Initial commit
This commit is contained in:
@@ -0,0 +1,104 @@
|
||||
From 4a0584f7c05641143151ebdc1be1163bebf9d35d Mon Sep 17 00:00:00 2001
|
||||
From: Las <las@protonmail.ch>
|
||||
Date: Sun, 3 Jan 2021 18:35:37 +0000
|
||||
Subject: [PATCH] Compile transupp.c as part of the library
|
||||
|
||||
The exported symbols are made weak to not conflict with users
|
||||
of the library that already vendor this functionality.
|
||||
---
|
||||
CMakeLists.txt | 4 ++--
|
||||
transupp.c | 14 +++++++-------
|
||||
2 files changed, 9 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 0ca6f98..a9a0fae 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -533,7 +533,7 @@ set(JPEG_SOURCES jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c
|
||||
jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdicc.c jdinput.c
|
||||
jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c
|
||||
jdtrans.c jerror.c jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c
|
||||
- jidctint.c jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c)
|
||||
+ jidctint.c jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c transupp.c)
|
||||
|
||||
if(WITH_ARITH_ENC OR WITH_ARITH_DEC)
|
||||
set(JPEG_SOURCES ${JPEG_SOURCES} jaricom.c)
|
||||
@@ -1489,7 +1489,7 @@ install(EXPORT ${CMAKE_PROJECT_NAME}Targets
|
||||
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/jconfig.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/jerror.h ${CMAKE_CURRENT_SOURCE_DIR}/jmorecfg.h
|
||||
- ${CMAKE_CURRENT_SOURCE_DIR}/jpeglib.h
|
||||
+ ${CMAKE_CURRENT_SOURCE_DIR}/jpeglib.h ${CMAKE_CURRENT_SOURCE_DIR}/transupp.h
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
include(cmakescripts/BuildPackages.cmake)
|
||||
diff --git a/transupp.c b/transupp.c
|
||||
index 6e86077..2da49a7 100644
|
||||
--- a/transupp.c
|
||||
+++ b/transupp.c
|
||||
@@ -1386,7 +1386,7 @@ jt_read_integer(const char **strptr, JDIMENSION *result)
|
||||
* This code is loosely based on XParseGeometry from the X11 distribution.
|
||||
*/
|
||||
|
||||
-GLOBAL(boolean)
|
||||
+GLOBAL(boolean) __attribute__((weak))
|
||||
jtransform_parse_crop_spec(jpeg_transform_info *info, const char *spec)
|
||||
{
|
||||
info->crop = FALSE;
|
||||
@@ -1486,7 +1486,7 @@ trim_bottom_edge(jpeg_transform_info *info, JDIMENSION full_height)
|
||||
* and transformation is not perfect. Otherwise returns TRUE.
|
||||
*/
|
||||
|
||||
-GLOBAL(boolean)
|
||||
+GLOBAL(boolean) __attribute__((weak))
|
||||
jtransform_request_workspace(j_decompress_ptr srcinfo,
|
||||
jpeg_transform_info *info)
|
||||
{
|
||||
@@ -2033,7 +2033,7 @@ adjust_exif_parameters(JOCTET *data, unsigned int length, JDIMENSION new_width,
|
||||
* to jpeg_write_coefficients().
|
||||
*/
|
||||
|
||||
-GLOBAL(jvirt_barray_ptr *)
|
||||
+GLOBAL(jvirt_barray_ptr *) __attribute__((weak))
|
||||
jtransform_adjust_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
|
||||
jvirt_barray_ptr *src_coef_arrays,
|
||||
jpeg_transform_info *info)
|
||||
@@ -2152,7 +2152,7 @@ jtransform_adjust_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
|
||||
* Note that some transformations will modify the source data arrays!
|
||||
*/
|
||||
|
||||
-GLOBAL(void)
|
||||
+GLOBAL(void) __attribute__((weak))
|
||||
jtransform_execute_transform(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
|
||||
jvirt_barray_ptr *src_coef_arrays,
|
||||
jpeg_transform_info *info)
|
||||
@@ -2264,7 +2264,7 @@ jtransform_execute_transform(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
|
||||
* (may use custom action then)
|
||||
*/
|
||||
|
||||
-GLOBAL(boolean)
|
||||
+GLOBAL(boolean) __attribute__((weak))
|
||||
jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
|
||||
int MCU_width, int MCU_height,
|
||||
JXFORM_CODE transform)
|
||||
@@ -2303,7 +2303,7 @@ jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
|
||||
* This must be called before jpeg_read_header() to have the desired effect.
|
||||
*/
|
||||
|
||||
-GLOBAL(void)
|
||||
+GLOBAL(void) __attribute__((weak))
|
||||
jcopy_markers_setup(j_decompress_ptr srcinfo, JCOPY_OPTION option)
|
||||
{
|
||||
#ifdef SAVE_MARKERS_SUPPORTED
|
||||
@@ -2331,7 +2331,7 @@ jcopy_markers_setup(j_decompress_ptr srcinfo, JCOPY_OPTION option)
|
||||
* JFIF APP0 or Adobe APP14 markers if selected.
|
||||
*/
|
||||
|
||||
-GLOBAL(void)
|
||||
+GLOBAL(void) __attribute__((weak))
|
||||
jcopy_markers_execute(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
|
||||
JCOPY_OPTION option)
|
||||
{
|
||||
--
|
||||
2.29.2
|
||||
|
||||
90
pkgs/rocm-modules/amdsmi/default.nix
Normal file
90
pkgs/rocm-modules/amdsmi/default.nix
Normal file
@@ -0,0 +1,90 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
fetchpatch,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
pkg-config,
|
||||
libdrm,
|
||||
wrapPython,
|
||||
autoPatchelfHook,
|
||||
}:
|
||||
|
||||
let
|
||||
esmi_ib_src = fetchFromGitHub {
|
||||
owner = "amd";
|
||||
repo = "esmi_ib_library";
|
||||
rev = "esmi_pkg_ver-4.1.2";
|
||||
hash = "sha256-wj3krY/6AdmnoNOSqN9EE/Yxbx++0AW2vu7dovQrQ9I=";
|
||||
};
|
||||
in
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "amdsmi";
|
||||
version = "6.4.1";
|
||||
src = fetchFromGitHub {
|
||||
owner = "rocm";
|
||||
repo = "amdsmi";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-K6FVvieJnOCfbyNNwOWs3t836ihtvELJvcCjXQoHuRY=";
|
||||
};
|
||||
|
||||
postPatch = ''
|
||||
substituteInPlace goamdsmi_shim/CMakeLists.txt \
|
||||
--replace-fail "amd_smi)" ${"'"}''${AMD_SMI_TARGET})' \
|
||||
--replace-fail 'target_link_libraries(''${GOAMDSMI_SHIM_TARGET} -L' '#'
|
||||
|
||||
# Manually unpack esmi_ib_src and add amd_hsmp.h so execute-process git clone doesn't run
|
||||
cp -rf --no-preserve=mode ${esmi_ib_src} ./esmi_ib_library
|
||||
mkdir -p ./esmi_ib_library/include/asm
|
||||
cp ./include/amd_smi/impl/amd_hsmp.h ./esmi_ib_library/include/asm/amd_hsmp.h
|
||||
'';
|
||||
|
||||
patches = [
|
||||
# Fix ld.lld undefined reference: drmGetVersion
|
||||
# (fetchpatch {
|
||||
# url = "https://github.com/ROCm/amdsmi/commit/c3864bf6171970d86dc50fd23f06377736823997.patch";
|
||||
# hash = "sha256-zRG1tBD8sIQCWdKfCbXC/Z/6d6NTrRYvRpddPWdM4j8=";
|
||||
# })
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
pkg-config
|
||||
wrapPython
|
||||
autoPatchelfHook
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
libdrm
|
||||
];
|
||||
|
||||
cmakeFlags = [
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
];
|
||||
|
||||
postInstall = ''
|
||||
wrapPythonProgramsIn $out
|
||||
rm $out/bin/amd-smi
|
||||
ln -sf $out/libexec/amdsmi_cli/amdsmi_cli.py $out/bin/amd-smi
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "System management interface for AMD GPUs supported by ROCm";
|
||||
homepage = "https://github.com/ROCm/rocm_smi_lib";
|
||||
license = with licenses; [ mit ];
|
||||
maintainers = with maintainers; [ lovesegfault ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = [ "x86_64-linux" ];
|
||||
};
|
||||
})
|
||||
230
pkgs/rocm-modules/aotriton/default.nix
Normal file
230
pkgs/rocm-modules/aotriton/default.nix
Normal file
@@ -0,0 +1,230 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
clr,
|
||||
rocblas,
|
||||
rocsolver,
|
||||
gtest,
|
||||
msgpack,
|
||||
libxml2,
|
||||
python3,
|
||||
python3Packages,
|
||||
openmp,
|
||||
hipblas-common,
|
||||
hipblas,
|
||||
nlohmann_json,
|
||||
triton-llvm,
|
||||
rocmlir,
|
||||
lapack-reference,
|
||||
ninja,
|
||||
ncurses,
|
||||
libffi,
|
||||
zlib,
|
||||
zstd,
|
||||
xz,
|
||||
pkg-config,
|
||||
buildTests ? false,
|
||||
buildBenchmarks ? false,
|
||||
buildSamples ? false,
|
||||
gpuTargets ? [
|
||||
# aotriton GPU support list:
|
||||
# https://github.com/ROCm/aotriton/blob/main/v2python/gpu_targets.py
|
||||
"gfx90a"
|
||||
"gfx942"
|
||||
"gfx1100"
|
||||
"gfx1101"
|
||||
],
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (
|
||||
finalAttrs:
|
||||
let
|
||||
py = python3.withPackages (ps: [
|
||||
ps.pyyaml
|
||||
ps.distutils
|
||||
ps.setuptools
|
||||
ps.packaging
|
||||
ps.numpy
|
||||
ps.wheel
|
||||
ps.filelock
|
||||
ps.iniconfig
|
||||
ps.pluggy
|
||||
ps.pybind11
|
||||
]);
|
||||
gpuTargets' = lib.concatStringsSep ";" gpuTargets;
|
||||
compiler = "amdclang++";
|
||||
cFlags = "-O3 -DNDEBUG";
|
||||
cxxFlags = "${cFlags} -Wno-c++11-narrowing";
|
||||
triton-llvm' = triton-llvm;
|
||||
in
|
||||
{
|
||||
pname = "aotriton";
|
||||
version = "0.9.2b";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "aotriton";
|
||||
rev = "${finalAttrs.version}";
|
||||
hash = "sha256-1Cf0olD3zRg9JESD6s/WaGifm3kfD12VUvjTZHpmGAE=";
|
||||
fetchSubmodules = true;
|
||||
};
|
||||
env.CXX = compiler;
|
||||
env.ROCM_PATH = "${clr}";
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"benchmark"
|
||||
]
|
||||
++ lib.optionals buildSamples [
|
||||
"sample"
|
||||
];
|
||||
|
||||
# Need an empty cuda.h for this to compile
|
||||
# Better than pulling in unfree cuda headers
|
||||
postPatch = ''
|
||||
touch third_party/triton/third_party/nvidia/include/cuda.h
|
||||
'';
|
||||
|
||||
doCheck = false;
|
||||
doInstallCheck = false;
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
pkg-config
|
||||
py
|
||||
clr
|
||||
ninja
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
rocblas
|
||||
rocsolver
|
||||
hipblas-common
|
||||
hipblas
|
||||
openmp
|
||||
libffi
|
||||
ncurses
|
||||
xz
|
||||
nlohmann_json
|
||||
rocmlir
|
||||
|
||||
msgpack
|
||||
libxml2
|
||||
python3Packages.msgpack
|
||||
zlib
|
||||
zstd
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
gtest
|
||||
]
|
||||
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||
lapack-reference
|
||||
];
|
||||
|
||||
env.TRITON_OFFLINE_BUILD = 1;
|
||||
env.LLVM_SYSPATH = "${triton-llvm'}";
|
||||
env.JSON_SYSPATH = nlohmann_json;
|
||||
env.MLIR_DIR = "${triton-llvm'}/lib/cmake/mlir";
|
||||
env.CXXFLAGS = "-I/build/source/third_party/triton/third_party/nvidia/backend/include";
|
||||
|
||||
# Fix up header issues in triton: https://github.com/triton-lang/triton/pull/3985/files
|
||||
preConfigure = ''
|
||||
mkdir third_party/triton/third_party/nvidia/backend/include/
|
||||
touch third_party/triton/third_party/nvidia/backend/include/cuda.h
|
||||
find third_party/triton -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} +
|
||||
find third_party/triton -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} +
|
||||
|
||||
sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' CMakeLists.txt
|
||||
sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' CMakeLists.txt
|
||||
sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt
|
||||
sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt
|
||||
substituteInPlace third_party/triton/python/setup.py \
|
||||
--replace-fail "from distutils.command.clean import clean" "import setuptools;from distutils.command.clean import clean" \
|
||||
--replace-fail 'system == "Linux"' 'False'
|
||||
# Fix 'ld: error: unable to insert .comment after .comment'
|
||||
substituteInPlace v2python/ld_script.py \
|
||||
--replace-fail 'INSERT AFTER .comment;' ""
|
||||
|
||||
cmakeFlagsArray+=(
|
||||
'-DCMAKE_C_FLAGS_RELEASE=${cFlags}'
|
||||
'-DCMAKE_CXX_FLAGS_RELEASE=${cxxFlags}'
|
||||
)
|
||||
prependToVar cmakeFlags "-GNinja"
|
||||
mkdir -p /build/tmp-home
|
||||
export HOME=/build/tmp-home
|
||||
'';
|
||||
|
||||
# Excerpt from README:
|
||||
# Note: do not run ninja separately, due to the limit of the current build system,
|
||||
# ninja install will run the whole build process unconditionally.
|
||||
dontBuild = true;
|
||||
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
ninja -v install
|
||||
runHook postInstall
|
||||
'';
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-Wno-dev"
|
||||
"-DAOTRITON_NOIMAGE_MODE=ON" # FIXME: Should be able to build with object code but generate_shim is failing
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
"-DCMAKE_VERBOSE_MAKEFILE=ON"
|
||||
"-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}"
|
||||
"-DCMAKE_CXX_COMPILER=${compiler}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
"-DAMDGPU_TARGETS=${gpuTargets'}"
|
||||
"-DGPU_TARGETS=${gpuTargets'}"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_CLIENTS_TESTS=ON"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
||||
]
|
||||
++ lib.optionals buildSamples [
|
||||
"-DBUILD_CLIENTS_SAMPLES=ON"
|
||||
];
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/hipblas-test $test/bin
|
||||
''
|
||||
+ lib.optionalString buildBenchmarks ''
|
||||
mkdir -p $benchmark/bin
|
||||
mv $out/bin/hipblas-bench $benchmark/bin
|
||||
''
|
||||
+ lib.optionalString buildSamples ''
|
||||
mkdir -p $sample/bin
|
||||
mv $out/bin/example-* $sample/bin
|
||||
''
|
||||
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
|
||||
rmdir $out/bin
|
||||
'';
|
||||
meta = with lib; {
|
||||
description = "ROCm Ahead of Time (AOT) Triton Math Library ";
|
||||
homepage = "https://github.com/ROCm/aotriton";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
}
|
||||
)
|
||||
40
pkgs/rocm-modules/clr/cmake-find-x11-libgl.patch
Normal file
40
pkgs/rocm-modules/clr/cmake-find-x11-libgl.patch
Normal file
@@ -0,0 +1,40 @@
|
||||
diff --git a/rocclr/cmake/ROCclr.cmake b/rocclr/cmake/ROCclr.cmake
|
||||
index 3f233b72f..67bdc62ee 100644
|
||||
--- a/rocclr/cmake/ROCclr.cmake
|
||||
+++ b/rocclr/cmake/ROCclr.cmake
|
||||
@@ -44,6 +44,19 @@ find_package(Threads REQUIRED)
|
||||
|
||||
find_package(AMD_OPENCL)
|
||||
|
||||
+# Find X11 package
|
||||
+find_package(X11 REQUIRED)
|
||||
+if(NOT X11_FOUND)
|
||||
+ message(FATAL_ERROR "X11 libraries not found")
|
||||
+endif()
|
||||
+
|
||||
+# Find OpenGL package
|
||||
+find_package(OpenGL REQUIRED)
|
||||
+if(NOT OpenGL_FOUND)
|
||||
+ message(FATAL_ERROR "OpenGL not found")
|
||||
+endif()
|
||||
+
|
||||
+
|
||||
add_library(rocclr STATIC)
|
||||
|
||||
include(ROCclrCompilerOptions)
|
||||
@@ -123,9 +136,14 @@ target_include_directories(rocclr PUBLIC
|
||||
${ROCCLR_SRC_DIR}/device
|
||||
${ROCCLR_SRC_DIR}/elf
|
||||
${ROCCLR_SRC_DIR}/include
|
||||
+ ${X11_INCLUDE_DIR}
|
||||
+ ${OPENGL_INCLUDE_DIR}
|
||||
${AMD_OPENCL_INCLUDE_DIRS})
|
||||
|
||||
-target_link_libraries(rocclr PUBLIC Threads::Threads)
|
||||
+target_link_libraries(rocclr PUBLIC
|
||||
+ Threads::Threads
|
||||
+ ${X11_LIBRARIES}
|
||||
+ ${OPENGL_LIBRARIES})
|
||||
# IPC on Windows is not supported
|
||||
if(UNIX)
|
||||
target_link_libraries(rocclr PUBLIC rt)
|
||||
287
pkgs/rocm-modules/clr/default.nix
Normal file
287
pkgs/rocm-modules/clr/default.nix
Normal file
@@ -0,0 +1,287 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
callPackage,
|
||||
fetchFromGitHub,
|
||||
fetchpatch,
|
||||
rocmUpdateScript,
|
||||
makeWrapper,
|
||||
cmake,
|
||||
perl,
|
||||
hip-common,
|
||||
hipcc,
|
||||
rocm-device-libs,
|
||||
rocm-comgr,
|
||||
rocm-runtime,
|
||||
rocm-core,
|
||||
roctracer,
|
||||
rocminfo,
|
||||
rocm-smi,
|
||||
numactl,
|
||||
libffi,
|
||||
zstd,
|
||||
zlib,
|
||||
libGL,
|
||||
libxml2,
|
||||
libX11,
|
||||
python3Packages,
|
||||
rocm-merged-llvm,
|
||||
khronos-ocl-icd-loader,
|
||||
gcc-unwrapped,
|
||||
writeShellScriptBin,
|
||||
localGpuTargets ? null,
|
||||
}:
|
||||
|
||||
let
|
||||
inherit (rocm-core) ROCM_LIBPATCH_VERSION;
|
||||
hipClang = rocm-merged-llvm;
|
||||
hipClangPath = "${hipClang}/bin";
|
||||
wrapperArgs = [
|
||||
"--prefix PATH : $out/bin"
|
||||
"--prefix LD_LIBRARY_PATH : ${rocm-runtime}"
|
||||
"--set HIP_PLATFORM amd"
|
||||
"--set HIP_PATH $out"
|
||||
"--set HIP_CLANG_PATH ${hipClangPath}"
|
||||
"--set DEVICE_LIB_PATH ${rocm-device-libs}/amdgcn/bitcode"
|
||||
"--set HSA_PATH ${rocm-runtime}"
|
||||
"--set ROCM_PATH $out"
|
||||
];
|
||||
amdclang = writeShellScriptBin "amdclang" ''
|
||||
exec clang "$@"
|
||||
'';
|
||||
amdclangxx = writeShellScriptBin "amdclang++" ''
|
||||
exec clang++ "$@"
|
||||
'';
|
||||
in
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "clr";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs = [
|
||||
"out"
|
||||
"icd"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "clr";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-MA9MS/HF3j5iqpFuatHQJZ+nNkcGgzCvpkiNO6CjoPg=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
makeWrapper
|
||||
cmake
|
||||
perl
|
||||
python3Packages.python
|
||||
python3Packages.cppheaderparser
|
||||
amdclang
|
||||
amdclangxx
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
numactl
|
||||
libGL
|
||||
libxml2
|
||||
libX11
|
||||
khronos-ocl-icd-loader
|
||||
hipClang
|
||||
libffi
|
||||
zstd
|
||||
zlib
|
||||
];
|
||||
|
||||
propagatedBuildInputs = [
|
||||
rocm-core
|
||||
rocm-device-libs
|
||||
rocm-comgr
|
||||
rocm-runtime
|
||||
rocminfo
|
||||
];
|
||||
|
||||
cmakeFlags = [
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
"-DCMAKE_POLICY_DEFAULT_CMP0072=NEW" # Prefer newer OpenGL libraries
|
||||
"-DCLR_BUILD_HIP=ON"
|
||||
"-DCLR_BUILD_OCL=ON"
|
||||
"-DHIP_COMMON_DIR=${hip-common}"
|
||||
"-DHIPCC_BIN_DIR=${hipcc}/bin"
|
||||
"-DHIP_PLATFORM=amd"
|
||||
"-DPROF_API_HEADER_PATH=${roctracer.src}/inc/ext"
|
||||
"-DROCM_PATH=${rocminfo}"
|
||||
"-DBUILD_ICD=ON"
|
||||
"-DHIP_ENABLE_ROCPROFILER_REGISTER=OFF" # circular dep - may need -minimal and -full builds?
|
||||
"-DAMD_ICD_LIBRARY_DIR=${khronos-ocl-icd-loader}"
|
||||
|
||||
# Temporarily set variables to work around upstream CMakeLists issue
|
||||
# Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
];
|
||||
|
||||
env.LLVM_DIR = "";
|
||||
|
||||
patches = [
|
||||
./cmake-find-x11-libgl.patch
|
||||
|
||||
# (fetchpatch {
|
||||
# # Fix handling of old fatbin version https://github.com/ROCm/clr/issues/99
|
||||
# sha256 = "sha256-CK/QwgWJQEruiG4DqetF9YM0VEWpSiUMxAf1gGdJkuA=";
|
||||
# url = "https://src.fedoraproject.org/rpms/rocclr/raw/rawhide/f/0001-handle-v1-of-compressed-fatbins.patch";
|
||||
# })
|
||||
# (fetchpatch {
|
||||
# # improve rocclr isa compatibility check
|
||||
# sha256 = "sha256-wUrhpYN68AbEXeFU5f366C6peqHyq25kujJXY/bBJMs=";
|
||||
# url = "https://github.com/GZGavinZhao/clr/commit/22c17a0ac09c6b77866febf366591f669a1ed133.patch";
|
||||
# })
|
||||
# (fetchpatch {
|
||||
# # [PATCH] Improve hipamd compat check
|
||||
# sha256 = "sha256-uZQ8rMrWH61CCbxwLqQGggDmXFmYTi6x8OcgYPrZRC8=";
|
||||
# url = "https://github.com/GZGavinZhao/clr/commit/63c6ee630966744d4199fdfb854e98d2da9e1122.patch";
|
||||
# })
|
||||
# (fetchpatch {
|
||||
# # [PATCH] SWDEV-504340 - Move cast of cl_mem inside the condition
|
||||
# # Fixes crash due to UB in KernelBlitManager::setArgument
|
||||
# sha256 = "sha256-nL4CZ7EOXqsTVUtYhuu9DLOMpnMeMRUhkhylEQLTg9I=";
|
||||
# url = "https://github.com/ROCm/clr/commit/fa63919a6339ea2a61111981ba2362c97fbdf743.patch";
|
||||
# })
|
||||
# (fetchpatch {
|
||||
# # [PATCH] SWDEV-507104 - Removes alignment requirement for Semaphore class to resolve runtime misaligned memory issues
|
||||
# sha256 = "sha256-nStJ22B/CM0fzQTvYjbHDbQt0GlE8DXxVK+UDU9BAx4=";
|
||||
# url = "https://github.com/ROCm/clr/commit/21d764518363d74187deaef2e66c1a127bc5aa64.patch";
|
||||
# })
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
patchShebangs hipamd/*.sh
|
||||
patchShebangs hipamd/src
|
||||
|
||||
# We're not on Windows so these are never installed to hipcc...
|
||||
substituteInPlace hipamd/CMakeLists.txt \
|
||||
--replace-fail "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipcc.bat DESTINATION bin)" "" \
|
||||
--replace-fail "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipconfig.bat DESTINATION bin)" ""
|
||||
|
||||
substituteInPlace hipamd/src/hip_embed_pch.sh \
|
||||
--replace-fail "\''$LLVM_DIR/bin/clang" "${hipClangPath}/clang"
|
||||
|
||||
substituteInPlace opencl/khronos/icd/loader/icd_platform.h \
|
||||
--replace-fail '#define ICD_VENDOR_PATH "/etc/OpenCL/vendors/";' \
|
||||
'#define ICD_VENDOR_PATH "/run/opengl-driver/etc/OpenCL/vendors/";'
|
||||
|
||||
# new unbundler has better error messages, defaulting it on
|
||||
substituteInPlace rocclr/utils/flags.hpp \
|
||||
--replace-fail "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false" "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, true"
|
||||
'';
|
||||
|
||||
postInstall = ''
|
||||
chmod +x $out/bin/*
|
||||
patchShebangs $out/bin
|
||||
|
||||
cp ${amdclang}/bin/* $out/bin/
|
||||
cp ${amdclangxx}/bin/* $out/bin/
|
||||
|
||||
for prog in hip{cc,config}{,.pl}; do
|
||||
wrapProgram $out/bin/$prog ${lib.concatStringsSep " " wrapperArgs}
|
||||
done
|
||||
|
||||
mkdir -p $out/nix-support/
|
||||
echo '
|
||||
export HIP_PATH="${placeholder "out"}"
|
||||
export HIP_PLATFORM=amd
|
||||
export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode"
|
||||
export NIX_CC_USE_RESPONSE_FILE=0
|
||||
export HIP_CLANG_PATH="${hipClangPath}"
|
||||
export ROCM_LIBPATCH_VERSION="${ROCM_LIBPATCH_VERSION}"
|
||||
export HSA_PATH="${rocm-runtime}"' > $out/nix-support/setup-hook
|
||||
|
||||
# Just link rocminfo, it's easier
|
||||
ln -s ${rocminfo}/bin/* $out/bin
|
||||
ln -s ${rocm-core}/include/* $out/include/
|
||||
|
||||
# Replace rocm-opencl-icd functionality
|
||||
mkdir -p $icd/etc/OpenCL/vendors
|
||||
echo "$out/lib/libamdocl64.so" > $icd/etc/OpenCL/vendors/amdocl64.icd
|
||||
|
||||
# add version info to output (downstream rocmPackages look for this)
|
||||
ln -s ${rocm-core}/.info/ $out/.info
|
||||
|
||||
ln -s ${hipClang} $out/llvm
|
||||
'';
|
||||
|
||||
disallowedRequisites = [
|
||||
gcc-unwrapped
|
||||
];
|
||||
|
||||
passthru =
|
||||
{
|
||||
# All known and valid general GPU targets
|
||||
# We cannot use this for each ROCm library, as each defines their own supported targets
|
||||
# See: https://github.com/ROCm/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix
|
||||
# Generic targets are not yet available in rocm-6.3.1 llvm
|
||||
gpuTargets = lib.forEach [
|
||||
# "9-generic"
|
||||
"900" # MI25, Vega 56/64
|
||||
"906" # MI50/60, Radeon VII
|
||||
"908" # MI100
|
||||
"90a" # MI210 / MI250
|
||||
# "9-4-generic"
|
||||
# 940/1 - never released publicly, maybe HPE cray specific MI3xx?
|
||||
"942" # MI300
|
||||
# "10-1-generic"
|
||||
"1010"
|
||||
"1012"
|
||||
# "10-3-generic"
|
||||
"1030" # W6800, various Radeon cards
|
||||
# "11-generic"
|
||||
"1100"
|
||||
"1101"
|
||||
"1102"
|
||||
"1201"
|
||||
] (target: "gfx${target}");
|
||||
|
||||
inherit hipClangPath;
|
||||
|
||||
updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
page = "tags?per_page=4";
|
||||
};
|
||||
|
||||
impureTests = {
|
||||
rocm-smi = callPackage ./test-rocm-smi.nix {
|
||||
inherit rocm-smi;
|
||||
clr = finalAttrs.finalPackage;
|
||||
};
|
||||
opencl-example = callPackage ./test-opencl-example.nix {
|
||||
clr = finalAttrs.finalPackage;
|
||||
};
|
||||
};
|
||||
|
||||
selectGpuTargets =
|
||||
{
|
||||
supported ? [ ],
|
||||
}:
|
||||
supported;
|
||||
gpuArchSuffix = "";
|
||||
}
|
||||
// lib.optionalAttrs (localGpuTargets != null) {
|
||||
inherit localGpuTargets;
|
||||
gpuArchSuffix = "-" + (builtins.concatStringsSep "-" localGpuTargets);
|
||||
selectGpuTargets =
|
||||
{
|
||||
supported ? [ ],
|
||||
}:
|
||||
if supported == [ ] then localGpuTargets else lib.lists.intersectLists localGpuTargets supported;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "AMD Common Language Runtime for hipamd, opencl, and rocclr";
|
||||
homepage = "https://github.com/ROCm/clr";
|
||||
license = with licenses; [ mit ];
|
||||
maintainers = with maintainers; [ lovesegfault ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
88
pkgs/rocm-modules/clr/test-opencl-example.nix
Normal file
88
pkgs/rocm-modules/clr/test-opencl-example.nix
Normal file
@@ -0,0 +1,88 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
makeImpureTest,
|
||||
fetchFromGitHub,
|
||||
clr,
|
||||
cmake,
|
||||
pkg-config,
|
||||
glew,
|
||||
libglut,
|
||||
opencl-headers,
|
||||
ocl-icd,
|
||||
}:
|
||||
|
||||
let
|
||||
|
||||
examples = stdenv.mkDerivation {
|
||||
pname = "amd-app-samples";
|
||||
version = "2018-06-10";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "OpenCL";
|
||||
repo = "AMD_APP_samples";
|
||||
rev = "54da6ca465634e78fc51fc25edf5840467ee2411";
|
||||
hash = "sha256-qARQpUiYsamHbko/I1gPZE9pUGJ+3396Vk2n7ERSftA=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
pkg-config
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
glew
|
||||
libglut
|
||||
opencl-headers
|
||||
ocl-icd
|
||||
];
|
||||
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
|
||||
mkdir -p $out/bin
|
||||
# Example path is bin/x86_64/Release/cl/Reduction/Reduction
|
||||
cp -r bin/*/*/*/*/* $out/bin/
|
||||
|
||||
runHook postInstall
|
||||
'';
|
||||
|
||||
cmakeFlags = [ "-DBUILD_CPP_CL=OFF" ];
|
||||
|
||||
meta = with lib; {
|
||||
description = "Samples from the AMD APP SDK (with OpenCRun support)";
|
||||
homepage = "https://github.com/OpenCL/AMD_APP_samples";
|
||||
license = licenses.bsd2;
|
||||
platforms = platforms.linux;
|
||||
teams = [ lib.teams.rocm ];
|
||||
};
|
||||
};
|
||||
|
||||
in
|
||||
makeImpureTest {
|
||||
name = "opencl-example";
|
||||
testedPackage = "rocmPackages_6.clr";
|
||||
|
||||
sandboxPaths = [
|
||||
"/sys"
|
||||
"/dev/dri"
|
||||
"/dev/kfd"
|
||||
];
|
||||
|
||||
nativeBuildInputs = [ examples ];
|
||||
|
||||
OCL_ICD_VENDORS = "${clr.icd}/etc/OpenCL/vendors";
|
||||
|
||||
testScript = ''
|
||||
# Examples load resources from current directory
|
||||
cd ${examples}/bin
|
||||
echo OCL_ICD_VENDORS=$OCL_ICD_VENDORS
|
||||
pwd
|
||||
|
||||
HelloWorld | grep HelloWorld
|
||||
'';
|
||||
|
||||
meta = with lib; {
|
||||
teams = [ teams.rocm ];
|
||||
};
|
||||
}
|
||||
27
pkgs/rocm-modules/clr/test-rocm-smi.nix
Normal file
27
pkgs/rocm-modules/clr/test-rocm-smi.nix
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
lib,
|
||||
makeImpureTest,
|
||||
clinfo,
|
||||
clr,
|
||||
rocm-smi,
|
||||
}:
|
||||
|
||||
makeImpureTest {
|
||||
name = "rocm-smi";
|
||||
testedPackage = "rocmPackages_6.clr";
|
||||
nativeBuildInputs = [
|
||||
clinfo
|
||||
rocm-smi
|
||||
];
|
||||
OCL_ICD_VENDORS = "${clr.icd}/etc/OpenCL/vendors";
|
||||
|
||||
testScript = ''
|
||||
# Test fails if the number of platforms is 0
|
||||
clinfo | grep -E 'Number of platforms * [1-9]'
|
||||
rocm-smi | grep -A1 GPU
|
||||
'';
|
||||
|
||||
meta = with lib; {
|
||||
teams = [ teams.rocm ];
|
||||
};
|
||||
}
|
||||
167
pkgs/rocm-modules/composable_kernel/base.nix
Normal file
167
pkgs/rocm-modules/composable_kernel/base.nix
Normal file
@@ -0,0 +1,167 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocm-merged-llvm,
|
||||
clr,
|
||||
rocminfo,
|
||||
hipify,
|
||||
gitMinimal,
|
||||
gtest,
|
||||
zstd,
|
||||
buildTests ? false,
|
||||
buildExamples ? false,
|
||||
gpuTargets ? (
|
||||
clr.localGpuTargets or [
|
||||
"gfx1010"
|
||||
"gfx1012"
|
||||
"gfx1030"
|
||||
"gfx1100"
|
||||
"gfx1101"
|
||||
"gfx1102"
|
||||
"gfx1201"
|
||||
]
|
||||
),
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
preBuild = ''
|
||||
echo "This derivation isn't intended to be built directly and only exists to be overridden and built in chunks";
|
||||
exit 1
|
||||
'';
|
||||
|
||||
pname = "composable_kernel_base";
|
||||
# Picked this version over 6.3 because much easier to get to build
|
||||
# and it matches the version torch 2.6 wants
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildExamples [
|
||||
"example"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "composable_kernel";
|
||||
rev = "07339c738396ebeae57374771ded4dcf11bddf1e";
|
||||
hash = "sha256-EvEBxlOpQ71BF57VW79WBo/cdxAwTKFXFMiYKyGyyEs=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
# Deliberately not using ninja
|
||||
# because we're jankily composing build outputs from multiple drvs
|
||||
# ninja won't believe they're up to date
|
||||
gitMinimal
|
||||
cmake
|
||||
rocminfo
|
||||
clr
|
||||
hipify
|
||||
zstd
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
rocm-cmake
|
||||
clr
|
||||
zstd
|
||||
];
|
||||
|
||||
strictDeps = true;
|
||||
enableParallelBuilding = true;
|
||||
env.ROCM_PATH = clr;
|
||||
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
"-DCMAKE_POLICY_DEFAULT_CMP0069=NEW"
|
||||
# "-DDL_KERNELS=ON" # Not needed, slow to build
|
||||
# CK_USE_CODEGEN Required for migraphx which uses device_gemm_multiple_d.hpp
|
||||
# but migraphx requires an incompatible fork of CK and fails anyway
|
||||
# "-DCK_USE_CODEGEN=ON"
|
||||
# It might be worth skipping fp64 in future with this:
|
||||
# "-DDTYPES=fp32;fp16;fp8;bf16;int8"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
"-DBUILD_DEV=OFF"
|
||||
"-DROCM_PATH=${clr}"
|
||||
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
|
||||
|
||||
# FP8 can build for 908/90a but very slow build
|
||||
# and produces unusably slow kernels that are huge
|
||||
"-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
# We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS
|
||||
# per readme this is required if archs are dissimilar
|
||||
# In rocm-6.3.x not setting any arch flag worked
|
||||
# but setting dissimilar arches always failed
|
||||
"-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names
|
||||
];
|
||||
|
||||
# No flags to build selectively it seems...
|
||||
postPatch =
|
||||
# Reduce configure time by preventing thousands of clang-tidy targets being added
|
||||
# We will never call them
|
||||
# Never build profiler
|
||||
''
|
||||
substituteInPlace library/src/utility/CMakeLists.txt library/src/tensor_operation_instance/gpu/CMakeLists.txt \
|
||||
--replace-fail clang_tidy_check '#clang_tidy_check'
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace-fail "add_subdirectory(profiler)" ""
|
||||
''
|
||||
# Optionally remove tests
|
||||
+ lib.optionalString (!buildTests) ''
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace-fail "add_subdirectory(test)" ""
|
||||
substituteInPlace codegen/CMakeLists.txt \
|
||||
--replace-fail "include(ROCMTest)" ""
|
||||
''
|
||||
# Optionally remove examples
|
||||
+ lib.optionalString (!buildExamples) ''
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace-fail "add_subdirectory(example)" ""
|
||||
'';
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/test_* $test/bin
|
||||
''
|
||||
+ lib.optionalString buildExamples ''
|
||||
mkdir -p $example/bin
|
||||
mv $out/bin/example_* $example/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
passthru.anyGfx9Target = lib.lists.any (lib.strings.hasPrefix "gfx9") gpuTargets;
|
||||
|
||||
meta = with lib; {
|
||||
description = "Performance portable programming model for machine learning tensor operators";
|
||||
homepage = "https://github.com/ROCm/composable_kernel";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
broken = true;
|
||||
};
|
||||
})
|
||||
161
pkgs/rocm-modules/composable_kernel/base.nix.cmp
Normal file
161
pkgs/rocm-modules/composable_kernel/base.nix.cmp
Normal file
@@ -0,0 +1,161 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocm-merged-llvm,
|
||||
clr,
|
||||
rocminfo,
|
||||
hipify,
|
||||
gitMinimal,
|
||||
gtest,
|
||||
zstd,
|
||||
buildTests ? false,
|
||||
buildExamples ? false,
|
||||
gpuTargets ? (
|
||||
clr.localGpuTargets or [
|
||||
"gfx1201"
|
||||
]
|
||||
),
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
preBuild = ''
|
||||
echo "This derivation isn't intended to be built directly and only exists to be overridden and built in chunks";
|
||||
exit 1
|
||||
'';
|
||||
|
||||
pname = "composable_kernel_base";
|
||||
# Picked this version over 6.3 because much easier to get to build
|
||||
# and it matches the version torch 2.6 wants
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildExamples [
|
||||
"example"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "composable_kernel";
|
||||
rev = "07339c738396ebeae57374771ded4dcf11bddf1e";
|
||||
hash = "sha256-EvEBxlOpQ71BF57VW79WBo/cdxAwTKFXFMiYKyGyyEs=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
# Deliberately not using ninja
|
||||
# because we're jankily composing build outputs from multiple drvs
|
||||
# ninja won't believe they're up to date
|
||||
gitMinimal
|
||||
cmake
|
||||
rocminfo
|
||||
clr
|
||||
hipify
|
||||
zstd
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
rocm-cmake
|
||||
clr
|
||||
zstd
|
||||
];
|
||||
|
||||
strictDeps = true;
|
||||
enableParallelBuilding = true;
|
||||
env.ROCM_PATH = clr;
|
||||
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
"-DCMAKE_POLICY_DEFAULT_CMP0069=NEW"
|
||||
# "-DDL_KERNELS=ON" # Not needed, slow to build
|
||||
# CK_USE_CODEGEN Required for migraphx which uses device_gemm_multiple_d.hpp
|
||||
# but migraphx requires an incompatible fork of CK and fails anyway
|
||||
# "-DCK_USE_CODEGEN=ON"
|
||||
# It might be worth skipping fp64 in future with this:
|
||||
# "-DDTYPES=fp32;fp16;fp8;bf16;int8"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
"-DBUILD_DEV=OFF"
|
||||
"-DROCM_PATH=${clr}"
|
||||
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
|
||||
|
||||
# FP8 can build for 908/90a but very slow build
|
||||
# and produces unusably slow kernels that are huge
|
||||
"-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
# We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS
|
||||
# per readme this is required if archs are dissimilar
|
||||
# In rocm-6.3.x not setting any arch flag worked
|
||||
# but setting dissimilar arches always failed
|
||||
"-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names
|
||||
];
|
||||
|
||||
# No flags to build selectively it seems...
|
||||
postPatch =
|
||||
# Reduce configure time by preventing thousands of clang-tidy targets being added
|
||||
# We will never call them
|
||||
# Never build profiler
|
||||
''
|
||||
substituteInPlace library/src/utility/CMakeLists.txt library/src/tensor_operation_instance/gpu/CMakeLists.txt \
|
||||
--replace-fail clang_tidy_check '#clang_tidy_check'
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace-fail "add_subdirectory(profiler)" ""
|
||||
''
|
||||
# Optionally remove tests
|
||||
+ lib.optionalString (!buildTests) ''
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace-fail "add_subdirectory(test)" ""
|
||||
substituteInPlace codegen/CMakeLists.txt \
|
||||
--replace-fail "include(ROCMTest)" ""
|
||||
''
|
||||
# Optionally remove examples
|
||||
+ lib.optionalString (!buildExamples) ''
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace-fail "add_subdirectory(example)" ""
|
||||
'';
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/test_* $test/bin
|
||||
''
|
||||
+ lib.optionalString buildExamples ''
|
||||
mkdir -p $example/bin
|
||||
mv $out/bin/example_* $example/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
passthru.anyGfx9Target = lib.lists.any (lib.strings.hasPrefix "gfx9") gpuTargets;
|
||||
|
||||
meta = with lib; {
|
||||
description = "Performance portable programming model for machine learning tensor operators";
|
||||
homepage = "https://github.com/ROCm/composable_kernel";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
broken = true;
|
||||
};
|
||||
})
|
||||
43
pkgs/rocm-modules/composable_kernel/ck4inductor.nix
Normal file
43
pkgs/rocm-modules/composable_kernel/ck4inductor.nix
Normal file
@@ -0,0 +1,43 @@
|
||||
{
|
||||
buildPythonPackage,
|
||||
python,
|
||||
composable_kernel,
|
||||
lib,
|
||||
setuptools,
|
||||
setuptools-scm,
|
||||
rocm-merged-llvm,
|
||||
}:
|
||||
buildPythonPackage {
|
||||
pyproject = true;
|
||||
pname = "ck4inductor";
|
||||
build-system = [
|
||||
setuptools
|
||||
setuptools-scm
|
||||
];
|
||||
version = "6.4.0";
|
||||
inherit (composable_kernel) src;
|
||||
pythonImportsCheck = [
|
||||
"ck4inductor"
|
||||
"ck4inductor.universal_gemm.gen_instances"
|
||||
"ck4inductor.universal_gemm.gen_instances"
|
||||
"ck4inductor.universal_gemm.op"
|
||||
];
|
||||
propagatedBuildInputs = [
|
||||
# At runtime will fail to compile anything with ck4inductor without this
|
||||
# can't easily use in checks phase because most of the compiler machinery is in torch
|
||||
rocm-merged-llvm
|
||||
];
|
||||
checkPhase = ''
|
||||
if [ ! -d "$out/${python.sitePackages}/ck4inductor" ]; then
|
||||
echo "ck4inductor isn't at the expected location in $out/${python.sitePackages}/ck4inductor"
|
||||
exit 1
|
||||
fi
|
||||
'';
|
||||
meta = with lib; {
|
||||
description = "pytorch inductor backend which uses composable_kernel universal GEMM implementations";
|
||||
homepage = "https://github.com/ROCm/composable_kernel";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
}
|
||||
249
pkgs/rocm-modules/composable_kernel/default.nix
Normal file
249
pkgs/rocm-modules/composable_kernel/default.nix
Normal file
@@ -0,0 +1,249 @@
|
||||
{
|
||||
lib,
|
||||
clr,
|
||||
composable_kernel_base,
|
||||
}:
|
||||
|
||||
let
|
||||
parts = {
|
||||
_mha = {
|
||||
# mha takes ~3hrs on 64 cores on an EPYC milan system at ~2.5GHz
|
||||
# big-parallel builders are one gen newer and clocked ~30% higher but only 24 cores
|
||||
# Should be <10h timeout but might be cutting it close
|
||||
# TODO: work out how to split this into smaller chunks instead of all 3k mha instances together
|
||||
# mha_0,1,2, search ninja target file for the individual instances, split by the index?
|
||||
# TODO: can we prune the generated instances down to only what in practice are used with popular models
|
||||
# when using flash-attention + MHA kernels?
|
||||
targets = [
|
||||
"device_mha_instance"
|
||||
];
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
|
||||
};
|
||||
gemm_multiply_multiply = {
|
||||
targets = [
|
||||
"device_gemm_multiply_multiply_instance"
|
||||
];
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
|
||||
};
|
||||
grouped_conv = {
|
||||
targets = [
|
||||
"device_grouped_conv1d_bwd_weight_instance"
|
||||
"device_grouped_conv2d_bwd_data_instance"
|
||||
"device_grouped_conv2d_bwd_weight_instance"
|
||||
"device_grouped_conv1d_fwd_instance"
|
||||
"device_grouped_conv2d_fwd_instance"
|
||||
"device_grouped_conv2d_fwd_dynamic_op_instance"
|
||||
];
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
};
|
||||
grouped_conv_bwd_3d = {
|
||||
targets = [
|
||||
"device_grouped_conv3d_bwd_data_instance"
|
||||
"device_grouped_conv3d_bwd_data_bilinear_instance"
|
||||
"device_grouped_conv3d_bwd_data_scale_instance"
|
||||
"device_grouped_conv3d_bwd_weight_instance"
|
||||
"device_grouped_conv3d_bwd_weight_bilinear_instance"
|
||||
"device_grouped_conv3d_bwd_weight_scale_instance"
|
||||
];
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
};
|
||||
grouped_conv_fwd_3d = {
|
||||
targets = [
|
||||
"device_grouped_conv3d_fwd_instance"
|
||||
"device_grouped_conv3d_fwd_bilinear_instance"
|
||||
"device_grouped_conv3d_fwd_convinvscale_instance"
|
||||
"device_grouped_conv3d_fwd_convscale_instance"
|
||||
"device_grouped_conv3d_fwd_convscale_add_instance"
|
||||
"device_grouped_conv3d_fwd_convscale_relu_instance"
|
||||
"device_grouped_conv3d_fwd_dynamic_op_instance"
|
||||
"device_grouped_conv3d_fwd_scale_instance"
|
||||
"device_grouped_conv3d_fwd_scaleadd_ab_instance"
|
||||
"device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance"
|
||||
];
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
};
|
||||
batched_gemm = {
|
||||
targets = [
|
||||
"device_batched_gemm_instance"
|
||||
"device_batched_gemm_add_relu_gemm_add_instance"
|
||||
"device_batched_gemm_bias_permute_instance"
|
||||
"device_batched_gemm_gemm_instance"
|
||||
"device_batched_gemm_reduce_instance"
|
||||
"device_batched_gemm_softmax_gemm_instance"
|
||||
"device_batched_gemm_softmax_gemm_permute_instance"
|
||||
"device_grouped_gemm_instance"
|
||||
"device_grouped_gemm_bias_instance"
|
||||
"device_grouped_gemm_fastgelu_instance"
|
||||
"device_grouped_gemm_fixed_nk_instance"
|
||||
"device_grouped_gemm_fixed_nk_multi_abd_instance"
|
||||
"device_grouped_gemm_tile_loop_instance"
|
||||
];
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
};
|
||||
gemm_universal = {
|
||||
targets = [
|
||||
"device_gemm_universal_instance"
|
||||
"device_gemm_universal_batched_instance"
|
||||
"device_gemm_universal_reduce_instance"
|
||||
"device_gemm_universal_streamk_instance"
|
||||
];
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
|
||||
};
|
||||
gemm_other = {
|
||||
targets = [
|
||||
"device_gemm_instance"
|
||||
"device_gemm_ab_scale_instance"
|
||||
"device_gemm_add_instance"
|
||||
"device_gemm_add_add_fastgelu_instance"
|
||||
"device_gemm_add_fastgelu_instance"
|
||||
"device_gemm_add_multiply_instance"
|
||||
"device_gemm_add_relu_instance"
|
||||
"device_gemm_add_relu_add_layernorm_instance"
|
||||
"device_gemm_add_silu_instance"
|
||||
"device_gemm_bias_add_reduce_instance"
|
||||
"device_gemm_bilinear_instance"
|
||||
"device_gemm_fastgelu_instance"
|
||||
"device_gemm_multi_abd_instance"
|
||||
"device_gemm_multiply_add_instance"
|
||||
"device_gemm_reduce_instance"
|
||||
"device_gemm_splitk_instance"
|
||||
"device_gemm_streamk_instance"
|
||||
];
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
};
|
||||
conv = {
|
||||
targets = [
|
||||
"device_conv1d_bwd_data_instance"
|
||||
"device_conv2d_bwd_data_instance"
|
||||
"device_conv2d_fwd_instance"
|
||||
"device_conv2d_fwd_bias_relu_instance"
|
||||
"device_conv2d_fwd_bias_relu_add_instance"
|
||||
"device_conv3d_bwd_data_instance"
|
||||
];
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
};
|
||||
pool = {
|
||||
targets = [
|
||||
"device_avg_pool2d_bwd_instance"
|
||||
"device_avg_pool3d_bwd_instance"
|
||||
"device_pool2d_fwd_instance"
|
||||
"device_pool3d_fwd_instance"
|
||||
"device_max_pool_bwd_instance"
|
||||
];
|
||||
};
|
||||
other1 = {
|
||||
targets = [
|
||||
"device_batchnorm_instance"
|
||||
"device_contraction_bilinear_instance"
|
||||
"device_contraction_scale_instance"
|
||||
"device_elementwise_instance"
|
||||
"device_elementwise_normalization_instance"
|
||||
"device_normalization_bwd_data_instance"
|
||||
"device_normalization_bwd_gamma_beta_instance"
|
||||
"device_normalization_fwd_instance"
|
||||
];
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
};
|
||||
other2 = {
|
||||
targets = [
|
||||
"device_column_to_image_instance"
|
||||
"device_image_to_column_instance"
|
||||
"device_permute_scale_instance"
|
||||
"device_quantization_instance"
|
||||
"device_reduce_instance"
|
||||
"device_softmax_instance"
|
||||
"device_transpose_instance"
|
||||
];
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
};
|
||||
};
|
||||
tensorOpBuilder =
|
||||
{
|
||||
part,
|
||||
targets,
|
||||
extraCmakeFlags ? [ ],
|
||||
requiredSystemFeatures ? [ ],
|
||||
}:
|
||||
composable_kernel_base.overrideAttrs (old: {
|
||||
inherit requiredSystemFeatures;
|
||||
pname = "composable_kernel${clr.gpuArchSuffix}-${part}";
|
||||
makeTargets = targets;
|
||||
preBuild = ''
|
||||
echo "Building ${part}"
|
||||
makeFlagsArray+=($makeTargets)
|
||||
substituteInPlace Makefile \
|
||||
--replace-fail '.NOTPARALLEL:' ""
|
||||
'';
|
||||
|
||||
# Compile parallelism adjusted based on available RAM
|
||||
# Never uses less than NIX_BUILD_CORES/4, never uses more than NIX_BUILD_CORES
|
||||
# CK uses an unusually high amount of memory per core in the build step
|
||||
# Nix/nixpkgs doesn't really have any infra to tell it that this build is unusually memory hungry
|
||||
# So, bodge. Otherwise you end up having to build all of ROCm with a low core limit when
|
||||
# it's only this package that has trouble.
|
||||
preConfigure =
|
||||
old.preConfigure or ""
|
||||
+ ''
|
||||
MEM_GB_TOTAL=$(awk '/MemTotal/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo)
|
||||
MEM_GB_AVAILABLE=$(awk '/MemAvailable/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo)
|
||||
APPX_GB=$((MEM_GB_AVAILABLE > MEM_GB_TOTAL ? MEM_GB_TOTAL : MEM_GB_AVAILABLE))
|
||||
MAX_CORES=$((1 + APPX_GB/3))
|
||||
MAX_CORES=$((MAX_CORES < NIX_BUILD_CORES/3 ? NIX_BUILD_CORES/3 : MAX_CORES))
|
||||
export NIX_BUILD_CORES="$((NIX_BUILD_CORES > MAX_CORES ? MAX_CORES : NIX_BUILD_CORES))"
|
||||
echo "Picked new core limit NIX_BUILD_CORES=$NIX_BUILD_CORES based on available mem: $APPX_GB GB"
|
||||
cmakeFlagsArray+=(
|
||||
"-DCK_PARALLEL_COMPILE_JOBS=$NIX_BUILD_CORES"
|
||||
)
|
||||
'';
|
||||
cmakeFlags = old.cmakeFlags ++ extraCmakeFlags;
|
||||
# Early exit after build phase with success, skips fixups etc
|
||||
# Will get copied back into /build of the final CK
|
||||
postBuild = ''
|
||||
find . -name "*.o" -type f | while read -r file; do
|
||||
mkdir -p "$out/$(dirname "$file")"
|
||||
cp --reflink=auto "$file" "$out/$file"
|
||||
done
|
||||
exit 0
|
||||
'';
|
||||
meta = old.meta // {
|
||||
broken = false;
|
||||
};
|
||||
});
|
||||
composable_kernel_parts = builtins.mapAttrs (
|
||||
part: targets: tensorOpBuilder (targets // { inherit part; })
|
||||
) parts;
|
||||
in
|
||||
|
||||
composable_kernel_base.overrideAttrs (
|
||||
finalAttrs: old: {
|
||||
pname = "composable_kernel${clr.gpuArchSuffix}";
|
||||
parts_dirs = builtins.attrValues composable_kernel_parts;
|
||||
disallowedReferences = builtins.attrValues composable_kernel_parts;
|
||||
preBuild = ''
|
||||
for dir in $parts_dirs; do
|
||||
find "$dir" -type f -name "*.o" | while read -r file; do
|
||||
# Extract the relative path by removing the output directory prefix
|
||||
rel_path="''${file#"$dir/"}"
|
||||
|
||||
# Create parent directory if it doesn't exist
|
||||
mkdir -p "$(dirname "$rel_path")"
|
||||
|
||||
# Copy the file back to its original location, give it a future timestamp
|
||||
# so make treats it as up to date
|
||||
cp --reflink=auto --no-preserve=all "$file" "$rel_path"
|
||||
touch -d "now +10 hours" "$rel_path"
|
||||
done
|
||||
done
|
||||
'';
|
||||
passthru = old.passthru // {
|
||||
parts = composable_kernel_parts;
|
||||
};
|
||||
meta = old.meta // {
|
||||
# Builds which don't don't target any gfx9 cause cmake errors in dependent projects
|
||||
broken = !finalAttrs.passthru.anyGfx9Target;
|
||||
};
|
||||
}
|
||||
)
|
||||
513
pkgs/rocm-modules/default.nix
Normal file
513
pkgs/rocm-modules/default.nix
Normal file
@@ -0,0 +1,513 @@
|
||||
{
|
||||
lib,
|
||||
config,
|
||||
callPackage,
|
||||
newScope,
|
||||
recurseIntoAttrs,
|
||||
symlinkJoin,
|
||||
fetchFromGitHub,
|
||||
ffmpeg_4,
|
||||
boost179,
|
||||
opencv,
|
||||
libjpeg_turbo,
|
||||
python3Packages,
|
||||
triton-llvm,
|
||||
openmpi,
|
||||
rocmGpuArches ? [ ],
|
||||
}:
|
||||
|
||||
let
|
||||
outer = lib.makeScope newScope (
|
||||
self:
|
||||
let
|
||||
inherit (self) llvm;
|
||||
pyPackages = python3Packages;
|
||||
openmpi-orig = openmpi;
|
||||
in
|
||||
{
|
||||
inherit rocmGpuArches;
|
||||
buildTests = false;
|
||||
buildBenchmarks = false;
|
||||
stdenv = llvm.rocmClangStdenv;
|
||||
|
||||
rocmPath = self.callPackage ./rocm-path { };
|
||||
rocmUpdateScript = self.callPackage ./update.nix { };
|
||||
|
||||
## ROCm ##
|
||||
llvm = recurseIntoAttrs (
|
||||
callPackage ./llvm/default.nix {
|
||||
inherit (self) rocm-device-libs rocm-runtime;
|
||||
}
|
||||
);
|
||||
inherit (self.llvm) rocm-merged-llvm clang openmp;
|
||||
|
||||
rocm-core = self.callPackage ./rocm-core { };
|
||||
amdsmi = pyPackages.callPackage ./amdsmi {
|
||||
inherit (self) rocmUpdateScript;
|
||||
};
|
||||
|
||||
rocm-cmake = self.callPackage ./rocm-cmake { };
|
||||
|
||||
rocm-smi = pyPackages.callPackage ./rocm-smi {
|
||||
inherit (self) rocmUpdateScript;
|
||||
};
|
||||
|
||||
rocm-device-libs = self.callPackage ./rocm-device-libs {
|
||||
inherit (llvm) rocm-merged-llvm;
|
||||
};
|
||||
|
||||
rocm-runtime = self.callPackage ./rocm-runtime {
|
||||
inherit (llvm) rocm-merged-llvm;
|
||||
};
|
||||
|
||||
rocm-comgr = self.callPackage ./rocm-comgr {
|
||||
inherit (llvm) rocm-merged-llvm;
|
||||
};
|
||||
|
||||
rocminfo = self.callPackage ./rocminfo { };
|
||||
|
||||
# Unfree
|
||||
hsa-amd-aqlprofile-bin = self.callPackage ./hsa-amd-aqlprofile-bin { };
|
||||
|
||||
rdc = self.callPackage ./rdc { };
|
||||
|
||||
rocm-docs-core = python3Packages.callPackage ./rocm-docs-core { };
|
||||
|
||||
hip-common = self.callPackage ./hip-common { };
|
||||
|
||||
# Eventually will be in the LLVM repo
|
||||
hipcc = self.callPackage ./hipcc {
|
||||
inherit (llvm) rocm-merged-llvm;
|
||||
};
|
||||
|
||||
# Replaces hip, opencl-runtime, and rocclr
|
||||
clr = self.callPackage ./clr { };
|
||||
|
||||
aotriton = self.callPackage ./aotriton { };
|
||||
|
||||
hipify = self.callPackage ./hipify {
|
||||
inherit (llvm)
|
||||
clang
|
||||
rocm-merged-llvm
|
||||
;
|
||||
};
|
||||
|
||||
# hsakmt was merged into rocm-runtime
|
||||
hsakmt = self.rocm-runtime;
|
||||
|
||||
rocprofiler = self.callPackage ./rocprofiler {
|
||||
inherit (llvm) clang;
|
||||
};
|
||||
rocprofiler-register = self.callPackage ./rocprofiler-register {
|
||||
inherit (llvm) clang;
|
||||
};
|
||||
|
||||
# Needs GCC
|
||||
roctracer = self.callPackage ./roctracer { };
|
||||
|
||||
rocgdb = self.callPackage ./rocgdb { };
|
||||
|
||||
rocdbgapi = self.callPackage ./rocdbgapi { };
|
||||
|
||||
rocr-debug-agent = self.callPackage ./rocr-debug-agent { };
|
||||
|
||||
rocprim = self.callPackage ./rocprim { };
|
||||
|
||||
rocsparse = self.callPackage ./rocsparse { };
|
||||
|
||||
rocthrust = self.callPackage ./rocthrust { };
|
||||
|
||||
rocrand = self.callPackage ./rocrand { };
|
||||
|
||||
hiprand = self.callPackage ./hiprand { };
|
||||
|
||||
rocfft = self.callPackage ./rocfft { };
|
||||
|
||||
mscclpp = self.callPackage ./mscclpp { };
|
||||
|
||||
rccl = self.callPackage ./rccl { };
|
||||
|
||||
# RCCL with sanitizers and tests
|
||||
# Can't have with sanitizer build as dep of other packages without
|
||||
# runtime crashes due to ASAN not loading first
|
||||
rccl-tests = self.callPackage ./rccl {
|
||||
buildTests = true;
|
||||
};
|
||||
|
||||
hipcub = self.callPackage ./hipcub { };
|
||||
|
||||
hipsparse = self.callPackage ./hipsparse { };
|
||||
|
||||
hipfort = self.callPackage ./hipfort { };
|
||||
|
||||
hipfft = self.callPackage ./hipfft { };
|
||||
|
||||
hiprt = self.callPackage ./hiprt { };
|
||||
|
||||
tensile = pyPackages.callPackage ./tensile {
|
||||
inherit (self)
|
||||
rocmUpdateScript
|
||||
clr
|
||||
;
|
||||
};
|
||||
|
||||
rocblas = self.callPackage ./rocblas {
|
||||
buildTests = true;
|
||||
buildBenchmarks = true;
|
||||
inherit (self) roctracer;
|
||||
};
|
||||
|
||||
rocsolver = self.callPackage ./rocsolver { };
|
||||
|
||||
rocwmma = self.callPackage ./rocwmma { };
|
||||
|
||||
rocalution = self.callPackage ./rocalution { };
|
||||
|
||||
rocmlir-rock = self.callPackage ./rocmlir {
|
||||
buildRockCompiler = true;
|
||||
};
|
||||
rocmlir = self.rocmlir-rock;
|
||||
|
||||
hipsolver = self.callPackage ./hipsolver { };
|
||||
|
||||
hipblas-common = self.callPackage ./hipblas-common { };
|
||||
|
||||
hipblas = self.callPackage ./hipblas { };
|
||||
|
||||
hipblaslt = self.callPackage ./hipblaslt { };
|
||||
|
||||
# hipTensor - Only supports GFX9
|
||||
|
||||
composable_kernel_base = self.callPackage ./composable_kernel/base.nix { };
|
||||
composable_kernel = self.callPackage ./composable_kernel { };
|
||||
|
||||
ck4inductor = pyPackages.callPackage ./composable_kernel/ck4inductor.nix {
|
||||
inherit (self) composable_kernel;
|
||||
inherit (llvm) rocm-merged-llvm;
|
||||
};
|
||||
|
||||
half = self.callPackage ./half { };
|
||||
|
||||
miopen = self.callPackage ./miopen {
|
||||
boost = boost179.override { enableStatic = true; };
|
||||
};
|
||||
|
||||
miopen-hip = self.miopen;
|
||||
|
||||
migraphx = self.callPackage ./migraphx { };
|
||||
|
||||
rpp = self.callPackage ./rpp { };
|
||||
|
||||
rpp-hip = self.rpp.override {
|
||||
useOpenCL = false;
|
||||
useCPU = false;
|
||||
};
|
||||
|
||||
rpp-opencl = self.rpp.override {
|
||||
useOpenCL = true;
|
||||
useCPU = false;
|
||||
};
|
||||
|
||||
rpp-cpu = self.rpp.override {
|
||||
useOpenCL = false;
|
||||
useCPU = true;
|
||||
};
|
||||
|
||||
mivisionx = self.callPackage ./mivisionx {
|
||||
opencv = opencv.override { enablePython = true; };
|
||||
# TODO: Remove this pin in ROCm 6.4+
|
||||
# FFMPEG support was improved in https://github.com/ROCm/MIVisionX/pull/1460
|
||||
ffmpeg = ffmpeg_4;
|
||||
# Unfortunately, rocAL needs a custom libjpeg-turbo until further notice
|
||||
# See: https://github.com/ROCm/MIVisionX/issues/1051
|
||||
libjpeg_turbo = libjpeg_turbo.overrideAttrs {
|
||||
version = "2.0.6.1";
|
||||
src = fetchFromGitHub {
|
||||
owner = "rrawther";
|
||||
repo = "libjpeg-turbo";
|
||||
rev = "640d7ee1917fcd3b6a5271aa6cf4576bccc7c5fb";
|
||||
sha256 = "sha256-T52whJ7nZi8jerJaZtYInC2YDN0QM+9tUDqiNr6IsNY=";
|
||||
};
|
||||
# overwrite all patches, since patches for newer version do not apply
|
||||
patches = [ ./0001-Compile-transupp.c-as-part-of-the-library.patch ];
|
||||
};
|
||||
};
|
||||
|
||||
mivisionx-hip = self.mivisionx.override {
|
||||
rpp = self.rpp-hip;
|
||||
useOpenCL = false;
|
||||
useCPU = false;
|
||||
};
|
||||
|
||||
mivisionx-cpu = self.mivisionx.override {
|
||||
rpp = self.rpp-cpu;
|
||||
useOpenCL = false;
|
||||
useCPU = true;
|
||||
};
|
||||
|
||||
# Even if config.rocmSupport is false we need rocmSupport true
|
||||
# version of ucc/ucx in openmpi in this package set
|
||||
openmpi = openmpi-orig.override (
|
||||
prev:
|
||||
let
|
||||
ucx = prev.ucx.override {
|
||||
enableCuda = false;
|
||||
enableRocm = true;
|
||||
};
|
||||
in
|
||||
{
|
||||
inherit ucx;
|
||||
ucc = prev.ucc.override {
|
||||
enableCuda = false;
|
||||
inherit ucx;
|
||||
};
|
||||
}
|
||||
);
|
||||
mpi = self.openmpi;
|
||||
|
||||
triton-llvm = triton-llvm.overrideAttrs {
|
||||
src = fetchFromGitHub {
|
||||
owner = "llvm";
|
||||
repo = "llvm-project";
|
||||
# make sure this matches triton llvm rel branch hash for now
|
||||
# https://github.com/triton-lang/triton/blob/release/3.2.x/cmake/llvm-hash.txt
|
||||
rev = "86b69c31642e98f8357df62c09d118ad1da4e16a";
|
||||
hash = "sha256-W/mQwaLGx6/rIBjdzUTIbWrvGjdh7m4s15f70fQ1/hE=";
|
||||
};
|
||||
pname = "triton-llvm-rocm";
|
||||
patches = [ ]; # FIXME: https://github.com/llvm/llvm-project//commit/84837e3cc1cf17ed71580e3ea38299ed2bfaa5f6.patch doesn't apply, may need to rebase
|
||||
};
|
||||
|
||||
triton = pyPackages.callPackage ./triton { rocmPackages = self; };
|
||||
|
||||
## Meta ##
|
||||
# Emulate common ROCm meta layout
|
||||
# These are mainly for users. I strongly suggest NOT using these in nixpkgs derivations
|
||||
# Don't put these into `propagatedBuildInputs` unless you want PATH/PYTHONPATH issues!
|
||||
# See: https://rocm.docs.amd.com/en/docs-5.7.1/_images/image.004.png
|
||||
# See: https://rocm.docs.amd.com/en/docs-5.7.1/deploy/linux/os-native/package_manager_integration.html
|
||||
meta = with self; rec {
|
||||
rocm-developer-tools = symlinkJoin {
|
||||
name = "rocm-developer-tools-meta";
|
||||
paths = [
|
||||
hsa-amd-aqlprofile-bin
|
||||
rocm-core
|
||||
rocr-debug-agent
|
||||
roctracer
|
||||
rocdbgapi
|
||||
rocprofiler
|
||||
rocgdb
|
||||
rocm-language-runtime
|
||||
];
|
||||
};
|
||||
rocm-ml-sdk = symlinkJoin {
|
||||
name = "rocm-ml-sdk-meta";
|
||||
paths = [
|
||||
rocm-core
|
||||
miopen-hip
|
||||
rocm-hip-sdk
|
||||
rocm-ml-libraries
|
||||
];
|
||||
};
|
||||
rocm-ml-libraries = symlinkJoin {
|
||||
name = "rocm-ml-libraries-meta";
|
||||
paths = [
|
||||
llvm.clang
|
||||
llvm.mlir
|
||||
llvm.openmp
|
||||
rocm-core
|
||||
miopen-hip
|
||||
rocm-hip-libraries
|
||||
];
|
||||
};
|
||||
rocm-hip-sdk = symlinkJoin {
|
||||
name = "rocm-hip-sdk-meta";
|
||||
paths = [
|
||||
rocprim
|
||||
rocalution
|
||||
hipfft
|
||||
rocm-core
|
||||
hipcub
|
||||
hipblas
|
||||
hipblaslt
|
||||
rocrand
|
||||
rocfft
|
||||
hiprt
|
||||
rocsparse
|
||||
rccl
|
||||
rocthrust
|
||||
rocblas
|
||||
hipsparse
|
||||
hipfort
|
||||
rocwmma
|
||||
hipsolver
|
||||
rocsolver
|
||||
rocm-hip-libraries
|
||||
rocm-hip-runtime-devel
|
||||
];
|
||||
};
|
||||
rocm-hip-libraries = symlinkJoin {
|
||||
name = "rocm-hip-libraries-meta";
|
||||
paths = [
|
||||
rocblas
|
||||
hipfort
|
||||
rocm-core
|
||||
rocsolver
|
||||
rocalution
|
||||
rocrand
|
||||
hipblas
|
||||
hipblaslt
|
||||
rocfft
|
||||
hipfft
|
||||
hiprt
|
||||
rccl
|
||||
rocsparse
|
||||
hipsparse
|
||||
hipsolver
|
||||
rocm-hip-runtime
|
||||
];
|
||||
};
|
||||
rocm-openmp-sdk = symlinkJoin {
|
||||
name = "rocm-openmp-sdk-meta";
|
||||
paths = [
|
||||
rocm-core
|
||||
llvm.clang
|
||||
llvm.mlir
|
||||
llvm.openmp # openmp-extras-devel (https://github.com/ROCm/aomp)
|
||||
rocm-language-runtime
|
||||
];
|
||||
};
|
||||
rocm-opencl-sdk = symlinkJoin {
|
||||
name = "rocm-opencl-sdk-meta";
|
||||
paths = [
|
||||
rocm-core
|
||||
rocm-runtime
|
||||
clr
|
||||
clr.icd
|
||||
rocm-opencl-runtime
|
||||
];
|
||||
};
|
||||
rocm-opencl-runtime = symlinkJoin {
|
||||
name = "rocm-opencl-runtime-meta";
|
||||
paths = [
|
||||
rocm-core
|
||||
clr
|
||||
clr.icd
|
||||
rocm-language-runtime
|
||||
];
|
||||
};
|
||||
rocm-hip-runtime-devel = symlinkJoin {
|
||||
name = "rocm-hip-runtime-devel-meta";
|
||||
paths = [
|
||||
clr
|
||||
rocm-core
|
||||
hipify
|
||||
rocm-cmake
|
||||
llvm.clang
|
||||
llvm.mlir
|
||||
llvm.openmp
|
||||
rocm-runtime
|
||||
rocm-hip-runtime
|
||||
];
|
||||
};
|
||||
rocm-hip-runtime = symlinkJoin {
|
||||
name = "rocm-hip-runtime-meta";
|
||||
paths = [
|
||||
rocm-core
|
||||
rocminfo
|
||||
clr
|
||||
rocm-language-runtime
|
||||
];
|
||||
};
|
||||
rocm-language-runtime = symlinkJoin {
|
||||
name = "rocm-language-runtime-meta";
|
||||
paths = [
|
||||
rocm-runtime
|
||||
rocm-core
|
||||
rocm-comgr
|
||||
llvm.openmp # openmp-extras-runtime (https://github.com/ROCm/aomp)
|
||||
];
|
||||
};
|
||||
rocm-all = symlinkJoin {
|
||||
name = "rocm-all-meta";
|
||||
paths = [
|
||||
rocm-developer-tools
|
||||
rocm-ml-sdk
|
||||
rocm-ml-libraries
|
||||
rocm-hip-sdk
|
||||
rocm-hip-libraries
|
||||
rocm-openmp-sdk
|
||||
rocm-opencl-sdk
|
||||
rocm-opencl-runtime
|
||||
rocm-hip-runtime-devel
|
||||
rocm-hip-runtime
|
||||
rocm-language-runtime
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
rocm-tests = self.callPackage ./rocm-tests {
|
||||
rocmPackages = self;
|
||||
};
|
||||
}
|
||||
// lib.optionalAttrs config.allowAliases {
|
||||
rocm-thunk = throw ''
|
||||
'rocm-thunk' has been removed. It's now part of the ROCm runtime.
|
||||
''; # Added 2025-3-16
|
||||
|
||||
clang-ocl = throw ''
|
||||
'clang-ocl' has been deprecated upstream. Use ROCm's clang directly.
|
||||
''; # Added 2025-3-16
|
||||
|
||||
miopengemm = throw ''
|
||||
'miopengemm' has been deprecated.
|
||||
''; # Added 2024-3-3
|
||||
|
||||
miopen-opencl = throw ''
|
||||
'miopen-opencl' has been deprecated.
|
||||
''; # Added 2024-3-3
|
||||
|
||||
mivisionx-opencl = throw ''
|
||||
'mivisionx-opencl' has been deprecated.
|
||||
Other versions of mivisionx are still available.
|
||||
''; # Added 2024-3-24
|
||||
}
|
||||
);
|
||||
scopeForArches =
|
||||
arches:
|
||||
outer.overrideScope (
|
||||
_final: prev: {
|
||||
clr = prev.clr.override {
|
||||
localGpuTargets = arches;
|
||||
};
|
||||
}
|
||||
);
|
||||
in
|
||||
outer
|
||||
// builtins.listToAttrs (
|
||||
builtins.map (arch: {
|
||||
name = arch;
|
||||
value = scopeForArches [ arch ];
|
||||
}) outer.clr.gpuTargets
|
||||
)
|
||||
// {
|
||||
gfx9 = scopeForArches [
|
||||
"gfx906"
|
||||
"gfx908"
|
||||
"gfx90a"
|
||||
"gfx942"
|
||||
];
|
||||
gfx10 = scopeForArches [
|
||||
"gfx1010"
|
||||
"gfx1030"
|
||||
];
|
||||
gfx11 = scopeForArches [
|
||||
"gfx1100"
|
||||
"gfx1101"
|
||||
"gfx1102"
|
||||
];
|
||||
|
||||
gfx12 = scopeForArches [
|
||||
"gfx1201"
|
||||
];
|
||||
}
|
||||
39
pkgs/rocm-modules/half/default.nix
Normal file
39
pkgs/rocm-modules/half/default.nix
Normal file
@@ -0,0 +1,39 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "half";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "half";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-H8Ogm4nxaxDB0WHx+KhRjUO3vzp3AwCqrIQ6k8R+xkc=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
];
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "C++ library for half precision floating point arithmetics";
|
||||
homepage = "https://github.com/ROCm/half";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.unix;
|
||||
};
|
||||
})
|
||||
45
pkgs/rocm-modules/hip-common/default.nix
Normal file
45
pkgs/rocm-modules/hip-common/default.nix
Normal file
@@ -0,0 +1,45 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "hip-common";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "HIP";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-2Iekju0t12c6iiqb104j+Lh53FvZwyqYtST12RkkuKc=";
|
||||
};
|
||||
|
||||
dontConfigure = true;
|
||||
dontBuild = true;
|
||||
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
|
||||
mkdir -p $out
|
||||
mv * $out
|
||||
|
||||
runHook postInstall
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "C++ Heterogeneous-Compute Interface for Portability";
|
||||
homepage = "https://github.com/ROCm/HIP";
|
||||
license = with licenses; [ mit ];
|
||||
maintainers = with maintainers; [ lovesegfault ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
42
pkgs/rocm-modules/hipblas-common/default.nix
Normal file
42
pkgs/rocm-modules/hipblas-common/default.nix
Normal file
@@ -0,0 +1,42 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
cmake,
|
||||
fetchFromGitHub,
|
||||
rocm-cmake,
|
||||
rocmUpdateScript,
|
||||
}:
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "hipblas-common";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "hipBLAS-common";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-eTwoAXH2HGdSAOLTZHJUFHF+c2wWHixqeMqr60KxJrc=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
rocm-cmake
|
||||
];
|
||||
|
||||
strictDeps = true;
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
meta = with lib; {
|
||||
description = "Common files shared by hipBLAS and hipBLASLt";
|
||||
homepage = "https://github.com/ROCm/hipBLASlt";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
140
pkgs/rocm-modules/hipblas/default.nix
Normal file
140
pkgs/rocm-modules/hipblas/default.nix
Normal file
@@ -0,0 +1,140 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
fetchpatch,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
clr,
|
||||
gfortran,
|
||||
hipblas-common,
|
||||
rocblas,
|
||||
rocsolver,
|
||||
rocsparse,
|
||||
rocprim,
|
||||
gtest,
|
||||
lapack-reference,
|
||||
buildTests ? false,
|
||||
buildBenchmarks ? false,
|
||||
buildSamples ? false,
|
||||
}:
|
||||
|
||||
# Can also use cuBLAS
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "hipblas";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"benchmark"
|
||||
]
|
||||
++ lib.optionals buildSamples [
|
||||
"sample"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "hipBLAS";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-lQv8Ik6+0ldqyeJ05CSGB0309nIpzlRL3CRYeQxVfd0=";
|
||||
};
|
||||
|
||||
patches = [
|
||||
# https://github.com/ROCm/hipBLAS/pull/952
|
||||
# (fetchpatch {
|
||||
# name = "transitively-depend-hipblas-common.patch";
|
||||
# url = "https://github.com/ROCm/hipBLAS/commit/54220fdaebf0fb4fd0921ee9e418ace5b143ec8f.patch";
|
||||
# hash = "sha256-MFEhv8Bkrd2zD0FFIDg9oJzO7ztdyMAF+R9oYA0rmwQ=";
|
||||
# })
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
substituteInPlace library/CMakeLists.txt \
|
||||
--replace-fail "find_package(Git REQUIRED)" ""
|
||||
'';
|
||||
|
||||
nativeBuildInputs = [
|
||||
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
gfortran
|
||||
];
|
||||
|
||||
propagatedBuildInputs = [ hipblas-common ];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
rocblas
|
||||
rocprim
|
||||
rocsparse
|
||||
rocsolver
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
gtest
|
||||
]
|
||||
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||
lapack-reference
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
"-DCMAKE_CXX_COMPILER=${lib.getExe' clr "hipcc"}"
|
||||
# Upstream is migrating to amdclang++, it is likely this will be correct in next version bump
|
||||
#"-DCMAKE_CXX_COMPILER=${lib.getBin clr}/bin/amdclang++"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
"-DAMDGPU_TARGETS=${rocblas.amdgpu_targets}"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_CLIENTS_TESTS=ON"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
||||
]
|
||||
++ lib.optionals buildSamples [
|
||||
"-DBUILD_CLIENTS_SAMPLES=ON"
|
||||
];
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/hipblas-test $test/bin
|
||||
''
|
||||
+ lib.optionalString buildBenchmarks ''
|
||||
mkdir -p $benchmark/bin
|
||||
mv $out/bin/hipblas-bench $benchmark/bin
|
||||
''
|
||||
+ lib.optionalString buildSamples ''
|
||||
mkdir -p $sample/bin
|
||||
mv $out/bin/example-* $sample/bin
|
||||
''
|
||||
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
|
||||
rmdir $out/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "ROCm BLAS marshalling library";
|
||||
homepage = "https://github.com/ROCm/hipBLAS";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
235
pkgs/rocm-modules/hipblaslt/default.nix
Normal file
235
pkgs/rocm-modules/hipblaslt/default.nix
Normal file
@@ -0,0 +1,235 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchpatch,
|
||||
fetchFromGitHub,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
clr,
|
||||
gfortran,
|
||||
gtest,
|
||||
msgpack,
|
||||
libxml2,
|
||||
python3,
|
||||
python3Packages,
|
||||
openmp,
|
||||
hipblas-common,
|
||||
tensile,
|
||||
lapack-reference,
|
||||
ncurses,
|
||||
libffi,
|
||||
zlib,
|
||||
zstd,
|
||||
rocmUpdateScript,
|
||||
buildTests ? false,
|
||||
buildBenchmarks ? false,
|
||||
buildSamples ? false,
|
||||
# hipblaslt supports only devices with MFMA or WMMA
|
||||
# WMMA on gfx1100 may be broken
|
||||
# MFMA on MI100 may be broken
|
||||
# MI200/MI300 known to work
|
||||
gpuTargets ? (
|
||||
clr.localGpuTargets or [
|
||||
# "gfx908" FIXME: confirm MFMA on MI100 works
|
||||
"gfx90a"
|
||||
"gfx942"
|
||||
# "gfx1100" FIXME: confirm WMMA targets work
|
||||
]
|
||||
),
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (
|
||||
finalAttrs:
|
||||
let
|
||||
supportsTargetArches =
|
||||
(builtins.any (lib.strings.hasPrefix "gfx9") gpuTargets)
|
||||
|| (builtins.any (lib.strings.hasPrefix "gfx11") gpuTargets);
|
||||
tensile' = (tensile.override { isTensileLite = true; }).overrideAttrs {
|
||||
inherit (finalAttrs) src;
|
||||
sourceRoot = "${finalAttrs.src.name}/tensilelite";
|
||||
};
|
||||
py = python3.withPackages (ps: [
|
||||
ps.pyyaml
|
||||
ps.setuptools
|
||||
ps.packaging
|
||||
]);
|
||||
gpuTargets' = lib.optionalString supportsTargetArches (lib.concatStringsSep ";" gpuTargets);
|
||||
compiler = "amdclang++";
|
||||
cFlags = "-O3 -I${msgpack}/include"; # FIXME: cmake files need patched to include this properly
|
||||
in
|
||||
{
|
||||
pname = "hipblaslt${clr.gpuArchSuffix}";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "hipBLASLt";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-ojNa3jt5285gsPwo4icATJD9JdxmbJBjfCF4A1ttCQ4=";
|
||||
};
|
||||
env.CXX = compiler;
|
||||
env.CFLAGS = cFlags;
|
||||
env.CXXFLAGS = cFlags;
|
||||
env.ROCM_PATH = "${clr}";
|
||||
env.TENSILE_ROCM_ASSEMBLER_PATH = lib.getExe' clr "amdclang++";
|
||||
env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = lib.getExe' clr "amdclang++";
|
||||
# Some tensile scripts look for this as an env var rather than a cmake flag
|
||||
env.CMAKE_CXX_COMPILER = lib.getExe' clr "amdclang++";
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"benchmark"
|
||||
]
|
||||
++ lib.optionals buildSamples [
|
||||
"sample"
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
mkdir -p build/Tensile/library
|
||||
# git isn't needed and we have no .git
|
||||
substituteInPlace cmake/Dependencies.cmake \
|
||||
--replace-fail "find_package(Git REQUIRED)" ""
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace-fail "include(virtualenv)" "" \
|
||||
--replace-fail "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" "" \
|
||||
--replace-fail "virtualenv_install(\''${CMAKE_SOURCE_DIR}/tensilelite)" "" \
|
||||
--replace-fail 'find_package(Tensile 4.33.0 EXACT REQUIRED HIP LLVM OpenMP PATHS "''${INSTALLED_TENSILE_PATH}")' "find_package(Tensile)" \
|
||||
--replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
|
||||
# FIXME: TensileCreateExtOpLibraries build failure due to unsupported null operand
|
||||
# Working around for now by disabling the ExtOp libs
|
||||
substituteInPlace library/src/amd_detail/rocblaslt/src/CMakeLists.txt \
|
||||
--replace-fail 'TensileCreateExtOpLibraries("' '# skipping TensileCreateExtOpLibraries'
|
||||
substituteInPlace library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh \
|
||||
--replace-fail '${"\${rocm_path}"}/bin/' ""
|
||||
'';
|
||||
|
||||
# Apply patches to allow building without a target arch if we need to do that
|
||||
patches = lib.optionals (!supportsTargetArches) [
|
||||
# Add ability to build without specitying any arch.
|
||||
(fetchpatch {
|
||||
sha256 = "sha256-VW3bPzmQvfo8+iKsVfpn4sbqAe41fLzCEUfBh9JxVyk=";
|
||||
url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.1.1-no-arch.patch";
|
||||
})
|
||||
# Followup to above patch for 6.3.x
|
||||
(fetchpatch {
|
||||
sha256 = "sha256-GCsrne6BiWzwj8TMAfFuaYz1Pij97hoCc6E3qJhWb10=";
|
||||
url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-no-arch-extra.patch";
|
||||
})
|
||||
];
|
||||
|
||||
doCheck = false;
|
||||
doInstallCheck = false;
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
py
|
||||
clr
|
||||
gfortran
|
||||
# need make to get streaming console output so nix knows build is still running
|
||||
# so deliberately not using ninja
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
hipblas-common
|
||||
tensile'
|
||||
openmp
|
||||
libffi
|
||||
ncurses
|
||||
|
||||
# Tensile deps - not optional, building without tensile isn't actually supported
|
||||
msgpack # FIXME: not included in cmake!
|
||||
libxml2
|
||||
python3Packages.msgpack
|
||||
python3Packages.joblib
|
||||
zlib
|
||||
zstd
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
gtest
|
||||
]
|
||||
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||
lapack-reference
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-Wno-dev"
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
"-DCMAKE_VERBOSE_MAKEFILE=ON"
|
||||
"-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}"
|
||||
"-DTENSILE_USE_HIP=ON"
|
||||
"-DTENSILE_BUILD_CLIENT=OFF"
|
||||
"-DTENSILE_USE_FLOAT16_BUILTIN=ON"
|
||||
"-DCMAKE_CXX_COMPILER=${compiler}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
"-DHIPBLASLT_ENABLE_MARKER=Off"
|
||||
# FIXME what are the implications of hardcoding this?
|
||||
"-DTensile_CODE_OBJECT_VERSION=V5"
|
||||
"-DTensile_COMPILER=${compiler}"
|
||||
"-DAMDGPU_TARGETS=${gpuTargets'}"
|
||||
"-DGPU_TARGETS=${gpuTargets'}"
|
||||
"-DTensile_LIBRARY_FORMAT=msgpack"
|
||||
]
|
||||
++ lib.optionals (!supportsTargetArches) [
|
||||
"-DBUILD_WITH_TENSILE=OFF"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_CLIENTS_TESTS=ON"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
||||
]
|
||||
++ lib.optionals buildSamples [
|
||||
"-DBUILD_CLIENTS_SAMPLES=ON"
|
||||
];
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/hipblas-test $test/bin
|
||||
''
|
||||
+ lib.optionalString buildBenchmarks ''
|
||||
mkdir -p $benchmark/bin
|
||||
mv $out/bin/hipblas-bench $benchmark/bin
|
||||
''
|
||||
+ lib.optionalString buildSamples ''
|
||||
mkdir -p $sample/bin
|
||||
mv $out/bin/example-* $sample/bin
|
||||
''
|
||||
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
|
||||
rmdir $out/bin
|
||||
'';
|
||||
# If this is false there are no kernels in the output lib
|
||||
# and it's useless at runtime
|
||||
# so if it's an optional dep it's best to not depend on it
|
||||
# Some packages like torch need hipblaslt to compile
|
||||
# and are fine ignoring it at runtime if it's not supported
|
||||
# so we have to support building an empty hipblaslt
|
||||
passthru.supportsTargetArches = supportsTargetArches;
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner repo;
|
||||
};
|
||||
passthru.tensilelite = tensile';
|
||||
meta = with lib; {
|
||||
description = "hipBLASLt is a library that provides general matrix-matrix operations with a flexible API";
|
||||
homepage = "https://github.com/ROCm/hipBLASlt";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
}
|
||||
)
|
||||
@@ -0,0 +1,39 @@
|
||||
From f259eca77c592813e11752a46c4e1f9a74c64091 Mon Sep 17 00:00:00 2001
|
||||
From: Luna Nova <git@lunnova.dev>
|
||||
Date: Fri, 11 Oct 2024 02:56:22 -0700
|
||||
Subject: [PATCH] [hipcc] Remove extra definition of hipBinUtilPtr_ in derived
|
||||
platforms
|
||||
|
||||
Fixes UB when hipBinUtilPtr_ is used.
|
||||
---
|
||||
amd/hipcc/src/hipBin_amd.h | 1 -
|
||||
amd/hipcc/src/hipBin_nvidia.h | 1 -
|
||||
2 files changed, 2 deletions(-)
|
||||
|
||||
diff --git a/amd/hipcc/src/hipBin_amd.h b/amd/hipcc/src/hipBin_amd.h
|
||||
index 0a782d1beab9..36cd625ae8bc 100644
|
||||
--- a/src/hipBin_amd.h
|
||||
+++ b/src/hipBin_amd.h
|
||||
@@ -42,7 +42,6 @@ THE SOFTWARE.
|
||||
|
||||
class HipBinAmd : public HipBinBase {
|
||||
private:
|
||||
- HipBinUtil* hipBinUtilPtr_;
|
||||
string hipClangPath_ = "";
|
||||
string roccmPathEnv_, hipRocclrPathEnv_, hsaPathEnv_;
|
||||
PlatformInfo platformInfoAMD_;
|
||||
diff --git a/amd/hipcc/src/hipBin_nvidia.h b/amd/hipcc/src/hipBin_nvidia.h
|
||||
index ff142cc1cea2..09b7b80979c7 100644
|
||||
--- a/src/hipBin_nvidia.h
|
||||
+++ b/src/hipBin_nvidia.h
|
||||
@@ -31,7 +31,6 @@ THE SOFTWARE.
|
||||
|
||||
class HipBinNvidia : public HipBinBase {
|
||||
private:
|
||||
- HipBinUtil* hipBinUtilPtr_;
|
||||
string cudaPath_ = "";
|
||||
PlatformInfo platformInfoNV_;
|
||||
string hipCFlags_, hipCXXFlags_, hipLdFlags_;
|
||||
--
|
||||
2.46.0
|
||||
|
||||
47
pkgs/rocm-modules/hipcc/default.nix
Normal file
47
pkgs/rocm-modules/hipcc/default.nix
Normal file
@@ -0,0 +1,47 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
rocm-merged-llvm,
|
||||
cmake,
|
||||
lsb-release,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "hipcc";
|
||||
# In-tree with ROCm LLVM
|
||||
inherit (rocm-merged-llvm) version;
|
||||
src = rocm-merged-llvm.llvm-src;
|
||||
sourceRoot = "${finalAttrs.src.name}/amd/hipcc";
|
||||
|
||||
nativeBuildInputs = [ cmake ];
|
||||
|
||||
buildInputs = [ rocm-merged-llvm ];
|
||||
|
||||
patches = [
|
||||
# https://github.com/ROCm/llvm-project/pull/183
|
||||
# Fixes always-invoked UB in hipcc
|
||||
./0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
substituteInPlace src/hipBin_amd.h \
|
||||
--replace-fail "/usr/bin/lsb_release" "${lsb-release}/bin/lsb_release"
|
||||
'';
|
||||
|
||||
cmakeFlags = [
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
];
|
||||
postInstall = ''
|
||||
rm -r $out/hip/bin
|
||||
ln -s $out/bin $out/hip/bin
|
||||
'';
|
||||
|
||||
meta = with lib; {
|
||||
description = "Compiler driver utility that calls clang or nvcc";
|
||||
homepage = "https://github.com/ROCm/HIPCC";
|
||||
license = with licenses; [ mit ];
|
||||
maintainers = with maintainers; [ lovesegfault ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
102
pkgs/rocm-modules/hipcub/default.nix
Normal file
102
pkgs/rocm-modules/hipcub/default.nix
Normal file
@@ -0,0 +1,102 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocprim,
|
||||
clr,
|
||||
gtest,
|
||||
gbenchmark,
|
||||
buildTests ? false,
|
||||
buildBenchmarks ? false,
|
||||
gpuTargets ? [ ],
|
||||
}:
|
||||
|
||||
# CUB can also be used as a backend instead of rocPRIM.
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "hipcub";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"benchmark"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "hipCUB";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-pwCAsRx5XyuCvppTmZ4VG83iYl9ilAQCZds4oKINhSI=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
rocprim
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
gtest
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
gbenchmark
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DHIP_ROOT_DIR=${clr}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_TEST=ON"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"-DBUILD_BENCHMARK=ON"
|
||||
];
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/test_* $test/bin
|
||||
''
|
||||
+ lib.optionalString buildBenchmarks ''
|
||||
mkdir -p $benchmark/bin
|
||||
mv $out/bin/benchmark_* $benchmark/bin
|
||||
''
|
||||
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||
rmdir $out/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Thin wrapper library on top of rocPRIM or CUB";
|
||||
homepage = "https://github.com/ROCm/hipCUB";
|
||||
license = with licenses; [ bsd3 ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
125
pkgs/rocm-modules/hipfft/default.nix
Normal file
125
pkgs/rocm-modules/hipfft/default.nix
Normal file
@@ -0,0 +1,125 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
clr,
|
||||
git,
|
||||
rocfft,
|
||||
gtest,
|
||||
boost,
|
||||
fftw,
|
||||
fftwFloat,
|
||||
openmp,
|
||||
buildTests ? false,
|
||||
buildBenchmarks ? false,
|
||||
buildSamples ? false,
|
||||
gpuTargets ? [ ],
|
||||
}:
|
||||
|
||||
# Can also use cuFFT
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "hipfft";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"benchmark"
|
||||
]
|
||||
++ lib.optionals buildSamples [
|
||||
"sample"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "hipFFT";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-VA9OC/TvvQyFCVVox/9ihuE2W0Ia87O0R3YsLM4Jzuk=";
|
||||
fetchSubmodules = true;
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
clr
|
||||
git
|
||||
cmake
|
||||
rocm-cmake
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
rocfft
|
||||
]
|
||||
++ lib.optionals (buildTests || buildBenchmarks || buildSamples) [
|
||||
gtest
|
||||
boost
|
||||
fftw
|
||||
fftwFloat
|
||||
openmp
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DCMAKE_C_COMPILER=hipcc"
|
||||
"-DCMAKE_CXX_COMPILER=hipcc"
|
||||
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
|
||||
"-DHIP_ROOT_DIR=${clr}"
|
||||
"-DHIP_PATH=${clr}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_CLIENTS_TESTS=ON"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"-DBUILD_CLIENTS_RIDER=ON"
|
||||
]
|
||||
++ lib.optionals buildSamples [
|
||||
"-DBUILD_CLIENTS_SAMPLES=ON"
|
||||
];
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/hipfft-test $test/bin
|
||||
''
|
||||
+ lib.optionalString buildBenchmarks ''
|
||||
mkdir -p $benchmark/bin
|
||||
mv $out/bin/hipfft-rider $benchmark/bin
|
||||
''
|
||||
+ lib.optionalString buildSamples ''
|
||||
mkdir -p $sample/bin
|
||||
mv clients/staging/hipfft_* $sample/bin
|
||||
patchelf $sample/bin/hipfft_* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE"
|
||||
''
|
||||
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||
rmdir $out/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "FFT marshalling library";
|
||||
homepage = "https://github.com/ROCm/hipFFT";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
66
pkgs/rocm-modules/hipfort/default.nix
Normal file
66
pkgs/rocm-modules/hipfort/default.nix
Normal file
@@ -0,0 +1,66 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
gfortran,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "hipfort";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "hipfort";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-a2YPGAWP+gF2EykpKmkG/fEIW6blm2ChOybmLAHQQdw=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
gfortran
|
||||
];
|
||||
|
||||
cmakeFlags = [
|
||||
"-DHIPFORT_COMPILER=${gfortran}/bin/gfortran"
|
||||
"-DHIPFORT_AR=${gfortran.cc}/bin/gcc-ar"
|
||||
"-DHIPFORT_RANLIB=${gfortran.cc}/bin/gcc-ranlib"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
patchShebangs bin
|
||||
|
||||
substituteInPlace bin/hipfc bin/mymcpu \
|
||||
--replace "/bin/cat" "cat"
|
||||
|
||||
substituteInPlace bin/CMakeLists.txt \
|
||||
--replace "/bin/mkdir" "mkdir" \
|
||||
--replace "/bin/cp" "cp" \
|
||||
--replace "/bin/sed" "sed" \
|
||||
--replace "/bin/chmod" "chmod" \
|
||||
--replace "/bin/ln" "ln"
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Fortran interfaces for ROCm libraries";
|
||||
homepage = "https://github.com/ROCm/hipfort";
|
||||
license = with licenses; [ mit ]; # mitx11
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
64
pkgs/rocm-modules/hipify/default.nix
Normal file
64
pkgs/rocm-modules/hipify/default.nix
Normal file
@@ -0,0 +1,64 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
clang,
|
||||
libxml2,
|
||||
rocm-merged-llvm,
|
||||
zlib,
|
||||
zstd,
|
||||
perl,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "hipify";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "HIPIFY";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-uj25WmGCpwouS1yzW9Oil5Vyrbyj5yRITvWF9WaGozM=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
libxml2
|
||||
rocm-merged-llvm
|
||||
zlib
|
||||
zstd
|
||||
perl
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace "\''${LLVM_TOOLS_BINARY_DIR}/clang" "${clang}/bin/clang"
|
||||
chmod +x bin/*
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
postInstall = ''
|
||||
chmod +x $out/bin/*
|
||||
chmod +x $out/libexec/*
|
||||
patchShebangs $out/bin/
|
||||
patchShebangs $out/libexec/
|
||||
'';
|
||||
|
||||
meta = with lib; {
|
||||
description = "Convert CUDA to Portable C++ Code";
|
||||
homepage = "https://github.com/ROCm/HIPIFY";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
79
pkgs/rocm-modules/hiprand/default.nix
Normal file
79
pkgs/rocm-modules/hiprand/default.nix
Normal file
@@ -0,0 +1,79 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
clr,
|
||||
rocrand,
|
||||
gtest,
|
||||
buildTests ? false,
|
||||
gpuTargets ? [ ],
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "hiprand";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "hipRAND";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-ISl4bVW/JvT81gJ/10JlKliv7ds5WtP2f/Dnc9qvh9Q=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
];
|
||||
|
||||
buildInputs = [ rocrand ] ++ (lib.optionals buildTests [ gtest ]);
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DHIP_ROOT_DIR=${clr}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_TEST=ON"
|
||||
];
|
||||
|
||||
postInstall = lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/test_* $test/bin
|
||||
rm -r $out/bin/hipRAND
|
||||
# Fail if bin/ isn't actually empty
|
||||
rmdir $out/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "HIP wrapper for rocRAND and cuRAND";
|
||||
homepage = "https://github.com/ROCm/hipRAND";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
60
pkgs/rocm-modules/hiprt/default.nix
Normal file
60
pkgs/rocm-modules/hiprt/default.nix
Normal file
@@ -0,0 +1,60 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
cmake,
|
||||
clr,
|
||||
gcc,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "hiprt";
|
||||
version = "2.5.a21e075.3";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "GPUOpen-LibrariesAndSDKs";
|
||||
repo = "HIPRT";
|
||||
tag = finalAttrs.version;
|
||||
sha256 = "sha256-3yGhwIsFHlFMCEzuYnXuXNzs99m7f2LTkYaTGs0GEcI=";
|
||||
};
|
||||
|
||||
postPatch = ''
|
||||
g++ contrib/easy-encryption/cl.cpp -o contrib/easy-encryption/bin/linux/ee64 #replacing prebuilt binary
|
||||
'';
|
||||
|
||||
nativeBuildInputs = [
|
||||
gcc # required for replacing easy-encryption binary
|
||||
cmake
|
||||
clr
|
||||
];
|
||||
buildInputs = [
|
||||
# TODO: do we need anything here?
|
||||
];
|
||||
|
||||
cmakeFlags = [
|
||||
#TODO: mostly copied from the Arch package, verify these:
|
||||
"-D CMAKE_BUILD_TYPE=Release"
|
||||
"-D HIP_PATH=${clr}"
|
||||
"-D BAKE_KERNEL=OFF"
|
||||
"-D BAKE_COMPILED_KERNEL=OFF"
|
||||
"-D BITCODE=ON"
|
||||
"-D PRECOMPILE=ON"
|
||||
"-D NO_UNITTEST=ON"
|
||||
"-D FORCE_DISABLE_CUDA=ON"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-D CMAKE_INSTALL_BINDIR=bin"
|
||||
"-D CMAKE_INSTALL_LIBDIR=lib"
|
||||
"-D CMAKE_INSTALL_INCLUDEDIR=include"
|
||||
];
|
||||
|
||||
meta = {
|
||||
homepage = "https://github.com/GPUOpen-LibrariesAndSDKs/HIPRT";
|
||||
description = "";
|
||||
license = lib.licenses.mit;
|
||||
maintainers = with lib.maintainers; [
|
||||
mksafavi
|
||||
];
|
||||
platforms = lib.platforms.linux;
|
||||
};
|
||||
})
|
||||
119
pkgs/rocm-modules/hipsolver/default.nix
Normal file
119
pkgs/rocm-modules/hipsolver/default.nix
Normal file
@@ -0,0 +1,119 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
clr,
|
||||
gfortran,
|
||||
rocblas,
|
||||
rocsolver,
|
||||
rocsparse,
|
||||
suitesparse,
|
||||
gtest,
|
||||
lapack-reference,
|
||||
buildTests ? false,
|
||||
buildBenchmarks ? false,
|
||||
buildSamples ? false,
|
||||
}:
|
||||
|
||||
# Can also use cuSOLVER
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "hipsolver";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"benchmark"
|
||||
]
|
||||
++ lib.optionals buildSamples [
|
||||
"sample"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "hipSOLVER";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-4ig8/P7JQCx3WB+PRHlhSlRhzdbnDo8QrFnWxsxJdwk=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
gfortran
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
rocblas
|
||||
rocsolver
|
||||
rocsparse
|
||||
suitesparse
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
gtest
|
||||
]
|
||||
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||
lapack-reference
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DCMAKE_CXX_COMPILER=hipcc"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
"-DBUILD_WITH_SPARSE=OFF" # FIXME: broken - can't find suitesparse/cholmod, looks fixed in master
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_CLIENTS_TESTS=ON"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
||||
]
|
||||
++ lib.optionals buildSamples [
|
||||
"-DBUILD_CLIENTS_SAMPLES=ON"
|
||||
];
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/hipsolver-test $test/bin
|
||||
''
|
||||
+ lib.optionalString buildBenchmarks ''
|
||||
mkdir -p $benchmark/bin
|
||||
mv $out/bin/hipsolver-bench $benchmark/bin
|
||||
''
|
||||
+ lib.optionalString buildSamples ''
|
||||
mkdir -p $sample/bin
|
||||
mv clients/staging/example-* $sample/bin
|
||||
patchelf $sample/bin/example-* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE"
|
||||
''
|
||||
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||
rmdir $out/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "ROCm SOLVER marshalling library";
|
||||
homepage = "https://github.com/ROCm/hipSOLVER";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
152
pkgs/rocm-modules/hipsparse/default.nix
Normal file
152
pkgs/rocm-modules/hipsparse/default.nix
Normal file
@@ -0,0 +1,152 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocsparse,
|
||||
clr,
|
||||
gfortran,
|
||||
git,
|
||||
gtest,
|
||||
openmp,
|
||||
buildTests ? false,
|
||||
buildBenchmarks ? false,
|
||||
buildSamples ? false,
|
||||
gpuTargets ? [ ],
|
||||
}:
|
||||
|
||||
# This can also use cuSPARSE as a backend instead of rocSPARSE
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "hipsparse";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildSamples [
|
||||
"sample"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "hipSPARSE";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-pRR/3t7YXgtPQwGFb5lA6DI2OTF6AnDcfkydRIEod2Q=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
gfortran
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
rocsparse
|
||||
git
|
||||
]
|
||||
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||
gtest
|
||||
]
|
||||
++ lib.optionals (buildTests || buildSamples) [
|
||||
openmp
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
(lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
|
||||
(lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
|
||||
(lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildSamples)
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
];
|
||||
|
||||
# We have to manually generate the matrices
|
||||
# CMAKE_MATRICES_DIR seems to be reset in clients/tests/CMakeLists.txt
|
||||
postPatch = lib.optionalString buildTests ''
|
||||
mkdir -p matrices
|
||||
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-01}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-02}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-03}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-04}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-05}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-06}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-07}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-08}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-09}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-10}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-11}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-12}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-13}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-14}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-15}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-16}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-17}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-18}/*.mtx matrices
|
||||
ln -s ${rocsparse.passthru.matrices.matrix-19}/*.mtx matrices
|
||||
|
||||
# Not used by the original cmake, causes an error
|
||||
rm matrices/*_b.mtx
|
||||
|
||||
echo "deps/convert.cpp -> deps/mtx2csr"
|
||||
hipcc deps/convert.cpp -O3 -o deps/mtx2csr
|
||||
|
||||
for mat in $(ls -1 matrices | cut -d "." -f 1); do
|
||||
echo "mtx2csr: $mat.mtx -> $mat.bin"
|
||||
deps/mtx2csr matrices/$mat.mtx matrices/$mat.bin
|
||||
unlink matrices/$mat.mtx
|
||||
done
|
||||
|
||||
substituteInPlace clients/tests/CMakeLists.txt \
|
||||
--replace "\''${PROJECT_BINARY_DIR}/matrices" "/build/source/matrices"
|
||||
'';
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/hipsparse-test $test/bin
|
||||
mv /build/source/matrices $test
|
||||
rmdir $out/bin
|
||||
''
|
||||
+ lib.optionalString buildSamples ''
|
||||
mkdir -p $sample/bin
|
||||
mv clients/staging/example_* $sample/bin
|
||||
patchelf --set-rpath $out/lib:${
|
||||
lib.makeLibraryPath (
|
||||
finalAttrs.buildInputs
|
||||
++ [
|
||||
clr
|
||||
gfortran.cc
|
||||
]
|
||||
)
|
||||
} $sample/bin/example_*
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "ROCm SPARSE marshalling library";
|
||||
homepage = "https://github.com/ROCm/hipSPARSE";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
53
pkgs/rocm-modules/hsa-amd-aqlprofile-bin/default.nix
Normal file
53
pkgs/rocm-modules/hsa-amd-aqlprofile-bin/default.nix
Normal file
@@ -0,0 +1,53 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchurl,
|
||||
callPackage,
|
||||
dpkg,
|
||||
rocm-core,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "hsa-amd-aqlprofile-bin";
|
||||
version = "6.3.3";
|
||||
|
||||
src =
|
||||
let
|
||||
inherit (finalAttrs) version;
|
||||
patch = rocm-core.ROCM_LIBPATCH_VERSION;
|
||||
majorMinor = lib.versions.majorMinor version;
|
||||
poolVersion = if majorMinor + ".0" == version then majorMinor else version;
|
||||
incremental = "74";
|
||||
osRelease = "22.04";
|
||||
in
|
||||
fetchurl {
|
||||
url = "https://repo.radeon.com/rocm/apt/${poolVersion}/pool/main/h/hsa-amd-aqlprofile/hsa-amd-aqlprofile_1.0.0.${patch}-${incremental}~${osRelease}_amd64.deb";
|
||||
hash = "sha256-Lo6gU9ywkujtsKvnOAwL3L8qQNPwjjm0Pm4OyzoUYao=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [ dpkg ];
|
||||
dontPatch = true;
|
||||
dontConfigure = true;
|
||||
dontBuild = true;
|
||||
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
|
||||
mkdir -p $out
|
||||
cp -a opt/rocm-${finalAttrs.version}*/* $out
|
||||
chmod +x $out/lib/libhsa-amd-aqlprofile64.so.1.*
|
||||
chmod +x $out/lib/hsa-amd-aqlprofile/librocprofv2_att.so
|
||||
|
||||
runHook postInstall
|
||||
'';
|
||||
|
||||
passthru.updateScript = (callPackage ./update.nix { }) { inherit (finalAttrs) version; };
|
||||
|
||||
meta = with lib; {
|
||||
description = "AQLPROFILE library for AMD HSA runtime API extension support";
|
||||
homepage = "https://rocm.docs.amd.com/en/latest/";
|
||||
license = with licenses; [ unfree ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
61
pkgs/rocm-modules/hsa-amd-aqlprofile-bin/update.nix
Normal file
61
pkgs/rocm-modules/hsa-amd-aqlprofile-bin/update.nix
Normal file
@@ -0,0 +1,61 @@
|
||||
{
|
||||
lib,
|
||||
writeScript,
|
||||
}:
|
||||
|
||||
{ version }:
|
||||
|
||||
let
|
||||
prefix = "hsa-amd-aqlprofile";
|
||||
extVersion = lib.strings.concatStrings (
|
||||
lib.strings.intersperse "0" (lib.versions.splitVersion version)
|
||||
);
|
||||
major = lib.versions.major version;
|
||||
minor = lib.versions.minor version;
|
||||
patch = lib.versions.patch version;
|
||||
|
||||
updateScript = writeScript "update.sh" ''
|
||||
#!/usr/bin/env nix-shell
|
||||
#!nix-shell -i bash -p curl common-updater-scripts
|
||||
apt="https://repo.radeon.com/rocm/apt"
|
||||
pool="pool/main/h/${prefix}/"
|
||||
url="$apt/latest/$pool"
|
||||
res="$(curl -sL "$url")"
|
||||
deb="${prefix}$(echo "$res" | grep -o -P "(?<=href=\"${prefix}).*(?=\">)" | tail -1)"
|
||||
patch="${patch}"
|
||||
|
||||
# Try up to 10 patch versions
|
||||
for i in {1..10}; do
|
||||
((patch++))
|
||||
extVersion="$(echo "$deb" | grep -o -P "(?<=\.....).*(?=\..*-)")"
|
||||
|
||||
if (( ''${#extVersion} == 6 )) && (( $extVersion <= ${extVersion} )); then
|
||||
url="https://repo.radeon.com/rocm/apt/${major}.${minor}.$patch/pool/main/h/${prefix}/"
|
||||
res="$(curl -sL "$url")"
|
||||
deb="${prefix}$(echo "$res" | grep -o -P "(?<=href=\"${prefix}).*(?=\">)" | tail -1)"
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
extVersion="$(echo $deb | grep -o -P "(?<=\.....).*(?=\..*-)")"
|
||||
version="$(echo $extVersion | sed "s/0/./1" | sed "s/0/./1")"
|
||||
IFS='.' read -a version_arr <<< "$version"
|
||||
|
||||
if (( ''${version_arr[0]} > 6 )); then
|
||||
echo "'rocmPackages_6.${prefix}-bin' is already at it's maximum allowed version.''\nAny further upgrades should go into 'rocmPackages_X.${prefix}-bin'." 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if (( ''${#extVersion} == 6 )); then
|
||||
repoVersion="$version"
|
||||
|
||||
if (( ''${version:4:1} == 0 )); then
|
||||
repoVersion=''${version:0:3}
|
||||
fi
|
||||
|
||||
update-source-version rocmPackages_6.${prefix}-bin "$version" "" "$apt/$repoVersion/$pool$deb" --ignore-same-hash
|
||||
fi
|
||||
'';
|
||||
in
|
||||
[ updateScript ]
|
||||
@@ -0,0 +1,70 @@
|
||||
diff --git a/cmake/modules/AddClang.cmake b/cmake/modules/AddClang.cmake
|
||||
index 75b0080f6..c895b884c 100644
|
||||
--- a/cmake/modules/AddClang.cmake
|
||||
+++ b/cmake/modules/AddClang.cmake
|
||||
@@ -119,8 +119,8 @@ macro(add_clang_library name)
|
||||
install(TARGETS ${lib}
|
||||
COMPONENT ${lib}
|
||||
${export_to_clangtargets}
|
||||
- LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
|
||||
- ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX}
|
||||
+ LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}"
|
||||
+ ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}"
|
||||
RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
|
||||
|
||||
if (NOT LLVM_ENABLE_IDE)
|
||||
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt
|
||||
index e6ae4e19e..5ef01aea2 100644
|
||||
--- a/lib/Headers/CMakeLists.txt
|
||||
+++ b/lib/Headers/CMakeLists.txt
|
||||
@@ -337,6 +337,7 @@ set(llvm_libc_wrapper_files
|
||||
|
||||
include(GetClangResourceDir)
|
||||
get_clang_resource_dir(output_dir PREFIX ${LLVM_LIBRARY_OUTPUT_INTDIR}/.. SUBDIR include)
|
||||
+set(header_install_dir ${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION_MAJOR}/include)
|
||||
set(out_files)
|
||||
set(generated_files)
|
||||
|
||||
diff --git a/tools/libclang/CMakeLists.txt b/tools/libclang/CMakeLists.txt
|
||||
index b5b6d2807..6b592d255 100644
|
||||
--- a/tools/libclang/CMakeLists.txt
|
||||
+++ b/tools/libclang/CMakeLists.txt
|
||||
@@ -246,7 +246,7 @@ foreach(PythonVersion ${CLANG_PYTHON_BINDINGS_VERSIONS})
|
||||
COMPONENT
|
||||
libclang-python-bindings
|
||||
DESTINATION
|
||||
- "lib${LLVM_LIBDIR_SUFFIX}/python${PythonVersion}/site-packages")
|
||||
+ "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}/python${PythonVersion}/site-packages")
|
||||
endforeach()
|
||||
if(NOT LLVM_ENABLE_IDE)
|
||||
add_custom_target(libclang-python-bindings)
|
||||
diff --git a/tools/scan-build-py/CMakeLists.txt b/tools/scan-build-py/CMakeLists.txt
|
||||
index 3aca22c0b..3115353e3 100644
|
||||
--- a/tools/scan-build-py/CMakeLists.txt
|
||||
+++ b/tools/scan-build-py/CMakeLists.txt
|
||||
@@ -88,7 +88,7 @@ foreach(lib ${LibScanbuild})
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/${lib})
|
||||
list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libscanbuild/${lib})
|
||||
install(FILES lib/libscanbuild/${lib}
|
||||
- DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libscanbuild
|
||||
+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/libscanbuild"
|
||||
COMPONENT scan-build-py)
|
||||
endforeach()
|
||||
|
||||
@@ -106,7 +106,7 @@ foreach(resource ${LibScanbuildResources})
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/resources/${resource})
|
||||
list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libscanbuild/resources/${resource})
|
||||
install(FILES lib/libscanbuild/resources/${resource}
|
||||
- DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libscanbuild/resources
|
||||
+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/libscanbuild/resources"
|
||||
COMPONENT scan-build-py)
|
||||
endforeach()
|
||||
|
||||
@@ -122,7 +122,7 @@ foreach(lib ${LibEar})
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libear/${lib})
|
||||
list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libear/${lib})
|
||||
install(FILES lib/libear/${lib}
|
||||
- DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libear
|
||||
+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/libear"
|
||||
COMPONENT scan-build-py)
|
||||
endforeach()
|
||||
@@ -0,0 +1,23 @@
|
||||
diff --git a/lib/Driver/ToolChains/Linux.cpp b/lib/Driver/ToolChains/Linux.cpp
|
||||
index 57368104c914..71c57f72078e 100644
|
||||
--- a/lib/Driver/ToolChains/Linux.cpp
|
||||
+++ b/lib/Driver/ToolChains/Linux.cpp
|
||||
@@ -640,6 +640,7 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
|
||||
return;
|
||||
|
||||
// LOCAL_INCLUDE_DIR
|
||||
+ if (!SysRoot.empty())
|
||||
addSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/local/include"));
|
||||
// TOOL_INCLUDE_DIR
|
||||
AddMultilibIncludeArgs(DriverArgs, CC1Args);
|
||||
@@ -672,8 +673,10 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
|
||||
// Add an include of '/include' directly. This isn't provided by default by
|
||||
// system GCCs, but is often used with cross-compiling GCCs, and harmless to
|
||||
// add even when Clang is acting as-if it were a system compiler.
|
||||
+ if (!SysRoot.empty())
|
||||
addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/include"));
|
||||
|
||||
+ if (!SysRoot.empty())
|
||||
addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/include"));
|
||||
|
||||
if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && getTriple().isMusl())
|
||||
40
pkgs/rocm-modules/llvm/clang-log-jobs.diff
Normal file
40
pkgs/rocm-modules/llvm/clang-log-jobs.diff
Normal file
@@ -0,0 +1,40 @@
|
||||
diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp
|
||||
index 06f5e7e7e335..8407d664886a 100644
|
||||
--- a/lib/Driver/Compilation.cpp
|
||||
+++ b/lib/Driver/Compilation.cpp
|
||||
@@ -340,6 +340,9 @@ private:
|
||||
void Compilation::ExecuteJobs(const JobList &Jobs,
|
||||
FailingCommandList &FailingCommands,
|
||||
bool LogOnly) const {
|
||||
+ // If >1 job, log as each job finishes so can see progress while building many offloads
|
||||
+ const bool logJobs = Jobs.size() > 1;
|
||||
+ auto start_time = std::chrono::steady_clock::now();
|
||||
// According to UNIX standard, driver need to continue compiling all the
|
||||
// inputs on the command line even one of them failed.
|
||||
// In all but CLMode, execute all the jobs unless the necessary inputs for the
|
||||
@@ -364,11 +367,25 @@ void Compilation::ExecuteJobs(const JobList &Jobs,
|
||||
|
||||
JS.setJobState(Next, JobScheduler::JS_RUN);
|
||||
auto Work = [&, Next]() {
|
||||
+ auto job_start_time = std::chrono::steady_clock::now();
|
||||
const Command *FailingCommand = nullptr;
|
||||
if (int Res = ExecuteCommand(*Next, FailingCommand, LogOnly)) {
|
||||
FailingCommands.push_back(std::make_pair(Res, FailingCommand));
|
||||
JS.setJobState(Next, JobScheduler::JS_FAIL);
|
||||
} else {
|
||||
+ if (logJobs && Next) {
|
||||
+ auto now = std::chrono::steady_clock::now();
|
||||
+ auto job_duration = std::chrono::duration_cast<std::chrono::seconds>(now - job_start_time).count();
|
||||
+ auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - start_time).count();
|
||||
+ if (duration > 10 && job_duration > 0) {
|
||||
+ if (Next->getOutputFilenames().empty())
|
||||
+ if (Next->getExecutable()) llvm::errs() << "Job completed: " << Next->getExecutable() << "\n";
|
||||
+ else (llvm::errs() << "Job completed: "), Next->Print(llvm::errs(), "\n", true);
|
||||
+ else
|
||||
+ llvm::errs() << "Job completed: " << Next->getOutputFilenames().front().c_str() << "\n";
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
JS.setJobState(Next, JobScheduler::JS_DONE);
|
||||
}
|
||||
};
|
||||
570
pkgs/rocm-modules/llvm/default.nix
Normal file
570
pkgs/rocm-modules/llvm/default.nix
Normal file
@@ -0,0 +1,570 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
llvmPackages_19,
|
||||
overrideCC,
|
||||
rocm-device-libs,
|
||||
rocm-runtime,
|
||||
fetchFromGitHub,
|
||||
runCommand,
|
||||
symlinkJoin,
|
||||
rdfind,
|
||||
wrapBintoolsWith,
|
||||
emptyDirectory,
|
||||
zstd,
|
||||
zlib,
|
||||
gcc-unwrapped,
|
||||
glibc,
|
||||
replaceVars,
|
||||
libffi,
|
||||
libxml2,
|
||||
removeReferencesTo,
|
||||
fetchpatch,
|
||||
writeShellScript,
|
||||
makeWrapper,
|
||||
# Build compilers and stdenv suitable for profiling
|
||||
# compressed line tables (-g1 -gz) and
|
||||
# frame pointers for sampling profilers (-fno-omit-frame-pointer -momit-leaf-frame-pointer)
|
||||
# TODO: Should also apply to downstream packages which use rocmClangStdenv
|
||||
profilableStdenv ? false,
|
||||
}:
|
||||
|
||||
let
|
||||
llvmPackagesNoBintools = llvmPackages_19.override {
|
||||
bootBintools = null;
|
||||
bootBintoolsNoLibc = null;
|
||||
};
|
||||
useLibcxx = false; # whether rocm stdenv uses libcxx (clang c++ stdlib) instead of gcc stdlibc++
|
||||
|
||||
llvmStdenv = overrideCC llvmPackagesNoBintools.libcxxStdenv llvmPackagesNoBintools.clangUseLLVM;
|
||||
llvmLibstdcxxStdenv = overrideCC llvmPackagesNoBintools.stdenv (
|
||||
llvmPackagesNoBintools.libstdcxxClang.override {
|
||||
inherit (llvmPackages_19) bintools;
|
||||
}
|
||||
);
|
||||
stdenvToBuildRocmLlvm = if useLibcxx then llvmStdenv else llvmLibstdcxxStdenv;
|
||||
gcc-include = runCommand "gcc-include" { } ''
|
||||
mkdir -p $out
|
||||
ln -s ${gcc-unwrapped}/include/ $out/
|
||||
ln -s ${gcc-unwrapped}/lib/ $out/
|
||||
'';
|
||||
|
||||
# A prefix for use as the GCC prefix when building rocmcxx
|
||||
disallowedRefsForToolchain = [
|
||||
stdenv.cc
|
||||
stdenv.cc.cc
|
||||
stdenv.cc.bintools
|
||||
gcc-unwrapped
|
||||
stdenvToBuildRocmLlvm
|
||||
];
|
||||
gcc-prefix =
|
||||
let
|
||||
gccPrefixPaths = [
|
||||
gcc-unwrapped
|
||||
gcc-unwrapped.lib
|
||||
glibc.dev
|
||||
];
|
||||
in
|
||||
symlinkJoin {
|
||||
name = "gcc-prefix";
|
||||
paths = gccPrefixPaths ++ [
|
||||
glibc
|
||||
];
|
||||
disallowedRequisites = gccPrefixPaths;
|
||||
postBuild = ''
|
||||
rm -rf $out/{bin,libexec,nix-support,lib64,share,etc}
|
||||
rm $out/lib/gcc/x86_64-unknown-linux-gnu/*/plugin/include/auto-host.h
|
||||
|
||||
mkdir /build/tmpout
|
||||
mv $out/* /build/tmpout
|
||||
cp -Lr --no-preserve=mode /build/tmpout/* $out/
|
||||
set -x
|
||||
versionedIncludePath="$(echo $out/include/c++/*/)"
|
||||
mv $versionedIncludePath/* $out/include/c++/
|
||||
rm -rf $versionedIncludePath/
|
||||
|
||||
find $out/lib -type f -exec ${removeReferencesTo}/bin/remove-references-to -t ${gcc-unwrapped.lib} {} +
|
||||
|
||||
ln -s $out $out/x86_64-unknown-linux-gnu
|
||||
'';
|
||||
};
|
||||
version = "6.4.1";
|
||||
# major version of this should be the clang version ROCm forked from
|
||||
rocmLlvmVersion = "19.0.0-${llvmSrc.rev}";
|
||||
usefulOutputs =
|
||||
drv:
|
||||
builtins.filter (x: x != null) [
|
||||
drv
|
||||
(drv.lib or null)
|
||||
(drv.dev or null)
|
||||
];
|
||||
listUsefulOutputs = builtins.concatMap usefulOutputs;
|
||||
# llvmSrc = fetchFromGitHub {
|
||||
# # Performance improvements cherry-picked on top of rocm-6.3.x
|
||||
# # most importantly, amdgpu-early-alwaysinline memory usage fix
|
||||
# owner = "LunNova";
|
||||
# repo = "llvm-project-rocm";
|
||||
# rev = "4182046534deb851753f0d962146e5176f648893";
|
||||
# hash = "sha256-sPmYi1WiiAqnRnHVNba2nPUxGflBC01FWCTNLPlYF9c=";
|
||||
# };
|
||||
llvmSrc = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "llvm-project";
|
||||
# rev = "873e9660026931bbd2cbce41475090039f81f8c7";
|
||||
# hash = "sha256-7B4NQ1LBN3btHjrh9Ht2S+BYYqhKNPAwMoP18qYJw4E=";
|
||||
# hash = "sha256-cIkKG5rB34G+AqonOS76acnhynmI29PIW7TuE0SQzO4=";
|
||||
# hash = "sha256-DtbPZ75KqfPqPsGwmT1sUyeI1HyICDDs3SxwQQl72BM=";
|
||||
rev = "rocm-${version}";
|
||||
# hash = "sha256-h4mD6gu0Gt4zYme7qtlm9QpsqWZ6XoH+XKd3hsQly1I=";
|
||||
# hash = "sha256-jJsmPainHOd4BJ0bQbf1M3Kd4+aLbx3ENxtuzJ9+lLY=";
|
||||
# hash = "sha256-5n3EQby17JEgr3kh1pUNuo/La4hUxMf10O7CckVMS5U=";
|
||||
hash = "sha256-84+ZsKjIhXip2yLU5jpoV53+ejxy2dzgamVU6AcAngU=";
|
||||
# hash = "sha256-4b1d9a2c7f0e8c3f5b6d8c1e4f0b2c5f3a6b7c8d9e0f1a2b3c4d5e6f7g8h9i0j";
|
||||
};
|
||||
llvmSrcFixed = llvmSrc;
|
||||
llvmMajorVersion = lib.versions.major rocmLlvmVersion;
|
||||
# An llvmPackages (pkgs/development/compilers/llvm/) built from ROCm LLVM's source tree
|
||||
# optionally using LLVM libcxx
|
||||
llvmPackagesRocm = llvmPackages_19.override (_old: {
|
||||
stdenv = stdenvToBuildRocmLlvm; # old.stdenv #llvmPackagesNoBintools.libcxxStdenv;
|
||||
|
||||
# not setting gitRelease = because that causes patch selection logic to use git patches
|
||||
# ROCm LLVM is closer to 18 official
|
||||
# gitRelease = {
|
||||
# rev-version = rocmLlvmVersion;
|
||||
# };
|
||||
# gitRelease = null;
|
||||
# officialRelease = null;
|
||||
officialRelease = { }; # Set but empty because we're overriding everything from it.
|
||||
version = rocmLlvmVersion;
|
||||
src = llvmSrcFixed;
|
||||
monorepoSrc = llvmSrcFixed;
|
||||
doCheck = false;
|
||||
});
|
||||
sysrootCompiler =
|
||||
cc: name: paths:
|
||||
let
|
||||
linked = symlinkJoin { inherit name paths; };
|
||||
in
|
||||
runCommand name { } ''
|
||||
set -x
|
||||
mkdir -p $out/
|
||||
cp --reflink=auto -rL ${linked}/* $out/
|
||||
chmod -R +rw $out
|
||||
mkdir -p $out/usr
|
||||
ln -s $out/ $out/usr/local
|
||||
mkdir -p $out/nix-support/
|
||||
rm -rf $out/lib64 # we don't need mixed 32 bit
|
||||
echo 'export CC=clang' >> $out/nix-support/setup-hook
|
||||
echo 'export CXX=clang++' >> $out/nix-support/setup-hook
|
||||
mkdir -p $out/lib/clang/${llvmMajorVersion}/lib/linux/
|
||||
ln -s $out/lib/linux/libclang_rt.* $out/lib/clang/${llvmMajorVersion}/lib/linux/
|
||||
file $out/bin/.clang-wrapped
|
||||
file $out/bin/.clang++-wrapped
|
||||
find $out -type f -exec sed -i "s|${cc.out}|$out|g" {} +
|
||||
find $out -type f -exec sed -i "s|${cc.dev}|$out|g" {} +
|
||||
|
||||
file $out/bin/.clang-wrapped
|
||||
file $out/bin/.clang++-wrapped
|
||||
# our /include now has more than clang expects, so this specific dir still needs to point to cc.dev
|
||||
# FIXME: could copy into a different subdir?
|
||||
sed -i 's|set(CLANG_INCLUDE_DIRS.*$|set(CLANG_INCLUDE_DIRS "${cc.dev}/include")|g' $out/lib/cmake/clang/ClangConfig.cmake
|
||||
# ${lib.getExe rdfind} -makesymlinks true $out/ # create links *within* the sysroot to save space
|
||||
'';
|
||||
findClangNostdlibincPatch =
|
||||
x:
|
||||
(
|
||||
(lib.strings.hasSuffix "add-nostdlibinc-flag.patch" (builtins.baseNameOf x))
|
||||
|| (lib.strings.hasSuffix "clang-at-least-16-LLVMgold-path.patch" (builtins.baseNameOf x))
|
||||
);
|
||||
llvmTargetsFlag = "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${
|
||||
{
|
||||
"x86_64" = "X86";
|
||||
"aarch64" = "AArch64";
|
||||
}
|
||||
.${llvmStdenv.targetPlatform.parsed.cpu.name}
|
||||
}";
|
||||
# -ffat-lto-objects = emit LTO object files that are compatible with non-LTO-supporting builds too
|
||||
# FatLTO objects are a special type of fat object file that contain LTO compatible IR in addition to generated object code,
|
||||
# instead of containing object code for multiple target architectures. This allows users to defer the choice of whether to
|
||||
# use LTO or not to link-time, and has been a feature available in other compilers, like GCC, for some time.
|
||||
|
||||
tablegenUsage = x: !(lib.strings.hasInfix "llvm-tblgen" x);
|
||||
addGccLtoCmakeFlags = !llvmPackagesRocm.stdenv.cc.isClang;
|
||||
llvmExtraCflags =
|
||||
"-O3 -DNDEBUG -march=skylake -mtune=znver3"
|
||||
+ (lib.optionalString addGccLtoCmakeFlags " -D_GLIBCXX_USE_CXX11_ABI=0 -flto -ffat-lto-objects -flto-compression-level=19 -Wl,-flto")
|
||||
+ (lib.optionalString llvmPackagesRocm.stdenv.cc.isClang " -flto=thin -ffat-lto-objects")
|
||||
+ (lib.optionalString profilableStdenv " -fno-omit-frame-pointer -momit-leaf-frame-pointer -gz -g1");
|
||||
in
|
||||
rec {
|
||||
inherit llvmSrc;
|
||||
inherit (llvmPackagesRocm) libunwind;
|
||||
inherit (llvmPackagesRocm) libcxx;
|
||||
# Pass through original attrs for debugging where non-overridden llvm/clang is getting used
|
||||
# llvm-orig = llvmPackagesRocm.llvm; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.llvm-orig
|
||||
# clang-orig = llvmPackagesRocm.clang; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.clang-orig
|
||||
llvm = (llvmPackagesRocm.llvm.override { ninja = emptyDirectory; }).overrideAttrs (old: {
|
||||
patches = old.patches ++ [
|
||||
./rocm-llvm-6.4-llvm-gold-plugin-fix-ModuleName.patch
|
||||
];
|
||||
# patches = builtins.filter (
|
||||
# x:
|
||||
# (
|
||||
# !(lib.strings.hasSuffix "gnu-install-dirs.patch" (builtins.baseNameOf x))
|
||||
# && !(lib.strings.hasSuffix "gnu-install-dirs-polly.patch" (builtins.baseNameOf x))
|
||||
# )
|
||||
# ) old.patches;
|
||||
dontStrip = profilableStdenv;
|
||||
nativeBuildInputs = old.nativeBuildInputs ++ [ removeReferencesTo ];
|
||||
buildInputs = old.buildInputs ++ [
|
||||
zstd
|
||||
zlib
|
||||
];
|
||||
env.NIX_BUILD_ID_STYLE = "fast";
|
||||
postPatch = ''
|
||||
${old.postPatch or ""}
|
||||
patchShebangs lib/OffloadArch/make_generated_offload_arch_h.sh
|
||||
'';
|
||||
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
|
||||
cmakeFlags =
|
||||
(builtins.filter tablegenUsage old.cmakeFlags)
|
||||
++ [
|
||||
llvmTargetsFlag
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
|
||||
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
|
||||
"-DLLVM_ENABLE_THREADS=ON"
|
||||
"-DLLVM_ENABLE_LTO=Thin"
|
||||
"-DLLVM_USE_LINKER=lld"
|
||||
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
|
||||
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
|
||||
]
|
||||
++ lib.optionals addGccLtoCmakeFlags [
|
||||
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
|
||||
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
|
||||
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
|
||||
]
|
||||
++ lib.optionals useLibcxx [
|
||||
"-DLLVM_ENABLE_LTO=Thin"
|
||||
"-DLLVM_USE_LINKER=lld"
|
||||
"-DLLVM_ENABLE_LIBCXX=ON"
|
||||
];
|
||||
preConfigure = ''
|
||||
${old.preConfigure or ""}
|
||||
cmakeFlagsArray+=(
|
||||
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
|
||||
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
|
||||
)
|
||||
'';
|
||||
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
|
||||
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
|
||||
postFixup = ''
|
||||
${old.postFixup or ""}
|
||||
remove-references-to -t "${stdenv.cc}" "$lib/lib/libLLVMSupport.a"
|
||||
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
|
||||
find $lib -type f -exec remove-references-to -t ${stdenvToBuildRocmLlvm.cc} {} +
|
||||
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
|
||||
'';
|
||||
doCheck = false;
|
||||
});
|
||||
lld =
|
||||
(llvmPackagesRocm.lld.override {
|
||||
libllvm = llvm;
|
||||
ninja = emptyDirectory;
|
||||
}).overrideAttrs
|
||||
(old: {
|
||||
patches = builtins.filter (
|
||||
x: !(lib.strings.hasSuffix "more-openbsd-program-headers.patch" (builtins.baseNameOf x))
|
||||
) old.patches;
|
||||
dontStrip = profilableStdenv;
|
||||
nativeBuildInputs = old.nativeBuildInputs ++ [
|
||||
llvmPackagesNoBintools.lld
|
||||
removeReferencesTo
|
||||
];
|
||||
buildInputs = old.buildInputs ++ [
|
||||
zstd
|
||||
zlib
|
||||
];
|
||||
env.NIX_BUILD_ID_STYLE = "fast";
|
||||
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
|
||||
cmakeFlags =
|
||||
(builtins.filter tablegenUsage old.cmakeFlags)
|
||||
++ [
|
||||
llvmTargetsFlag
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
|
||||
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
|
||||
"-DLLVM_ENABLE_THREADS=ON"
|
||||
"-DLLVM_ENABLE_LTO=Thin"
|
||||
"-DLLVM_USE_LINKER=lld"
|
||||
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
|
||||
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
|
||||
]
|
||||
++ lib.optionals addGccLtoCmakeFlags [
|
||||
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
|
||||
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
|
||||
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
|
||||
]
|
||||
++ lib.optionals useLibcxx [
|
||||
"-DLLVM_ENABLE_LIBCXX=ON"
|
||||
];
|
||||
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
|
||||
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
|
||||
postFixup = ''
|
||||
${old.postFixup or ""}
|
||||
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
|
||||
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
|
||||
'';
|
||||
preConfigure = ''
|
||||
${old.preConfigure or ""}
|
||||
cmakeFlagsArray+=(
|
||||
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
|
||||
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
|
||||
)
|
||||
'';
|
||||
});
|
||||
clang-unwrapped =
|
||||
(
|
||||
(llvmPackagesRocm.clang-unwrapped.override {
|
||||
libllvm = llvm;
|
||||
ninja = emptyDirectory;
|
||||
}).overrideAttrs
|
||||
(
|
||||
old:
|
||||
let
|
||||
# filteredPatches = builtins.filter (x: !(findClangNostdlibincPatch x)) old.patches;
|
||||
filteredPatches = builtins.filter (
|
||||
x:
|
||||
(
|
||||
!(lib.strings.hasSuffix "gnu-install-dirs.patch" (builtins.baseNameOf x))
|
||||
# && !(lib.strings.hasSuffix "gnu-install-dirs-polly.patch" (builtins.baseNameOf x))
|
||||
&& !(findClangNostdlibincPatch x)
|
||||
)
|
||||
) old.patches;
|
||||
in
|
||||
{
|
||||
meta.platforms = [
|
||||
"x86_64-linux"
|
||||
];
|
||||
pname = "${old.pname}-rocm";
|
||||
patches = filteredPatches ++ [
|
||||
./96cbfymn788ssbhmay4sy7h268qg81fl-gnu-install-dirs.patch
|
||||
# Never add FHS include paths
|
||||
./clang-bodge-ignore-systemwide-incls.diff
|
||||
# Prevents builds timing out if a single compiler invocation is very slow but
|
||||
# per-arch jobs are completing by ensuring there's terminal output
|
||||
./clang-log-jobs.diff
|
||||
(fetchpatch {
|
||||
# [ClangOffloadBundler]: Add GetBundleIDsInFile to OffloadBundler
|
||||
sha256 = "sha256-G/mzUdFfrJ2bLJgo4+mBcR6Ox7xGhWu5X+XxT4kH2c8=";
|
||||
url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/6d296f879b0fed830c54b2a9d26240da86c8bb3a.patch";
|
||||
relative = "clang";
|
||||
})
|
||||
# FIXME: Needed due to https://github.com/NixOS/nixpkgs/issues/375431
|
||||
# Once we can switch to overrideScope this can be removed
|
||||
# (replaceVars ./../../../compilers/llvm/common/clang/clang-at-least-16-LLVMgold-path.patch {
|
||||
# libllvmLibdir = "${llvm.lib}/lib";
|
||||
# })
|
||||
];
|
||||
nativeBuildInputs = old.nativeBuildInputs ++ [
|
||||
llvmPackagesNoBintools.lld
|
||||
removeReferencesTo
|
||||
];
|
||||
buildInputs = old.buildInputs ++ [
|
||||
zstd
|
||||
zlib
|
||||
];
|
||||
dontStrip = profilableStdenv;
|
||||
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
|
||||
env = (old.env or { }) // {
|
||||
NIX_BUILD_ID_STYLE = "fast";
|
||||
};
|
||||
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
|
||||
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
|
||||
requiredSystemFeatures = (old.requiredSystemFeatures or [ ]) ++ [ "big-parallel" ];
|
||||
# https://github.com/llvm/llvm-project/blob/6976deebafa8e7de993ce159aa6b82c0e7089313/clang/cmake/caches/DistributionExample-stage2.cmake#L9-L11
|
||||
cmakeFlags =
|
||||
(builtins.filter tablegenUsage old.cmakeFlags)
|
||||
++ [
|
||||
llvmTargetsFlag
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
|
||||
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
|
||||
"-DLLVM_ENABLE_THREADS=ON"
|
||||
"-DLLVM_ENABLE_LTO=Thin"
|
||||
"-DLLVM_USE_LINKER=lld"
|
||||
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
|
||||
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
|
||||
]
|
||||
++ lib.optionals addGccLtoCmakeFlags [
|
||||
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
|
||||
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
|
||||
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
|
||||
]
|
||||
++ lib.optionals useLibcxx [
|
||||
"-DLLVM_ENABLE_LTO=Thin"
|
||||
"-DLLVM_ENABLE_LIBCXX=ON"
|
||||
"-DLLVM_USE_LINKER=lld"
|
||||
"-DCLANG_DEFAULT_RTLIB=compiler-rt"
|
||||
];
|
||||
# ++ lib.optionals (!useLibcxx) [
|
||||
# # FIXME: Config file in rocmcxx instead of GCC_INSTALL_PREFIX?
|
||||
# "-DGCC_INSTALL_PREFIX=${gcc-prefix}"
|
||||
# ];
|
||||
postFixup =
|
||||
(old.postFixup or "")
|
||||
+ ''
|
||||
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
|
||||
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
|
||||
'';
|
||||
preConfigure =
|
||||
(old.preConfigure or "")
|
||||
+ ''
|
||||
cmakeFlagsArray+=(
|
||||
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
|
||||
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
|
||||
)
|
||||
'';
|
||||
postInstall =
|
||||
(old.postInstall or "")
|
||||
+ ''
|
||||
echo "--gcc-toolchain=${gcc-prefix}" > $out/bin/clang.cfg
|
||||
echo "--gcc-toolchain=${gcc-prefix}" > $out/bin/clang++.cfg
|
||||
'';
|
||||
}
|
||||
)
|
||||
)
|
||||
// {
|
||||
libllvm = llvm;
|
||||
};
|
||||
# A clang that understands standard include searching in a GNU sysroot and will put GPU libs in include path
|
||||
# in the right order
|
||||
# and expects its libc to be in the sysroot
|
||||
rocmcxx =
|
||||
(sysrootCompiler clang-unwrapped "rocmcxx" (
|
||||
listUsefulOutputs (
|
||||
[
|
||||
clang-unwrapped
|
||||
bintools
|
||||
compiler-rt
|
||||
]
|
||||
++ (lib.optionals useLibcxx [
|
||||
libcxx
|
||||
])
|
||||
++ (lib.optionals (!useLibcxx) [
|
||||
gcc-include
|
||||
glibc
|
||||
glibc.dev
|
||||
])
|
||||
)
|
||||
))
|
||||
// {
|
||||
version = llvmMajorVersion;
|
||||
cc = rocmcxx;
|
||||
libllvm = llvm;
|
||||
isClang = true;
|
||||
isGNU = false;
|
||||
|
||||
};
|
||||
clang-tools = llvmPackagesRocm.clang-tools.override {
|
||||
inherit clang-unwrapped clang;
|
||||
};
|
||||
compiler-rt-libc = llvmPackagesRocm.compiler-rt-libc.overrideAttrs (old: {
|
||||
patches = old.patches ++ [
|
||||
(fetchpatch {
|
||||
name = "avoid-overload-ambiguity-for-interceptors.patch";
|
||||
url = "https://github.com/ROCm/llvm-project/commit/155b7a12820ec45095988b6aa6e057afaf2bc892.patch";
|
||||
hash = "sha256-pgpN1q1vIQrPXHPxNSZ6zfgV2EflHO5Amzl+2BDjXbs=";
|
||||
relative = "compiler-rt";
|
||||
})
|
||||
];
|
||||
});
|
||||
compiler-rt = compiler-rt-libc;
|
||||
bintools = wrapBintoolsWith {
|
||||
bintools = llvmPackagesRocm.bintools-unwrapped.override {
|
||||
inherit lld llvm;
|
||||
};
|
||||
};
|
||||
|
||||
clang = rocmcxx;
|
||||
|
||||
# Emulate a monolithic ROCm LLVM build to support building ROCm's in-tree LLVM projects
|
||||
rocm-merged-llvm = symlinkJoin {
|
||||
name = "rocm-llvm-merge";
|
||||
paths =
|
||||
[
|
||||
llvm
|
||||
llvm.dev
|
||||
lld
|
||||
lld.lib
|
||||
lld.dev
|
||||
libunwind
|
||||
libunwind.dev
|
||||
compiler-rt
|
||||
compiler-rt.dev
|
||||
rocmcxx
|
||||
]
|
||||
++ lib.optionals useLibcxx [
|
||||
libcxx
|
||||
libcxx.out
|
||||
libcxx.dev
|
||||
];
|
||||
postBuild = builtins.unsafeDiscardStringContext ''
|
||||
found_files=$(find $out -name '*.cmake')
|
||||
if [ -z "$found_files" ]; then
|
||||
>&2 echo "Error: No CMake files found in $out"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for target in ${clang-unwrapped.out} ${clang-unwrapped.lib} ${clang-unwrapped.dev}; do
|
||||
if grep "$target" $found_files; then
|
||||
>&2 echo "Unexpected ref to $target (clang-unwrapped) found"
|
||||
# exit 1
|
||||
# # FIXME: enable this to reduce closure size
|
||||
fi
|
||||
done
|
||||
'';
|
||||
inherit version;
|
||||
llvm-src = llvmSrc;
|
||||
};
|
||||
|
||||
rocmClangStdenv = overrideCC (
|
||||
if useLibcxx then llvmPackagesRocm.libcxxStdenv else llvmPackagesRocm.stdenv
|
||||
) clang;
|
||||
|
||||
# Projects
|
||||
openmp =
|
||||
(llvmPackagesRocm.openmp.override {
|
||||
stdenv = rocmClangStdenv;
|
||||
llvm = rocm-merged-llvm;
|
||||
targetLlvm = rocm-merged-llvm;
|
||||
clang-unwrapped = clang;
|
||||
}).overrideAttrs
|
||||
(old: {
|
||||
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
|
||||
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ removeReferencesTo ];
|
||||
cmakeFlags =
|
||||
old.cmakeFlags
|
||||
++ [
|
||||
"-DDEVICELIBS_ROOT=${rocm-device-libs.src}"
|
||||
# OMPD support is broken in ROCm 6.3. Haven't investigated why.
|
||||
"-DLIBOMP_OMPD_SUPPORT:BOOL=FALSE"
|
||||
"-DLIBOMP_OMPD_GDB_SUPPORT:BOOL=FALSE"
|
||||
]
|
||||
++ lib.optionals addGccLtoCmakeFlags [
|
||||
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
|
||||
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
|
||||
];
|
||||
env.LLVM = "${rocm-merged-llvm}";
|
||||
env.LLVM_DIR = "${rocm-merged-llvm}";
|
||||
buildInputs = old.buildInputs ++ [
|
||||
rocm-device-libs
|
||||
rocm-runtime
|
||||
zlib
|
||||
zstd
|
||||
libxml2
|
||||
libffi
|
||||
];
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
From 6c2872afcd9ae8e313621eb6cb7f407e89097304 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Sun, 13 Apr 2025 07:41:34 -0700
|
||||
Subject: [PATCH] rocm-llvm: gold-plugin: fix ModuleName
|
||||
|
||||
---
|
||||
llvm/tools/gold/gold-plugin.cpp | 6 ++----
|
||||
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp
|
||||
index 0d4ca5299689..dd577206408c 100644
|
||||
--- a/tools/gold/gold-plugin.cpp
|
||||
+++ b/tools/gold/gold-plugin.cpp
|
||||
@@ -1100,10 +1100,8 @@ static std::vector<std::pair<SmallString<128>, bool>> runLTO() {
|
||||
};
|
||||
|
||||
auto AddBuffer = [&](size_t Task, const Twine &moduleName,
|
||||
- std::unique_ptr<MemoryBuffer> MB) {
|
||||
- auto Stream = *AddStream(Task, ModuleName);
|
||||
- Stream->OS << MB->getBuffer();
|
||||
- check(Stream->commit(), "Failed to commit cache");
|
||||
+ std::unique_ptr<MemoryBuffer> MB) {
|
||||
+ *AddStream(Task, moduleName)->OS << MB->getBuffer();
|
||||
};
|
||||
|
||||
FileCache Cache;
|
||||
--
|
||||
2.48.1
|
||||
194
pkgs/rocm-modules/migraphx/default.nix
Normal file
194
pkgs/rocm-modules/migraphx/default.nix
Normal file
@@ -0,0 +1,194 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
pkg-config,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
clr,
|
||||
openmp,
|
||||
rocblas,
|
||||
hipblas-common,
|
||||
hipblas,
|
||||
hipblaslt,
|
||||
rocmlir,
|
||||
miopen,
|
||||
protobuf,
|
||||
abseil-cpp,
|
||||
half,
|
||||
nlohmann_json,
|
||||
msgpack,
|
||||
sqlite,
|
||||
oneDNN_2,
|
||||
blaze,
|
||||
texliveSmall,
|
||||
doxygen,
|
||||
sphinx,
|
||||
docutils,
|
||||
ghostscript,
|
||||
python3Packages,
|
||||
buildDocs ? false,
|
||||
buildTests ? false,
|
||||
gpuTargets ? clr.gpuTargets,
|
||||
}:
|
||||
|
||||
let
|
||||
latex = lib.optionalAttrs buildDocs (
|
||||
texliveSmall.withPackages (
|
||||
ps: with ps; [
|
||||
latexmk
|
||||
tex-gyre
|
||||
fncychap
|
||||
wrapfig
|
||||
capt-of
|
||||
framed
|
||||
needspace
|
||||
tabulary
|
||||
varwidth
|
||||
titlesec
|
||||
epstopdf
|
||||
]
|
||||
)
|
||||
);
|
||||
in
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "migraphx";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildDocs [
|
||||
"doc"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "AMDMIGraphX";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-PytBEnLBHyp0JzkSLbLRHslqGBk4mabKC62JZoXwHxE=";
|
||||
};
|
||||
|
||||
nativeBuildInputs =
|
||||
[
|
||||
pkg-config
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
python3Packages.python
|
||||
]
|
||||
++ lib.optionals buildDocs [
|
||||
latex
|
||||
doxygen
|
||||
sphinx
|
||||
docutils
|
||||
ghostscript
|
||||
python3Packages.sphinx-rtd-theme
|
||||
python3Packages.breathe
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
openmp
|
||||
rocblas
|
||||
hipblas-common
|
||||
hipblas
|
||||
hipblaslt
|
||||
rocmlir
|
||||
miopen
|
||||
protobuf
|
||||
half
|
||||
nlohmann_json
|
||||
msgpack
|
||||
sqlite
|
||||
oneDNN_2
|
||||
blaze
|
||||
python3Packages.pybind11
|
||||
python3Packages.onnx
|
||||
];
|
||||
|
||||
LDFLAGS = "-Wl,--allow-shlib-undefined";
|
||||
|
||||
cmakeFlags = [
|
||||
"-DMIGRAPHX_ENABLE_GPU=ON"
|
||||
"-DMIGRAPHX_ENABLE_CPU=ON"
|
||||
"-DMIGRAPHX_ENABLE_FPGA=ON"
|
||||
"-DMIGRAPHX_ENABLE_MLIR=OFF" # LLVM or rocMLIR mismatch?
|
||||
"-DCMAKE_C_COMPILER=amdclang"
|
||||
"-DCMAKE_CXX_COMPILER=amdclang++"
|
||||
"-DCMAKE_VERBOSE_MAKEFILE=ON"
|
||||
"-DEMBED_USE=CArrays" # Fixes error with lld
|
||||
"-DDMIGRAPHX_ENABLE_PYTHON=ON"
|
||||
"-DROCM_PATH=${clr}"
|
||||
"-DHIP_ROOT_DIR=${clr}"
|
||||
# migraphx relies on an incompatible fork of composable_kernel
|
||||
# migraphxs relies on miopen which relies on current composable_kernel
|
||||
# impossible to build with this ON; we can't link both of them even if we package both
|
||||
"-DMIGRAPHX_USE_COMPOSABLEKERNEL=OFF"
|
||||
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
|
||||
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
|
||||
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
];
|
||||
|
||||
postPatch =
|
||||
''
|
||||
export CXXFLAGS+=" -w -isystem${rocmlir}/include/rocmlir -I${half}/include -I${abseil-cpp}/include -I${hipblas-common}/include"
|
||||
patchShebangs tools
|
||||
|
||||
# `error: '__clang_hip_runtime_wrapper.h' file not found [clang-diagnostic-error]`
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace "set(MIGRAPHX_TIDY_ERRORS ALL)" ""
|
||||
''
|
||||
+ lib.optionalString (!buildDocs) ''
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace "add_subdirectory(doc)" ""
|
||||
''
|
||||
+ lib.optionalString (!buildTests) ''
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace "add_subdirectory(test)" ""
|
||||
'';
|
||||
|
||||
# Unfortunately, it seems like we have to call make on this manually
|
||||
preInstall = lib.optionalString buildDocs ''
|
||||
export HOME=$(mktemp -d)
|
||||
make -j$NIX_BUILD_CORES doc
|
||||
cd ../doc/pdf
|
||||
make -j$NIX_BUILD_CORES
|
||||
cd -
|
||||
'';
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildDocs ''
|
||||
mv ../doc/html $out/share/doc/migraphx
|
||||
mv ../doc/pdf/MIGraphX.pdf $out/share/doc/migraphx
|
||||
''
|
||||
+ lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv bin/test_* $test/bin
|
||||
patchelf $test/bin/test_* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE"
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "AMD's graph optimization engine";
|
||||
homepage = "https://github.com/ROCm/AMDMIGraphX";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
324
pkgs/rocm-modules/miopen/default.nix
Normal file
324
pkgs/rocm-modules/miopen/default.nix
Normal file
@@ -0,0 +1,324 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
fetchpatch,
|
||||
rocmUpdateScript,
|
||||
runCommand,
|
||||
pkg-config,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocblas,
|
||||
rocmlir,
|
||||
rocrand,
|
||||
rocm-runtime,
|
||||
rocm-merged-llvm,
|
||||
hipblas-common,
|
||||
hipblas,
|
||||
hipblaslt,
|
||||
clr,
|
||||
composable_kernel,
|
||||
frugally-deep,
|
||||
rocm-docs-core,
|
||||
half,
|
||||
boost,
|
||||
sqlite,
|
||||
bzip2,
|
||||
lbzip2,
|
||||
nlohmann_json,
|
||||
texliveSmall,
|
||||
doxygen,
|
||||
sphinx,
|
||||
zlib,
|
||||
gtest,
|
||||
rocm-comgr,
|
||||
roctracer,
|
||||
python3Packages,
|
||||
# FIXME: should be able to use all clr targets
|
||||
gpuTargets ? [
|
||||
"gfx900"
|
||||
"gfx906"
|
||||
"gfx908"
|
||||
"gfx90a"
|
||||
"gfx942"
|
||||
"gfx1030"
|
||||
"gfx1100"
|
||||
"gfx1101"
|
||||
"gfx1102"
|
||||
"gfx1201"
|
||||
], # clr.gpuTargets
|
||||
buildDocs ? false, # Needs internet because of rocm-docs-core
|
||||
buildTests ? false,
|
||||
withComposableKernel ? composable_kernel.anyGfx9Target,
|
||||
}:
|
||||
|
||||
let
|
||||
# FIXME: cmake files need patched to include this properly
|
||||
cFlags = "-O3 -DNDEBUG -Wno-documentation-pedantic --offload-compress -I${hipblas-common}/include -I${hipblas}/include -I${roctracer}/include -I${nlohmann_json}/include -I${sqlite.dev}/include -I${rocrand}/include";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "MIOpen";
|
||||
rev = "rocm-${version}";
|
||||
hash = "sha256-DEcVj2vOwIYYyNKEKFqZ0fb9o+/QRpwiSksxwnmgEMc=";
|
||||
fetchLFS = true;
|
||||
fetchSubmodules = true;
|
||||
# WORKAROUND: .lfsconfig is incorrectly set to exclude everything upstream
|
||||
leaveDotGit = true;
|
||||
postFetch = ''
|
||||
export HOME=$(mktemp -d)
|
||||
cd $out
|
||||
set -x
|
||||
git remote add origin $url
|
||||
git fetch origin +refs/tags/rocm-${version}:refs/tags/rocm-${version}
|
||||
git clean -fdx
|
||||
git switch -c rocm-${version} refs/tags/rocm-${version}
|
||||
git config lfs.fetchexclude "none"
|
||||
rm .lfsconfig
|
||||
git lfs install
|
||||
git lfs track "*.kdb.bz2"
|
||||
GIT_TRACE=1 git lfs fetch --include="src/kernels/**"
|
||||
GIT_TRACE=1 git lfs pull --include="src/kernels/**"
|
||||
git lfs checkout
|
||||
|
||||
rm -rf .git
|
||||
'';
|
||||
};
|
||||
|
||||
latex = lib.optionalAttrs buildDocs (
|
||||
texliveSmall.withPackages (
|
||||
ps: with ps; [
|
||||
latexmk
|
||||
tex-gyre
|
||||
fncychap
|
||||
wrapfig
|
||||
capt-of
|
||||
framed
|
||||
needspace
|
||||
tabulary
|
||||
varwidth
|
||||
titlesec
|
||||
]
|
||||
)
|
||||
);
|
||||
|
||||
gfx900 = runCommand "miopen-gfx900.kdb" { preferLocalBuild = true; } ''
|
||||
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx900.kdb.bz2 > $out
|
||||
'';
|
||||
|
||||
gfx906 = runCommand "miopen-gfx906.kdb" { preferLocalBuild = true; } ''
|
||||
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx906.kdb.bz2 > $out
|
||||
'';
|
||||
|
||||
gfx908 = runCommand "miopen-gfx908.kdb" { preferLocalBuild = true; } ''
|
||||
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx908.kdb.bz2 > $out
|
||||
'';
|
||||
|
||||
gfx90a = runCommand "miopen-gfx90a.kdb" { preferLocalBuild = true; } ''
|
||||
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx90a.kdb.bz2 > $out
|
||||
'';
|
||||
|
||||
gfx1030 = runCommand "miopen-gfx1030.kdb" { preferLocalBuild = true; } ''
|
||||
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx1030.kdb.bz2 > $out
|
||||
'';
|
||||
gfx1201 = runCommand "miopen-gfx1201.kdb" { preferLocalBuild = true; } ''
|
||||
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx1201.kdb.bz2 > $out
|
||||
'';
|
||||
in
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
inherit version src;
|
||||
pname = "miopen";
|
||||
|
||||
env.CFLAGS = cFlags;
|
||||
env.CXXFLAGS = cFlags;
|
||||
|
||||
# Find zstd and add to target. Mainly for torch.
|
||||
patches = [
|
||||
./skip-preexisting-dbs.patch
|
||||
./fix-isnan.patch # https://github.com/ROCm/MIOpen/pull/3448
|
||||
(fetchpatch {
|
||||
url = "https://github.com/ROCm/MIOpen/commit/e608b4325646afeabb5e52846997b926d2019d19.patch";
|
||||
hash = "sha256-oxa3qlIC2bzbwGxrQOZXoY/S7CpLsMrnWRB7Og0tk0M=";
|
||||
})
|
||||
(fetchpatch {
|
||||
url = "https://github.com/ROCm/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch";
|
||||
hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs=";
|
||||
})
|
||||
# FIXME: We need to rebase or drop this arch compat patch
|
||||
# https://github.com/ROCm/MIOpen/issues/3540 suggests that
|
||||
# arch compat patching doesn't work correctly for gfx1031
|
||||
# (fetchpatch {
|
||||
# name = "Extend-MIOpen-ISA-compatibility.patch";
|
||||
# url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch";
|
||||
# hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU=";
|
||||
# })
|
||||
];
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildDocs [
|
||||
"doc"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
];
|
||||
enableParallelBuilding = true;
|
||||
env.ROCM_PATH = clr;
|
||||
env.LD_LIBRARY_PATH = lib.makeLibraryPath [ rocm-runtime ];
|
||||
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
|
||||
|
||||
nativeBuildInputs = [
|
||||
pkg-config
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
hipblas
|
||||
hipblas-common
|
||||
rocblas
|
||||
rocmlir
|
||||
half
|
||||
boost
|
||||
sqlite
|
||||
bzip2
|
||||
nlohmann_json
|
||||
frugally-deep
|
||||
roctracer
|
||||
rocrand
|
||||
hipblaslt
|
||||
]
|
||||
++ lib.optionals withComposableKernel [
|
||||
composable_kernel
|
||||
]
|
||||
++ lib.optionals buildDocs [
|
||||
latex
|
||||
doxygen
|
||||
sphinx
|
||||
rocm-docs-core
|
||||
python3Packages.sphinx-rtd-theme
|
||||
python3Packages.breathe
|
||||
python3Packages.myst-parser
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
gtest
|
||||
zlib
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
"-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
"-DMIOPEN_USE_SQLITE_PERFDB=ON"
|
||||
"-DCMAKE_VERBOSE_MAKEFILE=ON"
|
||||
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
|
||||
# needs to stream to stdout so bzcat rather than bunzip2
|
||||
"-DUNZIPPER=${bzip2}/bin/bzcat"
|
||||
|
||||
"-DCMAKE_C_COMPILER=amdclang"
|
||||
"-DCMAKE_CXX_COMPILER=amdclang++"
|
||||
"-DROCM_PATH=${clr}"
|
||||
"-DHIP_ROOT_DIR=${clr}"
|
||||
(lib.cmakeBool "MIOPEN_USE_ROCBLAS" true)
|
||||
(lib.cmakeBool "MIOPEN_USE_HIPBLASLT" true)
|
||||
(lib.cmakeBool "MIOPEN_USE_COMPOSABLEKERNEL" withComposableKernel)
|
||||
(lib.cmakeBool "MIOPEN_USE_HIPRTC" true)
|
||||
(lib.cmakeBool "MIOPEN_USE_COMGR" true)
|
||||
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
"-DMIOPEN_BACKEND=HIP"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_TESTS=ON"
|
||||
"-DMIOPEN_TEST_ALL=ON"
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
substituteInPlace cmake/ClangTidy.cmake \
|
||||
--replace-fail 'macro(enable_clang_tidy)' 'macro(enable_clang_tidy)
|
||||
endmacro()
|
||||
macro(enable_clang_tidy_unused)' \
|
||||
--replace-fail 'function(clang_tidy_check TARGET)' 'function(clang_tidy_check TARGET)
|
||||
return()'
|
||||
|
||||
patchShebangs test src/composable_kernel fin utils install_deps.cmake
|
||||
|
||||
ln -sf ${gfx900} src/kernels/gfx900.kdb
|
||||
ln -sf ${gfx906} src/kernels/gfx906.kdb
|
||||
ln -sf ${gfx908} src/kernels/gfx908.kdb
|
||||
ln -sf ${gfx90a} src/kernels/gfx90a.kdb
|
||||
ln -sf ${gfx1030} src/kernels/gfx1030.kdb
|
||||
ln -sf ${gfx1201} src/kernels/gfx1201.kdb
|
||||
mkdir -p build/share/miopen/db/
|
||||
ln -sf ${gfx900} build/share/miopen/db/gfx900.kdb
|
||||
ln -sf ${gfx906} build/share/miopen/db/gfx906.kdb
|
||||
ln -sf ${gfx908} build/share/miopen/db/gfx908.kdb
|
||||
ln -sf ${gfx90a} build/share/miopen/db/gfx90a.kdb
|
||||
ln -sf ${gfx1030} build/share/miopen/db/gfx1030.kdb
|
||||
ln -sf ${gfx1201} build/share/miopen/db/gfx1201.kdb
|
||||
'';
|
||||
|
||||
# Unfortunately, it seems like we have to call make on these manually
|
||||
postBuild =
|
||||
lib.optionalString buildDocs ''
|
||||
python -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html
|
||||
''
|
||||
+ lib.optionalString buildTests ''
|
||||
make -j$NIX_BUILD_CORES check
|
||||
'';
|
||||
|
||||
postInstall =
|
||||
''
|
||||
rm $out/bin/install_precompiled_kernels.sh
|
||||
ln -sf ${gfx900} $out/share/miopen/db/gfx900.kdb
|
||||
ln -sf ${gfx906} $out/share/miopen/db/gfx906.kdb
|
||||
ln -sf ${gfx908} $out/share/miopen/db/gfx908.kdb
|
||||
ln -sf ${gfx90a} $out/share/miopen/db/gfx90a.kdb
|
||||
ln -sf ${gfx1030} $out/share/miopen/db/gfx1030.kdb
|
||||
ln -sf ${gfx1201} $out/share/miopen/db/gfx1201.kdb
|
||||
''
|
||||
+ lib.optionalString buildDocs ''
|
||||
mv ../doc/html $out/share/doc/miopen-hip
|
||||
''
|
||||
+ lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv bin/test_* $test/bin
|
||||
patchelf --set-rpath $out/lib:${
|
||||
lib.makeLibraryPath (
|
||||
finalAttrs.buildInputs
|
||||
++ [
|
||||
clr
|
||||
rocm-comgr
|
||||
]
|
||||
)
|
||||
} $test/bin/*
|
||||
'';
|
||||
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Machine intelligence library for ROCm";
|
||||
homepage = "https://github.com/ROCm/MIOpen";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
31
pkgs/rocm-modules/miopen/fix-isnan.patch
Normal file
31
pkgs/rocm-modules/miopen/fix-isnan.patch
Normal file
@@ -0,0 +1,31 @@
|
||||
From 17f67e0aa31cd2f1c1cb012d3858abf6956acc72 Mon Sep 17 00:00:00 2001
|
||||
From: "Sv. Lockal" <lockalsash@gmail.com>
|
||||
Date: Tue, 24 Dec 2024 14:43:10 +0000
|
||||
Subject: [PATCH] Fix missing isnan definition on libstdc++ >=14 systems
|
||||
|
||||
Closes #3441
|
||||
---
|
||||
driver/reducecalculation_driver.hpp | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/driver/reducecalculation_driver.hpp b/driver/reducecalculation_driver.hpp
|
||||
index 8226b3c953..2001969509 100644
|
||||
--- a/driver/reducecalculation_driver.hpp
|
||||
+++ b/driver/reducecalculation_driver.hpp
|
||||
@@ -33,6 +33,7 @@
|
||||
#include "random.hpp"
|
||||
#include <algorithm>
|
||||
#include <cfloat>
|
||||
+#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <miopen/miopen.h>
|
||||
@@ -77,7 +78,7 @@ int32_t mloReduceCalculationForwardRunHost(miopenTensorDescriptor_t inputDesc,
|
||||
for(size_t i = 0; i < reduce_size; ++i)
|
||||
{
|
||||
Tcheck val = static_cast<Tcheck>(input[input_idx]);
|
||||
- if(nanPropagation && isnan(val))
|
||||
+ if(nanPropagation && std::isnan(val))
|
||||
{
|
||||
val = 0.0f;
|
||||
}
|
||||
22
pkgs/rocm-modules/miopen/skip-preexisting-dbs.patch
Normal file
22
pkgs/rocm-modules/miopen/skip-preexisting-dbs.patch
Normal file
@@ -0,0 +1,22 @@
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index d0ffaf983..0b9ed0952 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -554,7 +554,7 @@ endif()
|
||||
function(unpack_db db_bzip2_file)
|
||||
get_filename_component(__fname ${db_bzip2_file} NAME_WLE)
|
||||
add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}
|
||||
- COMMAND ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname})
|
||||
+ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname} || ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname})
|
||||
string(REPLACE "." "_" __tname ${__fname})
|
||||
add_custom_target(generate_${__tname} ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname})
|
||||
|
||||
@@ -563,7 +563,7 @@ function(unpack_db db_bzip2_file)
|
||||
if(NOT MIOPEN_USE_SQLITE_PERFDB AND __extension STREQUAL ".db")
|
||||
add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}.txt
|
||||
DEPENDS sqlite2txt generate_${__tname}
|
||||
- COMMAND $<TARGET_FILE:sqlite2txt> ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt
|
||||
+ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname}.txt || $<TARGET_FILE:sqlite2txt> ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt
|
||||
)
|
||||
add_custom_target(generate_${__tname}_txt ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname}.txt)
|
||||
add_dependencies(generate_kernels generate_${__tname}_txt)
|
||||
@@ -0,0 +1,25 @@
|
||||
From f0e66bd446d44df1d30faaad520613f5fb7f5916 Mon Sep 17 00:00:00 2001
|
||||
From: Martin Schwaighofer <mschwaig@users.noreply.github.com>
|
||||
Date: Sat, 30 Mar 2024 15:36:52 +0100
|
||||
Subject: [PATCH] set __STDC_CONSTANT_MACROS to make rocAL compile
|
||||
|
||||
---
|
||||
CMakeLists.txt | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 42b139b6..509915f1 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -149,6 +149,8 @@ message("-- ${Cyan} -D MIGRAPHX=${MIGRAPHX} [Turn ON/OFF MIGraphX Module (de
|
||||
message("-- ${Cyan} -D BACKEND=${BACKEND} [Select MIVisionX Backend [options:CPU/OPENCL/HIP](default:HIP)]${ColourReset}")
|
||||
message("-- ${Cyan} -D BUILD_WITH_AMD_ADVANCE=${BUILD_WITH_AMD_ADVANCE} [Turn ON/OFF Build for AMD advanced GPUs(default:OFF)]${ColourReset}")
|
||||
|
||||
+add_definitions(-D__STDC_CONSTANT_MACROS)
|
||||
+
|
||||
add_subdirectory(amd_openvx)
|
||||
add_subdirectory(amd_openvx_extensions)
|
||||
add_subdirectory(utilities)
|
||||
--
|
||||
2.43.0
|
||||
|
||||
150
pkgs/rocm-modules/mivisionx/default.nix
Normal file
150
pkgs/rocm-modules/mivisionx/default.nix
Normal file
@@ -0,0 +1,150 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocm-device-libs,
|
||||
clr,
|
||||
pkg-config,
|
||||
rpp,
|
||||
rocblas,
|
||||
miopen,
|
||||
migraphx,
|
||||
openmp,
|
||||
protobuf,
|
||||
qtcreator,
|
||||
opencv,
|
||||
ffmpeg,
|
||||
boost,
|
||||
libjpeg_turbo,
|
||||
half,
|
||||
lmdb,
|
||||
rapidjson,
|
||||
rocm-docs-core,
|
||||
python3Packages,
|
||||
useOpenCL ? false,
|
||||
useCPU ? false,
|
||||
buildDocs ? false, # Needs internet
|
||||
gpuTargets ? [ ],
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname =
|
||||
"mivisionx-"
|
||||
+ (
|
||||
if (!useOpenCL && !useCPU) then
|
||||
"hip"
|
||||
else if (!useOpenCL && !useCPU) then
|
||||
"opencl"
|
||||
else
|
||||
"cpu"
|
||||
);
|
||||
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "MIVisionX";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-07MivgCYmKLnhGDjOYsFBfwIxEoQLYNoRbOo3MPpVzE=";
|
||||
};
|
||||
|
||||
patches = [
|
||||
./0001-set-__STDC_CONSTANT_MACROS-to-make-rocAL-compile.patch
|
||||
];
|
||||
|
||||
nativeBuildInputs =
|
||||
[
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
pkg-config
|
||||
]
|
||||
++ lib.optionals buildDocs [
|
||||
rocm-docs-core
|
||||
python3Packages.python
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
miopen
|
||||
migraphx
|
||||
rpp
|
||||
rocblas
|
||||
openmp
|
||||
half
|
||||
protobuf
|
||||
qtcreator
|
||||
opencv
|
||||
ffmpeg
|
||||
boost
|
||||
libjpeg_turbo
|
||||
lmdb
|
||||
rapidjson
|
||||
python3Packages.pybind11
|
||||
python3Packages.numpy
|
||||
python3Packages.torchWithRocm
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DROCM_PATH=${clr}"
|
||||
"-DAMDRPP_PATH=${rpp}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
"-DCMAKE_INSTALL_PREFIX_PYTHON=lib"
|
||||
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
|
||||
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
|
||||
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
|
||||
# "-DAMD_FP16_SUPPORT=ON" `error: typedef redefinition with different types ('__half' vs 'half_float::half')`
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals (!useOpenCL && !useCPU) [
|
||||
"-DBACKEND=HIP"
|
||||
]
|
||||
++ lib.optionals (useOpenCL && !useCPU) [
|
||||
"-DBACKEND=OCL"
|
||||
]
|
||||
++ lib.optionals useCPU [
|
||||
"-DBACKEND=CPU"
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
# We need to not use hipcc and define the CXXFLAGS manually due to `undefined hidden symbol: tensorflow:: ...`
|
||||
export CXXFLAGS+=" --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode"
|
||||
# Properly find miopen, fix ffmpeg version detection
|
||||
substituteInPlace amd_openvx_extensions/CMakeLists.txt \
|
||||
--replace-fail "miopen PATHS \''${ROCM_PATH} QUIET" "miopen PATHS ${miopen} QUIET" \
|
||||
--replace-fail "\''${ROCM_PATH}/include/miopen/config.h" "${miopen}/include/miopen/config.h"
|
||||
|
||||
# Properly find turbojpeg
|
||||
substituteInPlace cmake/FindTurboJpeg.cmake \
|
||||
--replace-fail "\''${TURBO_JPEG_PATH}/include" "${libjpeg_turbo.dev}/include" \
|
||||
--replace-fail "\''${TURBO_JPEG_PATH}/lib" "${libjpeg_turbo.out}/lib"
|
||||
'';
|
||||
|
||||
postBuild = lib.optionalString buildDocs ''
|
||||
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Set of comprehensive computer vision and machine intelligence libraries, utilities, and applications";
|
||||
homepage = "https://github.com/ROCm/MIVisionX";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
broken = useOpenCL;
|
||||
};
|
||||
})
|
||||
42
pkgs/rocm-modules/mscclpp/default.nix
Normal file
42
pkgs/rocm-modules/mscclpp/default.nix
Normal file
@@ -0,0 +1,42 @@
|
||||
{
|
||||
fetchFromGitHub,
|
||||
stdenv,
|
||||
cmake,
|
||||
clr,
|
||||
numactl,
|
||||
nlohmann_json,
|
||||
}:
|
||||
stdenv.mkDerivation {
|
||||
pname = "mscclpp";
|
||||
version = "unstable-2024-12-13";
|
||||
src = fetchFromGitHub {
|
||||
owner = "microsoft";
|
||||
repo = "mscclpp";
|
||||
rev = "ee75caf365a27b9ab7521cfdda220b55429e5c37";
|
||||
hash = "sha256-/mi9T9T6OIVtJWN3YoEe9az/86rz7BrX537lqaEh3ig=";
|
||||
};
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
];
|
||||
buildInputs = [
|
||||
clr
|
||||
numactl
|
||||
];
|
||||
postPatch = ''
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace-fail "gfx90a gfx941 gfx942" "gfx908 gfx90a gfx942 gfx1030 gfx1100"
|
||||
'';
|
||||
cmakeFlags = [
|
||||
"-DMSCCLPP_BYPASS_GPU_CHECK=ON"
|
||||
"-DMSCCLPP_USE_ROCM=ON"
|
||||
"-DMSCCLPP_BUILD_TESTS=OFF"
|
||||
"-DGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100"
|
||||
"-DAMDGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100"
|
||||
"-DMSCCLPP_BUILD_APPS_NCCL=ON"
|
||||
"-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF"
|
||||
"-DFETCHCONTENT_QUIET=OFF"
|
||||
"-DFETCHCONTENT_TRY_FIND_PACKAGE_MODE=ALWAYS"
|
||||
"-DFETCHCONTENT_SOURCE_DIR_JSON=${nlohmann_json.src}"
|
||||
];
|
||||
env.ROCM_PATH = clr;
|
||||
}
|
||||
144
pkgs/rocm-modules/rccl/default.nix
Normal file
144
pkgs/rocm-modules/rccl/default.nix
Normal file
@@ -0,0 +1,144 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocm-smi,
|
||||
rocm-core,
|
||||
clr,
|
||||
mscclpp,
|
||||
perl,
|
||||
hipify,
|
||||
gtest,
|
||||
chrpath,
|
||||
rocprofiler,
|
||||
rocprofiler-register,
|
||||
autoPatchelfHook,
|
||||
buildTests ? false,
|
||||
gpuTargets ? (clr.localGpuTargets or [ ]),
|
||||
}:
|
||||
|
||||
let
|
||||
useAsan = buildTests;
|
||||
useUbsan = buildTests;
|
||||
san = lib.optionalString (useAsan || useUbsan) (
|
||||
"-fno-gpu-sanitize -fsanitize=undefined "
|
||||
+ (lib.optionalString useAsan "-fsanitize=address -shared-libsan ")
|
||||
);
|
||||
in
|
||||
# Note: we can't properly test or make use of multi-node collective ops
|
||||
# https://github.com/NixOS/nixpkgs/issues/366242 tracks kernel support
|
||||
# kfd_peerdirect support which is on out-of-tree amdkfd in ROCm/ROCK-Kernel-Driver
|
||||
# infiniband ib_peer_mem support isn't in the mainline kernel but is carried by some distros
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rccl${clr.gpuArchSuffix}";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
];
|
||||
|
||||
patches = [
|
||||
./fix-mainline-support-and-ub.diff
|
||||
./enable-mscclpp-on-all-gfx9.diff
|
||||
./rccl-test-missing-iomanip.diff
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rccl";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-6lQBpoJKszgvt+UpNEKdiw74s3ZhC4zpA4HP+F6u7X4=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
perl
|
||||
hipify
|
||||
autoPatchelfHook # ASAN doesn't add rpath without this
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
rocm-smi
|
||||
gtest
|
||||
rocprofiler
|
||||
rocprofiler-register
|
||||
mscclpp
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
chrpath
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DHIP_CLANG_NUM_PARALLEL_JOBS=4"
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
"-DROCM_PATH=${clr}"
|
||||
"-DHIP_COMPILER=${clr}/bin/amdclang++"
|
||||
"-DCMAKE_CXX_COMPILER=${clr}/bin/amdclang++"
|
||||
"-DROCM_PATCH_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}"
|
||||
"-DROCM_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}"
|
||||
"-DBUILD_BFD=OFF" # Can't get it to detect bfd.h
|
||||
"-DENABLE_MSCCL_KERNEL=ON"
|
||||
"-DENABLE_MSCCLPP=ON"
|
||||
"-DMSCCLPP_ROOT=${mscclpp}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
# AMD can't make up their minds and keep changing which one is used in different projects.
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_TESTS=ON"
|
||||
];
|
||||
|
||||
# -O2 and -fno-strict-aliasing due to UB issues in RCCL :c
|
||||
# Reported upstream
|
||||
env.CFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer";
|
||||
env.CXXFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer";
|
||||
env.LDFLAGS = "${san}";
|
||||
postPatch = ''
|
||||
patchShebangs src tools
|
||||
'';
|
||||
|
||||
postInstall =
|
||||
lib.optionalString useAsan ''
|
||||
patchelf --add-needed ${clr}/llvm/lib/linux/libclang_rt.asan-${stdenv.hostPlatform.parsed.cpu.name}.so $out/lib/librccl.so
|
||||
''
|
||||
+ lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/* $test/bin
|
||||
rmdir $out/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "ROCm communication collectives library";
|
||||
homepage = "https://github.com/ROCm/rccl";
|
||||
license = with licenses; [
|
||||
bsd2
|
||||
bsd3
|
||||
];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
13
pkgs/rocm-modules/rccl/enable-mscclpp-on-all-gfx9.diff
Normal file
13
pkgs/rocm-modules/rccl/enable-mscclpp-on-all-gfx9.diff
Normal file
@@ -0,0 +1,13 @@
|
||||
diff --git a/src/init.cc b/src/init.cc
|
||||
index 738f756..1b0e4fc 100644
|
||||
--- a/src/init.cc
|
||||
+++ b/src/init.cc
|
||||
@@ -2049,7 +2049,7 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) {
|
||||
if (mscclEnabled() && (comm->topo->mscclEnabled || mscclForceEnabled()) && mscclppCommCompatible(comm)) {
|
||||
hipDeviceProp_t devProp;
|
||||
CUDACHECK(hipGetDeviceProperties(&devProp, cudaDev));
|
||||
- comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx94");
|
||||
+ comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx9");
|
||||
if (comm->mscclppCompatible) {
|
||||
bool mapContainsId = (mscclpp_uniqueIdMap.count(job->commId) > 0);
|
||||
auto& mscclppUniqueId = mscclpp_uniqueIdMap[job->commId];
|
||||
178
pkgs/rocm-modules/rccl/fix-mainline-support-and-ub.diff
Normal file
178
pkgs/rocm-modules/rccl/fix-mainline-support-and-ub.diff
Normal file
@@ -0,0 +1,178 @@
|
||||
diff --git a/src/include/bootstrap.h b/src/include/bootstrap.h
|
||||
index 8c5f081..9922b79 100644
|
||||
--- a/src/include/bootstrap.h
|
||||
+++ b/src/include/bootstrap.h
|
||||
@@ -10,11 +10,13 @@
|
||||
#include "nccl.h"
|
||||
#include "comm.h"
|
||||
|
||||
+// this is accessed through unaligned ptrs because ncclUniqueId is a typedef of char[128]
|
||||
struct ncclBootstrapHandle {
|
||||
uint64_t magic;
|
||||
union ncclSocketAddress addr;
|
||||
};
|
||||
static_assert(sizeof(struct ncclBootstrapHandle) <= sizeof(ncclUniqueId), "Bootstrap handle is too large to fit inside NCCL unique ID");
|
||||
+static_assert(alignof(struct ncclBootstrapHandle) == alignof(ncclUniqueId), "Bootstrap handle must have same alignment as NCCL unique ID to avoid UB");
|
||||
|
||||
ncclResult_t bootstrapNetInit();
|
||||
ncclResult_t bootstrapCreateRoot(struct ncclBootstrapHandle* handle, bool idFromEnv);
|
||||
diff --git a/src/misc/rocmwrap.cc b/src/misc/rocmwrap.cc
|
||||
index b3063d5..464b80d 100644
|
||||
--- a/src/misc/rocmwrap.cc
|
||||
+++ b/src/misc/rocmwrap.cc
|
||||
@@ -131,9 +131,12 @@ static void initOnceFunc() {
|
||||
//format and store the kernel conf file location
|
||||
snprintf(kernel_conf_file, sizeof(kernel_conf_file), "/boot/config-%s", utsname.release);
|
||||
fp = fopen(kernel_conf_file, "r");
|
||||
- if (fp == NULL) INFO(NCCL_INIT,"Could not open kernel conf file");
|
||||
+ if (fp == NULL) {
|
||||
+ INFO(NCCL_INIT,"Could not open kernel conf file, will assume CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA are enabled");
|
||||
+ }
|
||||
//look for kernel_opt1 and kernel_opt2 in the conf file and check
|
||||
- while (fgets(buf, sizeof(buf), fp) != NULL) {
|
||||
+ // FIXME: This check is broken, CONFIG_DMABUF_MOVE_NOTIFY could be across a buf boundary.
|
||||
+ while (fp && fgets(buf, sizeof(buf), fp) != NULL) {
|
||||
if (strstr(buf, kernel_opt1) != NULL) {
|
||||
found_opt1 = 1;
|
||||
INFO(NCCL_INIT,"CONFIG_DMABUF_MOVE_NOTIFY=y in /boot/config-%s", utsname.release);
|
||||
@@ -143,11 +146,12 @@ static void initOnceFunc() {
|
||||
INFO(NCCL_INIT,"CONFIG_PCI_P2PDMA=y in /boot/config-%s", utsname.release);
|
||||
}
|
||||
}
|
||||
- if (!found_opt1 || !found_opt2) {
|
||||
+ if (fp && (!found_opt1 || !found_opt2)) {
|
||||
dmaBufSupport = 0;
|
||||
INFO(NCCL_INIT, "CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA should be set for DMA_BUF in /boot/config-%s", utsname.release);
|
||||
INFO(NCCL_INIT, "DMA_BUF_SUPPORT Failed due to OS kernel support");
|
||||
}
|
||||
+ if (fp) fclose(fp);
|
||||
|
||||
if(dmaBufSupport) INFO(NCCL_INIT, "DMA_BUF Support Enabled");
|
||||
else goto error;
|
||||
diff --git a/src/nccl.h.in b/src/nccl.h.in
|
||||
index 1d127b0..6296073 100644
|
||||
--- a/src/nccl.h.in
|
||||
+++ b/src/nccl.h.in
|
||||
@@ -39,7 +39,7 @@ typedef struct ncclComm* ncclComm_t;
|
||||
#define NCCL_UNIQUE_ID_BYTES 128
|
||||
/*! @brief Opaque unique id used to initialize communicators
|
||||
@details The ncclUniqueId must be passed to all participating ranks */
|
||||
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId;
|
||||
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId;
|
||||
|
||||
/*! @defgroup rccl_result_code Result Codes
|
||||
@details The various result codes that RCCL API calls may return
|
||||
diff --git a/src/proxy.cc b/src/proxy.cc
|
||||
index 50e5437..51bb401 100644
|
||||
--- a/src/proxy.cc
|
||||
+++ b/src/proxy.cc
|
||||
@@ -965,7 +965,11 @@ struct ncclProxyConnectionPool {
|
||||
|
||||
static ncclResult_t ncclProxyNewConnection(struct ncclProxyConnectionPool* pool, int* id) {
|
||||
if (pool->offset == NCCL_PROXY_CONN_POOL_SIZE) {
|
||||
- NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1));
|
||||
+ if (pool->pools) {
|
||||
+ NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1));
|
||||
+ } else {
|
||||
+ NCCLCHECK(ncclCalloc(&pool->pools, pool->banks+1));
|
||||
+ }
|
||||
NCCLCHECK(ncclCalloc(pool->pools+pool->banks, NCCL_PROXY_CONN_POOL_SIZE));
|
||||
pool->banks++;
|
||||
pool->offset = 0;
|
||||
diff --git a/src/transport/net_ib.cc b/src/transport/net_ib.cc
|
||||
index 6d77784..49762d3 100644
|
||||
--- a/src/transport/net_ib.cc
|
||||
+++ b/src/transport/net_ib.cc
|
||||
@@ -573,7 +573,7 @@ ncclResult_t ncclIbGdrSupport() {
|
||||
// Requires support from NIC driver modules
|
||||
// Use ONLY for debugging!
|
||||
moduleLoaded = 1;
|
||||
- INFO(NCCL_INIT, "RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1");
|
||||
+ INFO(NCCL_INIT, "ncclIbGdrSupport: RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1");
|
||||
}
|
||||
|
||||
if (moduleLoaded == -1) {
|
||||
@@ -586,13 +586,14 @@ ncclResult_t ncclIbGdrSupport() {
|
||||
// or created under a different path like `/sys/kernel/` or `/sys/` (depending on your ib_peer_mem module)
|
||||
const char* memory_peers_paths[] = {"/sys/kernel/mm/memory_peers/amdkfd/version",
|
||||
"/sys/kernel/memory_peers/amdkfd/version",
|
||||
- "/sys/memory_peers/amdkfd/version"};
|
||||
+ "/sys/memory_peers/amdkfd/version",
|
||||
+ NULL};
|
||||
int i = 0;
|
||||
|
||||
while (memory_peers_paths[i]) {
|
||||
if (access(memory_peers_paths[i], F_OK) == 0) {
|
||||
moduleLoaded = 1;
|
||||
- INFO(NCCL_INIT,"Found %s", memory_peers_paths[i]);
|
||||
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Found %s", memory_peers_paths[i]);
|
||||
break;
|
||||
} else {
|
||||
moduleLoaded = 0;
|
||||
@@ -612,22 +613,23 @@ ncclResult_t ncclIbGdrSupport() {
|
||||
if (moduleLoaded == 0) {
|
||||
// Check for `ib_register_peer_memory_client` symbol in `/proc/kallsyms`
|
||||
// if your system uses native OS ib_peer module
|
||||
- char buf[256];
|
||||
- FILE *fp = NULL;
|
||||
- fp = fopen("/proc/kallsyms", "r");
|
||||
+ FILE *fp = fopen("/proc/kallsyms", "r");
|
||||
+ char *line = NULL;
|
||||
+ size_t len = 0;
|
||||
|
||||
if (fp == NULL) {
|
||||
- INFO(NCCL_INIT,"Could not open /proc/kallsyms");
|
||||
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Could not open /proc/kallsyms to check for ib_register_peer_memory_client");
|
||||
} else {
|
||||
- while (fgets(buf, sizeof(buf), fp) != NULL) {
|
||||
- if (strstr(buf, "t ib_register_peer_memory_client") != NULL ||
|
||||
- strstr(buf, "T ib_register_peer_memory_client") != NULL) {
|
||||
+ while (getline(&line, &len, fp) > 0) {
|
||||
+ if (line && strstr(line, "ib_register_peer_memory_client") != NULL) {
|
||||
moduleLoaded = 1;
|
||||
- INFO(NCCL_INIT,"Found ib_register_peer_memory_client in /proc/kallsyms");
|
||||
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Found ib_register_peer_memory_client in /proc/kallsyms");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
+ if (line) free(line);
|
||||
+ if (fp) fclose(fp);
|
||||
}
|
||||
#else
|
||||
// Check for the nv_peer_mem module being loaded
|
||||
@@ -637,7 +639,7 @@ ncclResult_t ncclIbGdrSupport() {
|
||||
#endif
|
||||
}
|
||||
if (moduleLoaded == 0) {
|
||||
- INFO(NCCL_INIT,"GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol");
|
||||
+ INFO(NCCL_INIT,"ncclIbGdrSupport: GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol");
|
||||
return ncclSystemError;
|
||||
}
|
||||
return ncclSuccess;
|
||||
diff --git a/tools/ib-test/include/nccl.h b/tools/ib-test/include/nccl.h
|
||||
index 2c86c33..5801c61 100755
|
||||
--- a/tools/ib-test/include/nccl.h
|
||||
+++ b/tools/ib-test/include/nccl.h
|
||||
@@ -31,7 +31,7 @@ extern "C" {
|
||||
typedef struct ncclComm* ncclComm_t;
|
||||
|
||||
#define NCCL_UNIQUE_ID_BYTES 128
|
||||
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
|
||||
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
|
||||
|
||||
/* Error type */
|
||||
typedef enum { ncclSuccess = 0,
|
||||
diff --git a/tools/topo_expl/include/nccl.h b/tools/topo_expl/include/nccl.h
|
||||
index 729561b..4e4bdd9 100644
|
||||
--- a/tools/topo_expl/include/nccl.h
|
||||
+++ b/tools/topo_expl/include/nccl.h
|
||||
@@ -35,7 +35,7 @@ typedef struct ncclComm* ncclComm_t;
|
||||
#define NCCL_COMM_NULL NULL
|
||||
|
||||
#define NCCL_UNIQUE_ID_BYTES 128
|
||||
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
|
||||
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
|
||||
|
||||
/*! @brief Error type */
|
||||
typedef enum { ncclSuccess = 0,
|
||||
10
pkgs/rocm-modules/rccl/rccl-test-missing-iomanip.diff
Normal file
10
pkgs/rocm-modules/rccl/rccl-test-missing-iomanip.diff
Normal file
@@ -0,0 +1,10 @@
|
||||
--- a/test/common/TestBed.cpp
|
||||
+++ b/test/common/TestBed.cpp
|
||||
@@ -4,6 +4,7 @@
|
||||
* See LICENSE.txt for license information
|
||||
************************************************************************/
|
||||
#include <unistd.h>
|
||||
+#include <iomanip>
|
||||
#include "TestBed.hpp"
|
||||
#include <rccl/rccl.h>
|
||||
|
||||
146
pkgs/rocm-modules/rdc/default.nix
Normal file
146
pkgs/rocm-modules/rdc/default.nix
Normal file
@@ -0,0 +1,146 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
amdsmi,
|
||||
rocm-smi,
|
||||
rocm-runtime,
|
||||
libcap,
|
||||
libdrm,
|
||||
grpc,
|
||||
protobuf,
|
||||
openssl,
|
||||
doxygen,
|
||||
graphviz,
|
||||
texliveSmall,
|
||||
gtest,
|
||||
buildDocs ? true,
|
||||
buildTests ? false,
|
||||
}:
|
||||
|
||||
let
|
||||
latex = lib.optionalAttrs buildDocs (
|
||||
texliveSmall.withPackages (
|
||||
ps: with ps; [
|
||||
changepage
|
||||
latexmk
|
||||
varwidth
|
||||
multirow
|
||||
hanging
|
||||
adjustbox
|
||||
collectbox
|
||||
stackengine
|
||||
enumitem
|
||||
alphalph
|
||||
wasysym
|
||||
sectsty
|
||||
tocloft
|
||||
newunicodechar
|
||||
etoc
|
||||
helvetic
|
||||
wasy
|
||||
courier
|
||||
]
|
||||
)
|
||||
);
|
||||
in
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rdc";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildDocs [
|
||||
"doc"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rdc";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-HkGumwag7mDERHiWwZ7cRQz0tzH+vIovY1HgX2g69d4=";
|
||||
};
|
||||
|
||||
nativeBuildInputs =
|
||||
[
|
||||
cmake
|
||||
protobuf
|
||||
]
|
||||
++ lib.optionals buildDocs [
|
||||
doxygen
|
||||
graphviz
|
||||
latex
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
amdsmi
|
||||
rocm-smi
|
||||
rocm-runtime
|
||||
libcap
|
||||
libdrm
|
||||
grpc
|
||||
openssl
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
gtest
|
||||
];
|
||||
|
||||
CXXFLAGS = "-I${libcap.dev}/include";
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DCMAKE_VERBOSE_MAKEFILE=OFF"
|
||||
"-DRDC_INSTALL_PREFIX=${placeholder "out"}"
|
||||
"-DBUILD_ROCRTEST=ON"
|
||||
"-DRSMI_INC_DIR=${rocm-smi}/include"
|
||||
"-DRSMI_LIB_DIR=${rocm-smi}/lib"
|
||||
"-DGRPC_ROOT=${grpc}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
"-DCMAKE_INSTALL_LIBEXECDIR=libexec"
|
||||
"-DCMAKE_INSTALL_DOCDIR=doc"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_TESTS=ON"
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace "file(STRINGS /etc/os-release LINUX_DISTRO LIMIT_COUNT 1 REGEX \"NAME=\")" "set(LINUX_DISTRO \"NixOS\")"
|
||||
'';
|
||||
|
||||
postInstall =
|
||||
''
|
||||
find $out/bin -executable -type f -exec \
|
||||
patchelf {} --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE" \;
|
||||
''
|
||||
+ lib.optionalString buildTests ''
|
||||
mkdir -p $test
|
||||
mv $out/bin/rdctst_tests $test/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Simplifies administration and addresses infrastructure challenges in cluster and datacenter environments";
|
||||
homepage = "https://github.com/ROCm/rdc";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
135
pkgs/rocm-modules/rocalution/default.nix
Normal file
135
pkgs/rocm-modules/rocalution/default.nix
Normal file
@@ -0,0 +1,135 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocblas,
|
||||
rocsparse,
|
||||
rocprim,
|
||||
rocrand,
|
||||
clr,
|
||||
git,
|
||||
pkg-config,
|
||||
openmp,
|
||||
openmpi,
|
||||
gtest,
|
||||
buildTests ? false,
|
||||
buildBenchmarks ? false,
|
||||
buildSamples ? false,
|
||||
gpuTargets ? [ ], # gpuTargets = [ "gfx803" "gfx900:xnack-" "gfx906:xnack-" ... ]
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocalution";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"benchmark"
|
||||
]
|
||||
++ lib.optionals buildSamples [
|
||||
"sample"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocALUTION";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-bZx1Cc2jcIfysohKCKzj5mowM3IeCelRhVaBU73KnTo=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
git
|
||||
pkg-config
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
rocblas
|
||||
rocsparse
|
||||
rocprim
|
||||
rocrand
|
||||
openmp
|
||||
openmpi
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
gtest
|
||||
];
|
||||
|
||||
CXXFLAGS = "-I${openmp.dev}/include";
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
|
||||
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
|
||||
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
|
||||
"-DROCM_PATH=${clr}"
|
||||
"-DHIP_ROOT_DIR=${clr}"
|
||||
"-DSUPPORT_HIP=ON"
|
||||
"-DSUPPORT_OMP=ON"
|
||||
"-DSUPPORT_MPI=ON"
|
||||
"-DBUILD_CLIENTS_SAMPLES=${if buildSamples then "ON" else "OFF"}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
"-DAMDGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}"
|
||||
"-DGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_CLIENTS_TESTS=ON"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
||||
];
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/rocalution-test $test/bin
|
||||
''
|
||||
+ lib.optionalString buildBenchmarks ''
|
||||
mkdir -p $benchmark/bin
|
||||
mv $out/bin/rocalution-bench $benchmark/bin
|
||||
''
|
||||
+ lib.optionalString buildSamples ''
|
||||
mkdir -p $sample/bin
|
||||
mv clients/staging/* $sample/bin
|
||||
rm $sample/bin/rocalution-test || true
|
||||
rm $sample/bin/rocalution-bench || true
|
||||
|
||||
patchelf --set-rpath \
|
||||
$out/lib:${lib.makeLibraryPath (finalAttrs.buildInputs ++ [ clr ])} \
|
||||
$sample/bin/*
|
||||
''
|
||||
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||
rmdir $out/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Iterative sparse solvers for ROCm";
|
||||
homepage = "https://github.com/ROCm/rocALUTION";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
200
pkgs/rocm-modules/rocblas/default.nix
Normal file
200
pkgs/rocm-modules/rocblas/default.nix
Normal file
@@ -0,0 +1,200 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
fetchpatch,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
clr,
|
||||
python3,
|
||||
tensile,
|
||||
msgpack,
|
||||
libxml2,
|
||||
gtest,
|
||||
gfortran,
|
||||
openmp,
|
||||
git,
|
||||
amd-blis,
|
||||
zstd,
|
||||
hipblas-common,
|
||||
hipblaslt,
|
||||
python3Packages,
|
||||
rocm-smi,
|
||||
libdrm,
|
||||
roctracer,
|
||||
buildTensile ? true,
|
||||
buildTests ? true,
|
||||
buildBenchmarks ? true,
|
||||
# https://github.com/ROCm/Tensile/issues/1757
|
||||
# Allows gfx101* users to use rocBLAS normally.
|
||||
# Turn the below two values to `true` after the fix has been cherry-picked
|
||||
# into a release. Just backporting that single fix is not enough because it
|
||||
# depends on some previous commits.
|
||||
tensileSepArch ? true,
|
||||
tensileLazyLib ? true,
|
||||
withHipBlasLt ? true,
|
||||
# `gfx940`, `gfx941` are not present in this list because they are early
|
||||
# engineering samples, and all final MI300 hardware are `gfx942`:
|
||||
# https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130
|
||||
#
|
||||
# `gfx1012` is not present in this list because the ISA compatibility patches
|
||||
# would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will
|
||||
# always try to use `gfx1010` code objects, hence building for `gfx1012` is
|
||||
# useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152
|
||||
gpuTargets ? (
|
||||
clr.localGpuTargets or [
|
||||
"gfx1010"
|
||||
# "gfx1012"
|
||||
# "gfx1030"
|
||||
# "gfx1100"
|
||||
# "gfx1101"
|
||||
# "gfx1102"
|
||||
"gfx1201"
|
||||
]
|
||||
),
|
||||
}:
|
||||
|
||||
let
|
||||
gpuTargets' = lib.concatStringsSep ";" gpuTargets;
|
||||
in
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocblas${clr.gpuArchSuffix}";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs = [
|
||||
"out"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocBLAS";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-To5V5bydYR5iuUxkwpx79RrNdncvWmR/v/w9VnlKBq4=";
|
||||
};
|
||||
|
||||
nativeBuildInputs =
|
||||
[
|
||||
cmake
|
||||
# no ninja, it buffers console output and nix times out long periods of no output
|
||||
rocm-cmake
|
||||
clr
|
||||
git
|
||||
]
|
||||
++ lib.optionals buildTensile [
|
||||
tensile
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
python3
|
||||
hipblas-common
|
||||
]
|
||||
++ lib.optionals withHipBlasLt [
|
||||
hipblaslt
|
||||
]
|
||||
++ lib.optionals buildTensile [
|
||||
zstd
|
||||
msgpack
|
||||
libxml2
|
||||
python3Packages.msgpack
|
||||
python3Packages.zstandard
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
gtest
|
||||
]
|
||||
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||
gfortran
|
||||
openmp
|
||||
amd-blis
|
||||
rocm-smi
|
||||
]
|
||||
++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [
|
||||
python3Packages.pyyaml
|
||||
];
|
||||
|
||||
dontStrip = true;
|
||||
env.CXXFLAGS =
|
||||
"-O3 -DNDEBUG -I${hipblas-common}/include -I${roctracer}/include -I${libdrm.dev}/include"
|
||||
|
||||
+ lib.optionalString (buildTests || buildBenchmarks) " -I${amd-blis}/include/blis";
|
||||
# Fails to link tests if we don't add amd-blis libs
|
||||
env.LDFLAGS =
|
||||
"-L${roctracer}/lib -L${libdrm.dev}/lib"
|
||||
+ lib.optionalString (
|
||||
buildTests || buildBenchmarks
|
||||
) " -Wl,--as-needed -L${amd-blis}/lib -lblis-mt -lcblas";
|
||||
env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++";
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
(lib.cmakeFeature "CMAKE_BUILD_TYPE" "Release")
|
||||
(lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true)
|
||||
(lib.cmakeFeature "CMAKE_EXECUTE_PROCESS_COMMAND_ECHO" "STDERR")
|
||||
(lib.cmakeFeature "CMAKE_Fortran_COMPILER" "${lib.getBin gfortran}/bin/gfortran")
|
||||
(lib.cmakeFeature "CMAKE_Fortran_COMPILER_AR" "${lib.getBin gfortran}/bin/ar")
|
||||
(lib.cmakeFeature "CMAKE_Fortran_COMPILER_RANLIB" "${lib.getBin gfortran}/bin/ranlib")
|
||||
(lib.cmakeFeature "python" "python3")
|
||||
(lib.cmakeFeature "SUPPORTED_TARGETS" gpuTargets')
|
||||
(lib.cmakeFeature "AMDGPU_TARGETS" gpuTargets')
|
||||
(lib.cmakeFeature "GPU_TARGETS" gpuTargets')
|
||||
(lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile)
|
||||
(lib.cmakeBool "ROCM_SYMLINK_LIBS" false)
|
||||
(lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas")
|
||||
(lib.cmakeBool "BUILD_WITH_HIPBLASLT" withHipBlasLt)
|
||||
(lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
|
||||
(lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
|
||||
(lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildBenchmarks)
|
||||
(lib.cmakeBool "BUILD_OFFLOAD_COMPRESS" true)
|
||||
# Temporarily set variables to work around upstream CMakeLists issue
|
||||
# Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
]
|
||||
++ lib.optionals buildTensile [
|
||||
"-DCPACK_SET_DESTDIR=OFF"
|
||||
"-DLINK_BLIS=ON"
|
||||
"-DTensile_CODE_OBJECT_VERSION=default"
|
||||
"-DTensile_LOGIC=asm_full"
|
||||
"-DTensile_LIBRARY_FORMAT=msgpack"
|
||||
(lib.cmakeBool "BUILD_WITH_PIP" false)
|
||||
(lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch)
|
||||
(lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib)
|
||||
];
|
||||
|
||||
passthru.amdgpu_targets = gpuTargets';
|
||||
|
||||
patches = [
|
||||
# (fetchpatch {
|
||||
# name = "Extend-rocBLAS-HIP-ISA-compatibility.patch";
|
||||
# url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch";
|
||||
# hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo=";
|
||||
# })
|
||||
];
|
||||
|
||||
# Pass $NIX_BUILD_CORES to Tensile
|
||||
postPatch = ''
|
||||
substituteInPlace cmake/build-options.cmake \
|
||||
--replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
|
||||
# substituteInPlace CMakeLists.txt \
|
||||
# --replace-fail "4.42.0" "4.43.0"
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
enableParallelBuilding = true;
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
|
||||
meta = with lib; {
|
||||
description = "BLAS implementation for ROCm platform";
|
||||
homepage = "https://github.com/ROCm/rocBLAS";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
114
pkgs/rocm-modules/rocdbgapi/default.nix
Normal file
114
pkgs/rocm-modules/rocdbgapi/default.nix
Normal file
@@ -0,0 +1,114 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
git,
|
||||
rocm-comgr,
|
||||
rocm-runtime,
|
||||
hwdata,
|
||||
texliveSmall,
|
||||
doxygen,
|
||||
graphviz,
|
||||
buildDocs ? true,
|
||||
}:
|
||||
|
||||
let
|
||||
latex = lib.optionalAttrs buildDocs (
|
||||
texliveSmall.withPackages (
|
||||
ps: with ps; [
|
||||
changepage
|
||||
latexmk
|
||||
varwidth
|
||||
multirow
|
||||
hanging
|
||||
adjustbox
|
||||
collectbox
|
||||
stackengine
|
||||
enumitem
|
||||
alphalph
|
||||
wasysym
|
||||
sectsty
|
||||
tocloft
|
||||
newunicodechar
|
||||
etoc
|
||||
helvetic
|
||||
wasy
|
||||
courier
|
||||
]
|
||||
)
|
||||
);
|
||||
in
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocdbgapi";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildDocs [
|
||||
"doc"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "ROCdbgapi";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-Rr8+SNeFps0rjk4Jn2+rFmtRJfL42l0tNOz13oZQy+I=";
|
||||
};
|
||||
|
||||
nativeBuildInputs =
|
||||
[
|
||||
cmake
|
||||
rocm-cmake
|
||||
git
|
||||
]
|
||||
++ lib.optionals buildDocs [
|
||||
latex
|
||||
doxygen
|
||||
graphviz
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
rocm-comgr
|
||||
rocm-runtime
|
||||
hwdata
|
||||
];
|
||||
|
||||
cmakeFlags = [
|
||||
"-DPCI_IDS_PATH=${hwdata}/share/hwdata"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
];
|
||||
|
||||
# Unfortunately, it seems like we have to call make on this manually
|
||||
postBuild = lib.optionalString buildDocs ''
|
||||
export HOME=$(mktemp -d)
|
||||
make -j$NIX_BUILD_CORES doc
|
||||
'';
|
||||
|
||||
postInstall = lib.optionalString buildDocs ''
|
||||
mv $out/share/html/amd-dbgapi $doc/share/doc/amd-dbgapi/html
|
||||
rmdir $out/share/html
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Debugger support for control of execution and inspection state";
|
||||
homepage = "https://github.com/ROCm/ROCdbgapi";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
174
pkgs/rocm-modules/rocfft/default.nix
Normal file
174
pkgs/rocm-modules/rocfft/default.nix
Normal file
@@ -0,0 +1,174 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
clr,
|
||||
python3,
|
||||
rocm-cmake,
|
||||
sqlite,
|
||||
boost,
|
||||
fftw,
|
||||
fftwFloat,
|
||||
gtest,
|
||||
openmp,
|
||||
rocrand,
|
||||
gpuTargets ? clr.localGpuTargets or clr.gpuTargets,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocfft${clr.gpuArchSuffix}";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocFFT";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-z8O//2lihXeVnYZklR8uUIgCS9RyNNRb+hM5keR5aYA=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
clr
|
||||
python3
|
||||
rocm-cmake
|
||||
];
|
||||
|
||||
# FIXME: rocfft_aot_helper runs at the end of the build and has a risk of timing it out
|
||||
# due to a long period with no terminal output
|
||||
buildInputs = [ sqlite ];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DCMAKE_C_COMPILER=hipcc"
|
||||
"-DCMAKE_CXX_COMPILER=hipcc"
|
||||
"-DSQLITE_USE_SYSTEM_PACKAGE=ON"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
];
|
||||
|
||||
passthru = {
|
||||
test = stdenv.mkDerivation {
|
||||
pname = "${finalAttrs.pname}-test";
|
||||
inherit (finalAttrs) version src;
|
||||
|
||||
sourceRoot = "${finalAttrs.src.name}/clients/tests";
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
clr
|
||||
rocm-cmake
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
boost
|
||||
fftw
|
||||
fftwFloat
|
||||
finalAttrs.finalPackage
|
||||
gtest
|
||||
openmp
|
||||
rocrand
|
||||
];
|
||||
|
||||
cmakeFlags = [
|
||||
"-DCMAKE_C_COMPILER=hipcc"
|
||||
"-DCMAKE_CXX_COMPILER=hipcc"
|
||||
];
|
||||
|
||||
postInstall = ''
|
||||
rm -r "$out/lib/fftw"
|
||||
rmdir "$out/lib"
|
||||
'';
|
||||
};
|
||||
|
||||
benchmark = stdenv.mkDerivation {
|
||||
pname = "${finalAttrs.pname}-benchmark";
|
||||
inherit (finalAttrs) version src;
|
||||
|
||||
sourceRoot = "${finalAttrs.src.name}/clients/rider";
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
clr
|
||||
rocm-cmake
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
boost
|
||||
finalAttrs.finalPackage
|
||||
openmp
|
||||
(python3.withPackages (
|
||||
ps: with ps; [
|
||||
pandas
|
||||
scipy
|
||||
]
|
||||
))
|
||||
rocrand
|
||||
];
|
||||
|
||||
cmakeFlags = [
|
||||
"-DCMAKE_C_COMPILER=hipcc"
|
||||
"-DCMAKE_CXX_COMPILER=hipcc"
|
||||
];
|
||||
|
||||
postInstall = ''
|
||||
cp -a ../../../scripts/perf "$out/bin"
|
||||
'';
|
||||
};
|
||||
|
||||
samples = stdenv.mkDerivation {
|
||||
pname = "${finalAttrs.pname}-samples";
|
||||
inherit (finalAttrs) version src;
|
||||
|
||||
sourceRoot = "${finalAttrs.src.name}/clients/samples";
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
clr
|
||||
rocm-cmake
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
boost
|
||||
finalAttrs.finalPackage
|
||||
openmp
|
||||
rocrand
|
||||
];
|
||||
|
||||
cmakeFlags = [
|
||||
"-DCMAKE_C_COMPILER=hipcc"
|
||||
"-DCMAKE_CXX_COMPILER=hipcc"
|
||||
];
|
||||
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
mkdir "$out"
|
||||
cp -a bin "$out"
|
||||
runHook postInstall
|
||||
'';
|
||||
};
|
||||
|
||||
updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
};
|
||||
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
|
||||
meta = with lib; {
|
||||
description = "FFT implementation for ROCm";
|
||||
homepage = "https://github.com/ROCm/rocFFT";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
117
pkgs/rocm-modules/rocgdb/default.nix
Normal file
117
pkgs/rocm-modules/rocgdb/default.nix
Normal file
@@ -0,0 +1,117 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
pkg-config,
|
||||
texinfo,
|
||||
bison,
|
||||
flex,
|
||||
glibc,
|
||||
zlib,
|
||||
zstd,
|
||||
gmp,
|
||||
mpfr,
|
||||
ncurses,
|
||||
expat,
|
||||
rocdbgapi,
|
||||
perl,
|
||||
python3,
|
||||
babeltrace,
|
||||
sourceHighlight,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocgdb";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "ROCgdb";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-evDWg2w2FHv6OU5BQOCAXTlDm7JpwdJ3Wh5a2i5r1gQ=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
pkg-config
|
||||
texinfo # For makeinfo
|
||||
bison
|
||||
flex
|
||||
perl # used in mkinstalldirs script during installPhase
|
||||
python3
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
zlib
|
||||
zstd
|
||||
gmp
|
||||
mpfr
|
||||
ncurses
|
||||
expat
|
||||
rocdbgapi
|
||||
python3
|
||||
babeltrace
|
||||
sourceHighlight
|
||||
];
|
||||
|
||||
configureFlags = [
|
||||
# Ensure we build the amdgpu target
|
||||
"--enable-targets=${stdenv.targetPlatform.config},amdgcn-amd-amdhsa"
|
||||
"--with-amd-dbgapi=yes"
|
||||
|
||||
"--with-iconv-path=${glibc.bin}"
|
||||
"--enable-tui"
|
||||
"--with-babeltrace=${babeltrace}"
|
||||
"--with-python=python3"
|
||||
"--with-system-zlib"
|
||||
"--with-system-zstd"
|
||||
"--enable-64-bit-bfd"
|
||||
"--with-gmp=${gmp.dev}"
|
||||
"--with-mpfr=${mpfr.dev}"
|
||||
"--with-expat=${expat}"
|
||||
|
||||
# So the installed binary is called "rocgdb" instead on plain "gdb"
|
||||
"--program-prefix=roc"
|
||||
|
||||
# Disable building many components not used or incompatible with the amdgcn target
|
||||
"--disable-sim"
|
||||
"--disable-gdbserver"
|
||||
"--disable-ld"
|
||||
"--disable-gas"
|
||||
"--disable-gdbserver"
|
||||
"--disable-gdbtk"
|
||||
"--disable-gprofng"
|
||||
"--disable-shared"
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
for file in *; do
|
||||
if [ -f "$file" ]; then
|
||||
patchShebangs "$file"
|
||||
fi
|
||||
done
|
||||
'';
|
||||
|
||||
# The source directory for ROCgdb (based on upstream GDB) contains multiple project
|
||||
# of GNU’s toolchain (binutils and onther), we only need to install the GDB part.
|
||||
installPhase = ''
|
||||
make install-gdb
|
||||
'';
|
||||
|
||||
env.CFLAGS = "-Wno-switch -Wno-format-nonliteral -I${zstd.dev}/include -I${zlib.dev}/include -I${expat.dev}/include -I${ncurses.dev}/include";
|
||||
env.CXXFLAGS = finalAttrs.env.CFLAGS;
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "ROCm source-level debugger for Linux, based on GDB";
|
||||
homepage = "https://github.com/ROCm/ROCgdb";
|
||||
license = licenses.gpl3Plus;
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
38
pkgs/rocm-modules/rocm-cmake/default.nix
Normal file
38
pkgs/rocm-modules/rocm-cmake/default.nix
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
rocm-core,
|
||||
cmake,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocm-cmake";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocm-cmake";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-wAipNWAB66YNf7exLSNPAzg3NgkGD9LPKfKiulL5yak=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [ cmake ];
|
||||
|
||||
buildInputs = [ rocm-core ];
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "CMake modules for common build tasks for the ROCm stack";
|
||||
homepage = "https://github.com/ROCm/rocm-cmake";
|
||||
license = licenses.mit;
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.unix;
|
||||
};
|
||||
})
|
||||
73
pkgs/rocm-modules/rocm-comgr/default.nix
Normal file
73
pkgs/rocm-modules/rocm-comgr/default.nix
Normal file
@@ -0,0 +1,73 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchpatch,
|
||||
cmake,
|
||||
python3,
|
||||
rocm-merged-llvm,
|
||||
rocm-device-libs,
|
||||
zlib,
|
||||
zstd,
|
||||
libxml2,
|
||||
}:
|
||||
|
||||
let
|
||||
llvmNativeTarget =
|
||||
if stdenv.hostPlatform.isx86_64 then
|
||||
"X86"
|
||||
else if stdenv.hostPlatform.isAarch64 then
|
||||
"AArch64"
|
||||
else
|
||||
throw "Unsupported ROCm LLVM platform";
|
||||
in
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocm-comgr";
|
||||
# In-tree with ROCm LLVM
|
||||
inherit (rocm-merged-llvm) version;
|
||||
src = rocm-merged-llvm.llvm-src;
|
||||
|
||||
sourceRoot = "${finalAttrs.src.name}/amd/comgr";
|
||||
|
||||
patches = [
|
||||
# [Comgr] Extend ISA compatibility
|
||||
# (fetchpatch {
|
||||
# sha256 = "sha256-dgow0kwSWM1TnkqWOZDRQrh5nuF8p5jbYyOLCpQsH4k=";
|
||||
# url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/a439e4f37ce71de48d4a979594276e3be0e6278f.patch";
|
||||
# relative = "amd/comgr";
|
||||
# })
|
||||
#[Comgr] Extend ISA compatibility for CCOB
|
||||
# (fetchpatch {
|
||||
# sha256 = "sha256-6Rwz12Lk4R2JK3olii3cr2Zd0ZLYe7VSpK1YRCOsJWY=";
|
||||
# url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/2d8c459a4d4c0567a7a275b4b54560d88e5c6919.patch";
|
||||
# relative = "amd/comgr";
|
||||
# })
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
python3
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
rocm-device-libs
|
||||
libxml2
|
||||
zlib
|
||||
zstd
|
||||
rocm-merged-llvm
|
||||
];
|
||||
|
||||
cmakeFlags = [
|
||||
"-DCMAKE_VERBOSE_MAKEFILE=ON"
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
|
||||
];
|
||||
|
||||
meta = with lib; {
|
||||
description = "APIs for compiling and inspecting AMDGPU code objects";
|
||||
homepage = "https://github.com/ROCm/ROCm-CompilerSupport/tree/amd-stg-open/lib/comgr";
|
||||
license = licenses.ncsa;
|
||||
maintainers = with maintainers; [ lovesegfault ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
53
pkgs/rocm-modules/rocm-core/default.nix
Normal file
53
pkgs/rocm-modules/rocm-core/default.nix
Normal file
@@ -0,0 +1,53 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
writeText,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocm-core";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocm-core";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-dDTCEAbeA88deLgUbdbulaHpHI9zcTze0mZeS49TsAM=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [ cmake ];
|
||||
# FIXME: What's the correct way to set this?
|
||||
env.ROCM_LIBPATCH_VERSION = "${lib.versions.major finalAttrs.version}0${lib.versions.minor finalAttrs.version}0${lib.versions.patch finalAttrs.version}";
|
||||
env.BUILD_ID = "nixos-${finalAttrs.env.ROCM_LIBPATCH_VERSION}";
|
||||
env.ROCM_BUILD_ID = "release-${finalAttrs.env.BUILD_ID}";
|
||||
cmakeFlags = [
|
||||
"-DROCM_LIBPATCH_VERSION=${finalAttrs.env.ROCM_LIBPATCH_VERSION}"
|
||||
"-DROCM_VERSION=${finalAttrs.version}"
|
||||
"-DBUILD_ID=${finalAttrs.env.BUILD_ID}"
|
||||
];
|
||||
|
||||
setupHook = writeText "setupHook.sh" ''
|
||||
export ROCM_LIBPATCH_VERSION="${finalAttrs.env.ROCM_LIBPATCH_VERSION}"
|
||||
export BUILD_ID="${finalAttrs.env.BUILD_ID}"
|
||||
export ROCM_BUILD_ID="${finalAttrs.env.ROCM_BUILD_ID}"
|
||||
'';
|
||||
|
||||
passthru.ROCM_LIBPATCH_VERSION = finalAttrs.env.ROCM_LIBPATCH_VERSION;
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
page = "tags?per_page=4";
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Utility for getting the ROCm release version";
|
||||
homepage = "https://github.com/ROCm/rocm-core";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
43
pkgs/rocm-modules/rocm-device-libs/cmake.patch
Normal file
43
pkgs/rocm-modules/rocm-device-libs/cmake.patch
Normal file
@@ -0,0 +1,43 @@
|
||||
diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake
|
||||
index 07c60eb..c736b3e 100644
|
||||
--- a/amd/device-libs/cmake/Packages.cmake
|
||||
+++ b/amd/device-libs/cmake/Packages.cmake
|
||||
@@ -12,24 +12,29 @@ set_target_properties(${target} PROPERTIES
|
||||
IMPORTED_LOCATION \"${target_path}\")")
|
||||
endforeach()
|
||||
configure_file(AMDDeviceLibsConfig.cmake.in
|
||||
- ${PACKAGE_PREFIX}/AMDDeviceLibsConfig.cmake
|
||||
+ lib/cmake/AMDDeviceLibs/AMDDeviceLibsConfig.cmake
|
||||
@ONLY)
|
||||
|
||||
|
||||
set(install_path_suffix "amdgcn/bitcode")
|
||||
|
||||
# Generate the install-tree package.
|
||||
-# We do not know the absolute path to the intall tree until we are installed,
|
||||
-# so we calculate it dynamically in AMD_DEVICE_LIBS_PREFIX_CODE and use
|
||||
-# relative paths in the target imports in AMD_DEVICE_LIBS_TARGET_CODE.
|
||||
-set(AMD_DEVICE_LIBS_PREFIX_CODE "
|
||||
+if(IS_ABSOLUTE "${CMAKE_INSTALL_PREFIX}")
|
||||
+ set(AMD_DEVICE_LIBS_PREFIX_CODE "set(AMD_DEVICE_LIBS_PREFIX \"${CMAKE_INSTALL_PREFIX}\")")
|
||||
+else()
|
||||
+ # We do not know the absolute path to the install tree until we are installed,
|
||||
+ # so we calculate it dynamically in AMD_DEVICE_LIBS_PREFIX_CODE and use
|
||||
+ # relative paths in the target imports in AMD_DEVICE_LIBS_TARGET_CODE.
|
||||
+ set(AMD_DEVICE_LIBS_PREFIX_CODE "
|
||||
# Derive absolute install prefix from config file path.
|
||||
get_filename_component(AMD_DEVICE_LIBS_PREFIX \"\${CMAKE_CURRENT_LIST_FILE}\" PATH)")
|
||||
-string(REGEX REPLACE "/" ";" count "${PACKAGE_PREFIX}")
|
||||
-foreach(p ${count})
|
||||
- set(AMD_DEVICE_LIBS_PREFIX_CODE "${AMD_DEVICE_LIBS_PREFIX_CODE}
|
||||
+ string(REGEX REPLACE "/" ";" count "${PACKAGE_PREFIX}")
|
||||
+ foreach(p ${count})
|
||||
+ set(AMD_DEVICE_LIBS_PREFIX_CODE "${AMD_DEVICE_LIBS_PREFIX_CODE}
|
||||
get_filename_component(AMD_DEVICE_LIBS_PREFIX \"\${AMD_DEVICE_LIBS_PREFIX}\" PATH)")
|
||||
-endforeach()
|
||||
+ endforeach()
|
||||
+endif()
|
||||
+
|
||||
set(AMD_DEVICE_LIBS_TARGET_CODE)
|
||||
foreach(target ${AMDGCN_LIB_LIST})
|
||||
get_target_property(target_name ${target} ARCHIVE_OUTPUT_NAME)
|
||||
62
pkgs/rocm-modules/rocm-device-libs/default.nix
Normal file
62
pkgs/rocm-modules/rocm-device-libs/default.nix
Normal file
@@ -0,0 +1,62 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
cmake,
|
||||
ninja,
|
||||
libxml2,
|
||||
zlib,
|
||||
zstd,
|
||||
ncurses,
|
||||
rocm-merged-llvm,
|
||||
python3,
|
||||
}:
|
||||
|
||||
let
|
||||
llvmNativeTarget =
|
||||
if stdenv.hostPlatform.isx86_64 then
|
||||
"X86"
|
||||
else if stdenv.hostPlatform.isAarch64 then
|
||||
"AArch64"
|
||||
else
|
||||
throw "Unsupported ROCm LLVM platform";
|
||||
in
|
||||
stdenv.mkDerivation {
|
||||
pname = "rocm-device-libs";
|
||||
# In-tree with ROCm LLVM
|
||||
inherit (rocm-merged-llvm) version;
|
||||
src = rocm-merged-llvm.llvm-src;
|
||||
|
||||
postPatch = ''
|
||||
cd amd/device-libs
|
||||
'';
|
||||
|
||||
patches = [ ./cmake.patch ];
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
ninja
|
||||
python3
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
libxml2
|
||||
zlib
|
||||
zstd
|
||||
ncurses
|
||||
rocm-merged-llvm
|
||||
];
|
||||
|
||||
cmakeFlags = [
|
||||
"-DCMAKE_RELEASE_TYPE=Release"
|
||||
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
|
||||
];
|
||||
|
||||
meta = with lib; {
|
||||
description = "Set of AMD-specific device-side language runtime libraries";
|
||||
homepage = "https://github.com/ROCm/ROCm-Device-Libs";
|
||||
license = licenses.ncsa;
|
||||
maintainers = with maintainers; [ lovesegfault ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
}
|
||||
71
pkgs/rocm-modules/rocm-docs-core/default.nix
Normal file
71
pkgs/rocm-modules/rocm-docs-core/default.nix
Normal file
@@ -0,0 +1,71 @@
|
||||
{
|
||||
lib,
|
||||
fetchFromGitHub,
|
||||
gitUpdater,
|
||||
buildPythonPackage,
|
||||
setuptools,
|
||||
beautifulsoup4,
|
||||
gitpython,
|
||||
pydata-sphinx-theme,
|
||||
pygithub,
|
||||
sphinx,
|
||||
breathe,
|
||||
myst-nb,
|
||||
myst-parser,
|
||||
sphinx-book-theme,
|
||||
sphinx-copybutton,
|
||||
sphinx-design,
|
||||
sphinx-external-toc,
|
||||
sphinx-notfound-page,
|
||||
pyyaml,
|
||||
fastjsonschema,
|
||||
}:
|
||||
|
||||
# FIXME: Move to rocmPackages_common
|
||||
buildPythonPackage rec {
|
||||
pname = "rocm-docs-core";
|
||||
version = "1.19.0";
|
||||
format = "pyproject";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocm-docs-core";
|
||||
rev = "v${version}";
|
||||
hash = "sha256-vmtOf9e8RhWQ0ecL+Sn1HJGK+ILCaQxeQvUkQ8W8YX8=";
|
||||
};
|
||||
|
||||
buildInputs = [ setuptools ];
|
||||
|
||||
propagatedBuildInputs = [
|
||||
beautifulsoup4
|
||||
gitpython
|
||||
pydata-sphinx-theme
|
||||
pygithub
|
||||
sphinx
|
||||
breathe
|
||||
myst-nb
|
||||
myst-parser
|
||||
sphinx-book-theme
|
||||
sphinx-copybutton
|
||||
sphinx-design
|
||||
sphinx-external-toc
|
||||
sphinx-notfound-page
|
||||
pyyaml
|
||||
fastjsonschema
|
||||
];
|
||||
|
||||
pythonImportsCheck = [ "rocm_docs" ];
|
||||
|
||||
passthru.updateScript = gitUpdater { rev-prefix = "v"; };
|
||||
|
||||
meta = with lib; {
|
||||
description = "ROCm Documentation Python package for ReadTheDocs build standardization";
|
||||
homepage = "https://github.com/ROCm/rocm-docs-core";
|
||||
license = with licenses; [
|
||||
mit
|
||||
cc-by-40
|
||||
];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
}
|
||||
27
pkgs/rocm-modules/rocm-path/default.nix
Normal file
27
pkgs/rocm-modules/rocm-path/default.nix
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
symlinkJoin,
|
||||
linkFarm,
|
||||
clr,
|
||||
hipblas,
|
||||
hipblas-common,
|
||||
rocblas,
|
||||
rocsolver,
|
||||
rocsparse,
|
||||
rocm-device-libs,
|
||||
rocm-smi,
|
||||
llvm,
|
||||
}:
|
||||
symlinkJoin {
|
||||
name = "rocm-path-${clr.version}";
|
||||
paths = [
|
||||
clr
|
||||
hipblas-common
|
||||
hipblas
|
||||
rocblas
|
||||
rocsolver
|
||||
rocsparse
|
||||
rocm-device-libs
|
||||
rocm-smi
|
||||
(linkFarm "rocm-llvm-subdir" { llvm = llvm.clang; })
|
||||
];
|
||||
}
|
||||
115
pkgs/rocm-modules/rocm-runtime/default.nix
Normal file
115
pkgs/rocm-modules/rocm-runtime/default.nix
Normal file
@@ -0,0 +1,115 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
fetchpatch,
|
||||
rocmUpdateScript,
|
||||
pkg-config,
|
||||
cmake,
|
||||
ninja,
|
||||
xxd,
|
||||
rocm-device-libs,
|
||||
elfutils,
|
||||
libdrm,
|
||||
numactl,
|
||||
valgrind,
|
||||
libxml2,
|
||||
rocm-merged-llvm,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocm-runtime";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "ROCR-Runtime";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-LOILnvjGwlLoB99+TdZib7VJsgp45yGJiEPgrlwXItI=";
|
||||
};
|
||||
|
||||
env.CFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w";
|
||||
env.CXXFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w";
|
||||
|
||||
nativeBuildInputs = [
|
||||
pkg-config
|
||||
cmake
|
||||
ninja
|
||||
xxd
|
||||
rocm-merged-llvm
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
elfutils
|
||||
libdrm
|
||||
numactl
|
||||
# without valgrind, additional work for "kCodeCopyAligned11" is done in the installPhase
|
||||
valgrind
|
||||
libxml2
|
||||
];
|
||||
|
||||
cmakeFlags = [
|
||||
"-DBUILD_SHARED_LIBS=ON"
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
];
|
||||
|
||||
patches = [
|
||||
# Patches for UB at runtime https://github.com/ROCm/ROCR-Runtime/issues/272
|
||||
# (fetchpatch {
|
||||
# # [PATCH] hsa-runtime: set underlying type of hsa_region_info_t and hsa_amd_region_info_t to int
|
||||
# url = "https://github.com/ROCm/ROCR-Runtime/commit/39a6a168fa07e289a10f6e20e6ead4e303e99ba0.patch";
|
||||
# hash = "sha256-CshJJDvII1nNyNmt+YjwMwfBHUTlrdsxkhwfgBwO+WE=";
|
||||
# })
|
||||
# (fetchpatch {
|
||||
# # [PATCH] rocr: refactor of runtime.cpp based on Coverity
|
||||
# url = "https://github.com/ROCm/ROCR-Runtime/commit/441bd9fe6c7bdb5c4c31f71524ed642786bc923e.patch";
|
||||
# hash = "sha256-7bQXxGkipzgT2aXRxCuh3Sfmo/zc/IOmA0x1zB+fMb0=";
|
||||
# })
|
||||
(fetchpatch {
|
||||
# [PATCH] queues: fix UB due to 1 << 31
|
||||
url = "https://github.com/ROCm/ROCR-Runtime/commit/9b8a0f5dbee1903fa990a7d8accc1c5fbc549636.patch";
|
||||
hash = "sha256-KlZWjfngH8yKly08iwC+Bzpvp/4dkaTpRIKdFYwRI+U=";
|
||||
})
|
||||
(fetchpatch {
|
||||
# [PATCH] topology: fix UB due to 1 << 31
|
||||
url = "https://github.com/ROCm/ROCR-Runtime/commit/d1d00bfee386d263e13c2b64fb6ffd1156deda7c.patch";
|
||||
hash = "sha256-u70WEZaphQ7qTfgQPFATwdKWtHytu7CFH7Pzv1rOM8w=";
|
||||
})
|
||||
(fetchpatch {
|
||||
# [PATCH] kfd_ioctl: fix UB due to 1 << 31
|
||||
url = "https://github.com/ROCm/ROCR-Runtime/commit/41bfc66aef437a5b349f71105fa4b907cc7e17d5.patch";
|
||||
hash = "sha256-A7VhPR3eSsmjq2cTBSjBIz9i//WiNjoXm0EsRKtF+ns=";
|
||||
})
|
||||
./remove-hsa-aqlprofile-dep.patch
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
patchShebangs --build \
|
||||
runtime/hsa-runtime/core/runtime/trap_handler/create_trap_handler_header.sh \
|
||||
runtime/hsa-runtime/core/runtime/blit_shaders/create_blit_shader_header.sh \
|
||||
runtime/hsa-runtime/image/blit_src/create_hsaco_ascii_file.sh
|
||||
patchShebangs --host image core runtime
|
||||
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace 'hsa/include/hsa' 'include/hsa'
|
||||
|
||||
export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode"
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Platform runtime for ROCm";
|
||||
homepage = "https://github.com/ROCm/ROCR-Runtime";
|
||||
license = with licenses; [ ncsa ];
|
||||
maintainers = with maintainers; [ lovesegfault ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
@@ -0,0 +1,27 @@
|
||||
libhsa-amd-aqlprofile64 library is unfree
|
||||
Bug: https://github.com/ROCm/ROCm/issues/1781
|
||||
--- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp
|
||||
+++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp
|
||||
@@ -1333,11 +1333,6 @@ hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const {
|
||||
setFlag(HSA_EXTENSION_AMD_PC_SAMPLING);
|
||||
}
|
||||
|
||||
- if (os::LibHandle lib = os::LoadLib(kAqlProfileLib)) {
|
||||
- os::CloseLib(lib);
|
||||
- setFlag(HSA_EXTENSION_AMD_AQLPROFILE);
|
||||
- }
|
||||
-
|
||||
setFlag(HSA_EXTENSION_AMD_PROFILER);
|
||||
|
||||
break;
|
||||
--- a/runtime/hsa-runtime/core/runtime/hsa.cpp
|
||||
+++ b/runtime/hsa-runtime/core/runtime/hsa.cpp
|
||||
@@ -490,7 +490,7 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
- if (extension == HSA_EXTENSION_AMD_AQLPROFILE) {
|
||||
+ if (0) {
|
||||
if (version_major != hsa_ven_amd_aqlprofile_VERSION_MAJOR) {
|
||||
debug_print("aqlprofile API incompatible ver %d, current ver %d\n",
|
||||
version_major, hsa_ven_amd_aqlprofile_VERSION_MAJOR);
|
||||
89
pkgs/rocm-modules/rocm-smi/cmake.patch
Normal file
89
pkgs/rocm-modules/rocm-smi/cmake.patch
Normal file
@@ -0,0 +1,89 @@
|
||||
diff --git a/rocm_smi-backward-compat.cmake b/rocm_smi-backward-compat.cmake
|
||||
index aa8fd9c..59afce5 100644
|
||||
--- a/rocm_smi-backward-compat.cmake
|
||||
+++ b/rocm_smi-backward-compat.cmake
|
||||
@@ -72,7 +72,12 @@ function(generate_wrapper_header)
|
||||
set(include_guard "${include_guard}COMGR_WRAPPER_INCLUDE_${INC_GAURD_NAME}_H")
|
||||
#set #include statement
|
||||
get_filename_component(file_name ${header_file} NAME)
|
||||
- set(include_statements "${include_statements}#include \"../../../${CMAKE_INSTALL_INCLUDEDIR}/${ROCM_SMI}/${file_name}\"\n")
|
||||
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
+ set(include_dir "${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
+ else()
|
||||
+ set(include_dir "../../../${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
+ endif()
|
||||
+ set(include_statements "${include_statements}#include \"${include_dir}/${ROCM_SMI}/${file_name}\"\n")
|
||||
configure_file(${RSMI_WRAPPER_DIR}/header.hpp.in ${RSMI_WRAPPER_INC_DIR}/${file_name})
|
||||
unset(include_guard)
|
||||
unset(include_statements)
|
||||
@@ -90,7 +95,12 @@ function(generate_wrapper_header)
|
||||
set(include_guard "${include_guard}COMGR_WRAPPER_INCLUDE_${INC_GAURD_NAME}_H")
|
||||
#set #include statement
|
||||
get_filename_component(file_name ${header_file} NAME)
|
||||
- set(include_statements "${include_statements}#include \"../../../${CMAKE_INSTALL_INCLUDEDIR}/${OAM_TARGET_NAME}/${file_name}\"\n")
|
||||
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
+ set(include_dir "${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
+ else()
|
||||
+ set(include_dir "../../../${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
+ endif()
|
||||
+ set(include_statements "${include_statements}#include \"${include_dir}/${OAM_TARGET_NAME}/${file_name}\"\n")
|
||||
configure_file(${RSMI_WRAPPER_DIR}/header.hpp.in ${OAM_WRAPPER_INC_DIR}/${file_name})
|
||||
unset(include_guard)
|
||||
unset(include_statements)
|
||||
@@ -123,11 +133,16 @@ function(create_library_symlink)
|
||||
set(library_files "${LIB_RSMI}")
|
||||
endif()
|
||||
|
||||
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBDIR})
|
||||
+ set(install_libdir "${CMAKE_INSTALL_LIBDIR}")
|
||||
+ else()
|
||||
+ set(install_libdir "../../${CMAKE_INSTALL_LIBDIR}")
|
||||
+ endif()
|
||||
foreach(file_name ${library_files})
|
||||
add_custom_target(link_${file_name} ALL
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink
|
||||
- ../../${CMAKE_INSTALL_LIBDIR}/${file_name} ${RSMI_WRAPPER_LIB_DIR}/${file_name})
|
||||
+ ${install_libdir}/${file_name} ${RSMI_WRAPPER_LIB_DIR}/${file_name})
|
||||
endforeach()
|
||||
|
||||
file(MAKE_DIRECTORY ${OAM_WRAPPER_LIB_DIR})
|
||||
@@ -151,11 +166,16 @@ function(create_library_symlink)
|
||||
set(library_files "${LIB_OAM}")
|
||||
endif()
|
||||
|
||||
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBDIR})
|
||||
+ set(install_libdir "${CMAKE_INSTALL_LIBDIR}")
|
||||
+ else()
|
||||
+ set(install_libdir "../../${CMAKE_INSTALL_LIBDIR}")
|
||||
+ endif()
|
||||
foreach(file_name ${library_files})
|
||||
add_custom_target(link_${file_name} ALL
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink
|
||||
- ../../${CMAKE_INSTALL_LIBDIR}/${file_name} ${OAM_WRAPPER_LIB_DIR}/${file_name})
|
||||
+ ${install_libdir}/${file_name} ${OAM_WRAPPER_LIB_DIR}/${file_name})
|
||||
endforeach()
|
||||
|
||||
endfunction()
|
||||
diff --git a/rocm_smi/CMakeLists.txt b/rocm_smi/CMakeLists.txt
|
||||
index c594eeb..d3ed39d 100755
|
||||
--- a/rocm_smi/CMakeLists.txt
|
||||
+++ b/rocm_smi/CMakeLists.txt
|
||||
@@ -105,10 +105,15 @@ endif ()
|
||||
#file reorganization changes
|
||||
#rocm_smi.py moved to libexec/rocm_smi. so creating rocm-smi symlink
|
||||
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
|
||||
+if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBEXECDIR})
|
||||
+ set(install_libexecdir "${CMAKE_INSTALL_LIBEXECDIR}")
|
||||
+else()
|
||||
+ set(install_libexecdir "../${CMAKE_INSTALL_LIBEXECDIR}")
|
||||
+endif()
|
||||
add_custom_target(link-rocm-smi ALL
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink
|
||||
- ../${CMAKE_INSTALL_LIBEXECDIR}/${ROCM_SMI}/rocm_smi.py ${CMAKE_CURRENT_BINARY_DIR}/bin/rocm-smi)
|
||||
+ ${install_libexecdir}/${ROCM_SMI}/rocm_smi.py ${CMAKE_CURRENT_BINARY_DIR}/bin/rocm-smi)
|
||||
|
||||
## Add the install directives for the runtime library.
|
||||
install(TARGETS ${ROCM_SMI_TARGET}
|
||||
58
pkgs/rocm-modules/rocm-smi/default.nix
Normal file
58
pkgs/rocm-modules/rocm-smi/default.nix
Normal file
@@ -0,0 +1,58 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
wrapPython,
|
||||
libdrm,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocm-smi";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocm_smi_lib";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-qshAMVhHJIA06fGOTJx5/l1t8wKv7KbmvdCSKNuEIKM=";
|
||||
};
|
||||
|
||||
patches = [ ./cmake.patch ];
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
wrapPython
|
||||
libdrm
|
||||
];
|
||||
|
||||
cmakeFlags = [
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
];
|
||||
|
||||
postInstall = ''
|
||||
wrapPythonProgramsIn $out
|
||||
mv $out/libexec/rocm_smi/.rsmiBindingsInit.py-wrapped $out/libexec/rocm_smi/rsmiBindingsInit.py
|
||||
mv $out/libexec/rocm_smi/.rsmiBindings.py-wrapped $out/libexec/rocm_smi/rsmiBindings.py
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "System management interface for AMD GPUs supported by ROCm";
|
||||
homepage = "https://github.com/ROCm/rocm_smi_lib";
|
||||
license = with licenses; [ mit ];
|
||||
maintainers = with maintainers; [ lovesegfault ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = [ "x86_64-linux" ];
|
||||
};
|
||||
})
|
||||
32
pkgs/rocm-modules/rocm-tests/default.nix
Normal file
32
pkgs/rocm-modules/rocm-tests/default.nix
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
clr,
|
||||
ollama,
|
||||
python3Packages,
|
||||
rocmPackages,
|
||||
magma-hip,
|
||||
emptyDirectory,
|
||||
stdenv,
|
||||
}:
|
||||
# This package exists purely to have a bunch of passthru.tests attrs
|
||||
stdenv.mkDerivation {
|
||||
name = "rocm-tests";
|
||||
nativeBuildInputs = [
|
||||
clr
|
||||
];
|
||||
src = emptyDirectory;
|
||||
postInstall = "mkdir -p $out";
|
||||
passthru.tests = {
|
||||
ollama = ollama.override {
|
||||
inherit rocmPackages;
|
||||
acceleration = "rocm";
|
||||
};
|
||||
torch = python3Packages.torch.override {
|
||||
inherit rocmPackages;
|
||||
rocmSupport = true;
|
||||
cudaSupport = false;
|
||||
magma-hip = magma-hip.override {
|
||||
inherit rocmPackages;
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
65
pkgs/rocm-modules/rocminfo/default.nix
Normal file
65
pkgs/rocm-modules/rocminfo/default.nix
Normal file
@@ -0,0 +1,65 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocm-runtime,
|
||||
busybox,
|
||||
python3,
|
||||
gnugrep,
|
||||
clr, # Only for localGpuTargets
|
||||
# rocminfo requires that the calling user have a password and be in
|
||||
# the video group. If we let rocm_agent_enumerator rely upon
|
||||
# rocminfo's output, then it, too, has those requirements. Instead,
|
||||
# we can specify the GPU targets for this system (e.g. "gfx803" for
|
||||
# Polaris) such that no system call is needed for downstream
|
||||
# compilers to determine the desired target.
|
||||
defaultTargets ? (clr.localGpuTargets or [ ]),
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
version = "6.4.1";
|
||||
pname = "rocminfo";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocminfo";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
sha256 = "sha256-YscZ5sFsLOVBg98w2X6vTzniTvl9NfCkIE+HAH6vv5Y=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
];
|
||||
|
||||
buildInputs = [ rocm-runtime ];
|
||||
propagatedBuildInputs = [ python3 ];
|
||||
cmakeFlags = [ "-DROCRTST_BLD_TYPE=Release" ];
|
||||
|
||||
prePatch = ''
|
||||
patchShebangs rocm_agent_enumerator
|
||||
sed 's,lsmod | grep ,${busybox}/bin/lsmod | ${gnugrep}/bin/grep ,' -i rocminfo.cc
|
||||
'';
|
||||
|
||||
postInstall = lib.optionalString (defaultTargets != [ ]) ''
|
||||
echo '${lib.concatStringsSep "\n" defaultTargets}' > $out/bin/target.lst
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "ROCm Application for Reporting System Info";
|
||||
homepage = "https://github.com/ROCm/rocminfo";
|
||||
license = licenses.ncsa;
|
||||
maintainers = with maintainers; [ lovesegfault ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
165
pkgs/rocm-modules/rocmlir/default.nix
Normal file
165
pkgs/rocm-modules/rocmlir/default.nix
Normal file
@@ -0,0 +1,165 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocminfo,
|
||||
clr,
|
||||
git,
|
||||
libxml2,
|
||||
libedit,
|
||||
zstd,
|
||||
zlib,
|
||||
ncurses,
|
||||
python3Packages,
|
||||
buildRockCompiler ? false,
|
||||
buildTests ? false, # `argument of type 'NoneType' is not iterable`
|
||||
}:
|
||||
|
||||
# FIXME: rocmlir has an entire separate LLVM build in a subdirectory this is silly
|
||||
# It seems to be forked from AMD's own LLVM
|
||||
# If possible reusing the rocmPackages.llvm build would be better
|
||||
# Would have to confirm it is compatible with ROCm's tagged LLVM.
|
||||
# Fairly likely it's not given AMD's track record with forking their own software in incompatible ways
|
||||
# in subdirs
|
||||
|
||||
# Theoretically, we could have our MLIR have an output
|
||||
# with the source and built objects so that we can just
|
||||
# use it as the external LLVM repo for this
|
||||
let
|
||||
suffix = if buildRockCompiler then "-rock" else "";
|
||||
|
||||
llvmNativeTarget =
|
||||
if stdenv.hostPlatform.isx86_64 then
|
||||
"X86"
|
||||
else if stdenv.hostPlatform.isAarch64 then
|
||||
"AArch64"
|
||||
else
|
||||
throw "Unsupported ROCm LLVM platform";
|
||||
in
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocmlir${suffix}";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals (!buildRockCompiler) [
|
||||
"external"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocMLIR";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-p/gvr1Z6yZtO5N+ecSouXiCrf520jt1HMOy/tohUHfI=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
python3Packages.python
|
||||
python3Packages.tomli
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
git
|
||||
libxml2
|
||||
libedit
|
||||
];
|
||||
|
||||
propagatedBuildInputs = [
|
||||
zstd
|
||||
zlib
|
||||
ncurses
|
||||
];
|
||||
|
||||
patches = [
|
||||
./initparamdata-sort-const.patch
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
"-DLLVM_USE_LINKER=lld"
|
||||
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
|
||||
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
|
||||
"-DLLVM_ENABLE_LIBCXX=ON"
|
||||
"-DLLVM_ENABLE_TERMINFO=ON"
|
||||
"-DROCM_PATH=${clr}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
(lib.cmakeBool "BUILD_FAT_LIBROCKCOMPILER" buildRockCompiler)
|
||||
]
|
||||
++ lib.optionals (!buildRockCompiler) [
|
||||
"-DROCM_TEST_CHIPSET=gfx000"
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
patchShebangs mlir
|
||||
patchShebangs external/llvm-project/mlir/lib/Dialect/GPU/AmdDeviceLibsIncGen.py
|
||||
|
||||
# Fixes mlir/lib/Analysis/BufferDependencyAnalysis.cpp:41:19: error: redefinition of 'read'
|
||||
substituteInPlace mlir/lib/Analysis/BufferDependencyAnalysis.cpp \
|
||||
--replace-fail "enum EffectType { read, write, unknown };" "enum class EffectType { read, write, unknown };"
|
||||
|
||||
# remove when no longer required
|
||||
substituteInPlace mlir/test/{e2e/generateE2ETest.py,fusion/e2e/generate-fusion-tests.py} \
|
||||
--replace-fail "\"/opt/rocm/bin" "\"${rocminfo}/bin"
|
||||
|
||||
substituteInPlace mlir/utils/performance/common/CMakeLists.txt \
|
||||
--replace-fail "/opt/rocm" "${clr}"
|
||||
'';
|
||||
|
||||
dontBuild = true;
|
||||
doCheck = true;
|
||||
|
||||
# Certain libs aren't being generated, try enabling tests next update
|
||||
checkTarget =
|
||||
if buildRockCompiler then
|
||||
"librockCompiler"
|
||||
else if buildTests then
|
||||
"check-rocmlir"
|
||||
else
|
||||
"check-rocmlir-build-only";
|
||||
|
||||
postInstall =
|
||||
let
|
||||
libPath = lib.makeLibraryPath [
|
||||
zstd
|
||||
zlib
|
||||
ncurses
|
||||
clr
|
||||
stdenv.cc.cc
|
||||
];
|
||||
in
|
||||
lib.optionals (!buildRockCompiler) ''
|
||||
mkdir -p $external/lib
|
||||
cp -a external/llvm-project/llvm/lib/{*.a*,*.so*} $external/lib
|
||||
patchelf --set-rpath $external/lib:$out/lib:${libPath} $external/lib/*.so*
|
||||
patchelf --set-rpath $out/lib:$external/lib:${libPath} $out/{bin/*,lib/*.so*}
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
page = "tags?per_page=4";
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "MLIR-based convolution and GEMM kernel generator";
|
||||
homepage = "https://github.com/ROCm/rocMLIR";
|
||||
license = with licenses; [ asl20 ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
13
pkgs/rocm-modules/rocmlir/initparamdata-sort-const.patch
Normal file
13
pkgs/rocm-modules/rocmlir/initparamdata-sort-const.patch
Normal file
@@ -0,0 +1,13 @@
|
||||
diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
|
||||
index 3f5ee596819a..590d53788822 100644
|
||||
--- a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
|
||||
+++ b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
|
||||
@@ -209,7 +209,7 @@ private:
|
||||
size_t original_pos;
|
||||
int64_t padding_amount;
|
||||
|
||||
- bool operator<(const InitParamData &rhs) {
|
||||
+ bool operator<(const InitParamData &rhs) const {
|
||||
if (this->padding_amount < rhs.padding_amount) {
|
||||
return true;
|
||||
} else if (this->padding_amount == rhs.padding_amount) {
|
||||
98
pkgs/rocm-modules/rocprim/default.nix
Normal file
98
pkgs/rocm-modules/rocprim/default.nix
Normal file
@@ -0,0 +1,98 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
clr,
|
||||
gtest,
|
||||
gbenchmark,
|
||||
buildTests ? false,
|
||||
buildBenchmarks ? false,
|
||||
gpuTargets ? [ ],
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocprim";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"benchmark"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocPRIM";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-EP61n4syYMWjTDkjC0dWLj9yau6KL2qu1Bs5IBtw580=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
lib.optionals buildTests [
|
||||
gtest
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
gbenchmark
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_TEST=ON"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"-DBUILD_BENCHMARK=ON"
|
||||
];
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/test_* $test/bin
|
||||
mv $out/bin/rocprim $test/bin
|
||||
''
|
||||
+ lib.optionalString buildBenchmarks ''
|
||||
mkdir -p $benchmark/bin
|
||||
mv $out/bin/benchmark_* $benchmark/bin
|
||||
''
|
||||
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||
rmdir $out/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "ROCm parallel primitives";
|
||||
homepage = "https://github.com/ROCm/rocPRIM";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
79
pkgs/rocm-modules/rocprofiler-register/default.nix
Normal file
79
pkgs/rocm-modules/rocprofiler-register/default.nix
Normal file
@@ -0,0 +1,79 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
rocm-runtime,
|
||||
rocprofiler,
|
||||
numactl,
|
||||
libpciaccess,
|
||||
libxml2,
|
||||
elfutils,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
clang,
|
||||
clr,
|
||||
python3Packages,
|
||||
gpuTargets ? clr.gpuTargets,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocprofiler-register";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocprofiler-register";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-HaN4XMHuCRDfKOpfuZ2SkOEQfAZKouh6luqbtATUYm0=";
|
||||
fetchSubmodules = true;
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
clang
|
||||
clr
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
numactl
|
||||
libpciaccess
|
||||
libxml2
|
||||
elfutils
|
||||
rocm-runtime
|
||||
|
||||
rocprofiler.rocmtoolkit-merged
|
||||
|
||||
python3Packages.lxml
|
||||
python3Packages.cppheaderparser
|
||||
python3Packages.pyyaml
|
||||
python3Packages.barectf
|
||||
python3Packages.pandas
|
||||
];
|
||||
cmakeFlags = [
|
||||
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
|
||||
"-DHIP_ROOT_DIR=${clr}"
|
||||
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
"-DBUILD_TEST=OFF"
|
||||
"-DROCPROFILER_BUILD_TESTS=0"
|
||||
"-DROCPROFILER_BUILD_SAMPLES=0"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
];
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = "rocprofiler-register";
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Profiling with perf-counters and derived metrics";
|
||||
homepage = "https://github.com/ROCm/rocprofiler";
|
||||
license = with licenses; [ mit ]; # mitx11
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
@@ -0,0 +1,15 @@
|
||||
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
|
||||
index 46efbd5..ca2cc3b 100644
|
||||
--- a/test/CMakeLists.txt
|
||||
+++ b/test/CMakeLists.txt
|
||||
@@ -127,10 +127,6 @@ function(generate_hsaco TARGET_ID INPUT_FILE OUTPUT_FILE)
|
||||
DEPENDS ${INPUT_FILE} clang
|
||||
COMMENT "Building ${OUTPUT_FILE}..."
|
||||
VERBATIM)
|
||||
- install(
|
||||
- FILES ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
|
||||
- DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests-v1
|
||||
- COMPONENT tests)
|
||||
set(HSACO_TARGET_LIST
|
||||
${HSACO_TARGET_LIST} ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
|
||||
PARENT_SCOPE)
|
||||
142
pkgs/rocm-modules/rocprofiler/default.nix
Normal file
142
pkgs/rocm-modules/rocprofiler/default.nix
Normal file
@@ -0,0 +1,142 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
symlinkJoin,
|
||||
cmake,
|
||||
clang,
|
||||
clr,
|
||||
rocm-core,
|
||||
rocm-runtime,
|
||||
rocm-device-libs,
|
||||
roctracer,
|
||||
rocdbgapi,
|
||||
numactl,
|
||||
libpciaccess,
|
||||
libxml2,
|
||||
elfutils,
|
||||
mpi,
|
||||
systemd,
|
||||
gtest,
|
||||
git,
|
||||
python3Packages,
|
||||
gpuTargets ? clr.gpuTargets,
|
||||
}:
|
||||
|
||||
let
|
||||
rocmtoolkit-merged = symlinkJoin {
|
||||
name = "rocmtoolkit-merged";
|
||||
|
||||
paths = [
|
||||
rocm-core
|
||||
rocm-runtime
|
||||
rocm-device-libs
|
||||
roctracer
|
||||
rocdbgapi
|
||||
clr
|
||||
];
|
||||
|
||||
postBuild = ''
|
||||
rm -rf $out/nix-support
|
||||
'';
|
||||
};
|
||||
in
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocprofiler";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocprofiler";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-CgW8foM4W3K19kUK/l8IsH2Q9DHi/z88viXTxhNqlHQ=";
|
||||
fetchSubmodules = true;
|
||||
};
|
||||
|
||||
patches = [
|
||||
# These just simply won't build
|
||||
./0000-dont-install-tests-hsaco.patch
|
||||
./optional-aql-in-cmake.patch
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
clang
|
||||
clr
|
||||
git
|
||||
python3Packages.lxml
|
||||
python3Packages.cppheaderparser
|
||||
python3Packages.pyyaml
|
||||
python3Packages.barectf
|
||||
python3Packages.pandas
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
numactl
|
||||
libpciaccess
|
||||
libxml2
|
||||
elfutils
|
||||
mpi
|
||||
systemd
|
||||
gtest
|
||||
];
|
||||
|
||||
propagatedBuildInputs = [ rocmtoolkit-merged ];
|
||||
|
||||
# HACK: allow building without aqlprofile, probably explodes at runtime if use profiling
|
||||
env.LDFLAGS = "-z nodefs -Wl,-undefined,dynamic_lookup,--unresolved-symbols=ignore-all";
|
||||
#HACK: rocprofiler's cmake doesn't add these deps properly
|
||||
env.CXXFLAGS = "-I${libpciaccess}/include -I${numactl.dev}/include -I${rocmtoolkit-merged}/include -I${elfutils.dev}/include -w";
|
||||
|
||||
cmakeFlags = [
|
||||
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
|
||||
"-DHIP_ROOT_DIR=${clr}"
|
||||
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DBUILD_TEST=OFF"
|
||||
"-DROCPROFILER_BUILD_TESTS=0"
|
||||
"-DROCPROFILER_BUILD_SAMPLES=0"
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
patchShebangs .
|
||||
|
||||
substituteInPlace cmake_modules/rocprofiler_utils.cmake \
|
||||
--replace-fail 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)' 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)
|
||||
return()'
|
||||
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace-fail 'set(ROCPROFILER_BUILD_TESTS ON)' ""
|
||||
|
||||
substituteInPlace tests-v2/featuretests/profiler/CMakeLists.txt \
|
||||
--replace "--build-id=sha1" "--build-id=sha1 --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode"
|
||||
|
||||
substituteInPlace test/CMakeLists.txt \
|
||||
--replace "\''${ROCM_ROOT_DIR}/amdgcn/bitcode" "${rocm-device-libs}/amdgcn/bitcode"
|
||||
'';
|
||||
|
||||
postInstall = ''
|
||||
# Why do these have the executable bit set?
|
||||
chmod -x $out/libexec/rocprofiler/counters/*.xml
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
passthru.rocmtoolkit-merged = rocmtoolkit-merged;
|
||||
|
||||
meta = with lib; {
|
||||
description = "Profiling with perf-counters and derived metrics";
|
||||
homepage = "https://github.com/ROCm/rocprofiler";
|
||||
license = with licenses; [ mit ]; # mitx11
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
147
pkgs/rocm-modules/rocprofiler/optional-aql-in-cmake.patch
Normal file
147
pkgs/rocm-modules/rocprofiler/optional-aql-in-cmake.patch
Normal file
@@ -0,0 +1,147 @@
|
||||
From https://raw.githubusercontent.com/AphidGit/rocm_compile/refs/heads/main/rocprofiler.patch
|
||||
diff --git a/cmake_modules/rocprofiler_env.cmake b/cmake_modules/rocprofiler_env.cmake
|
||||
index 7b7c472..0aba3ed 100644
|
||||
--- a/cmake_modules/rocprofiler_env.cmake
|
||||
+++ b/cmake_modules/rocprofiler_env.cmake
|
||||
@@ -36,6 +36,7 @@ if(ROCPROFILER_DEBUG_TRACE)
|
||||
target_compile_definitions(rocprofiler-build-flags INTERFACE DEBUG_TRACE=1)
|
||||
endif()
|
||||
|
||||
+set(ROCPROFILER_LD_AQLPROFILE false)
|
||||
# Enable direct loading of AQL-profile HSA extension
|
||||
if(ROCPROFILER_LD_AQLPROFILE)
|
||||
target_compile_definitions(rocprofiler-build-flags INTERFACE ROCP_LD_AQLPROFILE=1)
|
||||
@@ -80,9 +81,3 @@ if("${ROCM_ROOT_DIR}" STREQUAL "")
|
||||
message(FATAL_ERROR "ROCM_ROOT_DIR is not found.")
|
||||
endif()
|
||||
|
||||
-find_library(
|
||||
- HSA_AMD_AQLPROFILE_LIBRARY
|
||||
- NAMES hsa-amd-aqlprofile64
|
||||
- HINTS ${CMAKE_PREFIX_PATH}
|
||||
- PATHS ${ROCM_ROOT_DIR}
|
||||
- PATH_SUFFIXES lib REQUIRED)
|
||||
diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt
|
||||
index 61782f0..16c83bf 100644
|
||||
--- a/src/api/CMakeLists.txt
|
||||
+++ b/src/api/CMakeLists.txt
|
||||
@@ -51,15 +51,6 @@ find_file(
|
||||
NO_DEFAULT_PATH REQUIRED)
|
||||
get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY)
|
||||
|
||||
-find_library(
|
||||
- AQLPROFILE_LIB "libhsa-amd-aqlprofile64.so"
|
||||
- HINTS ${CMAKE_PREFIX_PATH}
|
||||
- PATHS ${ROCM_PATH}
|
||||
- PATH_SUFFIXES lib)
|
||||
-
|
||||
-if(NOT AQLPROFILE_LIB)
|
||||
- message(FATAL_ERROR "AQL_PROFILE not installed. Please install hsa-amd-aqlprofile!")
|
||||
-endif()
|
||||
|
||||
# ########################################################################################
|
||||
# Adding Old Library Files
|
||||
@@ -247,7 +238,7 @@ target_include_directories(
|
||||
PUBLIC $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include/rocprofiler>
|
||||
PRIVATE ${LIB_DIR} ${ROOT_DIR} ${PROJECT_SOURCE_DIR}/include/rocprofiler)
|
||||
target_link_libraries(
|
||||
- ${ROCPROFILER_TARGET} PRIVATE ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 c stdc++
|
||||
+ ${ROCPROFILER_TARGET} PRIVATE hsa-runtime64::hsa-runtime64 c stdc++
|
||||
dl rocprofiler::build-flags rocprofiler::memcheck)
|
||||
|
||||
get_target_property(ROCPROFILER_LIBRARY_V1_NAME ${ROCPROFILER_TARGET} NAME)
|
||||
@@ -325,8 +316,7 @@ target_link_options(
|
||||
-Wl,--no-undefined)
|
||||
target_link_libraries(
|
||||
rocprofiler-v2
|
||||
- PRIVATE ${AQLPROFILE_LIB}
|
||||
- hsa-runtime64::hsa-runtime64
|
||||
+ PRIVATE hsa-runtime64::hsa-runtime64
|
||||
Threads::Threads
|
||||
atomic
|
||||
numa
|
||||
diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp
|
||||
index 2c47186..6b39634 100644
|
||||
--- a/src/util/hsa_rsrc_factory.cpp
|
||||
+++ b/src/util/hsa_rsrc_factory.cpp
|
||||
@@ -155,17 +155,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
|
||||
if (kern_arg_pool_ == nullptr)
|
||||
CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
|
||||
|
||||
- // Get AqlProfile API table
|
||||
- aqlprofile_api_ = {};
|
||||
-#ifdef ROCP_LD_AQLPROFILE
|
||||
- status = LoadAqlProfileLib(&aqlprofile_api_);
|
||||
-#else
|
||||
- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
|
||||
- hsa_ven_amd_aqlprofile_VERSION_MAJOR,
|
||||
- sizeof(aqlprofile_api_), &aqlprofile_api_);
|
||||
-#endif
|
||||
- CHECK_STATUS("aqlprofile API table load failed", status);
|
||||
-
|
||||
// Get Loader API table
|
||||
loader_api_ = {};
|
||||
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
|
||||
diff --git a/test/util/hsa_rsrc_factory.cpp b/test/util/hsa_rsrc_factory.cpp
|
||||
index 0a44d18..fab5b75 100644
|
||||
--- a/test/util/hsa_rsrc_factory.cpp
|
||||
+++ b/test/util/hsa_rsrc_factory.cpp
|
||||
@@ -137,17 +137,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
|
||||
if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR);
|
||||
if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
|
||||
|
||||
- // Get AqlProfile API table
|
||||
- aqlprofile_api_ = {0};
|
||||
-#ifdef ROCP_LD_AQLPROFILE
|
||||
- status = LoadAqlProfileLib(&aqlprofile_api_);
|
||||
-#else
|
||||
- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
|
||||
- hsa_ven_amd_aqlprofile_VERSION_MAJOR,
|
||||
- sizeof(aqlprofile_api_), &aqlprofile_api_);
|
||||
-#endif
|
||||
- CHECK_STATUS("aqlprofile API table load failed", status);
|
||||
-
|
||||
// Get Loader API table
|
||||
loader_api_ = {0};
|
||||
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
|
||||
diff --git a/tests-v2/unittests/core/CMakeLists.txt b/tests-v2/unittests/core/CMakeLists.txt
|
||||
index 107cb51..0f6d4bf 100644
|
||||
--- a/tests-v2/unittests/core/CMakeLists.txt
|
||||
+++ b/tests-v2/unittests/core/CMakeLists.txt
|
||||
@@ -235,8 +235,7 @@ set_target_properties(runCoreUnitTests PROPERTIES
|
||||
INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests")
|
||||
target_link_libraries(
|
||||
runCoreUnitTests
|
||||
- PRIVATE ${AQLPROFILE_LIB}
|
||||
- test_hsatool_library
|
||||
+ PRIVATE test_hsatool_library
|
||||
hsa-runtime64::hsa-runtime64
|
||||
Threads::Threads
|
||||
GTest::gtest GTest::gtest_main
|
||||
@@ -285,4 +284,4 @@ endif()
|
||||
# for the *_FilePlugin tests
|
||||
if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output")
|
||||
file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output")
|
||||
-endif()
|
||||
\ No newline at end of file
|
||||
+endif()
|
||||
diff --git a/tests-v2/unittests/profiler/CMakeLists.txt b/tests-v2/unittests/profiler/CMakeLists.txt
|
||||
index 53180d5..0c4d4a7 100644
|
||||
--- a/tests-v2/unittests/profiler/CMakeLists.txt
|
||||
+++ b/tests-v2/unittests/profiler/CMakeLists.txt
|
||||
@@ -122,7 +122,7 @@ target_compile_definitions(
|
||||
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
|
||||
|
||||
target_link_libraries(
|
||||
- runUnitTests PRIVATE rocprofiler-v2 ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64
|
||||
+ runUnitTests PRIVATE rocprofiler-v2 hsa-runtime64::hsa-runtime64
|
||||
GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES} dw elf c dl)
|
||||
|
||||
add_dependencies(tests runUnitTests)
|
||||
@@ -158,4 +158,4 @@ endif()
|
||||
# for the *_FilePlugin tests
|
||||
if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output")
|
||||
file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output")
|
||||
-endif()
|
||||
\ No newline at end of file
|
||||
+endif()
|
||||
59
pkgs/rocm-modules/rocr-debug-agent/default.nix
Normal file
59
pkgs/rocm-modules/rocr-debug-agent/default.nix
Normal file
@@ -0,0 +1,59 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
clr,
|
||||
git,
|
||||
rocdbgapi,
|
||||
elfutils,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocr-debug-agent";
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocr_debug_agent";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-otoxZ2NHkPDIFhvn4/nvaQ/W4LF38Nx9MZ9IYEf1DyY=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
clr
|
||||
git
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
rocdbgapi
|
||||
elfutils
|
||||
];
|
||||
|
||||
cmakeFlags = [
|
||||
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
|
||||
"-DHIP_ROOT_DIR=${clr}"
|
||||
"-DHIP_PATH=${clr}"
|
||||
];
|
||||
|
||||
# Weird install target
|
||||
postInstall = ''
|
||||
rm -rf $out/src
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Library that provides some debugging functionality for ROCr";
|
||||
homepage = "https://github.com/ROCm/rocr_debug_agent";
|
||||
license = with licenses; [ ncsa ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
99
pkgs/rocm-modules/rocrand/default.nix
Normal file
99
pkgs/rocm-modules/rocrand/default.nix
Normal file
@@ -0,0 +1,99 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
clr,
|
||||
gtest,
|
||||
gbenchmark,
|
||||
buildTests ? false,
|
||||
buildBenchmarks ? false,
|
||||
gpuTargets ? clr.localGpuTargets or [ ],
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocrand${clr.gpuArchSuffix}";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"benchmark"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocRAND";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-XqGPXx+LqjJs602vtG5u578B1hitGlsopA0izbClHro=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
lib.optionals buildTests [
|
||||
gtest
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
gbenchmark
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DHIP_ROOT_DIR=${clr}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_TEST=ON"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"-DBUILD_BENCHMARK=ON"
|
||||
];
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/test_* $test/bin
|
||||
''
|
||||
+ lib.optionalString buildBenchmarks ''
|
||||
mkdir -p $benchmark/bin
|
||||
mv $out/bin/benchmark_* $benchmark/bin
|
||||
''
|
||||
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||
rm -r $out/bin/rocRAND
|
||||
# Fail if bin/ isn't actually empty
|
||||
rmdir $out/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Generate pseudo-random and quasi-random numbers";
|
||||
homepage = "https://github.com/ROCm/rocRAND";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
135
pkgs/rocm-modules/rocsolver/default.nix
Normal file
135
pkgs/rocm-modules/rocsolver/default.nix
Normal file
@@ -0,0 +1,135 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocblas,
|
||||
rocprim,
|
||||
rocsparse,
|
||||
clr,
|
||||
fmt,
|
||||
gtest,
|
||||
gfortran,
|
||||
lapack-reference,
|
||||
buildTests ? false,
|
||||
buildBenchmarks ? false,
|
||||
gpuTargets ? (
|
||||
clr.localGpuTargets or [
|
||||
"gfx900"
|
||||
"gfx906"
|
||||
"gfx908"
|
||||
"gfx90a"
|
||||
"gfx942"
|
||||
"gfx1010"
|
||||
"gfx1030"
|
||||
"gfx1100"
|
||||
"gfx1101"
|
||||
"gfx1102"
|
||||
"gfx1201"
|
||||
]
|
||||
),
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocsolver${clr.gpuArchSuffix}";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"benchmark"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocSOLVER";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-u5GRWetMnhEBJ9HZcXoEaqpdO8f0cuSPnq+XawljfUs=";
|
||||
};
|
||||
|
||||
nativeBuildInputs =
|
||||
[
|
||||
cmake
|
||||
# no ninja, it buffers console output and nix times out long periods of no output
|
||||
rocm-cmake
|
||||
clr
|
||||
]
|
||||
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||
gfortran
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
# FIXME: rocblas and rocsolver can't build in parallel
|
||||
# but rocsolver doesn't need rocblas' offload builds at build time
|
||||
# could we build against a rocblas-minimal?
|
||||
rocblas
|
||||
rocprim
|
||||
rocsparse
|
||||
fmt
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
gtest
|
||||
]
|
||||
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||
lapack-reference
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DHIP_CLANG_NUM_PARALLEL_JOBS=4"
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
"-DCMAKE_VERBOSE_MAKEFILE=ON"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_CLIENTS_TESTS=ON"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
||||
];
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/rocsolver-test $test/bin
|
||||
''
|
||||
+ lib.optionalString buildBenchmarks ''
|
||||
mkdir -p $benchmark/bin
|
||||
mv $out/bin/rocsolver-bench $benchmark/bin
|
||||
''
|
||||
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||
rmdir $out/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = "rocsolver";
|
||||
inherit (finalAttrs.src) owner repo;
|
||||
};
|
||||
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
|
||||
meta = with lib; {
|
||||
description = "ROCm LAPACK implementation";
|
||||
homepage = "https://github.com/ROCm/rocSOLVER";
|
||||
license = with licenses; [ bsd2 ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
timeout = 14400; # 4 hours
|
||||
maxSilent = 14400; # 4 hours
|
||||
};
|
||||
})
|
||||
161
pkgs/rocm-modules/rocsparse/default.nix
Normal file
161
pkgs/rocm-modules/rocsparse/default.nix
Normal file
@@ -0,0 +1,161 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
fetchzip,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocprim,
|
||||
clr,
|
||||
gfortran,
|
||||
git,
|
||||
gtest,
|
||||
boost,
|
||||
python3Packages,
|
||||
buildTests ? false,
|
||||
buildBenchmarks ? false, # Seems to depend on tests
|
||||
gpuTargets ? clr.localGpuTargets or clr.gpuTargets,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocsparse${clr.gpuArchSuffix}";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"benchmark"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocSPARSE";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-l7rOuVthfrSO5bnhgm49cjPnRbV/2sFhSRT+mShkBek=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
# no ninja, it buffers console output and nix times out long periods of no output
|
||||
rocm-cmake
|
||||
clr
|
||||
gfortran
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
rocprim
|
||||
git
|
||||
]
|
||||
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||
gtest
|
||||
boost
|
||||
python3Packages.python
|
||||
python3Packages.pyyaml
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||
"-DBUILD_CLIENTS_TESTS=ON"
|
||||
"-DCMAKE_MATRICES_DIR=/build/source/matrices"
|
||||
"-Dpython=python3"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
||||
];
|
||||
|
||||
# We have to manually generate the matrices
|
||||
postPatch = lib.optionalString (buildTests || buildBenchmarks) ''
|
||||
mkdir -p matrices
|
||||
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-01}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-02}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-03}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-04}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-05}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-06}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-07}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-08}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-09}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-10}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-11}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-12}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-13}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-14}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-15}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-16}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-17}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-18}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-19}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-20}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-21}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-22}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-23}/*.mtx matrices
|
||||
ln -s ${finalAttrs.passthru.matrices.matrix-24}/*.mtx matrices
|
||||
|
||||
# Not used by the original cmake, causes an error
|
||||
rm matrices/*_b.mtx
|
||||
|
||||
echo "deps/convert.cpp -> deps/mtx2csr"
|
||||
hipcc deps/convert.cpp -O3 -o deps/mtx2csr
|
||||
|
||||
for mat in $(ls -1 matrices | cut -d "." -f 1); do
|
||||
echo "mtx2csr: $mat.mtx -> $mat.csr"
|
||||
deps/mtx2csr matrices/$mat.mtx matrices/$mat.csr
|
||||
unlink matrices/$mat.mtx
|
||||
done
|
||||
'';
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildBenchmarks ''
|
||||
mkdir -p $benchmark/bin
|
||||
cp -a $out/bin/* $benchmark/bin
|
||||
rm $benchmark/bin/rocsparse-test
|
||||
''
|
||||
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/* $test/bin
|
||||
rm $test/bin/rocsparse-bench || true
|
||||
mv /build/source/matrices $test
|
||||
rmdir $out/bin
|
||||
'';
|
||||
|
||||
passthru = {
|
||||
matrices = import ./deps.nix {
|
||||
inherit fetchzip;
|
||||
mirror1 = "https://sparse.tamu.edu/MM";
|
||||
mirror2 = "https://www.cise.ufl.edu/research/sparse/MM";
|
||||
};
|
||||
|
||||
updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "ROCm SPARSE implementation";
|
||||
homepage = "https://github.com/ROCm/rocSPARSE";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
223
pkgs/rocm-modules/rocsparse/deps.nix
Normal file
223
pkgs/rocm-modules/rocsparse/deps.nix
Normal file
@@ -0,0 +1,223 @@
|
||||
{
|
||||
fetchzip,
|
||||
mirror1,
|
||||
mirror2,
|
||||
}:
|
||||
|
||||
{
|
||||
matrix-01 = fetchzip {
|
||||
sha256 = "sha256-AHur5ZIDZTFRrO2GV0ieXrffq4KUiGWiZ59pv0fUtEQ=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/SNAP/amazon0312.tar.gz"
|
||||
"${mirror2}/SNAP/amazon0312.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-02 = fetchzip {
|
||||
sha256 = "sha256-0rSxaN4lQcdaCLsvlgicG70FXUxXeERPiEmQ4MzbRdE=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/Muite/Chebyshev4.tar.gz"
|
||||
"${mirror2}/Muite/Chebyshev4.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-03 = fetchzip {
|
||||
sha256 = "sha256-hDzDWDUnHEyFedX/tMNq83ZH8uWyM4xtZYUUAD3rizo=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/FEMLAB/sme3Dc.tar.gz"
|
||||
"${mirror2}/FEMLAB/sme3Dc.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-04 = fetchzip {
|
||||
sha256 = "sha256-GmN2yOt/MoX01rKe05aTyB3ypUP4YbQGOITZ0BqPmC0=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/Williams/webbase-1M.tar.gz"
|
||||
"${mirror2}/Williams/webbase-1M.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-05 = fetchzip {
|
||||
sha256 = "sha256-gQNjfVyWzNM9RwImJGhkhahRmZz74LzDs1oijL7mI7k=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/Williams/mac_econ_fwd500.tar.gz"
|
||||
"${mirror2}/Williams/mac_econ_fwd500.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-06 = fetchzip {
|
||||
sha256 = "sha256-87cdZjntNcTuz5BtO59irhcuRbPllWSbhCEX3Td02qc=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/Williams/mc2depi.tar.gz"
|
||||
"${mirror2}/Williams/mc2depi.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-07 = fetchzip {
|
||||
sha256 = "sha256-WRamuJX3D8Tm+k0q67RjUDG3DeNAxhKiaPkk5afY5eU=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/Bova/rma10.tar.gz"
|
||||
"${mirror2}/Bova/rma10.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-08 = fetchzip {
|
||||
sha256 = "sha256-5dhkm293Mc3lzakKxHy5W5XIn4Rw+gihVh7gyrjEHXo=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/JGD_BIBD/bibd_22_8.tar.gz"
|
||||
"${mirror2}/JGD_BIBD/bibd_22_8.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-09 = fetchzip {
|
||||
sha256 = "sha256-czjLWCjXAjZCk5TGYHaEkwSAzQu3TQ3QyB6eNKR4G88=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/Hamm/scircuit.tar.gz"
|
||||
"${mirror2}/Hamm/scircuit.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-10 = fetchzip {
|
||||
sha256 = "sha256-bYuLnJViAIcIejAkh69/bsNAVIDU4wfTLtD+nmHd6FM=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/Sandia/ASIC_320k.tar.gz"
|
||||
"${mirror2}/Sandia/ASIC_320k.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-11 = fetchzip {
|
||||
sha256 = "sha256-aDwn8P1khYjo2Agbq5m9ZBInJUxf/knJNvyptt0fak0=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/GHS_psdef/bmwcra_1.tar.gz"
|
||||
"${mirror2}/GHS_psdef/bmwcra_1.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-12 = fetchzip {
|
||||
sha256 = "sha256-8OJqA/byhlAZd869TPUzZFdsOiwOoRGfKyhM+RMjXoY=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/HB/nos1.tar.gz"
|
||||
"${mirror2}/HB/nos1.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-13 = fetchzip {
|
||||
sha256 = "sha256-FS0rKqmg+uHwsM/yGfQLBdd7LH/rUrdutkNGBD/Mh1I=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/HB/nos2.tar.gz"
|
||||
"${mirror2}/HB/nos2.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-14 = fetchzip {
|
||||
sha256 = "sha256-DANnlrNJikrI7Pst9vRedtbuxepyHmCIu2yhltc4Qcs=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/HB/nos3.tar.gz"
|
||||
"${mirror2}/HB/nos3.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-15 = fetchzip {
|
||||
sha256 = "sha256-21mUgqjWGUfYgiWwSrKh9vH8Vdt3xzcefmqYNYRpxiY=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/HB/nos4.tar.gz"
|
||||
"${mirror2}/HB/nos4.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-16 = fetchzip {
|
||||
sha256 = "sha256-FOuXvGqBBFNkVS6cexmkluret54hCfCOdK+DOZllE4c=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/HB/nos5.tar.gz"
|
||||
"${mirror2}/HB/nos5.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-17 = fetchzip {
|
||||
sha256 = "sha256-+7NI1rA/qQxYPpjXKHvAaCZ+LSaAJ4xuJvMRMBEUYxg=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/HB/nos6.tar.gz"
|
||||
"${mirror2}/HB/nos6.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-18 = fetchzip {
|
||||
sha256 = "sha256-q3NxJjbwGGcFiQ9nhWfUKgZmdVwCfPmgQoqy0AqOsNc=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/HB/nos7.tar.gz"
|
||||
"${mirror2}/HB/nos7.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-19 = fetchzip {
|
||||
sha256 = "sha256-0GAN6qmVfD+tprIigzuUUUwm5KVhkN9X65wMEvFltDY=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/DNVS/shipsec1.tar.gz"
|
||||
"${mirror2}/DNVS/shipsec1.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-20 = fetchzip {
|
||||
sha256 = "sha256-f28Du/Urxsiq5NkRmRO10Zz9vvGRjEchquzHzbZpZ7U=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/Cote/mplate.tar.gz"
|
||||
"${mirror2}/Cote/mplate.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-21 = fetchzip {
|
||||
sha256 = "sha256-O+Wy0NfCU1hVUOfNR1dJpvDHLBwwa301IRJDrQJnhak=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/Bai/qc2534.tar.gz"
|
||||
"${mirror2}/Bai/qc2534.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-22 = fetchzip {
|
||||
sha256 = "sha256-oxMnt8U5Cf1ILWcBdU6W9jdSMMm+U6bIVl8nm3n3+OA=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/Chevron/Chevron2.tar.gz"
|
||||
"${mirror2}/Chevron/Chevron2.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-23 = fetchzip {
|
||||
sha256 = "sha256-MFD9BxFI/3IS7yatW121BAI04fbqrXpgYDT5UKjeKcU=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/Chevron/Chevron3.tar.gz"
|
||||
"${mirror2}/Chevron/Chevron3.tar.gz"
|
||||
];
|
||||
};
|
||||
|
||||
matrix-24 = fetchzip {
|
||||
sha256 = "sha256-ikS8O51pe1nt3BNyhvfvqCbVL0+bg/da9bqGqeBDkTg=";
|
||||
|
||||
urls = [
|
||||
"${mirror1}/Chevron/Chevron4.tar.gz"
|
||||
"${mirror2}/Chevron/Chevron4.tar.gz"
|
||||
];
|
||||
};
|
||||
}
|
||||
94
pkgs/rocm-modules/rocthrust/default.nix
Normal file
94
pkgs/rocm-modules/rocthrust/default.nix
Normal file
@@ -0,0 +1,94 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocprim,
|
||||
clr,
|
||||
gtest,
|
||||
buildTests ? false,
|
||||
buildBenchmarks ? false,
|
||||
gpuTargets ? [ ],
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocthrust";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"benchmark"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocThrust";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-bHyqG0pSt6bc6cDMnd1uY+0o+V3cxdp0mUEzWYRdd20=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
rocprim
|
||||
clr
|
||||
];
|
||||
|
||||
buildInputs = lib.optionals buildTests [
|
||||
gtest
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DHIP_ROOT_DIR=${clr}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"-DBUILD_TEST=ON"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"-DBUILD_BENCHMARKS=ON"
|
||||
];
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/{test_*,*.hip} $test/bin
|
||||
''
|
||||
+ lib.optionalString buildBenchmarks ''
|
||||
mkdir -p $benchmark/bin
|
||||
mv $out/bin/benchmark_* $benchmark/bin
|
||||
''
|
||||
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||
rm -rf $out/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "ROCm parallel algorithm library";
|
||||
homepage = "https://github.com/ROCm/rocThrust";
|
||||
license = with licenses; [ asl20 ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
120
pkgs/rocm-modules/roctracer/default.nix
Normal file
120
pkgs/rocm-modules/roctracer/default.nix
Normal file
@@ -0,0 +1,120 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
clr,
|
||||
rocm-device-libs,
|
||||
libxml2,
|
||||
doxygen,
|
||||
graphviz,
|
||||
gcc-unwrapped,
|
||||
libbacktrace,
|
||||
rocm-runtime,
|
||||
python3Packages,
|
||||
buildDocs ? false, # Nothing seems to be generated, so not making the output
|
||||
buildTests ? false,
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "roctracer";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals buildDocs [
|
||||
"doc"
|
||||
]
|
||||
++ lib.optionals buildTests [
|
||||
"test"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "roctracer";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-Dwk5cBZLysmsVA2kwpQM0FQt2KXOGcaZcAw/d8VUaXw=";
|
||||
};
|
||||
|
||||
nativeBuildInputs =
|
||||
[
|
||||
cmake
|
||||
clr
|
||||
]
|
||||
++ lib.optionals buildDocs [
|
||||
doxygen
|
||||
graphviz
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
libxml2
|
||||
libbacktrace
|
||||
python3Packages.python
|
||||
python3Packages.cppheaderparser
|
||||
];
|
||||
|
||||
cmakeFlags = [
|
||||
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
];
|
||||
|
||||
env.NIX_CFLAGS_COMPILE = toString [
|
||||
# Needed with GCC 12
|
||||
"-Wno-error=array-bounds"
|
||||
];
|
||||
|
||||
postPatch =
|
||||
''
|
||||
export HIP_DEVICE_LIB_PATH=${rocm-device-libs}/amdgcn/bitcode
|
||||
''
|
||||
+ lib.optionalString (!buildTests) ''
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace "add_subdirectory(test)" ""
|
||||
'';
|
||||
|
||||
# Tests always fail, probably need GPU
|
||||
# doCheck = buildTests;
|
||||
|
||||
postInstall =
|
||||
lib.optionalString buildDocs ''
|
||||
mkdir -p $doc
|
||||
''
|
||||
+ lib.optionalString buildTests ''
|
||||
mkdir -p $test/bin
|
||||
# Not sure why this is an install target
|
||||
find $out/test -executable -type f -exec mv {} $test/bin \;
|
||||
rm $test/bin/{*.sh,*.py}
|
||||
patchelf --set-rpath $out/lib:${
|
||||
lib.makeLibraryPath (
|
||||
finalAttrs.buildInputs
|
||||
++ [
|
||||
clr
|
||||
gcc-unwrapped.lib
|
||||
rocm-runtime
|
||||
]
|
||||
)
|
||||
} $test/bin/*
|
||||
rm -rf $out/test
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Tracer callback/activity library";
|
||||
homepage = "https://github.com/ROCm/roctracer";
|
||||
license = with licenses; [ mit ]; # mitx11
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
35
pkgs/rocm-modules/rocwmma/0000-dont-fetch-googletest.patch
Normal file
35
pkgs/rocm-modules/rocwmma/0000-dont-fetch-googletest.patch
Normal file
@@ -0,0 +1,35 @@
|
||||
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
|
||||
index 0d00883..86ce282 100644
|
||||
--- a/test/CMakeLists.txt
|
||||
+++ b/test/CMakeLists.txt
|
||||
@@ -30,30 +30,6 @@ cmake_dependent_option( ROCWMMA_BUILD_VALIDATION_TESTS "Build validation tests"
|
||||
cmake_dependent_option( ROCWMMA_BUILD_BENCHMARK_TESTS "Build benchmarking tests" OFF "ROCWMMA_BUILD_TESTS" OFF )
|
||||
cmake_dependent_option( ROCWMMA_BUILD_EXTENDED_TESTS "Build extended test parameter coverage" OFF "ROCWMMA_BUILD_TESTS" OFF )
|
||||
|
||||
-# Test/benchmark requires additional dependencies
|
||||
-include( FetchContent )
|
||||
-
|
||||
-FetchContent_Declare(
|
||||
- googletest
|
||||
- GIT_REPOSITORY https://github.com/google/googletest.git
|
||||
- GIT_TAG release-1.12.1
|
||||
-)
|
||||
-FetchContent_GetProperties(googletest)
|
||||
-if(NOT googletest_POPULATED)
|
||||
-
|
||||
- # Fetch the content using default details
|
||||
- FetchContent_Populate(googletest)
|
||||
- # Save the shared libs setting, then force to static libs
|
||||
- set(BUILD_SHARED_LIBS_OLD ${BUILD_SHARED_LIBS})
|
||||
- set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build SHARED libraries" FORCE)
|
||||
-
|
||||
- # Add gtest targets as static libs
|
||||
- add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR})
|
||||
-
|
||||
- # Restore shared libs setting
|
||||
- set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_OLD} CACHE INTERNAL "Build SHARED libraries" FORCE)
|
||||
-endif()
|
||||
-
|
||||
set(ROCWMMA_TEST_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
set(ROCWMMA_COMMON_TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/hip_device.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/rocwmma_gtest_main.cpp)
|
||||
121
pkgs/rocm-modules/rocwmma/default.nix
Normal file
121
pkgs/rocm-modules/rocwmma/default.nix
Normal file
@@ -0,0 +1,121 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocm-smi,
|
||||
clr,
|
||||
openmp,
|
||||
gtest,
|
||||
rocblas,
|
||||
buildTests ? false, # Will likely fail building because wavefront shifts are not supported for certain archs
|
||||
buildExtendedTests ? false,
|
||||
buildBenchmarks ? false,
|
||||
buildSamples ? false,
|
||||
gpuTargets ? [ ],
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocwmma";
|
||||
version = "6.4.1";
|
||||
|
||||
outputs =
|
||||
[
|
||||
"out"
|
||||
]
|
||||
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||
"test"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"benchmark"
|
||||
]
|
||||
++ lib.optionals buildSamples [
|
||||
"sample"
|
||||
];
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rocWMMA";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-fjyxMrzt74rE7Gf4v4WawYltuw1fvahwZUpauMIE3qc=";
|
||||
};
|
||||
|
||||
patches = lib.optionals (buildTests || buildBenchmarks) [
|
||||
./0000-dont-fetch-googletest.patch
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
[
|
||||
openmp
|
||||
]
|
||||
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||
rocm-smi
|
||||
gtest
|
||||
rocblas
|
||||
];
|
||||
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
|
||||
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
|
||||
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
|
||||
"-DROCWMMA_BUILD_TESTS=${if buildTests || buildBenchmarks then "ON" else "OFF"}"
|
||||
"-DROCWMMA_BUILD_SAMPLES=${if buildSamples then "ON" else "OFF"}"
|
||||
# Manually define CMAKE_INSTALL_<DIR>
|
||||
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals buildExtendedTests [
|
||||
"-DROCWMMA_BUILD_EXTENDED_TESTS=ON"
|
||||
]
|
||||
++ lib.optionals buildBenchmarks [
|
||||
"-DROCWMMA_BUILD_BENCHMARK_TESTS=ON"
|
||||
"-DROCWMMA_BENCHMARK_WITH_ROCBLAS=ON"
|
||||
];
|
||||
|
||||
postInstall =
|
||||
lib.optionalString (buildTests || buildBenchmarks) ''
|
||||
mkdir -p $test/bin
|
||||
mv $out/bin/{*_test,*-validate} $test/bin
|
||||
''
|
||||
+ lib.optionalString buildBenchmarks ''
|
||||
mkdir -p $benchmark/bin
|
||||
mv $out/bin/*-bench $benchmark/bin
|
||||
''
|
||||
+ lib.optionalString buildSamples ''
|
||||
mkdir -p $sample/bin
|
||||
mv $out/bin/sgemmv $sample/bin
|
||||
mv $out/bin/simple_gemm $sample/bin
|
||||
mv $out/bin/simple_dlrm $sample/bin
|
||||
''
|
||||
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
|
||||
rm -rf $out/bin
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Mixed precision matrix multiplication and accumulation";
|
||||
homepage = "https://github.com/ROCm/rocWMMA";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
})
|
||||
104
pkgs/rocm-modules/rpp/default.nix
Normal file
104
pkgs/rocm-modules/rpp/default.nix
Normal file
@@ -0,0 +1,104 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
rocmUpdateScript,
|
||||
cmake,
|
||||
rocm-cmake,
|
||||
rocm-docs-core,
|
||||
half,
|
||||
clr,
|
||||
openmp,
|
||||
boost,
|
||||
python3Packages,
|
||||
buildDocs ? false, # Needs internet
|
||||
useOpenCL ? false,
|
||||
useCPU ? false,
|
||||
gpuTargets ? [ ],
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname =
|
||||
"rpp-"
|
||||
+ (
|
||||
if (!useOpenCL && !useCPU) then
|
||||
"hip"
|
||||
else if (!useOpenCL && !useCPU) then
|
||||
"opencl"
|
||||
else
|
||||
"cpu"
|
||||
);
|
||||
|
||||
version = "6.4.1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "rpp";
|
||||
rev = "rocm-${finalAttrs.version}";
|
||||
hash = "sha256-rccVjSrOVIe4ZDtloCoCCI3u9UIcUqdirHIzS7ffAas=";
|
||||
};
|
||||
|
||||
nativeBuildInputs =
|
||||
[
|
||||
cmake
|
||||
rocm-cmake
|
||||
clr
|
||||
]
|
||||
++ lib.optionals buildDocs [
|
||||
rocm-docs-core
|
||||
python3Packages.python
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
half
|
||||
openmp
|
||||
boost
|
||||
];
|
||||
|
||||
CFLAGS = "-I${openmp.dev}/include";
|
||||
CXXFLAGS = "-I${openmp.dev}/include";
|
||||
cmakeFlags =
|
||||
[
|
||||
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
|
||||
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
|
||||
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
|
||||
"-DROCM_PATH=${clr}"
|
||||
]
|
||||
++ lib.optionals (gpuTargets != [ ]) [
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
]
|
||||
++ lib.optionals (!useOpenCL && !useCPU) [
|
||||
"-DBACKEND=HIP"
|
||||
]
|
||||
++ lib.optionals (useOpenCL && !useCPU) [
|
||||
"-DBACKEND=OCL"
|
||||
]
|
||||
++ lib.optionals useCPU [
|
||||
"-DBACKEND=CPU"
|
||||
];
|
||||
|
||||
postPatch = lib.optionalString (!useOpenCL && !useCPU) ''
|
||||
# Bad path
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace "COMPILER_FOR_HIP \''${ROCM_PATH}/llvm/bin/clang++" "COMPILER_FOR_HIP ${clr}/bin/hipcc"
|
||||
'';
|
||||
|
||||
postBuild = lib.optionalString buildDocs ''
|
||||
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html
|
||||
'';
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = finalAttrs.pname;
|
||||
inherit (finalAttrs.src) owner;
|
||||
inherit (finalAttrs.src) repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "Comprehensive high-performance computer vision library for AMD processors";
|
||||
homepage = "https://github.com/ROCm/rpp";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
broken = useOpenCL;
|
||||
};
|
||||
})
|
||||
115
pkgs/rocm-modules/tensile/default.nix
Normal file
115
pkgs/rocm-modules/tensile/default.nix
Normal file
@@ -0,0 +1,115 @@
|
||||
{
|
||||
lib,
|
||||
fetchFromGitHub,
|
||||
fetchpatch,
|
||||
rocmUpdateScript,
|
||||
buildPythonPackage,
|
||||
pytestCheckHook,
|
||||
setuptools,
|
||||
pyyaml,
|
||||
msgpack,
|
||||
simplejson,
|
||||
ujson,
|
||||
distro,
|
||||
orjson,
|
||||
pandas,
|
||||
joblib,
|
||||
filelock,
|
||||
clr,
|
||||
rich,
|
||||
isTensileLite ? false,
|
||||
}:
|
||||
|
||||
buildPythonPackage rec {
|
||||
pname = if isTensileLite then "tensilelite" else "tensile";
|
||||
# Using a specific commit which has code object compression support from after the 6.3 release
|
||||
# Without compression packages are too large for hydra
|
||||
version = "6.4.1";
|
||||
format = "pyproject";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ROCm";
|
||||
repo = "Tensile";
|
||||
rev = "1752af518190500891a865379a4569b8abf6ba01";
|
||||
hash = "sha256-Wvz4PVs//3Ox7ykZHpjPzOVwlyATyc+MmVVenfTzWK4=";
|
||||
};
|
||||
|
||||
# TODO: It should be possible to run asm caps test ONCE for all supported arches
|
||||
# We currently disable the test because it's slow and runs each time tensile launches
|
||||
|
||||
postPatch =
|
||||
lib.optionalString (!isTensileLite) ''
|
||||
if grep -F .SafeLoader Tensile/LibraryIO.py; then
|
||||
substituteInPlace Tensile/LibraryIO.py \
|
||||
--replace-fail "yaml.SafeLoader" "yaml.CSafeLoader"
|
||||
fi
|
||||
# See TODO above about asm caps test
|
||||
substituteInPlace Tensile/Common.py \
|
||||
--replace-fail 'if globalParameters["AssemblerPath"] is not None:' "if False:"
|
||||
''
|
||||
+ ''
|
||||
# Add an assert that the fallback 9,0,0 is supported before setting the kernel to it
|
||||
# If it's not detected as supported we have an issue with compiler paths or the compiler is broken
|
||||
# and it's better to stop immediately
|
||||
substituteInPlace Tensile/KernelWriter.py \
|
||||
--replace-fail '= (9,0,0)' '= (9,0,0);assert(globalParameters["AsmCaps"][(9,0,0)]["SupportedISA"])'
|
||||
find . -type f -iname "*.sh" -exec chmod +x {} \;
|
||||
patchShebangs Tensile
|
||||
'';
|
||||
|
||||
buildInputs = [ setuptools ];
|
||||
|
||||
propagatedBuildInputs =
|
||||
[
|
||||
pyyaml
|
||||
msgpack
|
||||
pandas
|
||||
joblib
|
||||
distro
|
||||
]
|
||||
++ lib.optionals (!isTensileLite) [
|
||||
rich
|
||||
]
|
||||
++ lib.optionals isTensileLite [
|
||||
simplejson
|
||||
ujson
|
||||
orjson
|
||||
];
|
||||
|
||||
patches =
|
||||
lib.optional (!isTensileLite) ./tensile-solutionstructs-perf-fix.diff
|
||||
++ lib.optional (!isTensileLite) ./tensile-create-library-dont-copy-twice.diff
|
||||
++ lib.optional (!isTensileLite) (fetchpatch {
|
||||
# [PATCH] Extend Tensile HIP ISA compatibility
|
||||
sha256 = "sha256-d+fVf/vz+sxGqJ96vuxe0jRMgbC5K6j5FQ5SJ1e3Sl8=";
|
||||
url = "https://github.com/GZGavinZhao/Tensile/commit/855cb15839849addb0816a6dde45772034a3e41f.patch";
|
||||
})
|
||||
++ lib.optional isTensileLite ./tensilelite-create-library-dont-copy-twice.diff
|
||||
++ lib.optional isTensileLite ./tensilelite-gen_assembly-venv-err-handling.diff;
|
||||
# ++ lib.optional isTensileLite ./tensilelite-compression.diff;
|
||||
|
||||
doCheck = false; # Too many errors, not sure how to set this up properly
|
||||
|
||||
nativeCheckInputs = [
|
||||
pytestCheckHook
|
||||
filelock
|
||||
clr
|
||||
];
|
||||
|
||||
env.ROCM_PATH = "${clr}";
|
||||
|
||||
pythonImportsCheck = [ "Tensile" ];
|
||||
|
||||
passthru.updateScript = rocmUpdateScript {
|
||||
name = pname;
|
||||
inherit (src) owner repo;
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "GEMMs and tensor contractions";
|
||||
homepage = "https://github.com/ROCm/Tensile";
|
||||
license = with licenses; [ mit ];
|
||||
teams = [ teams.rocm ];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
|
||||
index a1644606..c6ca2882 100644
|
||||
--- a/Tensile/TensileCreateLibrary.py
|
||||
+++ b/Tensile/TensileCreateLibrary.py
|
||||
@@ -852,9 +852,14 @@ def copyStaticFiles(outputPath=None):
|
||||
"KernelHeader.h",
|
||||
]
|
||||
|
||||
+ import filecmp
|
||||
for fileName in libraryStaticFiles:
|
||||
# copy file
|
||||
- shutil.copy(os.path.join(globalParameters["SourcePath"], fileName), outputPath)
|
||||
+ # no need to copy twice if it has already been copied
|
||||
+ src = os.path.join(globalParameters["SourcePath"], fileName)
|
||||
+ dst = os.path.join(outputPath, os.path.basename(src))
|
||||
+ if not os.path.isfile(dst) or not filecmp.cmp(src, dst):
|
||||
+ shutil.copyfile(src, dst)
|
||||
|
||||
return libraryStaticFiles
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
diff --git a/Tensile/SolutionStructs.py b/Tensile/SolutionStructs.py
|
||||
index f663c6f1..17bcf897 100644
|
||||
--- a/Tensile/SolutionStructs.py
|
||||
+++ b/Tensile/SolutionStructs.py
|
||||
@@ -4828,24 +4828,26 @@ class Solution(collections.abc.Mapping):
|
||||
# create a dictionary of lists of parameter values
|
||||
@staticmethod
|
||||
def getSerialNaming(objs):
|
||||
+ valid_params = sorted(validParameters.keys())
|
||||
data = {}
|
||||
- for objIdx in range(0, len(objs)):
|
||||
- obj = objs[objIdx]
|
||||
- for paramName in sorted(obj.keys()):
|
||||
- if paramName in list(validParameters.keys()):
|
||||
- paramValue = obj[paramName]
|
||||
- if paramName in data:
|
||||
- if paramValue not in data[paramName]:
|
||||
- data[paramName].append(paramValue)
|
||||
- else:
|
||||
- data[paramName] = [ paramValue ]
|
||||
- maxObjs = 1
|
||||
- for paramName in data:
|
||||
- if not isinstance(data[paramName][0],dict):
|
||||
- data[paramName] = sorted(data[paramName])
|
||||
- maxObjs *= len(data[paramName])
|
||||
- numDigits = len(str(maxObjs))
|
||||
- return [ data, numDigits ]
|
||||
+
|
||||
+ objs = [getattr(obj, "_state", obj) for obj in objs]
|
||||
+
|
||||
+ for param in valid_params:
|
||||
+ d = []
|
||||
+ for obj in objs:
|
||||
+ if param in obj:
|
||||
+ v = obj[param]
|
||||
+ if v not in d:
|
||||
+ d.append(v)
|
||||
+ if len(d):
|
||||
+ if not isinstance(d[0], dict): d.sort()
|
||||
+ data[param] = d
|
||||
+
|
||||
+ # Calculate max objects using prod() from math module
|
||||
+ max_objs = math.prod(len(values) for values in data.values())
|
||||
+ num_digits = len(str(max_objs))
|
||||
+ return data, num_digits
|
||||
|
||||
########################################
|
||||
# Get Name Serial
|
||||
345
pkgs/rocm-modules/tensile/tensilelite-compression.diff
Normal file
345
pkgs/rocm-modules/tensile/tensilelite-compression.diff
Normal file
@@ -0,0 +1,345 @@
|
||||
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
|
||||
index b8cea84558..1bc24bd1dd 100644
|
||||
--- a/Tensile/TensileCreateLibrary.py
|
||||
+++ b/Tensile/TensileCreateLibrary.py
|
||||
@@ -41,6 +41,7 @@
|
||||
from .SolutionLibrary import MasterSolutionLibrary
|
||||
from .SolutionStructs import Solution
|
||||
from .CustomYamlLoader import load_logic_gfx_arch
|
||||
+from .Utilities.Profile import profile
|
||||
|
||||
import argparse
|
||||
import collections
|
||||
@@ -1233,7 +1234,7 @@ def validateLibrary(masterLibraries: MasterSolutionLibrary,
|
||||
################################################################################
|
||||
# Tensile Create Library
|
||||
################################################################################
|
||||
-@timing
|
||||
+@profile
|
||||
def TensileCreateLibrary():
|
||||
print1("")
|
||||
print1(HR)
|
||||
@@ -1558,7 +1559,6 @@ def param(key, value):
|
||||
|
||||
print1("# Check if generated files exists.")
|
||||
|
||||
- @timing
|
||||
def checkFileExistence(files):
|
||||
for filePath in files:
|
||||
if not os.path.exists(filePath):
|
||||
diff --git a/Tensile/Utilities/Profile.py b/Tensile/Utilities/Profile.py
|
||||
new file mode 100644
|
||||
index 0000000000..cc3c7eb44c
|
||||
--- /dev/null
|
||||
+++ b/Tensile/Utilities/Profile.py
|
||||
@@ -0,0 +1,77 @@
|
||||
+################################################################################
|
||||
+#
|
||||
+# Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
+#
|
||||
+# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
+# of this software and associated documentation files (the "Software"), to deal
|
||||
+# in the Software without restriction, including without limitation the rights
|
||||
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
+# copies of the Software, and to permit persons to whom the Software is
|
||||
+# furnished to do so, subject to the following conditions:
|
||||
+#
|
||||
+# The above copyright notice and this permission notice shall be included in
|
||||
+# all copies or substantial portions of the Software.
|
||||
+#
|
||||
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
+# SOFTWARE.
|
||||
+#
|
||||
+################################################################################
|
||||
+
|
||||
+import cProfile
|
||||
+import pstats
|
||||
+import os
|
||||
+
|
||||
+from pathlib import Path
|
||||
+from datetime import datetime, timezone
|
||||
+from typing import Callable, Tuple
|
||||
+
|
||||
+PROFILE_ENV_VAR = "TENSILE_PROFILE"
|
||||
+
|
||||
+def profile(func: Callable) -> Callable:
|
||||
+ """Profiling decorator.
|
||||
+
|
||||
+ Add ``@profile`` to mark a function for profiling; set the environment variable
|
||||
+ TENSILE_PROFILE=ON to enable profiling decorated functions.
|
||||
+ """
|
||||
+ if not envVariableIsSet(PROFILE_ENV_VAR):
|
||||
+ return func
|
||||
+ def wrapper(*args, **kwargs):
|
||||
+ path, filename = initProfileArtifacts(func.__name__)
|
||||
+
|
||||
+ prof = cProfile.Profile()
|
||||
+ output = prof.runcall(func, *args, **kwargs)
|
||||
+ result = pstats.Stats(prof)
|
||||
+ result.sort_stats(pstats.SortKey.TIME)
|
||||
+ result.dump_stats(path/filename)
|
||||
+
|
||||
+ return output
|
||||
+ return wrapper
|
||||
+
|
||||
+def envVariableIsSet(varName: str) -> bool:
|
||||
+ """Checks if the provided environment variable is set to "ON", "TRUE", or "1"
|
||||
+ Args:
|
||||
+ varName: Environment variable name.
|
||||
+ Returns:
|
||||
+ True if the environment variable is set, otherwise False.
|
||||
+ """
|
||||
+ value = os.environ.get(varName, "").upper()
|
||||
+ return True if value in ["ON", "TRUE", "1"] else False
|
||||
+
|
||||
+def initProfileArtifacts(funcName: str) -> Tuple[Path, str]:
|
||||
+ """Initializes filenames and paths for profiling artifacts based on the current datetime
|
||||
+ Args:
|
||||
+ funcName: The name of the function being profiled, nominally passed via func.__name__
|
||||
+ Returns:
|
||||
+ A tuple (path, filename) where the path is the artifact directory and filename is
|
||||
+ a .prof file with the profiling results.
|
||||
+ """
|
||||
+ dt = datetime.now(timezone.utc)
|
||||
+ filename = f"{funcName}-{dt.strftime('%Y-%m-%dT%H-%M-%SZ')}.prof"
|
||||
+ path = Path().cwd()/f"profiling-results-{dt.strftime('%Y-%m-%d')}"
|
||||
+ path.mkdir(exist_ok=True)
|
||||
+ return path, filename
|
||||
|
||||
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
|
||||
index e62b0072df..2c843ba936 100644
|
||||
--- a/Tensile/TensileCreateLibrary.py
|
||||
+++ b/Tensile/TensileCreateLibrary.py
|
||||
@@ -56,7 +56,7 @@
|
||||
import sys
|
||||
from timeit import default_timer as timer
|
||||
from pathlib import Path
|
||||
-from typing import Sequence, List
|
||||
+from typing import Sequence, List, Union
|
||||
|
||||
def timing(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
@@ -90,87 +90,142 @@ def processKernelSource(kernel, kernelWriterAssembly, ti):
|
||||
|
||||
return (err, src, header, kernelName, filename)
|
||||
|
||||
-def getAssemblyCodeObjectFiles(kernels, kernelWriterAssembly, outputPath):
|
||||
- destDir = ensurePath(os.path.join(outputPath, 'library'))
|
||||
- asmDir = kernelWriterAssembly.getAssemblyDirectory()
|
||||
- archs = collections.defaultdict(list)
|
||||
+def linkIntoCodeObject(
|
||||
+ objFiles: List[str], coPathDest: Union[Path, str], kernelWriterAssembly: KernelWriterAssembly
|
||||
+):
|
||||
+ """Links object files into a code object file.
|
||||
|
||||
- for k in filter(lambda k: k['KernelLanguage'] == 'Assembly', kernels):
|
||||
- archs[tuple(k['ISA'])].append(k)
|
||||
+ Args:
|
||||
+ objectFiles: A list of object files to be linked.
|
||||
+ coPathDest: The destination path for the code object file.
|
||||
+ kernelWriterAssembly: An instance of KernelWriterAssembly to get link arguments.
|
||||
|
||||
- coFiles = []
|
||||
+ Raises:
|
||||
+ RuntimeError: If linker invocation fails.
|
||||
+ """
|
||||
+ if os.name == "nt":
|
||||
+ # On Windows, the objectFiles list command line (including spaces)
|
||||
+ # exceeds the limit of 8191 characters, so using response file
|
||||
+
|
||||
+ responseFile = os.path.join('/tmp', 'clangArgs.txt')
|
||||
+ with open(responseFile, 'wt') as file:
|
||||
+ file.write(" ".join(objFiles))
|
||||
+ file.flush()
|
||||
+
|
||||
+ args = [globalParameters['AssemblerPath'], '-target', 'amdgcn-amd-amdhsa', '-o', coFileRaw, '@clangArgs.txt']
|
||||
+ subprocess.check_call(args, cwd=asmDir)
|
||||
+ else:
|
||||
+ numObjFiles = len(objFiles)
|
||||
+ maxObjFiles = 10000
|
||||
+
|
||||
+ if numObjFiles > maxObjFiles:
|
||||
+ batchedObjFiles = [objFiles[i:i+maxObjFiles] for i in range(0, numObjFiles, maxObjFiles)]
|
||||
+ batchSize = int(math.ceil(numObjFiles / maxObjFiles))
|
||||
+
|
||||
+ newObjFiles = [str(coPathDest) + "." + str(i) for i in range(0, batchSize)]
|
||||
+ newObjFilesOutput = []
|
||||
+
|
||||
+ for batch, filename in zip(batchedObjFiles, newObjFiles):
|
||||
+ if len(batch) > 1:
|
||||
+ args = [globalParameters["ROCmLdPath"], "-r"] + batch + [ "-o", filename]
|
||||
+ print2(f"Linking object files into fewer object files: {' '.join(args)}")
|
||||
+ subprocess.check_call(args)
|
||||
+ newObjFilesOutput.append(filename)
|
||||
+ else:
|
||||
+ newObjFilesOutput.append(batchedObjFiles[0])
|
||||
+
|
||||
+ args = kernelWriterAssembly.getLinkCodeObjectArgs(newObjFilesOutput, str(coPathDest))
|
||||
+ print2(f"Linking object files into code object: {' '.join(args)}")
|
||||
+ subprocess.check_call(args)
|
||||
+ else:
|
||||
+ args = kernelWriterAssembly.getLinkCodeObjectArgs(objFiles, str(coPathDest))
|
||||
+ print2(f"Linking object files into code object: {' '.join(args)}")
|
||||
+ subprocess.check_call(args)
|
||||
+
|
||||
+
|
||||
+def compressCodeObject(
|
||||
+ coPathSrc: Union[Path, str], coPathDest: Union[Path, str], gfx: str, bundler: str
|
||||
+):
|
||||
+ """Compresses a code object file using the provided bundler.
|
||||
+
|
||||
+ Args:
|
||||
+ coPathSrc: The source path of the code object file to be compressed.
|
||||
+ coPathDest: The destination path for the compressed code object file.
|
||||
+ gfx: The target GPU architecture.
|
||||
+ bundler: The path to the Clang Offload Bundler executable.
|
||||
+
|
||||
+ Raises:
|
||||
+ RuntimeError: If compressing the code object file fails.
|
||||
+ """
|
||||
+ args = [
|
||||
+ bundler,
|
||||
+ "--compress",
|
||||
+ "--type=o",
|
||||
+ "--bundle-align=4096",
|
||||
+ f"--targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--{gfx}",
|
||||
+ "--input=/dev/null",
|
||||
+ f"--input={str(coPathSrc)}",
|
||||
+ f"--output={str(coPathDest)}",
|
||||
+ ]
|
||||
+
|
||||
+ print2(f"Bundling/compressing code objects: {' '.join(args)}")
|
||||
+ try:
|
||||
+ out = subprocess.check_output(args, stderr=subprocess.STDOUT)
|
||||
+ print2(f"Output: {out}")
|
||||
+ except subprocess.CalledProcessError as err:
|
||||
+ raise RuntimeError(
|
||||
+ f"Error compressing code object via bundling: {err.output}\nFailed command: {' '.join(args)}"
|
||||
+ )
|
||||
+
|
||||
+def buildAssemblyCodeObjectFiles(kernels, kernelWriterAssembly, outputPath):
|
||||
+
|
||||
+ isAsm = lambda k: k["KernelLanguage"] == "Assembly"
|
||||
+
|
||||
+ extObj = ".o"
|
||||
+ extCo = ".co"
|
||||
+ extCoRaw = ".co.raw"
|
||||
|
||||
- for arch, archKernels in archs.items():
|
||||
+ destDir = Path(ensurePath(os.path.join(outputPath, 'library')))
|
||||
+ asmDir = Path(kernelWriterAssembly.getAssemblyDirectory())
|
||||
+
|
||||
+ archKernelMap = collections.defaultdict(list)
|
||||
+ for k in filter(isAsm, kernels):
|
||||
+ archKernelMap[tuple(k['ISA'])].append(k)
|
||||
+
|
||||
+ coFiles = []
|
||||
+ for arch, archKernels in archKernelMap.items():
|
||||
if len(archKernels) == 0:
|
||||
continue
|
||||
|
||||
- archName = getGfxName(arch)
|
||||
+ gfx = getGfxName(arch)
|
||||
|
||||
if globalParameters["MergeFiles"] or globalParameters["NumMergedFiles"] > 1 or globalParameters["LazyLibraryLoading"]:
|
||||
- objectFiles = [kernelWriterAssembly.getKernelFileBase(k) + '.o' for k in archKernels if 'codeObjectFile' not in k]
|
||||
+ objectFiles = [str(asmDir / (kernelWriterAssembly.getKernelFileBase(k) + extObj)) for k in archKernels if 'codeObjectFile' not in k]
|
||||
|
||||
- #Group kernels from placeholder libraries
|
||||
coFileMap = collections.defaultdict(list)
|
||||
+
|
||||
if len(objectFiles):
|
||||
- coFileMap[os.path.join(destDir, "TensileLibrary_"+archName+".co")] = objectFiles
|
||||
+ coFileMap[asmDir / ("TensileLibrary_"+ gfx + extCoRaw)] = objectFiles
|
||||
|
||||
for kernel in archKernels:
|
||||
coName = kernel.get("codeObjectFile", None)
|
||||
if coName:
|
||||
- coFileMap[os.path.join(destDir, coName+".co")] += [kernelWriterAssembly.getKernelFileBase(kernel) + '.o']
|
||||
+ coFileMap[asmDir / (coName + extCoRaw)].append(str(asmDir / (kernelWriterAssembly.getKernelFileBase(kernel) + extObj)))
|
||||
|
||||
- for coFile, objectFiles in coFileMap.items():
|
||||
- if os.name == "nt":
|
||||
- # On Windows, the objectFiles list command line (including spaces)
|
||||
- # exceeds the limit of 8191 characters, so using response file
|
||||
+ for coFileRaw, objFiles in coFileMap.items():
|
||||
|
||||
- responseArgs = objectFiles
|
||||
- responseFile = os.path.join(asmDir, 'clangArgs.txt')
|
||||
- with open(responseFile, 'wt') as file:
|
||||
- file.write( " ".join(responseArgs) )
|
||||
- file.flush()
|
||||
-
|
||||
- args = [globalParameters['AssemblerPath'], '-target', 'amdgcn-amd-amdhsa', '-o', coFile, '@clangArgs.txt']
|
||||
- subprocess.check_call(args, cwd=asmDir)
|
||||
- else:
|
||||
- numOfObjectFiles = len(objectFiles)
|
||||
- splitFiles = 10000
|
||||
- if numOfObjectFiles > splitFiles:
|
||||
- slicedObjectFilesList = [objectFiles[x:x+splitFiles] for x in range(0, numOfObjectFiles, splitFiles)]
|
||||
- objectFileBasename = os.path.split(coFile)[-1].split('.')[0]
|
||||
- numOfOneSliceOfObjectFiles = int(math.ceil(numOfObjectFiles / splitFiles))
|
||||
- newObjectFiles = [ objectFileBasename + "_" + str(i) + ".o" for i in range(0, numOfOneSliceOfObjectFiles)]
|
||||
- newObjectFilesOutput = []
|
||||
- for slicedObjectFiles, objectFile in zip(slicedObjectFilesList, newObjectFiles):
|
||||
- if len(slicedObjectFiles) > 1:
|
||||
- args = [globalParameters["ROCmLdPath"], "-r"] + slicedObjectFiles + [ "-o", objectFile ]
|
||||
- if globalParameters["PrintCodeCommands"]:
|
||||
- print(asmDir)
|
||||
- print(' '.join(args))
|
||||
- subprocess.check_call(args, cwd=asmDir)
|
||||
- newObjectFilesOutput.append(objectFile)
|
||||
- else:
|
||||
- newObjectFilesOutput.append(slicedObjectFiles[0])
|
||||
- args = kernelWriterAssembly.getLinkCodeObjectArgs(newObjectFilesOutput, coFile)
|
||||
- if globalParameters["PrintCodeCommands"]:
|
||||
- print(asmDir)
|
||||
- print(' '.join(args))
|
||||
- subprocess.check_call(args, cwd=asmDir)
|
||||
- else:
|
||||
- args = kernelWriterAssembly.getLinkCodeObjectArgs(objectFiles, coFile)
|
||||
- if globalParameters["PrintCodeCommands"]:
|
||||
- print(asmDir)
|
||||
- print(' '.join(args))
|
||||
- subprocess.check_call(args, cwd=asmDir)
|
||||
+ linkIntoCodeObject(objFiles, coFileRaw, kernelWriterAssembly)
|
||||
+ coFile = destDir / coFileRaw.name.replace(extCoRaw, extCo)
|
||||
+ compressCodeObject(coFileRaw, coFile, gfx, globalParameters["ClangOffloadBundlerPath"])
|
||||
|
||||
coFiles.append(coFile)
|
||||
else:
|
||||
# no mergefiles
|
||||
def newCoFileName(kName):
|
||||
if globalParameters["PackageLibrary"]:
|
||||
- return os.path.join(destDir, archName, kName + '.co')
|
||||
+ return os.path.join(destDir, gfx, kName + '.co')
|
||||
else:
|
||||
- return os.path.join(destDir, kName + '_' + archName + '.co')
|
||||
+ return os.path.join(destDir, kName + '_' + gfx + '.co')
|
||||
|
||||
def orgCoFileName(kName):
|
||||
return os.path.join(asmDir, kName + '.co')
|
||||
@@ -179,6 +234,8 @@ def orgCoFileName(kName):
|
||||
map(lambda k: kernelWriterAssembly.getKernelFileBase(k), archKernels)), "Copying code objects"):
|
||||
shutil.copyfile(src, dst)
|
||||
coFiles.append(dst)
|
||||
+ printWarning("Code object files are not compressed in `--no-merge-files` build mode.")
|
||||
+
|
||||
return coFiles
|
||||
|
||||
def which(p):
|
||||
@@ -645,7 +702,7 @@ def success(kernel):
|
||||
|
||||
if not globalParameters["GenerateSourcesAndExit"]:
|
||||
codeObjectFiles += buildSourceCodeObjectFiles(CxxCompiler, kernelFiles, outputPath)
|
||||
- codeObjectFiles += getAssemblyCodeObjectFiles(kernelsToBuild, kernelWriterAssembly, outputPath)
|
||||
+ codeObjectFiles += buildAssemblyCodeObjectFiles(kernelsToBuild, kernelWriterAssembly, outputPath)
|
||||
|
||||
Common.popWorkingPath() # build_tmp
|
||||
Common.popWorkingPath() # workingDir
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
|
||||
index 2b9da394..b001fa7c 100644
|
||||
--- a/Tensile/TensileCreateLibrary.py
|
||||
+++ b/Tensile/TensileCreateLibrary.py
|
||||
@@ -808,10 +808,13 @@ def copyStaticFiles(outputPath=None):
|
||||
"ReductionTemplate.h",
|
||||
"memory_gfx.h" ]
|
||||
|
||||
+ import filecmp
|
||||
for fileName in libraryStaticFiles:
|
||||
- # copy file
|
||||
- shutil.copy( os.path.join(globalParameters["SourcePath"], fileName), \
|
||||
- outputPath )
|
||||
+ src = os.path.join(globalParameters["SourcePath"], fileName)
|
||||
+ dst = os.path.join(outputPath, os.path.basename(src))
|
||||
+ # no need to copy twice if it has already been copied
|
||||
+ if not os.path.isfile(dst) or not filecmp.cmp(src, dst):
|
||||
+ shutil.copyfile(src, dst)
|
||||
|
||||
return libraryStaticFiles
|
||||
|
||||
@@ -1417,9 +1420,13 @@ def TensileCreateLibrary():
|
||||
writeCMake(outputPath, solutionFiles, sourceKernelFiles, staticFiles, masterLibraries)
|
||||
|
||||
# Make sure to copy the library static files.
|
||||
+ import filecmp
|
||||
for fileName in staticFiles:
|
||||
- shutil.copy( os.path.join(globalParameters["SourcePath"], fileName), \
|
||||
- outputPath )
|
||||
+ src = os.path.join(globalParameters["SourcePath"], fileName)
|
||||
+ dst = os.path.join(outputPath, os.path.basename(src))
|
||||
+ # no need to copy twice if it has already been copied
|
||||
+ if not os.path.isfile(dst) or not filecmp.cmp(src, dst):
|
||||
+ shutil.copyfile(src, dst)
|
||||
|
||||
# write solutions and kernels
|
||||
codeObjectFiles = writeSolutionsAndKernels(outputPath, CxxCompiler, None, solutions,
|
||||
@@ -0,0 +1,36 @@
|
||||
diff --git a/Tensile/Ops/gen_assembly.sh b/Tensile/Ops/gen_assembly.sh
|
||||
index 0b21b6c6..609f1dd1 100755
|
||||
--- a/Tensile/Ops/gen_assembly.sh
|
||||
+++ b/Tensile/Ops/gen_assembly.sh
|
||||
@@ -23,6 +23,8 @@
|
||||
#
|
||||
################################################################################
|
||||
|
||||
+set -x
|
||||
+
|
||||
archStr=$1
|
||||
dst=$2
|
||||
venv=$3
|
||||
@@ -35,7 +37,13 @@ fi
|
||||
|
||||
toolchain=${rocm_path}/llvm/bin/clang++
|
||||
|
||||
-. ${venv}/bin/activate
|
||||
+if ! [ -z ${TENSILE_GEN_ASSEMBLY_TOOLCHAIN+x} ]; then
|
||||
+ toolchain="${TENSILE_GEN_ASSEMBLY_TOOLCHAIN}"
|
||||
+fi
|
||||
+
|
||||
+if [ -f ${venv}/bin/activate ]; then
|
||||
+ . ${venv}/bin/activate
|
||||
+fi
|
||||
|
||||
IFS=';' read -r -a archs <<< "$archStr"
|
||||
|
||||
@@ -77,4 +85,6 @@ for arch in "${archs[@]}"; do
|
||||
python3 ./ExtOpCreateLibrary.py --src=$dst --co=$dst/extop_$arch.co --output=$dst --arch=$arch
|
||||
done
|
||||
|
||||
-deactivate
|
||||
+if [ -f ${venv}/bin/activate ]; then
|
||||
+ deactivate
|
||||
+fi
|
||||
56
pkgs/rocm-modules/triton/default.nix
Normal file
56
pkgs/rocm-modules/triton/default.nix
Normal file
@@ -0,0 +1,56 @@
|
||||
{
|
||||
triton-no-cuda,
|
||||
rocmPackages,
|
||||
fetchFromGitHub,
|
||||
}:
|
||||
(triton-no-cuda.override (_old: {
|
||||
inherit rocmPackages;
|
||||
rocmSupport = true;
|
||||
stdenv = rocmPackages.llvm.rocmClangStdenv;
|
||||
llvm = rocmPackages.triton-llvm;
|
||||
})).overridePythonAttrs
|
||||
(old: {
|
||||
doCheck = false;
|
||||
stdenv = rocmPackages.llvm.rocmClangStdenv;
|
||||
version = "3.2.0";
|
||||
src = fetchFromGitHub {
|
||||
owner = "triton-lang";
|
||||
repo = "triton";
|
||||
rev = "9641643da6c52000c807b5eeed05edaec4402a67"; # "release/3.2.x";
|
||||
hash = "sha256-V1lpARwOLn28ZHfjiWR/JJWGw3MB34c+gz6Tq1GOVfo=";
|
||||
};
|
||||
buildInputs = old.buildInputs ++ [
|
||||
rocmPackages.clr
|
||||
];
|
||||
dontStrip = true;
|
||||
env = old.env // {
|
||||
CXXFLAGS = "-O3 -I${rocmPackages.clr}/include -I/build/source/third_party/triton/third_party/nvidia/backend/include";
|
||||
TRITON_OFFLINE_BUILD = 1;
|
||||
};
|
||||
patches = [ ];
|
||||
postPatch = ''
|
||||
# Remove nvidia backend so we don't depend on unfree nvidia headers
|
||||
# when we only want to target ROCm
|
||||
rm -rf third_party/nvidia
|
||||
substituteInPlace CMakeLists.txt \
|
||||
--replace-fail "add_subdirectory(test)" ""
|
||||
sed -i '/nvidia\|NVGPU\|registerConvertTritonGPUToLLVMPass\|mlir::test::/Id' bin/RegisterTritonDialects.h
|
||||
sed -i '/TritonTestAnalysis/Id' bin/CMakeLists.txt
|
||||
substituteInPlace python/setup.py \
|
||||
--replace-fail 'backends = [*BackendInstaller.copy(["nvidia", "amd"]), *BackendInstaller.copy_externals()]' \
|
||||
'backends = [*BackendInstaller.copy(["amd"]), *BackendInstaller.copy_externals()]'
|
||||
find . -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} +
|
||||
find . -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} +
|
||||
# remove any downloads
|
||||
substituteInPlace python/setup.py \
|
||||
--replace-fail "[get_json_package_info()]" "[]"\
|
||||
--replace-fail "[get_llvm_package_info()]" "[]"\
|
||||
--replace-fail "curr_version != version" "False"
|
||||
# Don't fetch googletest
|
||||
substituteInPlace cmake/AddTritonUnitTest.cmake \
|
||||
--replace-fail 'include(''${PROJECT_SOURCE_DIR}/unittest/googletest.cmake)' "" \
|
||||
--replace-fail "include(GoogleTest)" "find_package(GTest REQUIRED)"
|
||||
substituteInPlace third_party/amd/backend/compiler.py \
|
||||
--replace-fail '"/opt/rocm/llvm/bin/ld.lld"' "os.environ['ROCM_PATH']"' + "/llvm/bin/ld.lld"'
|
||||
'';
|
||||
})
|
||||
62
pkgs/rocm-modules/update.nix
Normal file
62
pkgs/rocm-modules/update.nix
Normal file
@@ -0,0 +1,62 @@
|
||||
{
|
||||
lib,
|
||||
writeScript,
|
||||
}:
|
||||
|
||||
{
|
||||
name ? "",
|
||||
owner ? "",
|
||||
repo ? "",
|
||||
page ? "releases",
|
||||
# input: array of [ { tag_name: "rocm-6.x.x", }, ... ]. some entries may have bad names like rocm-test-date we want to skip
|
||||
# output: first tag_name/name that's a proper version if any
|
||||
filter ? "map(.tag_name // .name) | map(select(test(\"^rocm-[0-9]+\\\\.[0-9]+(\\\\.[0-9]+)?$\"))) | first | ltrimstr(\"rocm-\")",
|
||||
}:
|
||||
|
||||
let
|
||||
pname =
|
||||
if lib.hasPrefix "rocm-llvm-" name then "llvm.${lib.removePrefix "rocm-llvm-" name}" else name;
|
||||
|
||||
updateScript = writeScript "update.sh" ''
|
||||
#!/usr/bin/env nix-shell
|
||||
#!nix-shell -i bash -p curl jq common-updater-scripts
|
||||
set -euo pipefail
|
||||
|
||||
fetch_releases() {
|
||||
local api_url="https://api.github.com/repos/${owner}/${repo}/${page}"
|
||||
if [ "${page}" = "releases" ]; then
|
||||
api_url="$api_url?per_page=4"
|
||||
fi
|
||||
>&2 echo $api_url
|
||||
curl ''${GITHUB_TOKEN:+-u ":$GITHUB_TOKEN"} -sL "$api_url"
|
||||
}
|
||||
|
||||
find_valid_version() {
|
||||
local releases="$1"
|
||||
>&2 echo "$releases"
|
||||
# Wrap in array if not already an array to make handline specific release or tags page the same
|
||||
>&2 echo jq -r 'if type == "array" then . else [.] end | ${filter}'
|
||||
echo "$releases" | jq -r 'if type == "array" then . else [.] end | ${filter}'
|
||||
}
|
||||
|
||||
releases="$(fetch_releases)"
|
||||
version="$(find_valid_version "$releases")"
|
||||
|
||||
if [ -z "$version" ]; then
|
||||
echo "No valid version found in the fetched release(s)." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
IFS='.' read -ra version_arr <<< "$version"
|
||||
|
||||
>&2 echo parsed version "$version_arr" from "$version"
|
||||
|
||||
if (( ''${version_arr[0]} > 6 )); then
|
||||
echo "'rocmPackages_6.${pname}' is already at its maximum allowed version.''\nAny further upgrades should go into 'rocmPackages_X.${pname}'." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
update-source-version rocmPackages_6.${pname} "$version" --ignore-same-hash
|
||||
'';
|
||||
in
|
||||
[ updateScript ]
|
||||
Reference in New Issue
Block a user