Initial commit
This commit is contained in:
61
flake.lock
generated
Normal file
61
flake.lock
generated
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
{
|
||||||
|
"nodes": {
|
||||||
|
"flake-utils": {
|
||||||
|
"inputs": {
|
||||||
|
"systems": "systems"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1731533236,
|
||||||
|
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nixpkgs": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1748370509,
|
||||||
|
"narHash": "sha256-QlL8slIgc16W5UaI3w7xHQEP+Qmv/6vSNTpoZrrSlbk=",
|
||||||
|
"owner": "nixos",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "4faa5f5321320e49a78ae7848582f684d64783e9",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nixos",
|
||||||
|
"ref": "nixos-unstable",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"inputs": {
|
||||||
|
"flake-utils": "flake-utils",
|
||||||
|
"nixpkgs": "nixpkgs"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systems": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1681028828,
|
||||||
|
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": "root",
|
||||||
|
"version": 7
|
||||||
|
}
|
||||||
45
flake.nix
Normal file
45
flake.nix
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
{
|
||||||
|
description = "A very basic flake";
|
||||||
|
|
||||||
|
inputs = {
|
||||||
|
nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-unstable";
|
||||||
|
flake-utils.url = "github:numtide/flake-utils";
|
||||||
|
};
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
{
|
||||||
|
self,
|
||||||
|
nixpkgs,
|
||||||
|
flake-utils,
|
||||||
|
...
|
||||||
|
}:
|
||||||
|
flake-utils.lib.eachDefaultSystem (
|
||||||
|
system:
|
||||||
|
let
|
||||||
|
pkgs = import nixpkgs {
|
||||||
|
inherit system;
|
||||||
|
# overlays = [
|
||||||
|
# (final: prev: {
|
||||||
|
# rocmPackages_6 = final.callPackage ./pkgs/rocm-modules { };
|
||||||
|
# })
|
||||||
|
# ];
|
||||||
|
};
|
||||||
|
in
|
||||||
|
{
|
||||||
|
packages = pkgs.callPackage ./pkgs/rocm-modules { };
|
||||||
|
overlays.default = (
|
||||||
|
final: prev: {
|
||||||
|
rocmPackages = final.callPackage ./pkgs/rocm-modules { };
|
||||||
|
rocmPackages_6 = final.rocmPackages;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
# build all pkgs as check
|
||||||
|
# defaultPackage = self.packages.${system}.rocm-modules;
|
||||||
|
#
|
||||||
|
# devShells.default = pkgs.mkShell {
|
||||||
|
# buildInputs = [ self.packages.${system}.rocm-modules ];
|
||||||
|
# };
|
||||||
|
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -0,0 +1,104 @@
|
|||||||
|
From 4a0584f7c05641143151ebdc1be1163bebf9d35d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Las <las@protonmail.ch>
|
||||||
|
Date: Sun, 3 Jan 2021 18:35:37 +0000
|
||||||
|
Subject: [PATCH] Compile transupp.c as part of the library
|
||||||
|
|
||||||
|
The exported symbols are made weak to not conflict with users
|
||||||
|
of the library that already vendor this functionality.
|
||||||
|
---
|
||||||
|
CMakeLists.txt | 4 ++--
|
||||||
|
transupp.c | 14 +++++++-------
|
||||||
|
2 files changed, 9 insertions(+), 9 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||||
|
index 0ca6f98..a9a0fae 100644
|
||||||
|
--- a/CMakeLists.txt
|
||||||
|
+++ b/CMakeLists.txt
|
||||||
|
@@ -533,7 +533,7 @@ set(JPEG_SOURCES jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c
|
||||||
|
jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdicc.c jdinput.c
|
||||||
|
jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c
|
||||||
|
jdtrans.c jerror.c jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c
|
||||||
|
- jidctint.c jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c)
|
||||||
|
+ jidctint.c jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c transupp.c)
|
||||||
|
|
||||||
|
if(WITH_ARITH_ENC OR WITH_ARITH_DEC)
|
||||||
|
set(JPEG_SOURCES ${JPEG_SOURCES} jaricom.c)
|
||||||
|
@@ -1489,7 +1489,7 @@ install(EXPORT ${CMAKE_PROJECT_NAME}Targets
|
||||||
|
|
||||||
|
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/jconfig.h
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/jerror.h ${CMAKE_CURRENT_SOURCE_DIR}/jmorecfg.h
|
||||||
|
- ${CMAKE_CURRENT_SOURCE_DIR}/jpeglib.h
|
||||||
|
+ ${CMAKE_CURRENT_SOURCE_DIR}/jpeglib.h ${CMAKE_CURRENT_SOURCE_DIR}/transupp.h
|
||||||
|
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||||
|
|
||||||
|
include(cmakescripts/BuildPackages.cmake)
|
||||||
|
diff --git a/transupp.c b/transupp.c
|
||||||
|
index 6e86077..2da49a7 100644
|
||||||
|
--- a/transupp.c
|
||||||
|
+++ b/transupp.c
|
||||||
|
@@ -1386,7 +1386,7 @@ jt_read_integer(const char **strptr, JDIMENSION *result)
|
||||||
|
* This code is loosely based on XParseGeometry from the X11 distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
-GLOBAL(boolean)
|
||||||
|
+GLOBAL(boolean) __attribute__((weak))
|
||||||
|
jtransform_parse_crop_spec(jpeg_transform_info *info, const char *spec)
|
||||||
|
{
|
||||||
|
info->crop = FALSE;
|
||||||
|
@@ -1486,7 +1486,7 @@ trim_bottom_edge(jpeg_transform_info *info, JDIMENSION full_height)
|
||||||
|
* and transformation is not perfect. Otherwise returns TRUE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
-GLOBAL(boolean)
|
||||||
|
+GLOBAL(boolean) __attribute__((weak))
|
||||||
|
jtransform_request_workspace(j_decompress_ptr srcinfo,
|
||||||
|
jpeg_transform_info *info)
|
||||||
|
{
|
||||||
|
@@ -2033,7 +2033,7 @@ adjust_exif_parameters(JOCTET *data, unsigned int length, JDIMENSION new_width,
|
||||||
|
* to jpeg_write_coefficients().
|
||||||
|
*/
|
||||||
|
|
||||||
|
-GLOBAL(jvirt_barray_ptr *)
|
||||||
|
+GLOBAL(jvirt_barray_ptr *) __attribute__((weak))
|
||||||
|
jtransform_adjust_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
|
||||||
|
jvirt_barray_ptr *src_coef_arrays,
|
||||||
|
jpeg_transform_info *info)
|
||||||
|
@@ -2152,7 +2152,7 @@ jtransform_adjust_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
|
||||||
|
* Note that some transformations will modify the source data arrays!
|
||||||
|
*/
|
||||||
|
|
||||||
|
-GLOBAL(void)
|
||||||
|
+GLOBAL(void) __attribute__((weak))
|
||||||
|
jtransform_execute_transform(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
|
||||||
|
jvirt_barray_ptr *src_coef_arrays,
|
||||||
|
jpeg_transform_info *info)
|
||||||
|
@@ -2264,7 +2264,7 @@ jtransform_execute_transform(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
|
||||||
|
* (may use custom action then)
|
||||||
|
*/
|
||||||
|
|
||||||
|
-GLOBAL(boolean)
|
||||||
|
+GLOBAL(boolean) __attribute__((weak))
|
||||||
|
jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
|
||||||
|
int MCU_width, int MCU_height,
|
||||||
|
JXFORM_CODE transform)
|
||||||
|
@@ -2303,7 +2303,7 @@ jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
|
||||||
|
* This must be called before jpeg_read_header() to have the desired effect.
|
||||||
|
*/
|
||||||
|
|
||||||
|
-GLOBAL(void)
|
||||||
|
+GLOBAL(void) __attribute__((weak))
|
||||||
|
jcopy_markers_setup(j_decompress_ptr srcinfo, JCOPY_OPTION option)
|
||||||
|
{
|
||||||
|
#ifdef SAVE_MARKERS_SUPPORTED
|
||||||
|
@@ -2331,7 +2331,7 @@ jcopy_markers_setup(j_decompress_ptr srcinfo, JCOPY_OPTION option)
|
||||||
|
* JFIF APP0 or Adobe APP14 markers if selected.
|
||||||
|
*/
|
||||||
|
|
||||||
|
-GLOBAL(void)
|
||||||
|
+GLOBAL(void) __attribute__((weak))
|
||||||
|
jcopy_markers_execute(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
|
||||||
|
JCOPY_OPTION option)
|
||||||
|
{
|
||||||
|
--
|
||||||
|
2.29.2
|
||||||
|
|
||||||
90
pkgs/rocm-modules/amdsmi/default.nix
Normal file
90
pkgs/rocm-modules/amdsmi/default.nix
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
fetchpatch,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
pkg-config,
|
||||||
|
libdrm,
|
||||||
|
wrapPython,
|
||||||
|
autoPatchelfHook,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
esmi_ib_src = fetchFromGitHub {
|
||||||
|
owner = "amd";
|
||||||
|
repo = "esmi_ib_library";
|
||||||
|
rev = "esmi_pkg_ver-4.1.2";
|
||||||
|
hash = "sha256-wj3krY/6AdmnoNOSqN9EE/Yxbx++0AW2vu7dovQrQ9I=";
|
||||||
|
};
|
||||||
|
in
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "amdsmi";
|
||||||
|
version = "6.4.1";
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "rocm";
|
||||||
|
repo = "amdsmi";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-K6FVvieJnOCfbyNNwOWs3t836ihtvELJvcCjXQoHuRY=";
|
||||||
|
};
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
substituteInPlace goamdsmi_shim/CMakeLists.txt \
|
||||||
|
--replace-fail "amd_smi)" ${"'"}''${AMD_SMI_TARGET})' \
|
||||||
|
--replace-fail 'target_link_libraries(''${GOAMDSMI_SHIM_TARGET} -L' '#'
|
||||||
|
|
||||||
|
# Manually unpack esmi_ib_src and add amd_hsmp.h so execute-process git clone doesn't run
|
||||||
|
cp -rf --no-preserve=mode ${esmi_ib_src} ./esmi_ib_library
|
||||||
|
mkdir -p ./esmi_ib_library/include/asm
|
||||||
|
cp ./include/amd_smi/impl/amd_hsmp.h ./esmi_ib_library/include/asm/amd_hsmp.h
|
||||||
|
'';
|
||||||
|
|
||||||
|
patches = [
|
||||||
|
# Fix ld.lld undefined reference: drmGetVersion
|
||||||
|
# (fetchpatch {
|
||||||
|
# url = "https://github.com/ROCm/amdsmi/commit/c3864bf6171970d86dc50fd23f06377736823997.patch";
|
||||||
|
# hash = "sha256-zRG1tBD8sIQCWdKfCbXC/Z/6d6NTrRYvRpddPWdM4j8=";
|
||||||
|
# })
|
||||||
|
];
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
pkg-config
|
||||||
|
wrapPython
|
||||||
|
autoPatchelfHook
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
libdrm
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall = ''
|
||||||
|
wrapPythonProgramsIn $out
|
||||||
|
rm $out/bin/amd-smi
|
||||||
|
ln -sf $out/libexec/amdsmi_cli/amdsmi_cli.py $out/bin/amd-smi
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "System management interface for AMD GPUs supported by ROCm";
|
||||||
|
homepage = "https://github.com/ROCm/rocm_smi_lib";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
maintainers = with maintainers; [ lovesegfault ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = [ "x86_64-linux" ];
|
||||||
|
};
|
||||||
|
})
|
||||||
230
pkgs/rocm-modules/aotriton/default.nix
Normal file
230
pkgs/rocm-modules/aotriton/default.nix
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
clr,
|
||||||
|
rocblas,
|
||||||
|
rocsolver,
|
||||||
|
gtest,
|
||||||
|
msgpack,
|
||||||
|
libxml2,
|
||||||
|
python3,
|
||||||
|
python3Packages,
|
||||||
|
openmp,
|
||||||
|
hipblas-common,
|
||||||
|
hipblas,
|
||||||
|
nlohmann_json,
|
||||||
|
triton-llvm,
|
||||||
|
rocmlir,
|
||||||
|
lapack-reference,
|
||||||
|
ninja,
|
||||||
|
ncurses,
|
||||||
|
libffi,
|
||||||
|
zlib,
|
||||||
|
zstd,
|
||||||
|
xz,
|
||||||
|
pkg-config,
|
||||||
|
buildTests ? false,
|
||||||
|
buildBenchmarks ? false,
|
||||||
|
buildSamples ? false,
|
||||||
|
gpuTargets ? [
|
||||||
|
# aotriton GPU support list:
|
||||||
|
# https://github.com/ROCm/aotriton/blob/main/v2python/gpu_targets.py
|
||||||
|
"gfx90a"
|
||||||
|
"gfx942"
|
||||||
|
"gfx1100"
|
||||||
|
"gfx1101"
|
||||||
|
],
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (
|
||||||
|
finalAttrs:
|
||||||
|
let
|
||||||
|
py = python3.withPackages (ps: [
|
||||||
|
ps.pyyaml
|
||||||
|
ps.distutils
|
||||||
|
ps.setuptools
|
||||||
|
ps.packaging
|
||||||
|
ps.numpy
|
||||||
|
ps.wheel
|
||||||
|
ps.filelock
|
||||||
|
ps.iniconfig
|
||||||
|
ps.pluggy
|
||||||
|
ps.pybind11
|
||||||
|
]);
|
||||||
|
gpuTargets' = lib.concatStringsSep ";" gpuTargets;
|
||||||
|
compiler = "amdclang++";
|
||||||
|
cFlags = "-O3 -DNDEBUG";
|
||||||
|
cxxFlags = "${cFlags} -Wno-c++11-narrowing";
|
||||||
|
triton-llvm' = triton-llvm;
|
||||||
|
in
|
||||||
|
{
|
||||||
|
pname = "aotriton";
|
||||||
|
version = "0.9.2b";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "aotriton";
|
||||||
|
rev = "${finalAttrs.version}";
|
||||||
|
hash = "sha256-1Cf0olD3zRg9JESD6s/WaGifm3kfD12VUvjTZHpmGAE=";
|
||||||
|
fetchSubmodules = true;
|
||||||
|
};
|
||||||
|
env.CXX = compiler;
|
||||||
|
env.ROCM_PATH = "${clr}";
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"benchmark"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildSamples [
|
||||||
|
"sample"
|
||||||
|
];
|
||||||
|
|
||||||
|
# Need an empty cuda.h for this to compile
|
||||||
|
# Better than pulling in unfree cuda headers
|
||||||
|
postPatch = ''
|
||||||
|
touch third_party/triton/third_party/nvidia/include/cuda.h
|
||||||
|
'';
|
||||||
|
|
||||||
|
doCheck = false;
|
||||||
|
doInstallCheck = false;
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
pkg-config
|
||||||
|
py
|
||||||
|
clr
|
||||||
|
ninja
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
rocblas
|
||||||
|
rocsolver
|
||||||
|
hipblas-common
|
||||||
|
hipblas
|
||||||
|
openmp
|
||||||
|
libffi
|
||||||
|
ncurses
|
||||||
|
xz
|
||||||
|
nlohmann_json
|
||||||
|
rocmlir
|
||||||
|
|
||||||
|
msgpack
|
||||||
|
libxml2
|
||||||
|
python3Packages.msgpack
|
||||||
|
zlib
|
||||||
|
zstd
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
gtest
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||||
|
lapack-reference
|
||||||
|
];
|
||||||
|
|
||||||
|
env.TRITON_OFFLINE_BUILD = 1;
|
||||||
|
env.LLVM_SYSPATH = "${triton-llvm'}";
|
||||||
|
env.JSON_SYSPATH = nlohmann_json;
|
||||||
|
env.MLIR_DIR = "${triton-llvm'}/lib/cmake/mlir";
|
||||||
|
env.CXXFLAGS = "-I/build/source/third_party/triton/third_party/nvidia/backend/include";
|
||||||
|
|
||||||
|
# Fix up header issues in triton: https://github.com/triton-lang/triton/pull/3985/files
|
||||||
|
preConfigure = ''
|
||||||
|
mkdir third_party/triton/third_party/nvidia/backend/include/
|
||||||
|
touch third_party/triton/third_party/nvidia/backend/include/cuda.h
|
||||||
|
find third_party/triton -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} +
|
||||||
|
find third_party/triton -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} +
|
||||||
|
|
||||||
|
sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' CMakeLists.txt
|
||||||
|
sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' CMakeLists.txt
|
||||||
|
sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt
|
||||||
|
sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt
|
||||||
|
substituteInPlace third_party/triton/python/setup.py \
|
||||||
|
--replace-fail "from distutils.command.clean import clean" "import setuptools;from distutils.command.clean import clean" \
|
||||||
|
--replace-fail 'system == "Linux"' 'False'
|
||||||
|
# Fix 'ld: error: unable to insert .comment after .comment'
|
||||||
|
substituteInPlace v2python/ld_script.py \
|
||||||
|
--replace-fail 'INSERT AFTER .comment;' ""
|
||||||
|
|
||||||
|
cmakeFlagsArray+=(
|
||||||
|
'-DCMAKE_C_FLAGS_RELEASE=${cFlags}'
|
||||||
|
'-DCMAKE_CXX_FLAGS_RELEASE=${cxxFlags}'
|
||||||
|
)
|
||||||
|
prependToVar cmakeFlags "-GNinja"
|
||||||
|
mkdir -p /build/tmp-home
|
||||||
|
export HOME=/build/tmp-home
|
||||||
|
'';
|
||||||
|
|
||||||
|
# Excerpt from README:
|
||||||
|
# Note: do not run ninja separately, due to the limit of the current build system,
|
||||||
|
# ninja install will run the whole build process unconditionally.
|
||||||
|
dontBuild = true;
|
||||||
|
|
||||||
|
installPhase = ''
|
||||||
|
runHook preInstall
|
||||||
|
ninja -v install
|
||||||
|
runHook postInstall
|
||||||
|
'';
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-Wno-dev"
|
||||||
|
"-DAOTRITON_NOIMAGE_MODE=ON" # FIXME: Should be able to build with object code but generate_shim is failing
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
"-DCMAKE_VERBOSE_MAKEFILE=ON"
|
||||||
|
"-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}"
|
||||||
|
"-DCMAKE_CXX_COMPILER=${compiler}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
"-DAMDGPU_TARGETS=${gpuTargets'}"
|
||||||
|
"-DGPU_TARGETS=${gpuTargets'}"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_CLIENTS_TESTS=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildSamples [
|
||||||
|
"-DBUILD_CLIENTS_SAMPLES=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/hipblas-test $test/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildBenchmarks ''
|
||||||
|
mkdir -p $benchmark/bin
|
||||||
|
mv $out/bin/hipblas-bench $benchmark/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildSamples ''
|
||||||
|
mkdir -p $sample/bin
|
||||||
|
mv $out/bin/example-* $sample/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
|
||||||
|
rmdir $out/bin
|
||||||
|
'';
|
||||||
|
meta = with lib; {
|
||||||
|
description = "ROCm Ahead of Time (AOT) Triton Math Library ";
|
||||||
|
homepage = "https://github.com/ROCm/aotriton";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
)
|
||||||
40
pkgs/rocm-modules/clr/cmake-find-x11-libgl.patch
Normal file
40
pkgs/rocm-modules/clr/cmake-find-x11-libgl.patch
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
diff --git a/rocclr/cmake/ROCclr.cmake b/rocclr/cmake/ROCclr.cmake
|
||||||
|
index 3f233b72f..67bdc62ee 100644
|
||||||
|
--- a/rocclr/cmake/ROCclr.cmake
|
||||||
|
+++ b/rocclr/cmake/ROCclr.cmake
|
||||||
|
@@ -44,6 +44,19 @@ find_package(Threads REQUIRED)
|
||||||
|
|
||||||
|
find_package(AMD_OPENCL)
|
||||||
|
|
||||||
|
+# Find X11 package
|
||||||
|
+find_package(X11 REQUIRED)
|
||||||
|
+if(NOT X11_FOUND)
|
||||||
|
+ message(FATAL_ERROR "X11 libraries not found")
|
||||||
|
+endif()
|
||||||
|
+
|
||||||
|
+# Find OpenGL package
|
||||||
|
+find_package(OpenGL REQUIRED)
|
||||||
|
+if(NOT OpenGL_FOUND)
|
||||||
|
+ message(FATAL_ERROR "OpenGL not found")
|
||||||
|
+endif()
|
||||||
|
+
|
||||||
|
+
|
||||||
|
add_library(rocclr STATIC)
|
||||||
|
|
||||||
|
include(ROCclrCompilerOptions)
|
||||||
|
@@ -123,9 +136,14 @@ target_include_directories(rocclr PUBLIC
|
||||||
|
${ROCCLR_SRC_DIR}/device
|
||||||
|
${ROCCLR_SRC_DIR}/elf
|
||||||
|
${ROCCLR_SRC_DIR}/include
|
||||||
|
+ ${X11_INCLUDE_DIR}
|
||||||
|
+ ${OPENGL_INCLUDE_DIR}
|
||||||
|
${AMD_OPENCL_INCLUDE_DIRS})
|
||||||
|
|
||||||
|
-target_link_libraries(rocclr PUBLIC Threads::Threads)
|
||||||
|
+target_link_libraries(rocclr PUBLIC
|
||||||
|
+ Threads::Threads
|
||||||
|
+ ${X11_LIBRARIES}
|
||||||
|
+ ${OPENGL_LIBRARIES})
|
||||||
|
# IPC on Windows is not supported
|
||||||
|
if(UNIX)
|
||||||
|
target_link_libraries(rocclr PUBLIC rt)
|
||||||
287
pkgs/rocm-modules/clr/default.nix
Normal file
287
pkgs/rocm-modules/clr/default.nix
Normal file
@@ -0,0 +1,287 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
callPackage,
|
||||||
|
fetchFromGitHub,
|
||||||
|
fetchpatch,
|
||||||
|
rocmUpdateScript,
|
||||||
|
makeWrapper,
|
||||||
|
cmake,
|
||||||
|
perl,
|
||||||
|
hip-common,
|
||||||
|
hipcc,
|
||||||
|
rocm-device-libs,
|
||||||
|
rocm-comgr,
|
||||||
|
rocm-runtime,
|
||||||
|
rocm-core,
|
||||||
|
roctracer,
|
||||||
|
rocminfo,
|
||||||
|
rocm-smi,
|
||||||
|
numactl,
|
||||||
|
libffi,
|
||||||
|
zstd,
|
||||||
|
zlib,
|
||||||
|
libGL,
|
||||||
|
libxml2,
|
||||||
|
libX11,
|
||||||
|
python3Packages,
|
||||||
|
rocm-merged-llvm,
|
||||||
|
khronos-ocl-icd-loader,
|
||||||
|
gcc-unwrapped,
|
||||||
|
writeShellScriptBin,
|
||||||
|
localGpuTargets ? null,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
inherit (rocm-core) ROCM_LIBPATCH_VERSION;
|
||||||
|
hipClang = rocm-merged-llvm;
|
||||||
|
hipClangPath = "${hipClang}/bin";
|
||||||
|
wrapperArgs = [
|
||||||
|
"--prefix PATH : $out/bin"
|
||||||
|
"--prefix LD_LIBRARY_PATH : ${rocm-runtime}"
|
||||||
|
"--set HIP_PLATFORM amd"
|
||||||
|
"--set HIP_PATH $out"
|
||||||
|
"--set HIP_CLANG_PATH ${hipClangPath}"
|
||||||
|
"--set DEVICE_LIB_PATH ${rocm-device-libs}/amdgcn/bitcode"
|
||||||
|
"--set HSA_PATH ${rocm-runtime}"
|
||||||
|
"--set ROCM_PATH $out"
|
||||||
|
];
|
||||||
|
amdclang = writeShellScriptBin "amdclang" ''
|
||||||
|
exec clang "$@"
|
||||||
|
'';
|
||||||
|
amdclangxx = writeShellScriptBin "amdclang++" ''
|
||||||
|
exec clang++ "$@"
|
||||||
|
'';
|
||||||
|
in
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "clr";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs = [
|
||||||
|
"out"
|
||||||
|
"icd"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "clr";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-MA9MS/HF3j5iqpFuatHQJZ+nNkcGgzCvpkiNO6CjoPg=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
makeWrapper
|
||||||
|
cmake
|
||||||
|
perl
|
||||||
|
python3Packages.python
|
||||||
|
python3Packages.cppheaderparser
|
||||||
|
amdclang
|
||||||
|
amdclangxx
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
numactl
|
||||||
|
libGL
|
||||||
|
libxml2
|
||||||
|
libX11
|
||||||
|
khronos-ocl-icd-loader
|
||||||
|
hipClang
|
||||||
|
libffi
|
||||||
|
zstd
|
||||||
|
zlib
|
||||||
|
];
|
||||||
|
|
||||||
|
propagatedBuildInputs = [
|
||||||
|
rocm-core
|
||||||
|
rocm-device-libs
|
||||||
|
rocm-comgr
|
||||||
|
rocm-runtime
|
||||||
|
rocminfo
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
"-DCMAKE_POLICY_DEFAULT_CMP0072=NEW" # Prefer newer OpenGL libraries
|
||||||
|
"-DCLR_BUILD_HIP=ON"
|
||||||
|
"-DCLR_BUILD_OCL=ON"
|
||||||
|
"-DHIP_COMMON_DIR=${hip-common}"
|
||||||
|
"-DHIPCC_BIN_DIR=${hipcc}/bin"
|
||||||
|
"-DHIP_PLATFORM=amd"
|
||||||
|
"-DPROF_API_HEADER_PATH=${roctracer.src}/inc/ext"
|
||||||
|
"-DROCM_PATH=${rocminfo}"
|
||||||
|
"-DBUILD_ICD=ON"
|
||||||
|
"-DHIP_ENABLE_ROCPROFILER_REGISTER=OFF" # circular dep - may need -minimal and -full builds?
|
||||||
|
"-DAMD_ICD_LIBRARY_DIR=${khronos-ocl-icd-loader}"
|
||||||
|
|
||||||
|
# Temporarily set variables to work around upstream CMakeLists issue
|
||||||
|
# Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
];
|
||||||
|
|
||||||
|
env.LLVM_DIR = "";
|
||||||
|
|
||||||
|
patches = [
|
||||||
|
./cmake-find-x11-libgl.patch
|
||||||
|
|
||||||
|
# (fetchpatch {
|
||||||
|
# # Fix handling of old fatbin version https://github.com/ROCm/clr/issues/99
|
||||||
|
# sha256 = "sha256-CK/QwgWJQEruiG4DqetF9YM0VEWpSiUMxAf1gGdJkuA=";
|
||||||
|
# url = "https://src.fedoraproject.org/rpms/rocclr/raw/rawhide/f/0001-handle-v1-of-compressed-fatbins.patch";
|
||||||
|
# })
|
||||||
|
# (fetchpatch {
|
||||||
|
# # improve rocclr isa compatibility check
|
||||||
|
# sha256 = "sha256-wUrhpYN68AbEXeFU5f366C6peqHyq25kujJXY/bBJMs=";
|
||||||
|
# url = "https://github.com/GZGavinZhao/clr/commit/22c17a0ac09c6b77866febf366591f669a1ed133.patch";
|
||||||
|
# })
|
||||||
|
# (fetchpatch {
|
||||||
|
# # [PATCH] Improve hipamd compat check
|
||||||
|
# sha256 = "sha256-uZQ8rMrWH61CCbxwLqQGggDmXFmYTi6x8OcgYPrZRC8=";
|
||||||
|
# url = "https://github.com/GZGavinZhao/clr/commit/63c6ee630966744d4199fdfb854e98d2da9e1122.patch";
|
||||||
|
# })
|
||||||
|
# (fetchpatch {
|
||||||
|
# # [PATCH] SWDEV-504340 - Move cast of cl_mem inside the condition
|
||||||
|
# # Fixes crash due to UB in KernelBlitManager::setArgument
|
||||||
|
# sha256 = "sha256-nL4CZ7EOXqsTVUtYhuu9DLOMpnMeMRUhkhylEQLTg9I=";
|
||||||
|
# url = "https://github.com/ROCm/clr/commit/fa63919a6339ea2a61111981ba2362c97fbdf743.patch";
|
||||||
|
# })
|
||||||
|
# (fetchpatch {
|
||||||
|
# # [PATCH] SWDEV-507104 - Removes alignment requirement for Semaphore class to resolve runtime misaligned memory issues
|
||||||
|
# sha256 = "sha256-nStJ22B/CM0fzQTvYjbHDbQt0GlE8DXxVK+UDU9BAx4=";
|
||||||
|
# url = "https://github.com/ROCm/clr/commit/21d764518363d74187deaef2e66c1a127bc5aa64.patch";
|
||||||
|
# })
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
patchShebangs hipamd/*.sh
|
||||||
|
patchShebangs hipamd/src
|
||||||
|
|
||||||
|
# We're not on Windows so these are never installed to hipcc...
|
||||||
|
substituteInPlace hipamd/CMakeLists.txt \
|
||||||
|
--replace-fail "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipcc.bat DESTINATION bin)" "" \
|
||||||
|
--replace-fail "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipconfig.bat DESTINATION bin)" ""
|
||||||
|
|
||||||
|
substituteInPlace hipamd/src/hip_embed_pch.sh \
|
||||||
|
--replace-fail "\''$LLVM_DIR/bin/clang" "${hipClangPath}/clang"
|
||||||
|
|
||||||
|
substituteInPlace opencl/khronos/icd/loader/icd_platform.h \
|
||||||
|
--replace-fail '#define ICD_VENDOR_PATH "/etc/OpenCL/vendors/";' \
|
||||||
|
'#define ICD_VENDOR_PATH "/run/opengl-driver/etc/OpenCL/vendors/";'
|
||||||
|
|
||||||
|
# new unbundler has better error messages, defaulting it on
|
||||||
|
substituteInPlace rocclr/utils/flags.hpp \
|
||||||
|
--replace-fail "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false" "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, true"
|
||||||
|
'';
|
||||||
|
|
||||||
|
postInstall = ''
|
||||||
|
chmod +x $out/bin/*
|
||||||
|
patchShebangs $out/bin
|
||||||
|
|
||||||
|
cp ${amdclang}/bin/* $out/bin/
|
||||||
|
cp ${amdclangxx}/bin/* $out/bin/
|
||||||
|
|
||||||
|
for prog in hip{cc,config}{,.pl}; do
|
||||||
|
wrapProgram $out/bin/$prog ${lib.concatStringsSep " " wrapperArgs}
|
||||||
|
done
|
||||||
|
|
||||||
|
mkdir -p $out/nix-support/
|
||||||
|
echo '
|
||||||
|
export HIP_PATH="${placeholder "out"}"
|
||||||
|
export HIP_PLATFORM=amd
|
||||||
|
export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode"
|
||||||
|
export NIX_CC_USE_RESPONSE_FILE=0
|
||||||
|
export HIP_CLANG_PATH="${hipClangPath}"
|
||||||
|
export ROCM_LIBPATCH_VERSION="${ROCM_LIBPATCH_VERSION}"
|
||||||
|
export HSA_PATH="${rocm-runtime}"' > $out/nix-support/setup-hook
|
||||||
|
|
||||||
|
# Just link rocminfo, it's easier
|
||||||
|
ln -s ${rocminfo}/bin/* $out/bin
|
||||||
|
ln -s ${rocm-core}/include/* $out/include/
|
||||||
|
|
||||||
|
# Replace rocm-opencl-icd functionality
|
||||||
|
mkdir -p $icd/etc/OpenCL/vendors
|
||||||
|
echo "$out/lib/libamdocl64.so" > $icd/etc/OpenCL/vendors/amdocl64.icd
|
||||||
|
|
||||||
|
# add version info to output (downstream rocmPackages look for this)
|
||||||
|
ln -s ${rocm-core}/.info/ $out/.info
|
||||||
|
|
||||||
|
ln -s ${hipClang} $out/llvm
|
||||||
|
'';
|
||||||
|
|
||||||
|
disallowedRequisites = [
|
||||||
|
gcc-unwrapped
|
||||||
|
];
|
||||||
|
|
||||||
|
passthru =
|
||||||
|
{
|
||||||
|
# All known and valid general GPU targets
|
||||||
|
# We cannot use this for each ROCm library, as each defines their own supported targets
|
||||||
|
# See: https://github.com/ROCm/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix
|
||||||
|
# Generic targets are not yet available in rocm-6.3.1 llvm
|
||||||
|
gpuTargets = lib.forEach [
|
||||||
|
# "9-generic"
|
||||||
|
"900" # MI25, Vega 56/64
|
||||||
|
"906" # MI50/60, Radeon VII
|
||||||
|
"908" # MI100
|
||||||
|
"90a" # MI210 / MI250
|
||||||
|
# "9-4-generic"
|
||||||
|
# 940/1 - never released publicly, maybe HPE cray specific MI3xx?
|
||||||
|
"942" # MI300
|
||||||
|
# "10-1-generic"
|
||||||
|
"1010"
|
||||||
|
"1012"
|
||||||
|
# "10-3-generic"
|
||||||
|
"1030" # W6800, various Radeon cards
|
||||||
|
# "11-generic"
|
||||||
|
"1100"
|
||||||
|
"1101"
|
||||||
|
"1102"
|
||||||
|
"1201"
|
||||||
|
] (target: "gfx${target}");
|
||||||
|
|
||||||
|
inherit hipClangPath;
|
||||||
|
|
||||||
|
updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
page = "tags?per_page=4";
|
||||||
|
};
|
||||||
|
|
||||||
|
impureTests = {
|
||||||
|
rocm-smi = callPackage ./test-rocm-smi.nix {
|
||||||
|
inherit rocm-smi;
|
||||||
|
clr = finalAttrs.finalPackage;
|
||||||
|
};
|
||||||
|
opencl-example = callPackage ./test-opencl-example.nix {
|
||||||
|
clr = finalAttrs.finalPackage;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
selectGpuTargets =
|
||||||
|
{
|
||||||
|
supported ? [ ],
|
||||||
|
}:
|
||||||
|
supported;
|
||||||
|
gpuArchSuffix = "";
|
||||||
|
}
|
||||||
|
// lib.optionalAttrs (localGpuTargets != null) {
|
||||||
|
inherit localGpuTargets;
|
||||||
|
gpuArchSuffix = "-" + (builtins.concatStringsSep "-" localGpuTargets);
|
||||||
|
selectGpuTargets =
|
||||||
|
{
|
||||||
|
supported ? [ ],
|
||||||
|
}:
|
||||||
|
if supported == [ ] then localGpuTargets else lib.lists.intersectLists localGpuTargets supported;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "AMD Common Language Runtime for hipamd, opencl, and rocclr";
|
||||||
|
homepage = "https://github.com/ROCm/clr";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
maintainers = with maintainers; [ lovesegfault ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
88
pkgs/rocm-modules/clr/test-opencl-example.nix
Normal file
88
pkgs/rocm-modules/clr/test-opencl-example.nix
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
makeImpureTest,
|
||||||
|
fetchFromGitHub,
|
||||||
|
clr,
|
||||||
|
cmake,
|
||||||
|
pkg-config,
|
||||||
|
glew,
|
||||||
|
libglut,
|
||||||
|
opencl-headers,
|
||||||
|
ocl-icd,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
|
||||||
|
examples = stdenv.mkDerivation {
|
||||||
|
pname = "amd-app-samples";
|
||||||
|
version = "2018-06-10";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "OpenCL";
|
||||||
|
repo = "AMD_APP_samples";
|
||||||
|
rev = "54da6ca465634e78fc51fc25edf5840467ee2411";
|
||||||
|
hash = "sha256-qARQpUiYsamHbko/I1gPZE9pUGJ+3396Vk2n7ERSftA=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
pkg-config
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
glew
|
||||||
|
libglut
|
||||||
|
opencl-headers
|
||||||
|
ocl-icd
|
||||||
|
];
|
||||||
|
|
||||||
|
installPhase = ''
|
||||||
|
runHook preInstall
|
||||||
|
|
||||||
|
mkdir -p $out/bin
|
||||||
|
# Example path is bin/x86_64/Release/cl/Reduction/Reduction
|
||||||
|
cp -r bin/*/*/*/*/* $out/bin/
|
||||||
|
|
||||||
|
runHook postInstall
|
||||||
|
'';
|
||||||
|
|
||||||
|
cmakeFlags = [ "-DBUILD_CPP_CL=OFF" ];
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Samples from the AMD APP SDK (with OpenCRun support)";
|
||||||
|
homepage = "https://github.com/OpenCL/AMD_APP_samples";
|
||||||
|
license = licenses.bsd2;
|
||||||
|
platforms = platforms.linux;
|
||||||
|
teams = [ lib.teams.rocm ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
in
|
||||||
|
makeImpureTest {
|
||||||
|
name = "opencl-example";
|
||||||
|
testedPackage = "rocmPackages_6.clr";
|
||||||
|
|
||||||
|
sandboxPaths = [
|
||||||
|
"/sys"
|
||||||
|
"/dev/dri"
|
||||||
|
"/dev/kfd"
|
||||||
|
];
|
||||||
|
|
||||||
|
nativeBuildInputs = [ examples ];
|
||||||
|
|
||||||
|
OCL_ICD_VENDORS = "${clr.icd}/etc/OpenCL/vendors";
|
||||||
|
|
||||||
|
testScript = ''
|
||||||
|
# Examples load resources from current directory
|
||||||
|
cd ${examples}/bin
|
||||||
|
echo OCL_ICD_VENDORS=$OCL_ICD_VENDORS
|
||||||
|
pwd
|
||||||
|
|
||||||
|
HelloWorld | grep HelloWorld
|
||||||
|
'';
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
};
|
||||||
|
}
|
||||||
27
pkgs/rocm-modules/clr/test-rocm-smi.nix
Normal file
27
pkgs/rocm-modules/clr/test-rocm-smi.nix
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
makeImpureTest,
|
||||||
|
clinfo,
|
||||||
|
clr,
|
||||||
|
rocm-smi,
|
||||||
|
}:
|
||||||
|
|
||||||
|
makeImpureTest {
|
||||||
|
name = "rocm-smi";
|
||||||
|
testedPackage = "rocmPackages_6.clr";
|
||||||
|
nativeBuildInputs = [
|
||||||
|
clinfo
|
||||||
|
rocm-smi
|
||||||
|
];
|
||||||
|
OCL_ICD_VENDORS = "${clr.icd}/etc/OpenCL/vendors";
|
||||||
|
|
||||||
|
testScript = ''
|
||||||
|
# Test fails if the number of platforms is 0
|
||||||
|
clinfo | grep -E 'Number of platforms * [1-9]'
|
||||||
|
rocm-smi | grep -A1 GPU
|
||||||
|
'';
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
};
|
||||||
|
}
|
||||||
167
pkgs/rocm-modules/composable_kernel/base.nix
Normal file
167
pkgs/rocm-modules/composable_kernel/base.nix
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocm-merged-llvm,
|
||||||
|
clr,
|
||||||
|
rocminfo,
|
||||||
|
hipify,
|
||||||
|
gitMinimal,
|
||||||
|
gtest,
|
||||||
|
zstd,
|
||||||
|
buildTests ? false,
|
||||||
|
buildExamples ? false,
|
||||||
|
gpuTargets ? (
|
||||||
|
clr.localGpuTargets or [
|
||||||
|
"gfx1010"
|
||||||
|
"gfx1012"
|
||||||
|
"gfx1030"
|
||||||
|
"gfx1100"
|
||||||
|
"gfx1101"
|
||||||
|
"gfx1102"
|
||||||
|
"gfx1201"
|
||||||
|
]
|
||||||
|
),
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
preBuild = ''
|
||||||
|
echo "This derivation isn't intended to be built directly and only exists to be overridden and built in chunks";
|
||||||
|
exit 1
|
||||||
|
'';
|
||||||
|
|
||||||
|
pname = "composable_kernel_base";
|
||||||
|
# Picked this version over 6.3 because much easier to get to build
|
||||||
|
# and it matches the version torch 2.6 wants
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildExamples [
|
||||||
|
"example"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "composable_kernel";
|
||||||
|
rev = "07339c738396ebeae57374771ded4dcf11bddf1e";
|
||||||
|
hash = "sha256-EvEBxlOpQ71BF57VW79WBo/cdxAwTKFXFMiYKyGyyEs=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
# Deliberately not using ninja
|
||||||
|
# because we're jankily composing build outputs from multiple drvs
|
||||||
|
# ninja won't believe they're up to date
|
||||||
|
gitMinimal
|
||||||
|
cmake
|
||||||
|
rocminfo
|
||||||
|
clr
|
||||||
|
hipify
|
||||||
|
zstd
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
zstd
|
||||||
|
];
|
||||||
|
|
||||||
|
strictDeps = true;
|
||||||
|
enableParallelBuilding = true;
|
||||||
|
env.ROCM_PATH = clr;
|
||||||
|
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
"-DCMAKE_POLICY_DEFAULT_CMP0069=NEW"
|
||||||
|
# "-DDL_KERNELS=ON" # Not needed, slow to build
|
||||||
|
# CK_USE_CODEGEN Required for migraphx which uses device_gemm_multiple_d.hpp
|
||||||
|
# but migraphx requires an incompatible fork of CK and fails anyway
|
||||||
|
# "-DCK_USE_CODEGEN=ON"
|
||||||
|
# It might be worth skipping fp64 in future with this:
|
||||||
|
# "-DDTYPES=fp32;fp16;fp8;bf16;int8"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
"-DBUILD_DEV=OFF"
|
||||||
|
"-DROCM_PATH=${clr}"
|
||||||
|
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
|
||||||
|
|
||||||
|
# FP8 can build for 908/90a but very slow build
|
||||||
|
# and produces unusably slow kernels that are huge
|
||||||
|
"-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
# We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS
|
||||||
|
# per readme this is required if archs are dissimilar
|
||||||
|
# In rocm-6.3.x not setting any arch flag worked
|
||||||
|
# but setting dissimilar arches always failed
|
||||||
|
"-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names
|
||||||
|
];
|
||||||
|
|
||||||
|
# No flags to build selectively it seems...
|
||||||
|
postPatch =
|
||||||
|
# Reduce configure time by preventing thousands of clang-tidy targets being added
|
||||||
|
# We will never call them
|
||||||
|
# Never build profiler
|
||||||
|
''
|
||||||
|
substituteInPlace library/src/utility/CMakeLists.txt library/src/tensor_operation_instance/gpu/CMakeLists.txt \
|
||||||
|
--replace-fail clang_tidy_check '#clang_tidy_check'
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace-fail "add_subdirectory(profiler)" ""
|
||||||
|
''
|
||||||
|
# Optionally remove tests
|
||||||
|
+ lib.optionalString (!buildTests) ''
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace-fail "add_subdirectory(test)" ""
|
||||||
|
substituteInPlace codegen/CMakeLists.txt \
|
||||||
|
--replace-fail "include(ROCMTest)" ""
|
||||||
|
''
|
||||||
|
# Optionally remove examples
|
||||||
|
+ lib.optionalString (!buildExamples) ''
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace-fail "add_subdirectory(example)" ""
|
||||||
|
'';
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/test_* $test/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildExamples ''
|
||||||
|
mkdir -p $example/bin
|
||||||
|
mv $out/bin/example_* $example/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
passthru.anyGfx9Target = lib.lists.any (lib.strings.hasPrefix "gfx9") gpuTargets;
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Performance portable programming model for machine learning tensor operators";
|
||||||
|
homepage = "https://github.com/ROCm/composable_kernel";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
broken = true;
|
||||||
|
};
|
||||||
|
})
|
||||||
161
pkgs/rocm-modules/composable_kernel/base.nix.cmp
Normal file
161
pkgs/rocm-modules/composable_kernel/base.nix.cmp
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocm-merged-llvm,
|
||||||
|
clr,
|
||||||
|
rocminfo,
|
||||||
|
hipify,
|
||||||
|
gitMinimal,
|
||||||
|
gtest,
|
||||||
|
zstd,
|
||||||
|
buildTests ? false,
|
||||||
|
buildExamples ? false,
|
||||||
|
gpuTargets ? (
|
||||||
|
clr.localGpuTargets or [
|
||||||
|
"gfx1201"
|
||||||
|
]
|
||||||
|
),
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
preBuild = ''
|
||||||
|
echo "This derivation isn't intended to be built directly and only exists to be overridden and built in chunks";
|
||||||
|
exit 1
|
||||||
|
'';
|
||||||
|
|
||||||
|
pname = "composable_kernel_base";
|
||||||
|
# Picked this version over 6.3 because much easier to get to build
|
||||||
|
# and it matches the version torch 2.6 wants
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildExamples [
|
||||||
|
"example"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "composable_kernel";
|
||||||
|
rev = "07339c738396ebeae57374771ded4dcf11bddf1e";
|
||||||
|
hash = "sha256-EvEBxlOpQ71BF57VW79WBo/cdxAwTKFXFMiYKyGyyEs=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
# Deliberately not using ninja
|
||||||
|
# because we're jankily composing build outputs from multiple drvs
|
||||||
|
# ninja won't believe they're up to date
|
||||||
|
gitMinimal
|
||||||
|
cmake
|
||||||
|
rocminfo
|
||||||
|
clr
|
||||||
|
hipify
|
||||||
|
zstd
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
zstd
|
||||||
|
];
|
||||||
|
|
||||||
|
strictDeps = true;
|
||||||
|
enableParallelBuilding = true;
|
||||||
|
env.ROCM_PATH = clr;
|
||||||
|
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
"-DCMAKE_POLICY_DEFAULT_CMP0069=NEW"
|
||||||
|
# "-DDL_KERNELS=ON" # Not needed, slow to build
|
||||||
|
# CK_USE_CODEGEN Required for migraphx which uses device_gemm_multiple_d.hpp
|
||||||
|
# but migraphx requires an incompatible fork of CK and fails anyway
|
||||||
|
# "-DCK_USE_CODEGEN=ON"
|
||||||
|
# It might be worth skipping fp64 in future with this:
|
||||||
|
# "-DDTYPES=fp32;fp16;fp8;bf16;int8"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
"-DBUILD_DEV=OFF"
|
||||||
|
"-DROCM_PATH=${clr}"
|
||||||
|
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
|
||||||
|
|
||||||
|
# FP8 can build for 908/90a but very slow build
|
||||||
|
# and produces unusably slow kernels that are huge
|
||||||
|
"-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
# We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS
|
||||||
|
# per readme this is required if archs are dissimilar
|
||||||
|
# In rocm-6.3.x not setting any arch flag worked
|
||||||
|
# but setting dissimilar arches always failed
|
||||||
|
"-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names
|
||||||
|
];
|
||||||
|
|
||||||
|
# No flags to build selectively it seems...
|
||||||
|
postPatch =
|
||||||
|
# Reduce configure time by preventing thousands of clang-tidy targets being added
|
||||||
|
# We will never call them
|
||||||
|
# Never build profiler
|
||||||
|
''
|
||||||
|
substituteInPlace library/src/utility/CMakeLists.txt library/src/tensor_operation_instance/gpu/CMakeLists.txt \
|
||||||
|
--replace-fail clang_tidy_check '#clang_tidy_check'
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace-fail "add_subdirectory(profiler)" ""
|
||||||
|
''
|
||||||
|
# Optionally remove tests
|
||||||
|
+ lib.optionalString (!buildTests) ''
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace-fail "add_subdirectory(test)" ""
|
||||||
|
substituteInPlace codegen/CMakeLists.txt \
|
||||||
|
--replace-fail "include(ROCMTest)" ""
|
||||||
|
''
|
||||||
|
# Optionally remove examples
|
||||||
|
+ lib.optionalString (!buildExamples) ''
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace-fail "add_subdirectory(example)" ""
|
||||||
|
'';
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/test_* $test/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildExamples ''
|
||||||
|
mkdir -p $example/bin
|
||||||
|
mv $out/bin/example_* $example/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
passthru.anyGfx9Target = lib.lists.any (lib.strings.hasPrefix "gfx9") gpuTargets;
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Performance portable programming model for machine learning tensor operators";
|
||||||
|
homepage = "https://github.com/ROCm/composable_kernel";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
broken = true;
|
||||||
|
};
|
||||||
|
})
|
||||||
43
pkgs/rocm-modules/composable_kernel/ck4inductor.nix
Normal file
43
pkgs/rocm-modules/composable_kernel/ck4inductor.nix
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
{
|
||||||
|
buildPythonPackage,
|
||||||
|
python,
|
||||||
|
composable_kernel,
|
||||||
|
lib,
|
||||||
|
setuptools,
|
||||||
|
setuptools-scm,
|
||||||
|
rocm-merged-llvm,
|
||||||
|
}:
|
||||||
|
buildPythonPackage {
|
||||||
|
pyproject = true;
|
||||||
|
pname = "ck4inductor";
|
||||||
|
build-system = [
|
||||||
|
setuptools
|
||||||
|
setuptools-scm
|
||||||
|
];
|
||||||
|
version = "6.4.0";
|
||||||
|
inherit (composable_kernel) src;
|
||||||
|
pythonImportsCheck = [
|
||||||
|
"ck4inductor"
|
||||||
|
"ck4inductor.universal_gemm.gen_instances"
|
||||||
|
"ck4inductor.universal_gemm.gen_instances"
|
||||||
|
"ck4inductor.universal_gemm.op"
|
||||||
|
];
|
||||||
|
propagatedBuildInputs = [
|
||||||
|
# At runtime will fail to compile anything with ck4inductor without this
|
||||||
|
# can't easily use in checks phase because most of the compiler machinery is in torch
|
||||||
|
rocm-merged-llvm
|
||||||
|
];
|
||||||
|
checkPhase = ''
|
||||||
|
if [ ! -d "$out/${python.sitePackages}/ck4inductor" ]; then
|
||||||
|
echo "ck4inductor isn't at the expected location in $out/${python.sitePackages}/ck4inductor"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
'';
|
||||||
|
meta = with lib; {
|
||||||
|
description = "pytorch inductor backend which uses composable_kernel universal GEMM implementations";
|
||||||
|
homepage = "https://github.com/ROCm/composable_kernel";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
}
|
||||||
249
pkgs/rocm-modules/composable_kernel/default.nix
Normal file
249
pkgs/rocm-modules/composable_kernel/default.nix
Normal file
@@ -0,0 +1,249 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
clr,
|
||||||
|
composable_kernel_base,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
parts = {
|
||||||
|
_mha = {
|
||||||
|
# mha takes ~3hrs on 64 cores on an EPYC milan system at ~2.5GHz
|
||||||
|
# big-parallel builders are one gen newer and clocked ~30% higher but only 24 cores
|
||||||
|
# Should be <10h timeout but might be cutting it close
|
||||||
|
# TODO: work out how to split this into smaller chunks instead of all 3k mha instances together
|
||||||
|
# mha_0,1,2, search ninja target file for the individual instances, split by the index?
|
||||||
|
# TODO: can we prune the generated instances down to only what in practice are used with popular models
|
||||||
|
# when using flash-attention + MHA kernels?
|
||||||
|
targets = [
|
||||||
|
"device_mha_instance"
|
||||||
|
];
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
|
||||||
|
};
|
||||||
|
gemm_multiply_multiply = {
|
||||||
|
targets = [
|
||||||
|
"device_gemm_multiply_multiply_instance"
|
||||||
|
];
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
|
||||||
|
};
|
||||||
|
grouped_conv = {
|
||||||
|
targets = [
|
||||||
|
"device_grouped_conv1d_bwd_weight_instance"
|
||||||
|
"device_grouped_conv2d_bwd_data_instance"
|
||||||
|
"device_grouped_conv2d_bwd_weight_instance"
|
||||||
|
"device_grouped_conv1d_fwd_instance"
|
||||||
|
"device_grouped_conv2d_fwd_instance"
|
||||||
|
"device_grouped_conv2d_fwd_dynamic_op_instance"
|
||||||
|
];
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
};
|
||||||
|
grouped_conv_bwd_3d = {
|
||||||
|
targets = [
|
||||||
|
"device_grouped_conv3d_bwd_data_instance"
|
||||||
|
"device_grouped_conv3d_bwd_data_bilinear_instance"
|
||||||
|
"device_grouped_conv3d_bwd_data_scale_instance"
|
||||||
|
"device_grouped_conv3d_bwd_weight_instance"
|
||||||
|
"device_grouped_conv3d_bwd_weight_bilinear_instance"
|
||||||
|
"device_grouped_conv3d_bwd_weight_scale_instance"
|
||||||
|
];
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
};
|
||||||
|
grouped_conv_fwd_3d = {
|
||||||
|
targets = [
|
||||||
|
"device_grouped_conv3d_fwd_instance"
|
||||||
|
"device_grouped_conv3d_fwd_bilinear_instance"
|
||||||
|
"device_grouped_conv3d_fwd_convinvscale_instance"
|
||||||
|
"device_grouped_conv3d_fwd_convscale_instance"
|
||||||
|
"device_grouped_conv3d_fwd_convscale_add_instance"
|
||||||
|
"device_grouped_conv3d_fwd_convscale_relu_instance"
|
||||||
|
"device_grouped_conv3d_fwd_dynamic_op_instance"
|
||||||
|
"device_grouped_conv3d_fwd_scale_instance"
|
||||||
|
"device_grouped_conv3d_fwd_scaleadd_ab_instance"
|
||||||
|
"device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance"
|
||||||
|
];
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
};
|
||||||
|
batched_gemm = {
|
||||||
|
targets = [
|
||||||
|
"device_batched_gemm_instance"
|
||||||
|
"device_batched_gemm_add_relu_gemm_add_instance"
|
||||||
|
"device_batched_gemm_bias_permute_instance"
|
||||||
|
"device_batched_gemm_gemm_instance"
|
||||||
|
"device_batched_gemm_reduce_instance"
|
||||||
|
"device_batched_gemm_softmax_gemm_instance"
|
||||||
|
"device_batched_gemm_softmax_gemm_permute_instance"
|
||||||
|
"device_grouped_gemm_instance"
|
||||||
|
"device_grouped_gemm_bias_instance"
|
||||||
|
"device_grouped_gemm_fastgelu_instance"
|
||||||
|
"device_grouped_gemm_fixed_nk_instance"
|
||||||
|
"device_grouped_gemm_fixed_nk_multi_abd_instance"
|
||||||
|
"device_grouped_gemm_tile_loop_instance"
|
||||||
|
];
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
};
|
||||||
|
gemm_universal = {
|
||||||
|
targets = [
|
||||||
|
"device_gemm_universal_instance"
|
||||||
|
"device_gemm_universal_batched_instance"
|
||||||
|
"device_gemm_universal_reduce_instance"
|
||||||
|
"device_gemm_universal_streamk_instance"
|
||||||
|
];
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
|
||||||
|
};
|
||||||
|
gemm_other = {
|
||||||
|
targets = [
|
||||||
|
"device_gemm_instance"
|
||||||
|
"device_gemm_ab_scale_instance"
|
||||||
|
"device_gemm_add_instance"
|
||||||
|
"device_gemm_add_add_fastgelu_instance"
|
||||||
|
"device_gemm_add_fastgelu_instance"
|
||||||
|
"device_gemm_add_multiply_instance"
|
||||||
|
"device_gemm_add_relu_instance"
|
||||||
|
"device_gemm_add_relu_add_layernorm_instance"
|
||||||
|
"device_gemm_add_silu_instance"
|
||||||
|
"device_gemm_bias_add_reduce_instance"
|
||||||
|
"device_gemm_bilinear_instance"
|
||||||
|
"device_gemm_fastgelu_instance"
|
||||||
|
"device_gemm_multi_abd_instance"
|
||||||
|
"device_gemm_multiply_add_instance"
|
||||||
|
"device_gemm_reduce_instance"
|
||||||
|
"device_gemm_splitk_instance"
|
||||||
|
"device_gemm_streamk_instance"
|
||||||
|
];
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
};
|
||||||
|
conv = {
|
||||||
|
targets = [
|
||||||
|
"device_conv1d_bwd_data_instance"
|
||||||
|
"device_conv2d_bwd_data_instance"
|
||||||
|
"device_conv2d_fwd_instance"
|
||||||
|
"device_conv2d_fwd_bias_relu_instance"
|
||||||
|
"device_conv2d_fwd_bias_relu_add_instance"
|
||||||
|
"device_conv3d_bwd_data_instance"
|
||||||
|
];
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
};
|
||||||
|
pool = {
|
||||||
|
targets = [
|
||||||
|
"device_avg_pool2d_bwd_instance"
|
||||||
|
"device_avg_pool3d_bwd_instance"
|
||||||
|
"device_pool2d_fwd_instance"
|
||||||
|
"device_pool3d_fwd_instance"
|
||||||
|
"device_max_pool_bwd_instance"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
other1 = {
|
||||||
|
targets = [
|
||||||
|
"device_batchnorm_instance"
|
||||||
|
"device_contraction_bilinear_instance"
|
||||||
|
"device_contraction_scale_instance"
|
||||||
|
"device_elementwise_instance"
|
||||||
|
"device_elementwise_normalization_instance"
|
||||||
|
"device_normalization_bwd_data_instance"
|
||||||
|
"device_normalization_bwd_gamma_beta_instance"
|
||||||
|
"device_normalization_fwd_instance"
|
||||||
|
];
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
};
|
||||||
|
other2 = {
|
||||||
|
targets = [
|
||||||
|
"device_column_to_image_instance"
|
||||||
|
"device_image_to_column_instance"
|
||||||
|
"device_permute_scale_instance"
|
||||||
|
"device_quantization_instance"
|
||||||
|
"device_reduce_instance"
|
||||||
|
"device_softmax_instance"
|
||||||
|
"device_transpose_instance"
|
||||||
|
];
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
tensorOpBuilder =
|
||||||
|
{
|
||||||
|
part,
|
||||||
|
targets,
|
||||||
|
extraCmakeFlags ? [ ],
|
||||||
|
requiredSystemFeatures ? [ ],
|
||||||
|
}:
|
||||||
|
composable_kernel_base.overrideAttrs (old: {
|
||||||
|
inherit requiredSystemFeatures;
|
||||||
|
pname = "composable_kernel${clr.gpuArchSuffix}-${part}";
|
||||||
|
makeTargets = targets;
|
||||||
|
preBuild = ''
|
||||||
|
echo "Building ${part}"
|
||||||
|
makeFlagsArray+=($makeTargets)
|
||||||
|
substituteInPlace Makefile \
|
||||||
|
--replace-fail '.NOTPARALLEL:' ""
|
||||||
|
'';
|
||||||
|
|
||||||
|
# Compile parallelism adjusted based on available RAM
|
||||||
|
# Never uses less than NIX_BUILD_CORES/4, never uses more than NIX_BUILD_CORES
|
||||||
|
# CK uses an unusually high amount of memory per core in the build step
|
||||||
|
# Nix/nixpkgs doesn't really have any infra to tell it that this build is unusually memory hungry
|
||||||
|
# So, bodge. Otherwise you end up having to build all of ROCm with a low core limit when
|
||||||
|
# it's only this package that has trouble.
|
||||||
|
preConfigure =
|
||||||
|
old.preConfigure or ""
|
||||||
|
+ ''
|
||||||
|
MEM_GB_TOTAL=$(awk '/MemTotal/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo)
|
||||||
|
MEM_GB_AVAILABLE=$(awk '/MemAvailable/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo)
|
||||||
|
APPX_GB=$((MEM_GB_AVAILABLE > MEM_GB_TOTAL ? MEM_GB_TOTAL : MEM_GB_AVAILABLE))
|
||||||
|
MAX_CORES=$((1 + APPX_GB/3))
|
||||||
|
MAX_CORES=$((MAX_CORES < NIX_BUILD_CORES/3 ? NIX_BUILD_CORES/3 : MAX_CORES))
|
||||||
|
export NIX_BUILD_CORES="$((NIX_BUILD_CORES > MAX_CORES ? MAX_CORES : NIX_BUILD_CORES))"
|
||||||
|
echo "Picked new core limit NIX_BUILD_CORES=$NIX_BUILD_CORES based on available mem: $APPX_GB GB"
|
||||||
|
cmakeFlagsArray+=(
|
||||||
|
"-DCK_PARALLEL_COMPILE_JOBS=$NIX_BUILD_CORES"
|
||||||
|
)
|
||||||
|
'';
|
||||||
|
cmakeFlags = old.cmakeFlags ++ extraCmakeFlags;
|
||||||
|
# Early exit after build phase with success, skips fixups etc
|
||||||
|
# Will get copied back into /build of the final CK
|
||||||
|
postBuild = ''
|
||||||
|
find . -name "*.o" -type f | while read -r file; do
|
||||||
|
mkdir -p "$out/$(dirname "$file")"
|
||||||
|
cp --reflink=auto "$file" "$out/$file"
|
||||||
|
done
|
||||||
|
exit 0
|
||||||
|
'';
|
||||||
|
meta = old.meta // {
|
||||||
|
broken = false;
|
||||||
|
};
|
||||||
|
});
|
||||||
|
composable_kernel_parts = builtins.mapAttrs (
|
||||||
|
part: targets: tensorOpBuilder (targets // { inherit part; })
|
||||||
|
) parts;
|
||||||
|
in
|
||||||
|
|
||||||
|
composable_kernel_base.overrideAttrs (
|
||||||
|
finalAttrs: old: {
|
||||||
|
pname = "composable_kernel${clr.gpuArchSuffix}";
|
||||||
|
parts_dirs = builtins.attrValues composable_kernel_parts;
|
||||||
|
disallowedReferences = builtins.attrValues composable_kernel_parts;
|
||||||
|
preBuild = ''
|
||||||
|
for dir in $parts_dirs; do
|
||||||
|
find "$dir" -type f -name "*.o" | while read -r file; do
|
||||||
|
# Extract the relative path by removing the output directory prefix
|
||||||
|
rel_path="''${file#"$dir/"}"
|
||||||
|
|
||||||
|
# Create parent directory if it doesn't exist
|
||||||
|
mkdir -p "$(dirname "$rel_path")"
|
||||||
|
|
||||||
|
# Copy the file back to its original location, give it a future timestamp
|
||||||
|
# so make treats it as up to date
|
||||||
|
cp --reflink=auto --no-preserve=all "$file" "$rel_path"
|
||||||
|
touch -d "now +10 hours" "$rel_path"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
'';
|
||||||
|
passthru = old.passthru // {
|
||||||
|
parts = composable_kernel_parts;
|
||||||
|
};
|
||||||
|
meta = old.meta // {
|
||||||
|
# Builds which don't don't target any gfx9 cause cmake errors in dependent projects
|
||||||
|
broken = !finalAttrs.passthru.anyGfx9Target;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
)
|
||||||
513
pkgs/rocm-modules/default.nix
Normal file
513
pkgs/rocm-modules/default.nix
Normal file
@@ -0,0 +1,513 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
config,
|
||||||
|
callPackage,
|
||||||
|
newScope,
|
||||||
|
recurseIntoAttrs,
|
||||||
|
symlinkJoin,
|
||||||
|
fetchFromGitHub,
|
||||||
|
ffmpeg_4,
|
||||||
|
boost179,
|
||||||
|
opencv,
|
||||||
|
libjpeg_turbo,
|
||||||
|
python3Packages,
|
||||||
|
triton-llvm,
|
||||||
|
openmpi,
|
||||||
|
rocmGpuArches ? [ ],
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
outer = lib.makeScope newScope (
|
||||||
|
self:
|
||||||
|
let
|
||||||
|
inherit (self) llvm;
|
||||||
|
pyPackages = python3Packages;
|
||||||
|
openmpi-orig = openmpi;
|
||||||
|
in
|
||||||
|
{
|
||||||
|
inherit rocmGpuArches;
|
||||||
|
buildTests = false;
|
||||||
|
buildBenchmarks = false;
|
||||||
|
stdenv = llvm.rocmClangStdenv;
|
||||||
|
|
||||||
|
rocmPath = self.callPackage ./rocm-path { };
|
||||||
|
rocmUpdateScript = self.callPackage ./update.nix { };
|
||||||
|
|
||||||
|
## ROCm ##
|
||||||
|
llvm = recurseIntoAttrs (
|
||||||
|
callPackage ./llvm/default.nix {
|
||||||
|
inherit (self) rocm-device-libs rocm-runtime;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
inherit (self.llvm) rocm-merged-llvm clang openmp;
|
||||||
|
|
||||||
|
rocm-core = self.callPackage ./rocm-core { };
|
||||||
|
amdsmi = pyPackages.callPackage ./amdsmi {
|
||||||
|
inherit (self) rocmUpdateScript;
|
||||||
|
};
|
||||||
|
|
||||||
|
rocm-cmake = self.callPackage ./rocm-cmake { };
|
||||||
|
|
||||||
|
rocm-smi = pyPackages.callPackage ./rocm-smi {
|
||||||
|
inherit (self) rocmUpdateScript;
|
||||||
|
};
|
||||||
|
|
||||||
|
rocm-device-libs = self.callPackage ./rocm-device-libs {
|
||||||
|
inherit (llvm) rocm-merged-llvm;
|
||||||
|
};
|
||||||
|
|
||||||
|
rocm-runtime = self.callPackage ./rocm-runtime {
|
||||||
|
inherit (llvm) rocm-merged-llvm;
|
||||||
|
};
|
||||||
|
|
||||||
|
rocm-comgr = self.callPackage ./rocm-comgr {
|
||||||
|
inherit (llvm) rocm-merged-llvm;
|
||||||
|
};
|
||||||
|
|
||||||
|
rocminfo = self.callPackage ./rocminfo { };
|
||||||
|
|
||||||
|
# Unfree
|
||||||
|
hsa-amd-aqlprofile-bin = self.callPackage ./hsa-amd-aqlprofile-bin { };
|
||||||
|
|
||||||
|
rdc = self.callPackage ./rdc { };
|
||||||
|
|
||||||
|
rocm-docs-core = python3Packages.callPackage ./rocm-docs-core { };
|
||||||
|
|
||||||
|
hip-common = self.callPackage ./hip-common { };
|
||||||
|
|
||||||
|
# Eventually will be in the LLVM repo
|
||||||
|
hipcc = self.callPackage ./hipcc {
|
||||||
|
inherit (llvm) rocm-merged-llvm;
|
||||||
|
};
|
||||||
|
|
||||||
|
# Replaces hip, opencl-runtime, and rocclr
|
||||||
|
clr = self.callPackage ./clr { };
|
||||||
|
|
||||||
|
aotriton = self.callPackage ./aotriton { };
|
||||||
|
|
||||||
|
hipify = self.callPackage ./hipify {
|
||||||
|
inherit (llvm)
|
||||||
|
clang
|
||||||
|
rocm-merged-llvm
|
||||||
|
;
|
||||||
|
};
|
||||||
|
|
||||||
|
# hsakmt was merged into rocm-runtime
|
||||||
|
hsakmt = self.rocm-runtime;
|
||||||
|
|
||||||
|
rocprofiler = self.callPackage ./rocprofiler {
|
||||||
|
inherit (llvm) clang;
|
||||||
|
};
|
||||||
|
rocprofiler-register = self.callPackage ./rocprofiler-register {
|
||||||
|
inherit (llvm) clang;
|
||||||
|
};
|
||||||
|
|
||||||
|
# Needs GCC
|
||||||
|
roctracer = self.callPackage ./roctracer { };
|
||||||
|
|
||||||
|
rocgdb = self.callPackage ./rocgdb { };
|
||||||
|
|
||||||
|
rocdbgapi = self.callPackage ./rocdbgapi { };
|
||||||
|
|
||||||
|
rocr-debug-agent = self.callPackage ./rocr-debug-agent { };
|
||||||
|
|
||||||
|
rocprim = self.callPackage ./rocprim { };
|
||||||
|
|
||||||
|
rocsparse = self.callPackage ./rocsparse { };
|
||||||
|
|
||||||
|
rocthrust = self.callPackage ./rocthrust { };
|
||||||
|
|
||||||
|
rocrand = self.callPackage ./rocrand { };
|
||||||
|
|
||||||
|
hiprand = self.callPackage ./hiprand { };
|
||||||
|
|
||||||
|
rocfft = self.callPackage ./rocfft { };
|
||||||
|
|
||||||
|
mscclpp = self.callPackage ./mscclpp { };
|
||||||
|
|
||||||
|
rccl = self.callPackage ./rccl { };
|
||||||
|
|
||||||
|
# RCCL with sanitizers and tests
|
||||||
|
# Can't have with sanitizer build as dep of other packages without
|
||||||
|
# runtime crashes due to ASAN not loading first
|
||||||
|
rccl-tests = self.callPackage ./rccl {
|
||||||
|
buildTests = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
hipcub = self.callPackage ./hipcub { };
|
||||||
|
|
||||||
|
hipsparse = self.callPackage ./hipsparse { };
|
||||||
|
|
||||||
|
hipfort = self.callPackage ./hipfort { };
|
||||||
|
|
||||||
|
hipfft = self.callPackage ./hipfft { };
|
||||||
|
|
||||||
|
hiprt = self.callPackage ./hiprt { };
|
||||||
|
|
||||||
|
tensile = pyPackages.callPackage ./tensile {
|
||||||
|
inherit (self)
|
||||||
|
rocmUpdateScript
|
||||||
|
clr
|
||||||
|
;
|
||||||
|
};
|
||||||
|
|
||||||
|
rocblas = self.callPackage ./rocblas {
|
||||||
|
buildTests = true;
|
||||||
|
buildBenchmarks = true;
|
||||||
|
inherit (self) roctracer;
|
||||||
|
};
|
||||||
|
|
||||||
|
rocsolver = self.callPackage ./rocsolver { };
|
||||||
|
|
||||||
|
rocwmma = self.callPackage ./rocwmma { };
|
||||||
|
|
||||||
|
rocalution = self.callPackage ./rocalution { };
|
||||||
|
|
||||||
|
rocmlir-rock = self.callPackage ./rocmlir {
|
||||||
|
buildRockCompiler = true;
|
||||||
|
};
|
||||||
|
rocmlir = self.rocmlir-rock;
|
||||||
|
|
||||||
|
hipsolver = self.callPackage ./hipsolver { };
|
||||||
|
|
||||||
|
hipblas-common = self.callPackage ./hipblas-common { };
|
||||||
|
|
||||||
|
hipblas = self.callPackage ./hipblas { };
|
||||||
|
|
||||||
|
hipblaslt = self.callPackage ./hipblaslt { };
|
||||||
|
|
||||||
|
# hipTensor - Only supports GFX9
|
||||||
|
|
||||||
|
composable_kernel_base = self.callPackage ./composable_kernel/base.nix { };
|
||||||
|
composable_kernel = self.callPackage ./composable_kernel { };
|
||||||
|
|
||||||
|
ck4inductor = pyPackages.callPackage ./composable_kernel/ck4inductor.nix {
|
||||||
|
inherit (self) composable_kernel;
|
||||||
|
inherit (llvm) rocm-merged-llvm;
|
||||||
|
};
|
||||||
|
|
||||||
|
half = self.callPackage ./half { };
|
||||||
|
|
||||||
|
miopen = self.callPackage ./miopen {
|
||||||
|
boost = boost179.override { enableStatic = true; };
|
||||||
|
};
|
||||||
|
|
||||||
|
miopen-hip = self.miopen;
|
||||||
|
|
||||||
|
migraphx = self.callPackage ./migraphx { };
|
||||||
|
|
||||||
|
rpp = self.callPackage ./rpp { };
|
||||||
|
|
||||||
|
rpp-hip = self.rpp.override {
|
||||||
|
useOpenCL = false;
|
||||||
|
useCPU = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
rpp-opencl = self.rpp.override {
|
||||||
|
useOpenCL = true;
|
||||||
|
useCPU = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
rpp-cpu = self.rpp.override {
|
||||||
|
useOpenCL = false;
|
||||||
|
useCPU = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
mivisionx = self.callPackage ./mivisionx {
|
||||||
|
opencv = opencv.override { enablePython = true; };
|
||||||
|
# TODO: Remove this pin in ROCm 6.4+
|
||||||
|
# FFMPEG support was improved in https://github.com/ROCm/MIVisionX/pull/1460
|
||||||
|
ffmpeg = ffmpeg_4;
|
||||||
|
# Unfortunately, rocAL needs a custom libjpeg-turbo until further notice
|
||||||
|
# See: https://github.com/ROCm/MIVisionX/issues/1051
|
||||||
|
libjpeg_turbo = libjpeg_turbo.overrideAttrs {
|
||||||
|
version = "2.0.6.1";
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "rrawther";
|
||||||
|
repo = "libjpeg-turbo";
|
||||||
|
rev = "640d7ee1917fcd3b6a5271aa6cf4576bccc7c5fb";
|
||||||
|
sha256 = "sha256-T52whJ7nZi8jerJaZtYInC2YDN0QM+9tUDqiNr6IsNY=";
|
||||||
|
};
|
||||||
|
# overwrite all patches, since patches for newer version do not apply
|
||||||
|
patches = [ ./0001-Compile-transupp.c-as-part-of-the-library.patch ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mivisionx-hip = self.mivisionx.override {
|
||||||
|
rpp = self.rpp-hip;
|
||||||
|
useOpenCL = false;
|
||||||
|
useCPU = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
mivisionx-cpu = self.mivisionx.override {
|
||||||
|
rpp = self.rpp-cpu;
|
||||||
|
useOpenCL = false;
|
||||||
|
useCPU = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
# Even if config.rocmSupport is false we need rocmSupport true
|
||||||
|
# version of ucc/ucx in openmpi in this package set
|
||||||
|
openmpi = openmpi-orig.override (
|
||||||
|
prev:
|
||||||
|
let
|
||||||
|
ucx = prev.ucx.override {
|
||||||
|
enableCuda = false;
|
||||||
|
enableRocm = true;
|
||||||
|
};
|
||||||
|
in
|
||||||
|
{
|
||||||
|
inherit ucx;
|
||||||
|
ucc = prev.ucc.override {
|
||||||
|
enableCuda = false;
|
||||||
|
inherit ucx;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
);
|
||||||
|
mpi = self.openmpi;
|
||||||
|
|
||||||
|
triton-llvm = triton-llvm.overrideAttrs {
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "llvm";
|
||||||
|
repo = "llvm-project";
|
||||||
|
# make sure this matches triton llvm rel branch hash for now
|
||||||
|
# https://github.com/triton-lang/triton/blob/release/3.2.x/cmake/llvm-hash.txt
|
||||||
|
rev = "86b69c31642e98f8357df62c09d118ad1da4e16a";
|
||||||
|
hash = "sha256-W/mQwaLGx6/rIBjdzUTIbWrvGjdh7m4s15f70fQ1/hE=";
|
||||||
|
};
|
||||||
|
pname = "triton-llvm-rocm";
|
||||||
|
patches = [ ]; # FIXME: https://github.com/llvm/llvm-project//commit/84837e3cc1cf17ed71580e3ea38299ed2bfaa5f6.patch doesn't apply, may need to rebase
|
||||||
|
};
|
||||||
|
|
||||||
|
triton = pyPackages.callPackage ./triton { rocmPackages = self; };
|
||||||
|
|
||||||
|
## Meta ##
|
||||||
|
# Emulate common ROCm meta layout
|
||||||
|
# These are mainly for users. I strongly suggest NOT using these in nixpkgs derivations
|
||||||
|
# Don't put these into `propagatedBuildInputs` unless you want PATH/PYTHONPATH issues!
|
||||||
|
# See: https://rocm.docs.amd.com/en/docs-5.7.1/_images/image.004.png
|
||||||
|
# See: https://rocm.docs.amd.com/en/docs-5.7.1/deploy/linux/os-native/package_manager_integration.html
|
||||||
|
meta = with self; rec {
|
||||||
|
rocm-developer-tools = symlinkJoin {
|
||||||
|
name = "rocm-developer-tools-meta";
|
||||||
|
paths = [
|
||||||
|
hsa-amd-aqlprofile-bin
|
||||||
|
rocm-core
|
||||||
|
rocr-debug-agent
|
||||||
|
roctracer
|
||||||
|
rocdbgapi
|
||||||
|
rocprofiler
|
||||||
|
rocgdb
|
||||||
|
rocm-language-runtime
|
||||||
|
];
|
||||||
|
};
|
||||||
|
rocm-ml-sdk = symlinkJoin {
|
||||||
|
name = "rocm-ml-sdk-meta";
|
||||||
|
paths = [
|
||||||
|
rocm-core
|
||||||
|
miopen-hip
|
||||||
|
rocm-hip-sdk
|
||||||
|
rocm-ml-libraries
|
||||||
|
];
|
||||||
|
};
|
||||||
|
rocm-ml-libraries = symlinkJoin {
|
||||||
|
name = "rocm-ml-libraries-meta";
|
||||||
|
paths = [
|
||||||
|
llvm.clang
|
||||||
|
llvm.mlir
|
||||||
|
llvm.openmp
|
||||||
|
rocm-core
|
||||||
|
miopen-hip
|
||||||
|
rocm-hip-libraries
|
||||||
|
];
|
||||||
|
};
|
||||||
|
rocm-hip-sdk = symlinkJoin {
|
||||||
|
name = "rocm-hip-sdk-meta";
|
||||||
|
paths = [
|
||||||
|
rocprim
|
||||||
|
rocalution
|
||||||
|
hipfft
|
||||||
|
rocm-core
|
||||||
|
hipcub
|
||||||
|
hipblas
|
||||||
|
hipblaslt
|
||||||
|
rocrand
|
||||||
|
rocfft
|
||||||
|
hiprt
|
||||||
|
rocsparse
|
||||||
|
rccl
|
||||||
|
rocthrust
|
||||||
|
rocblas
|
||||||
|
hipsparse
|
||||||
|
hipfort
|
||||||
|
rocwmma
|
||||||
|
hipsolver
|
||||||
|
rocsolver
|
||||||
|
rocm-hip-libraries
|
||||||
|
rocm-hip-runtime-devel
|
||||||
|
];
|
||||||
|
};
|
||||||
|
rocm-hip-libraries = symlinkJoin {
|
||||||
|
name = "rocm-hip-libraries-meta";
|
||||||
|
paths = [
|
||||||
|
rocblas
|
||||||
|
hipfort
|
||||||
|
rocm-core
|
||||||
|
rocsolver
|
||||||
|
rocalution
|
||||||
|
rocrand
|
||||||
|
hipblas
|
||||||
|
hipblaslt
|
||||||
|
rocfft
|
||||||
|
hipfft
|
||||||
|
hiprt
|
||||||
|
rccl
|
||||||
|
rocsparse
|
||||||
|
hipsparse
|
||||||
|
hipsolver
|
||||||
|
rocm-hip-runtime
|
||||||
|
];
|
||||||
|
};
|
||||||
|
rocm-openmp-sdk = symlinkJoin {
|
||||||
|
name = "rocm-openmp-sdk-meta";
|
||||||
|
paths = [
|
||||||
|
rocm-core
|
||||||
|
llvm.clang
|
||||||
|
llvm.mlir
|
||||||
|
llvm.openmp # openmp-extras-devel (https://github.com/ROCm/aomp)
|
||||||
|
rocm-language-runtime
|
||||||
|
];
|
||||||
|
};
|
||||||
|
rocm-opencl-sdk = symlinkJoin {
|
||||||
|
name = "rocm-opencl-sdk-meta";
|
||||||
|
paths = [
|
||||||
|
rocm-core
|
||||||
|
rocm-runtime
|
||||||
|
clr
|
||||||
|
clr.icd
|
||||||
|
rocm-opencl-runtime
|
||||||
|
];
|
||||||
|
};
|
||||||
|
rocm-opencl-runtime = symlinkJoin {
|
||||||
|
name = "rocm-opencl-runtime-meta";
|
||||||
|
paths = [
|
||||||
|
rocm-core
|
||||||
|
clr
|
||||||
|
clr.icd
|
||||||
|
rocm-language-runtime
|
||||||
|
];
|
||||||
|
};
|
||||||
|
rocm-hip-runtime-devel = symlinkJoin {
|
||||||
|
name = "rocm-hip-runtime-devel-meta";
|
||||||
|
paths = [
|
||||||
|
clr
|
||||||
|
rocm-core
|
||||||
|
hipify
|
||||||
|
rocm-cmake
|
||||||
|
llvm.clang
|
||||||
|
llvm.mlir
|
||||||
|
llvm.openmp
|
||||||
|
rocm-runtime
|
||||||
|
rocm-hip-runtime
|
||||||
|
];
|
||||||
|
};
|
||||||
|
rocm-hip-runtime = symlinkJoin {
|
||||||
|
name = "rocm-hip-runtime-meta";
|
||||||
|
paths = [
|
||||||
|
rocm-core
|
||||||
|
rocminfo
|
||||||
|
clr
|
||||||
|
rocm-language-runtime
|
||||||
|
];
|
||||||
|
};
|
||||||
|
rocm-language-runtime = symlinkJoin {
|
||||||
|
name = "rocm-language-runtime-meta";
|
||||||
|
paths = [
|
||||||
|
rocm-runtime
|
||||||
|
rocm-core
|
||||||
|
rocm-comgr
|
||||||
|
llvm.openmp # openmp-extras-runtime (https://github.com/ROCm/aomp)
|
||||||
|
];
|
||||||
|
};
|
||||||
|
rocm-all = symlinkJoin {
|
||||||
|
name = "rocm-all-meta";
|
||||||
|
paths = [
|
||||||
|
rocm-developer-tools
|
||||||
|
rocm-ml-sdk
|
||||||
|
rocm-ml-libraries
|
||||||
|
rocm-hip-sdk
|
||||||
|
rocm-hip-libraries
|
||||||
|
rocm-openmp-sdk
|
||||||
|
rocm-opencl-sdk
|
||||||
|
rocm-opencl-runtime
|
||||||
|
rocm-hip-runtime-devel
|
||||||
|
rocm-hip-runtime
|
||||||
|
rocm-language-runtime
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
rocm-tests = self.callPackage ./rocm-tests {
|
||||||
|
rocmPackages = self;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// lib.optionalAttrs config.allowAliases {
|
||||||
|
rocm-thunk = throw ''
|
||||||
|
'rocm-thunk' has been removed. It's now part of the ROCm runtime.
|
||||||
|
''; # Added 2025-3-16
|
||||||
|
|
||||||
|
clang-ocl = throw ''
|
||||||
|
'clang-ocl' has been deprecated upstream. Use ROCm's clang directly.
|
||||||
|
''; # Added 2025-3-16
|
||||||
|
|
||||||
|
miopengemm = throw ''
|
||||||
|
'miopengemm' has been deprecated.
|
||||||
|
''; # Added 2024-3-3
|
||||||
|
|
||||||
|
miopen-opencl = throw ''
|
||||||
|
'miopen-opencl' has been deprecated.
|
||||||
|
''; # Added 2024-3-3
|
||||||
|
|
||||||
|
mivisionx-opencl = throw ''
|
||||||
|
'mivisionx-opencl' has been deprecated.
|
||||||
|
Other versions of mivisionx are still available.
|
||||||
|
''; # Added 2024-3-24
|
||||||
|
}
|
||||||
|
);
|
||||||
|
scopeForArches =
|
||||||
|
arches:
|
||||||
|
outer.overrideScope (
|
||||||
|
_final: prev: {
|
||||||
|
clr = prev.clr.override {
|
||||||
|
localGpuTargets = arches;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
);
|
||||||
|
in
|
||||||
|
outer
|
||||||
|
// builtins.listToAttrs (
|
||||||
|
builtins.map (arch: {
|
||||||
|
name = arch;
|
||||||
|
value = scopeForArches [ arch ];
|
||||||
|
}) outer.clr.gpuTargets
|
||||||
|
)
|
||||||
|
// {
|
||||||
|
gfx9 = scopeForArches [
|
||||||
|
"gfx906"
|
||||||
|
"gfx908"
|
||||||
|
"gfx90a"
|
||||||
|
"gfx942"
|
||||||
|
];
|
||||||
|
gfx10 = scopeForArches [
|
||||||
|
"gfx1010"
|
||||||
|
"gfx1030"
|
||||||
|
];
|
||||||
|
gfx11 = scopeForArches [
|
||||||
|
"gfx1100"
|
||||||
|
"gfx1101"
|
||||||
|
"gfx1102"
|
||||||
|
];
|
||||||
|
|
||||||
|
gfx12 = scopeForArches [
|
||||||
|
"gfx1201"
|
||||||
|
];
|
||||||
|
}
|
||||||
39
pkgs/rocm-modules/half/default.nix
Normal file
39
pkgs/rocm-modules/half/default.nix
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "half";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "half";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-H8Ogm4nxaxDB0WHx+KhRjUO3vzp3AwCqrIQ6k8R+xkc=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
];
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "C++ library for half precision floating point arithmetics";
|
||||||
|
homepage = "https://github.com/ROCm/half";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.unix;
|
||||||
|
};
|
||||||
|
})
|
||||||
45
pkgs/rocm-modules/hip-common/default.nix
Normal file
45
pkgs/rocm-modules/hip-common/default.nix
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "hip-common";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "HIP";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-2Iekju0t12c6iiqb104j+Lh53FvZwyqYtST12RkkuKc=";
|
||||||
|
};
|
||||||
|
|
||||||
|
dontConfigure = true;
|
||||||
|
dontBuild = true;
|
||||||
|
|
||||||
|
installPhase = ''
|
||||||
|
runHook preInstall
|
||||||
|
|
||||||
|
mkdir -p $out
|
||||||
|
mv * $out
|
||||||
|
|
||||||
|
runHook postInstall
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "C++ Heterogeneous-Compute Interface for Portability";
|
||||||
|
homepage = "https://github.com/ROCm/HIP";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
maintainers = with maintainers; [ lovesegfault ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
42
pkgs/rocm-modules/hipblas-common/default.nix
Normal file
42
pkgs/rocm-modules/hipblas-common/default.nix
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
cmake,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocm-cmake,
|
||||||
|
rocmUpdateScript,
|
||||||
|
}:
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "hipblas-common";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "hipBLAS-common";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-eTwoAXH2HGdSAOLTZHJUFHF+c2wWHixqeMqr60KxJrc=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
rocm-cmake
|
||||||
|
];
|
||||||
|
|
||||||
|
strictDeps = true;
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Common files shared by hipBLAS and hipBLASLt";
|
||||||
|
homepage = "https://github.com/ROCm/hipBLASlt";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
140
pkgs/rocm-modules/hipblas/default.nix
Normal file
140
pkgs/rocm-modules/hipblas/default.nix
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
fetchpatch,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
clr,
|
||||||
|
gfortran,
|
||||||
|
hipblas-common,
|
||||||
|
rocblas,
|
||||||
|
rocsolver,
|
||||||
|
rocsparse,
|
||||||
|
rocprim,
|
||||||
|
gtest,
|
||||||
|
lapack-reference,
|
||||||
|
buildTests ? false,
|
||||||
|
buildBenchmarks ? false,
|
||||||
|
buildSamples ? false,
|
||||||
|
}:
|
||||||
|
|
||||||
|
# Can also use cuBLAS
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "hipblas";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"benchmark"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildSamples [
|
||||||
|
"sample"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "hipBLAS";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-lQv8Ik6+0ldqyeJ05CSGB0309nIpzlRL3CRYeQxVfd0=";
|
||||||
|
};
|
||||||
|
|
||||||
|
patches = [
|
||||||
|
# https://github.com/ROCm/hipBLAS/pull/952
|
||||||
|
# (fetchpatch {
|
||||||
|
# name = "transitively-depend-hipblas-common.patch";
|
||||||
|
# url = "https://github.com/ROCm/hipBLAS/commit/54220fdaebf0fb4fd0921ee9e418ace5b143ec8f.patch";
|
||||||
|
# hash = "sha256-MFEhv8Bkrd2zD0FFIDg9oJzO7ztdyMAF+R9oYA0rmwQ=";
|
||||||
|
# })
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
substituteInPlace library/CMakeLists.txt \
|
||||||
|
--replace-fail "find_package(Git REQUIRED)" ""
|
||||||
|
'';
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
gfortran
|
||||||
|
];
|
||||||
|
|
||||||
|
propagatedBuildInputs = [ hipblas-common ];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
rocblas
|
||||||
|
rocprim
|
||||||
|
rocsparse
|
||||||
|
rocsolver
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
gtest
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||||
|
lapack-reference
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
"-DCMAKE_CXX_COMPILER=${lib.getExe' clr "hipcc"}"
|
||||||
|
# Upstream is migrating to amdclang++, it is likely this will be correct in next version bump
|
||||||
|
#"-DCMAKE_CXX_COMPILER=${lib.getBin clr}/bin/amdclang++"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
"-DAMDGPU_TARGETS=${rocblas.amdgpu_targets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_CLIENTS_TESTS=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildSamples [
|
||||||
|
"-DBUILD_CLIENTS_SAMPLES=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/hipblas-test $test/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildBenchmarks ''
|
||||||
|
mkdir -p $benchmark/bin
|
||||||
|
mv $out/bin/hipblas-bench $benchmark/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildSamples ''
|
||||||
|
mkdir -p $sample/bin
|
||||||
|
mv $out/bin/example-* $sample/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
|
||||||
|
rmdir $out/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "ROCm BLAS marshalling library";
|
||||||
|
homepage = "https://github.com/ROCm/hipBLAS";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
235
pkgs/rocm-modules/hipblaslt/default.nix
Normal file
235
pkgs/rocm-modules/hipblaslt/default.nix
Normal file
@@ -0,0 +1,235 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchpatch,
|
||||||
|
fetchFromGitHub,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
clr,
|
||||||
|
gfortran,
|
||||||
|
gtest,
|
||||||
|
msgpack,
|
||||||
|
libxml2,
|
||||||
|
python3,
|
||||||
|
python3Packages,
|
||||||
|
openmp,
|
||||||
|
hipblas-common,
|
||||||
|
tensile,
|
||||||
|
lapack-reference,
|
||||||
|
ncurses,
|
||||||
|
libffi,
|
||||||
|
zlib,
|
||||||
|
zstd,
|
||||||
|
rocmUpdateScript,
|
||||||
|
buildTests ? false,
|
||||||
|
buildBenchmarks ? false,
|
||||||
|
buildSamples ? false,
|
||||||
|
# hipblaslt supports only devices with MFMA or WMMA
|
||||||
|
# WMMA on gfx1100 may be broken
|
||||||
|
# MFMA on MI100 may be broken
|
||||||
|
# MI200/MI300 known to work
|
||||||
|
gpuTargets ? (
|
||||||
|
clr.localGpuTargets or [
|
||||||
|
# "gfx908" FIXME: confirm MFMA on MI100 works
|
||||||
|
"gfx90a"
|
||||||
|
"gfx942"
|
||||||
|
# "gfx1100" FIXME: confirm WMMA targets work
|
||||||
|
]
|
||||||
|
),
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (
|
||||||
|
finalAttrs:
|
||||||
|
let
|
||||||
|
supportsTargetArches =
|
||||||
|
(builtins.any (lib.strings.hasPrefix "gfx9") gpuTargets)
|
||||||
|
|| (builtins.any (lib.strings.hasPrefix "gfx11") gpuTargets);
|
||||||
|
tensile' = (tensile.override { isTensileLite = true; }).overrideAttrs {
|
||||||
|
inherit (finalAttrs) src;
|
||||||
|
sourceRoot = "${finalAttrs.src.name}/tensilelite";
|
||||||
|
};
|
||||||
|
py = python3.withPackages (ps: [
|
||||||
|
ps.pyyaml
|
||||||
|
ps.setuptools
|
||||||
|
ps.packaging
|
||||||
|
]);
|
||||||
|
gpuTargets' = lib.optionalString supportsTargetArches (lib.concatStringsSep ";" gpuTargets);
|
||||||
|
compiler = "amdclang++";
|
||||||
|
cFlags = "-O3 -I${msgpack}/include"; # FIXME: cmake files need patched to include this properly
|
||||||
|
in
|
||||||
|
{
|
||||||
|
pname = "hipblaslt${clr.gpuArchSuffix}";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "hipBLASLt";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-ojNa3jt5285gsPwo4icATJD9JdxmbJBjfCF4A1ttCQ4=";
|
||||||
|
};
|
||||||
|
env.CXX = compiler;
|
||||||
|
env.CFLAGS = cFlags;
|
||||||
|
env.CXXFLAGS = cFlags;
|
||||||
|
env.ROCM_PATH = "${clr}";
|
||||||
|
env.TENSILE_ROCM_ASSEMBLER_PATH = lib.getExe' clr "amdclang++";
|
||||||
|
env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = lib.getExe' clr "amdclang++";
|
||||||
|
# Some tensile scripts look for this as an env var rather than a cmake flag
|
||||||
|
env.CMAKE_CXX_COMPILER = lib.getExe' clr "amdclang++";
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"benchmark"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildSamples [
|
||||||
|
"sample"
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
mkdir -p build/Tensile/library
|
||||||
|
# git isn't needed and we have no .git
|
||||||
|
substituteInPlace cmake/Dependencies.cmake \
|
||||||
|
--replace-fail "find_package(Git REQUIRED)" ""
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace-fail "include(virtualenv)" "" \
|
||||||
|
--replace-fail "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" "" \
|
||||||
|
--replace-fail "virtualenv_install(\''${CMAKE_SOURCE_DIR}/tensilelite)" "" \
|
||||||
|
--replace-fail 'find_package(Tensile 4.33.0 EXACT REQUIRED HIP LLVM OpenMP PATHS "''${INSTALLED_TENSILE_PATH}")' "find_package(Tensile)" \
|
||||||
|
--replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
|
||||||
|
# FIXME: TensileCreateExtOpLibraries build failure due to unsupported null operand
|
||||||
|
# Working around for now by disabling the ExtOp libs
|
||||||
|
substituteInPlace library/src/amd_detail/rocblaslt/src/CMakeLists.txt \
|
||||||
|
--replace-fail 'TensileCreateExtOpLibraries("' '# skipping TensileCreateExtOpLibraries'
|
||||||
|
substituteInPlace library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh \
|
||||||
|
--replace-fail '${"\${rocm_path}"}/bin/' ""
|
||||||
|
'';
|
||||||
|
|
||||||
|
# Apply patches to allow building without a target arch if we need to do that
|
||||||
|
patches = lib.optionals (!supportsTargetArches) [
|
||||||
|
# Add ability to build without specitying any arch.
|
||||||
|
(fetchpatch {
|
||||||
|
sha256 = "sha256-VW3bPzmQvfo8+iKsVfpn4sbqAe41fLzCEUfBh9JxVyk=";
|
||||||
|
url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.1.1-no-arch.patch";
|
||||||
|
})
|
||||||
|
# Followup to above patch for 6.3.x
|
||||||
|
(fetchpatch {
|
||||||
|
sha256 = "sha256-GCsrne6BiWzwj8TMAfFuaYz1Pij97hoCc6E3qJhWb10=";
|
||||||
|
url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-no-arch-extra.patch";
|
||||||
|
})
|
||||||
|
];
|
||||||
|
|
||||||
|
doCheck = false;
|
||||||
|
doInstallCheck = false;
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
py
|
||||||
|
clr
|
||||||
|
gfortran
|
||||||
|
# need make to get streaming console output so nix knows build is still running
|
||||||
|
# so deliberately not using ninja
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
hipblas-common
|
||||||
|
tensile'
|
||||||
|
openmp
|
||||||
|
libffi
|
||||||
|
ncurses
|
||||||
|
|
||||||
|
# Tensile deps - not optional, building without tensile isn't actually supported
|
||||||
|
msgpack # FIXME: not included in cmake!
|
||||||
|
libxml2
|
||||||
|
python3Packages.msgpack
|
||||||
|
python3Packages.joblib
|
||||||
|
zlib
|
||||||
|
zstd
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
gtest
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||||
|
lapack-reference
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-Wno-dev"
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
"-DCMAKE_VERBOSE_MAKEFILE=ON"
|
||||||
|
"-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}"
|
||||||
|
"-DTENSILE_USE_HIP=ON"
|
||||||
|
"-DTENSILE_BUILD_CLIENT=OFF"
|
||||||
|
"-DTENSILE_USE_FLOAT16_BUILTIN=ON"
|
||||||
|
"-DCMAKE_CXX_COMPILER=${compiler}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
"-DHIPBLASLT_ENABLE_MARKER=Off"
|
||||||
|
# FIXME what are the implications of hardcoding this?
|
||||||
|
"-DTensile_CODE_OBJECT_VERSION=V5"
|
||||||
|
"-DTensile_COMPILER=${compiler}"
|
||||||
|
"-DAMDGPU_TARGETS=${gpuTargets'}"
|
||||||
|
"-DGPU_TARGETS=${gpuTargets'}"
|
||||||
|
"-DTensile_LIBRARY_FORMAT=msgpack"
|
||||||
|
]
|
||||||
|
++ lib.optionals (!supportsTargetArches) [
|
||||||
|
"-DBUILD_WITH_TENSILE=OFF"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_CLIENTS_TESTS=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildSamples [
|
||||||
|
"-DBUILD_CLIENTS_SAMPLES=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/hipblas-test $test/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildBenchmarks ''
|
||||||
|
mkdir -p $benchmark/bin
|
||||||
|
mv $out/bin/hipblas-bench $benchmark/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildSamples ''
|
||||||
|
mkdir -p $sample/bin
|
||||||
|
mv $out/bin/example-* $sample/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
|
||||||
|
rmdir $out/bin
|
||||||
|
'';
|
||||||
|
# If this is false there are no kernels in the output lib
|
||||||
|
# and it's useless at runtime
|
||||||
|
# so if it's an optional dep it's best to not depend on it
|
||||||
|
# Some packages like torch need hipblaslt to compile
|
||||||
|
# and are fine ignoring it at runtime if it's not supported
|
||||||
|
# so we have to support building an empty hipblaslt
|
||||||
|
passthru.supportsTargetArches = supportsTargetArches;
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner repo;
|
||||||
|
};
|
||||||
|
passthru.tensilelite = tensile';
|
||||||
|
meta = with lib; {
|
||||||
|
description = "hipBLASLt is a library that provides general matrix-matrix operations with a flexible API";
|
||||||
|
homepage = "https://github.com/ROCm/hipBLASlt";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
)
|
||||||
@@ -0,0 +1,39 @@
|
|||||||
|
From f259eca77c592813e11752a46c4e1f9a74c64091 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Luna Nova <git@lunnova.dev>
|
||||||
|
Date: Fri, 11 Oct 2024 02:56:22 -0700
|
||||||
|
Subject: [PATCH] [hipcc] Remove extra definition of hipBinUtilPtr_ in derived
|
||||||
|
platforms
|
||||||
|
|
||||||
|
Fixes UB when hipBinUtilPtr_ is used.
|
||||||
|
---
|
||||||
|
amd/hipcc/src/hipBin_amd.h | 1 -
|
||||||
|
amd/hipcc/src/hipBin_nvidia.h | 1 -
|
||||||
|
2 files changed, 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/amd/hipcc/src/hipBin_amd.h b/amd/hipcc/src/hipBin_amd.h
|
||||||
|
index 0a782d1beab9..36cd625ae8bc 100644
|
||||||
|
--- a/src/hipBin_amd.h
|
||||||
|
+++ b/src/hipBin_amd.h
|
||||||
|
@@ -42,7 +42,6 @@ THE SOFTWARE.
|
||||||
|
|
||||||
|
class HipBinAmd : public HipBinBase {
|
||||||
|
private:
|
||||||
|
- HipBinUtil* hipBinUtilPtr_;
|
||||||
|
string hipClangPath_ = "";
|
||||||
|
string roccmPathEnv_, hipRocclrPathEnv_, hsaPathEnv_;
|
||||||
|
PlatformInfo platformInfoAMD_;
|
||||||
|
diff --git a/amd/hipcc/src/hipBin_nvidia.h b/amd/hipcc/src/hipBin_nvidia.h
|
||||||
|
index ff142cc1cea2..09b7b80979c7 100644
|
||||||
|
--- a/src/hipBin_nvidia.h
|
||||||
|
+++ b/src/hipBin_nvidia.h
|
||||||
|
@@ -31,7 +31,6 @@ THE SOFTWARE.
|
||||||
|
|
||||||
|
class HipBinNvidia : public HipBinBase {
|
||||||
|
private:
|
||||||
|
- HipBinUtil* hipBinUtilPtr_;
|
||||||
|
string cudaPath_ = "";
|
||||||
|
PlatformInfo platformInfoNV_;
|
||||||
|
string hipCFlags_, hipCXXFlags_, hipLdFlags_;
|
||||||
|
--
|
||||||
|
2.46.0
|
||||||
|
|
||||||
47
pkgs/rocm-modules/hipcc/default.nix
Normal file
47
pkgs/rocm-modules/hipcc/default.nix
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
rocm-merged-llvm,
|
||||||
|
cmake,
|
||||||
|
lsb-release,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "hipcc";
|
||||||
|
# In-tree with ROCm LLVM
|
||||||
|
inherit (rocm-merged-llvm) version;
|
||||||
|
src = rocm-merged-llvm.llvm-src;
|
||||||
|
sourceRoot = "${finalAttrs.src.name}/amd/hipcc";
|
||||||
|
|
||||||
|
nativeBuildInputs = [ cmake ];
|
||||||
|
|
||||||
|
buildInputs = [ rocm-merged-llvm ];
|
||||||
|
|
||||||
|
patches = [
|
||||||
|
# https://github.com/ROCm/llvm-project/pull/183
|
||||||
|
# Fixes always-invoked UB in hipcc
|
||||||
|
./0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
substituteInPlace src/hipBin_amd.h \
|
||||||
|
--replace-fail "/usr/bin/lsb_release" "${lsb-release}/bin/lsb_release"
|
||||||
|
'';
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
];
|
||||||
|
postInstall = ''
|
||||||
|
rm -r $out/hip/bin
|
||||||
|
ln -s $out/bin $out/hip/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Compiler driver utility that calls clang or nvcc";
|
||||||
|
homepage = "https://github.com/ROCm/HIPCC";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
maintainers = with maintainers; [ lovesegfault ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
102
pkgs/rocm-modules/hipcub/default.nix
Normal file
102
pkgs/rocm-modules/hipcub/default.nix
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocprim,
|
||||||
|
clr,
|
||||||
|
gtest,
|
||||||
|
gbenchmark,
|
||||||
|
buildTests ? false,
|
||||||
|
buildBenchmarks ? false,
|
||||||
|
gpuTargets ? [ ],
|
||||||
|
}:
|
||||||
|
|
||||||
|
# CUB can also be used as a backend instead of rocPRIM.
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "hipcub";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"benchmark"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "hipCUB";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-pwCAsRx5XyuCvppTmZ4VG83iYl9ilAQCZds4oKINhSI=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
rocprim
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
gtest
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
gbenchmark
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DHIP_ROOT_DIR=${clr}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_TEST=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"-DBUILD_BENCHMARK=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/test_* $test/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildBenchmarks ''
|
||||||
|
mkdir -p $benchmark/bin
|
||||||
|
mv $out/bin/benchmark_* $benchmark/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||||
|
rmdir $out/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Thin wrapper library on top of rocPRIM or CUB";
|
||||||
|
homepage = "https://github.com/ROCm/hipCUB";
|
||||||
|
license = with licenses; [ bsd3 ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
125
pkgs/rocm-modules/hipfft/default.nix
Normal file
125
pkgs/rocm-modules/hipfft/default.nix
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
clr,
|
||||||
|
git,
|
||||||
|
rocfft,
|
||||||
|
gtest,
|
||||||
|
boost,
|
||||||
|
fftw,
|
||||||
|
fftwFloat,
|
||||||
|
openmp,
|
||||||
|
buildTests ? false,
|
||||||
|
buildBenchmarks ? false,
|
||||||
|
buildSamples ? false,
|
||||||
|
gpuTargets ? [ ],
|
||||||
|
}:
|
||||||
|
|
||||||
|
# Can also use cuFFT
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "hipfft";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"benchmark"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildSamples [
|
||||||
|
"sample"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "hipFFT";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-VA9OC/TvvQyFCVVox/9ihuE2W0Ia87O0R3YsLM4Jzuk=";
|
||||||
|
fetchSubmodules = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
clr
|
||||||
|
git
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
rocfft
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildBenchmarks || buildSamples) [
|
||||||
|
gtest
|
||||||
|
boost
|
||||||
|
fftw
|
||||||
|
fftwFloat
|
||||||
|
openmp
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DCMAKE_C_COMPILER=hipcc"
|
||||||
|
"-DCMAKE_CXX_COMPILER=hipcc"
|
||||||
|
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
|
||||||
|
"-DHIP_ROOT_DIR=${clr}"
|
||||||
|
"-DHIP_PATH=${clr}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_CLIENTS_TESTS=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"-DBUILD_CLIENTS_RIDER=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildSamples [
|
||||||
|
"-DBUILD_CLIENTS_SAMPLES=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/hipfft-test $test/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildBenchmarks ''
|
||||||
|
mkdir -p $benchmark/bin
|
||||||
|
mv $out/bin/hipfft-rider $benchmark/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildSamples ''
|
||||||
|
mkdir -p $sample/bin
|
||||||
|
mv clients/staging/hipfft_* $sample/bin
|
||||||
|
patchelf $sample/bin/hipfft_* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE"
|
||||||
|
''
|
||||||
|
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||||
|
rmdir $out/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "FFT marshalling library";
|
||||||
|
homepage = "https://github.com/ROCm/hipFFT";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
66
pkgs/rocm-modules/hipfort/default.nix
Normal file
66
pkgs/rocm-modules/hipfort/default.nix
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
gfortran,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "hipfort";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "hipfort";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-a2YPGAWP+gF2EykpKmkG/fEIW6blm2ChOybmLAHQQdw=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
gfortran
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DHIPFORT_COMPILER=${gfortran}/bin/gfortran"
|
||||||
|
"-DHIPFORT_AR=${gfortran.cc}/bin/gcc-ar"
|
||||||
|
"-DHIPFORT_RANLIB=${gfortran.cc}/bin/gcc-ranlib"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
patchShebangs bin
|
||||||
|
|
||||||
|
substituteInPlace bin/hipfc bin/mymcpu \
|
||||||
|
--replace "/bin/cat" "cat"
|
||||||
|
|
||||||
|
substituteInPlace bin/CMakeLists.txt \
|
||||||
|
--replace "/bin/mkdir" "mkdir" \
|
||||||
|
--replace "/bin/cp" "cp" \
|
||||||
|
--replace "/bin/sed" "sed" \
|
||||||
|
--replace "/bin/chmod" "chmod" \
|
||||||
|
--replace "/bin/ln" "ln"
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Fortran interfaces for ROCm libraries";
|
||||||
|
homepage = "https://github.com/ROCm/hipfort";
|
||||||
|
license = with licenses; [ mit ]; # mitx11
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
64
pkgs/rocm-modules/hipify/default.nix
Normal file
64
pkgs/rocm-modules/hipify/default.nix
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
clang,
|
||||||
|
libxml2,
|
||||||
|
rocm-merged-llvm,
|
||||||
|
zlib,
|
||||||
|
zstd,
|
||||||
|
perl,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "hipify";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "HIPIFY";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-uj25WmGCpwouS1yzW9Oil5Vyrbyj5yRITvWF9WaGozM=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
libxml2
|
||||||
|
rocm-merged-llvm
|
||||||
|
zlib
|
||||||
|
zstd
|
||||||
|
perl
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace "\''${LLVM_TOOLS_BINARY_DIR}/clang" "${clang}/bin/clang"
|
||||||
|
chmod +x bin/*
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
postInstall = ''
|
||||||
|
chmod +x $out/bin/*
|
||||||
|
chmod +x $out/libexec/*
|
||||||
|
patchShebangs $out/bin/
|
||||||
|
patchShebangs $out/libexec/
|
||||||
|
'';
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Convert CUDA to Portable C++ Code";
|
||||||
|
homepage = "https://github.com/ROCm/HIPIFY";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
79
pkgs/rocm-modules/hiprand/default.nix
Normal file
79
pkgs/rocm-modules/hiprand/default.nix
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
clr,
|
||||||
|
rocrand,
|
||||||
|
gtest,
|
||||||
|
buildTests ? false,
|
||||||
|
gpuTargets ? [ ],
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "hiprand";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "hipRAND";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-ISl4bVW/JvT81gJ/10JlKliv7ds5WtP2f/Dnc9qvh9Q=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [ rocrand ] ++ (lib.optionals buildTests [ gtest ]);
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DHIP_ROOT_DIR=${clr}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_TEST=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall = lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/test_* $test/bin
|
||||||
|
rm -r $out/bin/hipRAND
|
||||||
|
# Fail if bin/ isn't actually empty
|
||||||
|
rmdir $out/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "HIP wrapper for rocRAND and cuRAND";
|
||||||
|
homepage = "https://github.com/ROCm/hipRAND";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
60
pkgs/rocm-modules/hiprt/default.nix
Normal file
60
pkgs/rocm-modules/hiprt/default.nix
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
cmake,
|
||||||
|
clr,
|
||||||
|
gcc,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "hiprt";
|
||||||
|
version = "2.5.a21e075.3";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "GPUOpen-LibrariesAndSDKs";
|
||||||
|
repo = "HIPRT";
|
||||||
|
tag = finalAttrs.version;
|
||||||
|
sha256 = "sha256-3yGhwIsFHlFMCEzuYnXuXNzs99m7f2LTkYaTGs0GEcI=";
|
||||||
|
};
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
g++ contrib/easy-encryption/cl.cpp -o contrib/easy-encryption/bin/linux/ee64 #replacing prebuilt binary
|
||||||
|
'';
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
gcc # required for replacing easy-encryption binary
|
||||||
|
cmake
|
||||||
|
clr
|
||||||
|
];
|
||||||
|
buildInputs = [
|
||||||
|
# TODO: do we need anything here?
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
#TODO: mostly copied from the Arch package, verify these:
|
||||||
|
"-D CMAKE_BUILD_TYPE=Release"
|
||||||
|
"-D HIP_PATH=${clr}"
|
||||||
|
"-D BAKE_KERNEL=OFF"
|
||||||
|
"-D BAKE_COMPILED_KERNEL=OFF"
|
||||||
|
"-D BITCODE=ON"
|
||||||
|
"-D PRECOMPILE=ON"
|
||||||
|
"-D NO_UNITTEST=ON"
|
||||||
|
"-D FORCE_DISABLE_CUDA=ON"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-D CMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-D CMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-D CMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
];
|
||||||
|
|
||||||
|
meta = {
|
||||||
|
homepage = "https://github.com/GPUOpen-LibrariesAndSDKs/HIPRT";
|
||||||
|
description = "";
|
||||||
|
license = lib.licenses.mit;
|
||||||
|
maintainers = with lib.maintainers; [
|
||||||
|
mksafavi
|
||||||
|
];
|
||||||
|
platforms = lib.platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
119
pkgs/rocm-modules/hipsolver/default.nix
Normal file
119
pkgs/rocm-modules/hipsolver/default.nix
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
clr,
|
||||||
|
gfortran,
|
||||||
|
rocblas,
|
||||||
|
rocsolver,
|
||||||
|
rocsparse,
|
||||||
|
suitesparse,
|
||||||
|
gtest,
|
||||||
|
lapack-reference,
|
||||||
|
buildTests ? false,
|
||||||
|
buildBenchmarks ? false,
|
||||||
|
buildSamples ? false,
|
||||||
|
}:
|
||||||
|
|
||||||
|
# Can also use cuSOLVER
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "hipsolver";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"benchmark"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildSamples [
|
||||||
|
"sample"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "hipSOLVER";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-4ig8/P7JQCx3WB+PRHlhSlRhzdbnDo8QrFnWxsxJdwk=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
gfortran
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
rocblas
|
||||||
|
rocsolver
|
||||||
|
rocsparse
|
||||||
|
suitesparse
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
gtest
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||||
|
lapack-reference
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DCMAKE_CXX_COMPILER=hipcc"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
"-DBUILD_WITH_SPARSE=OFF" # FIXME: broken - can't find suitesparse/cholmod, looks fixed in master
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_CLIENTS_TESTS=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildSamples [
|
||||||
|
"-DBUILD_CLIENTS_SAMPLES=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/hipsolver-test $test/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildBenchmarks ''
|
||||||
|
mkdir -p $benchmark/bin
|
||||||
|
mv $out/bin/hipsolver-bench $benchmark/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildSamples ''
|
||||||
|
mkdir -p $sample/bin
|
||||||
|
mv clients/staging/example-* $sample/bin
|
||||||
|
patchelf $sample/bin/example-* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE"
|
||||||
|
''
|
||||||
|
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||||
|
rmdir $out/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "ROCm SOLVER marshalling library";
|
||||||
|
homepage = "https://github.com/ROCm/hipSOLVER";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
152
pkgs/rocm-modules/hipsparse/default.nix
Normal file
152
pkgs/rocm-modules/hipsparse/default.nix
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocsparse,
|
||||||
|
clr,
|
||||||
|
gfortran,
|
||||||
|
git,
|
||||||
|
gtest,
|
||||||
|
openmp,
|
||||||
|
buildTests ? false,
|
||||||
|
buildBenchmarks ? false,
|
||||||
|
buildSamples ? false,
|
||||||
|
gpuTargets ? [ ],
|
||||||
|
}:
|
||||||
|
|
||||||
|
# This can also use cuSPARSE as a backend instead of rocSPARSE
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "hipsparse";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildSamples [
|
||||||
|
"sample"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "hipSPARSE";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-pRR/3t7YXgtPQwGFb5lA6DI2OTF6AnDcfkydRIEod2Q=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
gfortran
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
rocsparse
|
||||||
|
git
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||||
|
gtest
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildSamples) [
|
||||||
|
openmp
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
(lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
|
||||||
|
(lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
|
||||||
|
(lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildSamples)
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
];
|
||||||
|
|
||||||
|
# We have to manually generate the matrices
|
||||||
|
# CMAKE_MATRICES_DIR seems to be reset in clients/tests/CMakeLists.txt
|
||||||
|
postPatch = lib.optionalString buildTests ''
|
||||||
|
mkdir -p matrices
|
||||||
|
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-01}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-02}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-03}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-04}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-05}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-06}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-07}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-08}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-09}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-10}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-11}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-12}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-13}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-14}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-15}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-16}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-17}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-18}/*.mtx matrices
|
||||||
|
ln -s ${rocsparse.passthru.matrices.matrix-19}/*.mtx matrices
|
||||||
|
|
||||||
|
# Not used by the original cmake, causes an error
|
||||||
|
rm matrices/*_b.mtx
|
||||||
|
|
||||||
|
echo "deps/convert.cpp -> deps/mtx2csr"
|
||||||
|
hipcc deps/convert.cpp -O3 -o deps/mtx2csr
|
||||||
|
|
||||||
|
for mat in $(ls -1 matrices | cut -d "." -f 1); do
|
||||||
|
echo "mtx2csr: $mat.mtx -> $mat.bin"
|
||||||
|
deps/mtx2csr matrices/$mat.mtx matrices/$mat.bin
|
||||||
|
unlink matrices/$mat.mtx
|
||||||
|
done
|
||||||
|
|
||||||
|
substituteInPlace clients/tests/CMakeLists.txt \
|
||||||
|
--replace "\''${PROJECT_BINARY_DIR}/matrices" "/build/source/matrices"
|
||||||
|
'';
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/hipsparse-test $test/bin
|
||||||
|
mv /build/source/matrices $test
|
||||||
|
rmdir $out/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildSamples ''
|
||||||
|
mkdir -p $sample/bin
|
||||||
|
mv clients/staging/example_* $sample/bin
|
||||||
|
patchelf --set-rpath $out/lib:${
|
||||||
|
lib.makeLibraryPath (
|
||||||
|
finalAttrs.buildInputs
|
||||||
|
++ [
|
||||||
|
clr
|
||||||
|
gfortran.cc
|
||||||
|
]
|
||||||
|
)
|
||||||
|
} $sample/bin/example_*
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "ROCm SPARSE marshalling library";
|
||||||
|
homepage = "https://github.com/ROCm/hipSPARSE";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
53
pkgs/rocm-modules/hsa-amd-aqlprofile-bin/default.nix
Normal file
53
pkgs/rocm-modules/hsa-amd-aqlprofile-bin/default.nix
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchurl,
|
||||||
|
callPackage,
|
||||||
|
dpkg,
|
||||||
|
rocm-core,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "hsa-amd-aqlprofile-bin";
|
||||||
|
version = "6.3.3";
|
||||||
|
|
||||||
|
src =
|
||||||
|
let
|
||||||
|
inherit (finalAttrs) version;
|
||||||
|
patch = rocm-core.ROCM_LIBPATCH_VERSION;
|
||||||
|
majorMinor = lib.versions.majorMinor version;
|
||||||
|
poolVersion = if majorMinor + ".0" == version then majorMinor else version;
|
||||||
|
incremental = "74";
|
||||||
|
osRelease = "22.04";
|
||||||
|
in
|
||||||
|
fetchurl {
|
||||||
|
url = "https://repo.radeon.com/rocm/apt/${poolVersion}/pool/main/h/hsa-amd-aqlprofile/hsa-amd-aqlprofile_1.0.0.${patch}-${incremental}~${osRelease}_amd64.deb";
|
||||||
|
hash = "sha256-Lo6gU9ywkujtsKvnOAwL3L8qQNPwjjm0Pm4OyzoUYao=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [ dpkg ];
|
||||||
|
dontPatch = true;
|
||||||
|
dontConfigure = true;
|
||||||
|
dontBuild = true;
|
||||||
|
|
||||||
|
installPhase = ''
|
||||||
|
runHook preInstall
|
||||||
|
|
||||||
|
mkdir -p $out
|
||||||
|
cp -a opt/rocm-${finalAttrs.version}*/* $out
|
||||||
|
chmod +x $out/lib/libhsa-amd-aqlprofile64.so.1.*
|
||||||
|
chmod +x $out/lib/hsa-amd-aqlprofile/librocprofv2_att.so
|
||||||
|
|
||||||
|
runHook postInstall
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = (callPackage ./update.nix { }) { inherit (finalAttrs) version; };
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "AQLPROFILE library for AMD HSA runtime API extension support";
|
||||||
|
homepage = "https://rocm.docs.amd.com/en/latest/";
|
||||||
|
license = with licenses; [ unfree ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
61
pkgs/rocm-modules/hsa-amd-aqlprofile-bin/update.nix
Normal file
61
pkgs/rocm-modules/hsa-amd-aqlprofile-bin/update.nix
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
writeScript,
|
||||||
|
}:
|
||||||
|
|
||||||
|
{ version }:
|
||||||
|
|
||||||
|
let
|
||||||
|
prefix = "hsa-amd-aqlprofile";
|
||||||
|
extVersion = lib.strings.concatStrings (
|
||||||
|
lib.strings.intersperse "0" (lib.versions.splitVersion version)
|
||||||
|
);
|
||||||
|
major = lib.versions.major version;
|
||||||
|
minor = lib.versions.minor version;
|
||||||
|
patch = lib.versions.patch version;
|
||||||
|
|
||||||
|
updateScript = writeScript "update.sh" ''
|
||||||
|
#!/usr/bin/env nix-shell
|
||||||
|
#!nix-shell -i bash -p curl common-updater-scripts
|
||||||
|
apt="https://repo.radeon.com/rocm/apt"
|
||||||
|
pool="pool/main/h/${prefix}/"
|
||||||
|
url="$apt/latest/$pool"
|
||||||
|
res="$(curl -sL "$url")"
|
||||||
|
deb="${prefix}$(echo "$res" | grep -o -P "(?<=href=\"${prefix}).*(?=\">)" | tail -1)"
|
||||||
|
patch="${patch}"
|
||||||
|
|
||||||
|
# Try up to 10 patch versions
|
||||||
|
for i in {1..10}; do
|
||||||
|
((patch++))
|
||||||
|
extVersion="$(echo "$deb" | grep -o -P "(?<=\.....).*(?=\..*-)")"
|
||||||
|
|
||||||
|
if (( ''${#extVersion} == 6 )) && (( $extVersion <= ${extVersion} )); then
|
||||||
|
url="https://repo.radeon.com/rocm/apt/${major}.${minor}.$patch/pool/main/h/${prefix}/"
|
||||||
|
res="$(curl -sL "$url")"
|
||||||
|
deb="${prefix}$(echo "$res" | grep -o -P "(?<=href=\"${prefix}).*(?=\">)" | tail -1)"
|
||||||
|
else
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
extVersion="$(echo $deb | grep -o -P "(?<=\.....).*(?=\..*-)")"
|
||||||
|
version="$(echo $extVersion | sed "s/0/./1" | sed "s/0/./1")"
|
||||||
|
IFS='.' read -a version_arr <<< "$version"
|
||||||
|
|
||||||
|
if (( ''${version_arr[0]} > 6 )); then
|
||||||
|
echo "'rocmPackages_6.${prefix}-bin' is already at it's maximum allowed version.''\nAny further upgrades should go into 'rocmPackages_X.${prefix}-bin'." 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (( ''${#extVersion} == 6 )); then
|
||||||
|
repoVersion="$version"
|
||||||
|
|
||||||
|
if (( ''${version:4:1} == 0 )); then
|
||||||
|
repoVersion=''${version:0:3}
|
||||||
|
fi
|
||||||
|
|
||||||
|
update-source-version rocmPackages_6.${prefix}-bin "$version" "" "$apt/$repoVersion/$pool$deb" --ignore-same-hash
|
||||||
|
fi
|
||||||
|
'';
|
||||||
|
in
|
||||||
|
[ updateScript ]
|
||||||
@@ -0,0 +1,70 @@
|
|||||||
|
diff --git a/cmake/modules/AddClang.cmake b/cmake/modules/AddClang.cmake
|
||||||
|
index 75b0080f6..c895b884c 100644
|
||||||
|
--- a/cmake/modules/AddClang.cmake
|
||||||
|
+++ b/cmake/modules/AddClang.cmake
|
||||||
|
@@ -119,8 +119,8 @@ macro(add_clang_library name)
|
||||||
|
install(TARGETS ${lib}
|
||||||
|
COMPONENT ${lib}
|
||||||
|
${export_to_clangtargets}
|
||||||
|
- LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
|
||||||
|
- ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX}
|
||||||
|
+ LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}"
|
||||||
|
+ ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}"
|
||||||
|
RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
|
||||||
|
|
||||||
|
if (NOT LLVM_ENABLE_IDE)
|
||||||
|
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt
|
||||||
|
index e6ae4e19e..5ef01aea2 100644
|
||||||
|
--- a/lib/Headers/CMakeLists.txt
|
||||||
|
+++ b/lib/Headers/CMakeLists.txt
|
||||||
|
@@ -337,6 +337,7 @@ set(llvm_libc_wrapper_files
|
||||||
|
|
||||||
|
include(GetClangResourceDir)
|
||||||
|
get_clang_resource_dir(output_dir PREFIX ${LLVM_LIBRARY_OUTPUT_INTDIR}/.. SUBDIR include)
|
||||||
|
+set(header_install_dir ${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION_MAJOR}/include)
|
||||||
|
set(out_files)
|
||||||
|
set(generated_files)
|
||||||
|
|
||||||
|
diff --git a/tools/libclang/CMakeLists.txt b/tools/libclang/CMakeLists.txt
|
||||||
|
index b5b6d2807..6b592d255 100644
|
||||||
|
--- a/tools/libclang/CMakeLists.txt
|
||||||
|
+++ b/tools/libclang/CMakeLists.txt
|
||||||
|
@@ -246,7 +246,7 @@ foreach(PythonVersion ${CLANG_PYTHON_BINDINGS_VERSIONS})
|
||||||
|
COMPONENT
|
||||||
|
libclang-python-bindings
|
||||||
|
DESTINATION
|
||||||
|
- "lib${LLVM_LIBDIR_SUFFIX}/python${PythonVersion}/site-packages")
|
||||||
|
+ "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}/python${PythonVersion}/site-packages")
|
||||||
|
endforeach()
|
||||||
|
if(NOT LLVM_ENABLE_IDE)
|
||||||
|
add_custom_target(libclang-python-bindings)
|
||||||
|
diff --git a/tools/scan-build-py/CMakeLists.txt b/tools/scan-build-py/CMakeLists.txt
|
||||||
|
index 3aca22c0b..3115353e3 100644
|
||||||
|
--- a/tools/scan-build-py/CMakeLists.txt
|
||||||
|
+++ b/tools/scan-build-py/CMakeLists.txt
|
||||||
|
@@ -88,7 +88,7 @@ foreach(lib ${LibScanbuild})
|
||||||
|
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/${lib})
|
||||||
|
list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libscanbuild/${lib})
|
||||||
|
install(FILES lib/libscanbuild/${lib}
|
||||||
|
- DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libscanbuild
|
||||||
|
+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/libscanbuild"
|
||||||
|
COMPONENT scan-build-py)
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
@@ -106,7 +106,7 @@ foreach(resource ${LibScanbuildResources})
|
||||||
|
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/resources/${resource})
|
||||||
|
list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libscanbuild/resources/${resource})
|
||||||
|
install(FILES lib/libscanbuild/resources/${resource}
|
||||||
|
- DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libscanbuild/resources
|
||||||
|
+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/libscanbuild/resources"
|
||||||
|
COMPONENT scan-build-py)
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
@@ -122,7 +122,7 @@ foreach(lib ${LibEar})
|
||||||
|
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libear/${lib})
|
||||||
|
list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libear/${lib})
|
||||||
|
install(FILES lib/libear/${lib}
|
||||||
|
- DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libear
|
||||||
|
+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/libear"
|
||||||
|
COMPONENT scan-build-py)
|
||||||
|
endforeach()
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
diff --git a/lib/Driver/ToolChains/Linux.cpp b/lib/Driver/ToolChains/Linux.cpp
|
||||||
|
index 57368104c914..71c57f72078e 100644
|
||||||
|
--- a/lib/Driver/ToolChains/Linux.cpp
|
||||||
|
+++ b/lib/Driver/ToolChains/Linux.cpp
|
||||||
|
@@ -640,6 +640,7 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
|
||||||
|
return;
|
||||||
|
|
||||||
|
// LOCAL_INCLUDE_DIR
|
||||||
|
+ if (!SysRoot.empty())
|
||||||
|
addSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/local/include"));
|
||||||
|
// TOOL_INCLUDE_DIR
|
||||||
|
AddMultilibIncludeArgs(DriverArgs, CC1Args);
|
||||||
|
@@ -672,8 +673,10 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
|
||||||
|
// Add an include of '/include' directly. This isn't provided by default by
|
||||||
|
// system GCCs, but is often used with cross-compiling GCCs, and harmless to
|
||||||
|
// add even when Clang is acting as-if it were a system compiler.
|
||||||
|
+ if (!SysRoot.empty())
|
||||||
|
addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/include"));
|
||||||
|
|
||||||
|
+ if (!SysRoot.empty())
|
||||||
|
addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/include"));
|
||||||
|
|
||||||
|
if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && getTriple().isMusl())
|
||||||
40
pkgs/rocm-modules/llvm/clang-log-jobs.diff
Normal file
40
pkgs/rocm-modules/llvm/clang-log-jobs.diff
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp
|
||||||
|
index 06f5e7e7e335..8407d664886a 100644
|
||||||
|
--- a/lib/Driver/Compilation.cpp
|
||||||
|
+++ b/lib/Driver/Compilation.cpp
|
||||||
|
@@ -340,6 +340,9 @@ private:
|
||||||
|
void Compilation::ExecuteJobs(const JobList &Jobs,
|
||||||
|
FailingCommandList &FailingCommands,
|
||||||
|
bool LogOnly) const {
|
||||||
|
+ // If >1 job, log as each job finishes so can see progress while building many offloads
|
||||||
|
+ const bool logJobs = Jobs.size() > 1;
|
||||||
|
+ auto start_time = std::chrono::steady_clock::now();
|
||||||
|
// According to UNIX standard, driver need to continue compiling all the
|
||||||
|
// inputs on the command line even one of them failed.
|
||||||
|
// In all but CLMode, execute all the jobs unless the necessary inputs for the
|
||||||
|
@@ -364,11 +367,25 @@ void Compilation::ExecuteJobs(const JobList &Jobs,
|
||||||
|
|
||||||
|
JS.setJobState(Next, JobScheduler::JS_RUN);
|
||||||
|
auto Work = [&, Next]() {
|
||||||
|
+ auto job_start_time = std::chrono::steady_clock::now();
|
||||||
|
const Command *FailingCommand = nullptr;
|
||||||
|
if (int Res = ExecuteCommand(*Next, FailingCommand, LogOnly)) {
|
||||||
|
FailingCommands.push_back(std::make_pair(Res, FailingCommand));
|
||||||
|
JS.setJobState(Next, JobScheduler::JS_FAIL);
|
||||||
|
} else {
|
||||||
|
+ if (logJobs && Next) {
|
||||||
|
+ auto now = std::chrono::steady_clock::now();
|
||||||
|
+ auto job_duration = std::chrono::duration_cast<std::chrono::seconds>(now - job_start_time).count();
|
||||||
|
+ auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - start_time).count();
|
||||||
|
+ if (duration > 10 && job_duration > 0) {
|
||||||
|
+ if (Next->getOutputFilenames().empty())
|
||||||
|
+ if (Next->getExecutable()) llvm::errs() << "Job completed: " << Next->getExecutable() << "\n";
|
||||||
|
+ else (llvm::errs() << "Job completed: "), Next->Print(llvm::errs(), "\n", true);
|
||||||
|
+ else
|
||||||
|
+ llvm::errs() << "Job completed: " << Next->getOutputFilenames().front().c_str() << "\n";
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
JS.setJobState(Next, JobScheduler::JS_DONE);
|
||||||
|
}
|
||||||
|
};
|
||||||
570
pkgs/rocm-modules/llvm/default.nix
Normal file
570
pkgs/rocm-modules/llvm/default.nix
Normal file
@@ -0,0 +1,570 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
llvmPackages_19,
|
||||||
|
overrideCC,
|
||||||
|
rocm-device-libs,
|
||||||
|
rocm-runtime,
|
||||||
|
fetchFromGitHub,
|
||||||
|
runCommand,
|
||||||
|
symlinkJoin,
|
||||||
|
rdfind,
|
||||||
|
wrapBintoolsWith,
|
||||||
|
emptyDirectory,
|
||||||
|
zstd,
|
||||||
|
zlib,
|
||||||
|
gcc-unwrapped,
|
||||||
|
glibc,
|
||||||
|
replaceVars,
|
||||||
|
libffi,
|
||||||
|
libxml2,
|
||||||
|
removeReferencesTo,
|
||||||
|
fetchpatch,
|
||||||
|
writeShellScript,
|
||||||
|
makeWrapper,
|
||||||
|
# Build compilers and stdenv suitable for profiling
|
||||||
|
# compressed line tables (-g1 -gz) and
|
||||||
|
# frame pointers for sampling profilers (-fno-omit-frame-pointer -momit-leaf-frame-pointer)
|
||||||
|
# TODO: Should also apply to downstream packages which use rocmClangStdenv
|
||||||
|
profilableStdenv ? false,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
llvmPackagesNoBintools = llvmPackages_19.override {
|
||||||
|
bootBintools = null;
|
||||||
|
bootBintoolsNoLibc = null;
|
||||||
|
};
|
||||||
|
useLibcxx = false; # whether rocm stdenv uses libcxx (clang c++ stdlib) instead of gcc stdlibc++
|
||||||
|
|
||||||
|
llvmStdenv = overrideCC llvmPackagesNoBintools.libcxxStdenv llvmPackagesNoBintools.clangUseLLVM;
|
||||||
|
llvmLibstdcxxStdenv = overrideCC llvmPackagesNoBintools.stdenv (
|
||||||
|
llvmPackagesNoBintools.libstdcxxClang.override {
|
||||||
|
inherit (llvmPackages_19) bintools;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
stdenvToBuildRocmLlvm = if useLibcxx then llvmStdenv else llvmLibstdcxxStdenv;
|
||||||
|
gcc-include = runCommand "gcc-include" { } ''
|
||||||
|
mkdir -p $out
|
||||||
|
ln -s ${gcc-unwrapped}/include/ $out/
|
||||||
|
ln -s ${gcc-unwrapped}/lib/ $out/
|
||||||
|
'';
|
||||||
|
|
||||||
|
# A prefix for use as the GCC prefix when building rocmcxx
|
||||||
|
disallowedRefsForToolchain = [
|
||||||
|
stdenv.cc
|
||||||
|
stdenv.cc.cc
|
||||||
|
stdenv.cc.bintools
|
||||||
|
gcc-unwrapped
|
||||||
|
stdenvToBuildRocmLlvm
|
||||||
|
];
|
||||||
|
gcc-prefix =
|
||||||
|
let
|
||||||
|
gccPrefixPaths = [
|
||||||
|
gcc-unwrapped
|
||||||
|
gcc-unwrapped.lib
|
||||||
|
glibc.dev
|
||||||
|
];
|
||||||
|
in
|
||||||
|
symlinkJoin {
|
||||||
|
name = "gcc-prefix";
|
||||||
|
paths = gccPrefixPaths ++ [
|
||||||
|
glibc
|
||||||
|
];
|
||||||
|
disallowedRequisites = gccPrefixPaths;
|
||||||
|
postBuild = ''
|
||||||
|
rm -rf $out/{bin,libexec,nix-support,lib64,share,etc}
|
||||||
|
rm $out/lib/gcc/x86_64-unknown-linux-gnu/*/plugin/include/auto-host.h
|
||||||
|
|
||||||
|
mkdir /build/tmpout
|
||||||
|
mv $out/* /build/tmpout
|
||||||
|
cp -Lr --no-preserve=mode /build/tmpout/* $out/
|
||||||
|
set -x
|
||||||
|
versionedIncludePath="$(echo $out/include/c++/*/)"
|
||||||
|
mv $versionedIncludePath/* $out/include/c++/
|
||||||
|
rm -rf $versionedIncludePath/
|
||||||
|
|
||||||
|
find $out/lib -type f -exec ${removeReferencesTo}/bin/remove-references-to -t ${gcc-unwrapped.lib} {} +
|
||||||
|
|
||||||
|
ln -s $out $out/x86_64-unknown-linux-gnu
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
version = "6.4.1";
|
||||||
|
# major version of this should be the clang version ROCm forked from
|
||||||
|
rocmLlvmVersion = "19.0.0-${llvmSrc.rev}";
|
||||||
|
usefulOutputs =
|
||||||
|
drv:
|
||||||
|
builtins.filter (x: x != null) [
|
||||||
|
drv
|
||||||
|
(drv.lib or null)
|
||||||
|
(drv.dev or null)
|
||||||
|
];
|
||||||
|
listUsefulOutputs = builtins.concatMap usefulOutputs;
|
||||||
|
# llvmSrc = fetchFromGitHub {
|
||||||
|
# # Performance improvements cherry-picked on top of rocm-6.3.x
|
||||||
|
# # most importantly, amdgpu-early-alwaysinline memory usage fix
|
||||||
|
# owner = "LunNova";
|
||||||
|
# repo = "llvm-project-rocm";
|
||||||
|
# rev = "4182046534deb851753f0d962146e5176f648893";
|
||||||
|
# hash = "sha256-sPmYi1WiiAqnRnHVNba2nPUxGflBC01FWCTNLPlYF9c=";
|
||||||
|
# };
|
||||||
|
llvmSrc = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "llvm-project";
|
||||||
|
# rev = "873e9660026931bbd2cbce41475090039f81f8c7";
|
||||||
|
# hash = "sha256-7B4NQ1LBN3btHjrh9Ht2S+BYYqhKNPAwMoP18qYJw4E=";
|
||||||
|
# hash = "sha256-cIkKG5rB34G+AqonOS76acnhynmI29PIW7TuE0SQzO4=";
|
||||||
|
# hash = "sha256-DtbPZ75KqfPqPsGwmT1sUyeI1HyICDDs3SxwQQl72BM=";
|
||||||
|
rev = "rocm-${version}";
|
||||||
|
# hash = "sha256-h4mD6gu0Gt4zYme7qtlm9QpsqWZ6XoH+XKd3hsQly1I=";
|
||||||
|
# hash = "sha256-jJsmPainHOd4BJ0bQbf1M3Kd4+aLbx3ENxtuzJ9+lLY=";
|
||||||
|
# hash = "sha256-5n3EQby17JEgr3kh1pUNuo/La4hUxMf10O7CckVMS5U=";
|
||||||
|
hash = "sha256-84+ZsKjIhXip2yLU5jpoV53+ejxy2dzgamVU6AcAngU=";
|
||||||
|
# hash = "sha256-4b1d9a2c7f0e8c3f5b6d8c1e4f0b2c5f3a6b7c8d9e0f1a2b3c4d5e6f7g8h9i0j";
|
||||||
|
};
|
||||||
|
llvmSrcFixed = llvmSrc;
|
||||||
|
llvmMajorVersion = lib.versions.major rocmLlvmVersion;
|
||||||
|
# An llvmPackages (pkgs/development/compilers/llvm/) built from ROCm LLVM's source tree
|
||||||
|
# optionally using LLVM libcxx
|
||||||
|
llvmPackagesRocm = llvmPackages_19.override (_old: {
|
||||||
|
stdenv = stdenvToBuildRocmLlvm; # old.stdenv #llvmPackagesNoBintools.libcxxStdenv;
|
||||||
|
|
||||||
|
# not setting gitRelease = because that causes patch selection logic to use git patches
|
||||||
|
# ROCm LLVM is closer to 18 official
|
||||||
|
# gitRelease = {
|
||||||
|
# rev-version = rocmLlvmVersion;
|
||||||
|
# };
|
||||||
|
# gitRelease = null;
|
||||||
|
# officialRelease = null;
|
||||||
|
officialRelease = { }; # Set but empty because we're overriding everything from it.
|
||||||
|
version = rocmLlvmVersion;
|
||||||
|
src = llvmSrcFixed;
|
||||||
|
monorepoSrc = llvmSrcFixed;
|
||||||
|
doCheck = false;
|
||||||
|
});
|
||||||
|
sysrootCompiler =
|
||||||
|
cc: name: paths:
|
||||||
|
let
|
||||||
|
linked = symlinkJoin { inherit name paths; };
|
||||||
|
in
|
||||||
|
runCommand name { } ''
|
||||||
|
set -x
|
||||||
|
mkdir -p $out/
|
||||||
|
cp --reflink=auto -rL ${linked}/* $out/
|
||||||
|
chmod -R +rw $out
|
||||||
|
mkdir -p $out/usr
|
||||||
|
ln -s $out/ $out/usr/local
|
||||||
|
mkdir -p $out/nix-support/
|
||||||
|
rm -rf $out/lib64 # we don't need mixed 32 bit
|
||||||
|
echo 'export CC=clang' >> $out/nix-support/setup-hook
|
||||||
|
echo 'export CXX=clang++' >> $out/nix-support/setup-hook
|
||||||
|
mkdir -p $out/lib/clang/${llvmMajorVersion}/lib/linux/
|
||||||
|
ln -s $out/lib/linux/libclang_rt.* $out/lib/clang/${llvmMajorVersion}/lib/linux/
|
||||||
|
file $out/bin/.clang-wrapped
|
||||||
|
file $out/bin/.clang++-wrapped
|
||||||
|
find $out -type f -exec sed -i "s|${cc.out}|$out|g" {} +
|
||||||
|
find $out -type f -exec sed -i "s|${cc.dev}|$out|g" {} +
|
||||||
|
|
||||||
|
file $out/bin/.clang-wrapped
|
||||||
|
file $out/bin/.clang++-wrapped
|
||||||
|
# our /include now has more than clang expects, so this specific dir still needs to point to cc.dev
|
||||||
|
# FIXME: could copy into a different subdir?
|
||||||
|
sed -i 's|set(CLANG_INCLUDE_DIRS.*$|set(CLANG_INCLUDE_DIRS "${cc.dev}/include")|g' $out/lib/cmake/clang/ClangConfig.cmake
|
||||||
|
# ${lib.getExe rdfind} -makesymlinks true $out/ # create links *within* the sysroot to save space
|
||||||
|
'';
|
||||||
|
findClangNostdlibincPatch =
|
||||||
|
x:
|
||||||
|
(
|
||||||
|
(lib.strings.hasSuffix "add-nostdlibinc-flag.patch" (builtins.baseNameOf x))
|
||||||
|
|| (lib.strings.hasSuffix "clang-at-least-16-LLVMgold-path.patch" (builtins.baseNameOf x))
|
||||||
|
);
|
||||||
|
llvmTargetsFlag = "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${
|
||||||
|
{
|
||||||
|
"x86_64" = "X86";
|
||||||
|
"aarch64" = "AArch64";
|
||||||
|
}
|
||||||
|
.${llvmStdenv.targetPlatform.parsed.cpu.name}
|
||||||
|
}";
|
||||||
|
# -ffat-lto-objects = emit LTO object files that are compatible with non-LTO-supporting builds too
|
||||||
|
# FatLTO objects are a special type of fat object file that contain LTO compatible IR in addition to generated object code,
|
||||||
|
# instead of containing object code for multiple target architectures. This allows users to defer the choice of whether to
|
||||||
|
# use LTO or not to link-time, and has been a feature available in other compilers, like GCC, for some time.
|
||||||
|
|
||||||
|
tablegenUsage = x: !(lib.strings.hasInfix "llvm-tblgen" x);
|
||||||
|
addGccLtoCmakeFlags = !llvmPackagesRocm.stdenv.cc.isClang;
|
||||||
|
llvmExtraCflags =
|
||||||
|
"-O3 -DNDEBUG -march=skylake -mtune=znver3"
|
||||||
|
+ (lib.optionalString addGccLtoCmakeFlags " -D_GLIBCXX_USE_CXX11_ABI=0 -flto -ffat-lto-objects -flto-compression-level=19 -Wl,-flto")
|
||||||
|
+ (lib.optionalString llvmPackagesRocm.stdenv.cc.isClang " -flto=thin -ffat-lto-objects")
|
||||||
|
+ (lib.optionalString profilableStdenv " -fno-omit-frame-pointer -momit-leaf-frame-pointer -gz -g1");
|
||||||
|
in
|
||||||
|
rec {
|
||||||
|
inherit llvmSrc;
|
||||||
|
inherit (llvmPackagesRocm) libunwind;
|
||||||
|
inherit (llvmPackagesRocm) libcxx;
|
||||||
|
# Pass through original attrs for debugging where non-overridden llvm/clang is getting used
|
||||||
|
# llvm-orig = llvmPackagesRocm.llvm; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.llvm-orig
|
||||||
|
# clang-orig = llvmPackagesRocm.clang; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.clang-orig
|
||||||
|
llvm = (llvmPackagesRocm.llvm.override { ninja = emptyDirectory; }).overrideAttrs (old: {
|
||||||
|
patches = old.patches ++ [
|
||||||
|
./rocm-llvm-6.4-llvm-gold-plugin-fix-ModuleName.patch
|
||||||
|
];
|
||||||
|
# patches = builtins.filter (
|
||||||
|
# x:
|
||||||
|
# (
|
||||||
|
# !(lib.strings.hasSuffix "gnu-install-dirs.patch" (builtins.baseNameOf x))
|
||||||
|
# && !(lib.strings.hasSuffix "gnu-install-dirs-polly.patch" (builtins.baseNameOf x))
|
||||||
|
# )
|
||||||
|
# ) old.patches;
|
||||||
|
dontStrip = profilableStdenv;
|
||||||
|
nativeBuildInputs = old.nativeBuildInputs ++ [ removeReferencesTo ];
|
||||||
|
buildInputs = old.buildInputs ++ [
|
||||||
|
zstd
|
||||||
|
zlib
|
||||||
|
];
|
||||||
|
env.NIX_BUILD_ID_STYLE = "fast";
|
||||||
|
postPatch = ''
|
||||||
|
${old.postPatch or ""}
|
||||||
|
patchShebangs lib/OffloadArch/make_generated_offload_arch_h.sh
|
||||||
|
'';
|
||||||
|
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
|
||||||
|
cmakeFlags =
|
||||||
|
(builtins.filter tablegenUsage old.cmakeFlags)
|
||||||
|
++ [
|
||||||
|
llvmTargetsFlag
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
|
||||||
|
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
|
||||||
|
"-DLLVM_ENABLE_THREADS=ON"
|
||||||
|
"-DLLVM_ENABLE_LTO=Thin"
|
||||||
|
"-DLLVM_USE_LINKER=lld"
|
||||||
|
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
|
||||||
|
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
|
||||||
|
]
|
||||||
|
++ lib.optionals addGccLtoCmakeFlags [
|
||||||
|
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
|
||||||
|
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
|
||||||
|
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
|
||||||
|
]
|
||||||
|
++ lib.optionals useLibcxx [
|
||||||
|
"-DLLVM_ENABLE_LTO=Thin"
|
||||||
|
"-DLLVM_USE_LINKER=lld"
|
||||||
|
"-DLLVM_ENABLE_LIBCXX=ON"
|
||||||
|
];
|
||||||
|
preConfigure = ''
|
||||||
|
${old.preConfigure or ""}
|
||||||
|
cmakeFlagsArray+=(
|
||||||
|
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
|
||||||
|
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
|
||||||
|
)
|
||||||
|
'';
|
||||||
|
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
|
||||||
|
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
|
||||||
|
postFixup = ''
|
||||||
|
${old.postFixup or ""}
|
||||||
|
remove-references-to -t "${stdenv.cc}" "$lib/lib/libLLVMSupport.a"
|
||||||
|
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
|
||||||
|
find $lib -type f -exec remove-references-to -t ${stdenvToBuildRocmLlvm.cc} {} +
|
||||||
|
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
|
||||||
|
'';
|
||||||
|
doCheck = false;
|
||||||
|
});
|
||||||
|
lld =
|
||||||
|
(llvmPackagesRocm.lld.override {
|
||||||
|
libllvm = llvm;
|
||||||
|
ninja = emptyDirectory;
|
||||||
|
}).overrideAttrs
|
||||||
|
(old: {
|
||||||
|
patches = builtins.filter (
|
||||||
|
x: !(lib.strings.hasSuffix "more-openbsd-program-headers.patch" (builtins.baseNameOf x))
|
||||||
|
) old.patches;
|
||||||
|
dontStrip = profilableStdenv;
|
||||||
|
nativeBuildInputs = old.nativeBuildInputs ++ [
|
||||||
|
llvmPackagesNoBintools.lld
|
||||||
|
removeReferencesTo
|
||||||
|
];
|
||||||
|
buildInputs = old.buildInputs ++ [
|
||||||
|
zstd
|
||||||
|
zlib
|
||||||
|
];
|
||||||
|
env.NIX_BUILD_ID_STYLE = "fast";
|
||||||
|
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
|
||||||
|
cmakeFlags =
|
||||||
|
(builtins.filter tablegenUsage old.cmakeFlags)
|
||||||
|
++ [
|
||||||
|
llvmTargetsFlag
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
|
||||||
|
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
|
||||||
|
"-DLLVM_ENABLE_THREADS=ON"
|
||||||
|
"-DLLVM_ENABLE_LTO=Thin"
|
||||||
|
"-DLLVM_USE_LINKER=lld"
|
||||||
|
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
|
||||||
|
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
|
||||||
|
]
|
||||||
|
++ lib.optionals addGccLtoCmakeFlags [
|
||||||
|
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
|
||||||
|
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
|
||||||
|
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
|
||||||
|
]
|
||||||
|
++ lib.optionals useLibcxx [
|
||||||
|
"-DLLVM_ENABLE_LIBCXX=ON"
|
||||||
|
];
|
||||||
|
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
|
||||||
|
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
|
||||||
|
postFixup = ''
|
||||||
|
${old.postFixup or ""}
|
||||||
|
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
|
||||||
|
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
|
||||||
|
'';
|
||||||
|
preConfigure = ''
|
||||||
|
${old.preConfigure or ""}
|
||||||
|
cmakeFlagsArray+=(
|
||||||
|
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
|
||||||
|
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
|
||||||
|
)
|
||||||
|
'';
|
||||||
|
});
|
||||||
|
clang-unwrapped =
|
||||||
|
(
|
||||||
|
(llvmPackagesRocm.clang-unwrapped.override {
|
||||||
|
libllvm = llvm;
|
||||||
|
ninja = emptyDirectory;
|
||||||
|
}).overrideAttrs
|
||||||
|
(
|
||||||
|
old:
|
||||||
|
let
|
||||||
|
# filteredPatches = builtins.filter (x: !(findClangNostdlibincPatch x)) old.patches;
|
||||||
|
filteredPatches = builtins.filter (
|
||||||
|
x:
|
||||||
|
(
|
||||||
|
!(lib.strings.hasSuffix "gnu-install-dirs.patch" (builtins.baseNameOf x))
|
||||||
|
# && !(lib.strings.hasSuffix "gnu-install-dirs-polly.patch" (builtins.baseNameOf x))
|
||||||
|
&& !(findClangNostdlibincPatch x)
|
||||||
|
)
|
||||||
|
) old.patches;
|
||||||
|
in
|
||||||
|
{
|
||||||
|
meta.platforms = [
|
||||||
|
"x86_64-linux"
|
||||||
|
];
|
||||||
|
pname = "${old.pname}-rocm";
|
||||||
|
patches = filteredPatches ++ [
|
||||||
|
./96cbfymn788ssbhmay4sy7h268qg81fl-gnu-install-dirs.patch
|
||||||
|
# Never add FHS include paths
|
||||||
|
./clang-bodge-ignore-systemwide-incls.diff
|
||||||
|
# Prevents builds timing out if a single compiler invocation is very slow but
|
||||||
|
# per-arch jobs are completing by ensuring there's terminal output
|
||||||
|
./clang-log-jobs.diff
|
||||||
|
(fetchpatch {
|
||||||
|
# [ClangOffloadBundler]: Add GetBundleIDsInFile to OffloadBundler
|
||||||
|
sha256 = "sha256-G/mzUdFfrJ2bLJgo4+mBcR6Ox7xGhWu5X+XxT4kH2c8=";
|
||||||
|
url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/6d296f879b0fed830c54b2a9d26240da86c8bb3a.patch";
|
||||||
|
relative = "clang";
|
||||||
|
})
|
||||||
|
# FIXME: Needed due to https://github.com/NixOS/nixpkgs/issues/375431
|
||||||
|
# Once we can switch to overrideScope this can be removed
|
||||||
|
# (replaceVars ./../../../compilers/llvm/common/clang/clang-at-least-16-LLVMgold-path.patch {
|
||||||
|
# libllvmLibdir = "${llvm.lib}/lib";
|
||||||
|
# })
|
||||||
|
];
|
||||||
|
nativeBuildInputs = old.nativeBuildInputs ++ [
|
||||||
|
llvmPackagesNoBintools.lld
|
||||||
|
removeReferencesTo
|
||||||
|
];
|
||||||
|
buildInputs = old.buildInputs ++ [
|
||||||
|
zstd
|
||||||
|
zlib
|
||||||
|
];
|
||||||
|
dontStrip = profilableStdenv;
|
||||||
|
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
|
||||||
|
env = (old.env or { }) // {
|
||||||
|
NIX_BUILD_ID_STYLE = "fast";
|
||||||
|
};
|
||||||
|
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
|
||||||
|
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
|
||||||
|
requiredSystemFeatures = (old.requiredSystemFeatures or [ ]) ++ [ "big-parallel" ];
|
||||||
|
# https://github.com/llvm/llvm-project/blob/6976deebafa8e7de993ce159aa6b82c0e7089313/clang/cmake/caches/DistributionExample-stage2.cmake#L9-L11
|
||||||
|
cmakeFlags =
|
||||||
|
(builtins.filter tablegenUsage old.cmakeFlags)
|
||||||
|
++ [
|
||||||
|
llvmTargetsFlag
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
|
||||||
|
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
|
||||||
|
"-DLLVM_ENABLE_THREADS=ON"
|
||||||
|
"-DLLVM_ENABLE_LTO=Thin"
|
||||||
|
"-DLLVM_USE_LINKER=lld"
|
||||||
|
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
|
||||||
|
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
|
||||||
|
]
|
||||||
|
++ lib.optionals addGccLtoCmakeFlags [
|
||||||
|
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
|
||||||
|
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
|
||||||
|
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
|
||||||
|
]
|
||||||
|
++ lib.optionals useLibcxx [
|
||||||
|
"-DLLVM_ENABLE_LTO=Thin"
|
||||||
|
"-DLLVM_ENABLE_LIBCXX=ON"
|
||||||
|
"-DLLVM_USE_LINKER=lld"
|
||||||
|
"-DCLANG_DEFAULT_RTLIB=compiler-rt"
|
||||||
|
];
|
||||||
|
# ++ lib.optionals (!useLibcxx) [
|
||||||
|
# # FIXME: Config file in rocmcxx instead of GCC_INSTALL_PREFIX?
|
||||||
|
# "-DGCC_INSTALL_PREFIX=${gcc-prefix}"
|
||||||
|
# ];
|
||||||
|
postFixup =
|
||||||
|
(old.postFixup or "")
|
||||||
|
+ ''
|
||||||
|
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
|
||||||
|
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
|
||||||
|
'';
|
||||||
|
preConfigure =
|
||||||
|
(old.preConfigure or "")
|
||||||
|
+ ''
|
||||||
|
cmakeFlagsArray+=(
|
||||||
|
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
|
||||||
|
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
|
||||||
|
)
|
||||||
|
'';
|
||||||
|
postInstall =
|
||||||
|
(old.postInstall or "")
|
||||||
|
+ ''
|
||||||
|
echo "--gcc-toolchain=${gcc-prefix}" > $out/bin/clang.cfg
|
||||||
|
echo "--gcc-toolchain=${gcc-prefix}" > $out/bin/clang++.cfg
|
||||||
|
'';
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
// {
|
||||||
|
libllvm = llvm;
|
||||||
|
};
|
||||||
|
# A clang that understands standard include searching in a GNU sysroot and will put GPU libs in include path
|
||||||
|
# in the right order
|
||||||
|
# and expects its libc to be in the sysroot
|
||||||
|
rocmcxx =
|
||||||
|
(sysrootCompiler clang-unwrapped "rocmcxx" (
|
||||||
|
listUsefulOutputs (
|
||||||
|
[
|
||||||
|
clang-unwrapped
|
||||||
|
bintools
|
||||||
|
compiler-rt
|
||||||
|
]
|
||||||
|
++ (lib.optionals useLibcxx [
|
||||||
|
libcxx
|
||||||
|
])
|
||||||
|
++ (lib.optionals (!useLibcxx) [
|
||||||
|
gcc-include
|
||||||
|
glibc
|
||||||
|
glibc.dev
|
||||||
|
])
|
||||||
|
)
|
||||||
|
))
|
||||||
|
// {
|
||||||
|
version = llvmMajorVersion;
|
||||||
|
cc = rocmcxx;
|
||||||
|
libllvm = llvm;
|
||||||
|
isClang = true;
|
||||||
|
isGNU = false;
|
||||||
|
|
||||||
|
};
|
||||||
|
clang-tools = llvmPackagesRocm.clang-tools.override {
|
||||||
|
inherit clang-unwrapped clang;
|
||||||
|
};
|
||||||
|
compiler-rt-libc = llvmPackagesRocm.compiler-rt-libc.overrideAttrs (old: {
|
||||||
|
patches = old.patches ++ [
|
||||||
|
(fetchpatch {
|
||||||
|
name = "avoid-overload-ambiguity-for-interceptors.patch";
|
||||||
|
url = "https://github.com/ROCm/llvm-project/commit/155b7a12820ec45095988b6aa6e057afaf2bc892.patch";
|
||||||
|
hash = "sha256-pgpN1q1vIQrPXHPxNSZ6zfgV2EflHO5Amzl+2BDjXbs=";
|
||||||
|
relative = "compiler-rt";
|
||||||
|
})
|
||||||
|
];
|
||||||
|
});
|
||||||
|
compiler-rt = compiler-rt-libc;
|
||||||
|
bintools = wrapBintoolsWith {
|
||||||
|
bintools = llvmPackagesRocm.bintools-unwrapped.override {
|
||||||
|
inherit lld llvm;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
clang = rocmcxx;
|
||||||
|
|
||||||
|
# Emulate a monolithic ROCm LLVM build to support building ROCm's in-tree LLVM projects
|
||||||
|
rocm-merged-llvm = symlinkJoin {
|
||||||
|
name = "rocm-llvm-merge";
|
||||||
|
paths =
|
||||||
|
[
|
||||||
|
llvm
|
||||||
|
llvm.dev
|
||||||
|
lld
|
||||||
|
lld.lib
|
||||||
|
lld.dev
|
||||||
|
libunwind
|
||||||
|
libunwind.dev
|
||||||
|
compiler-rt
|
||||||
|
compiler-rt.dev
|
||||||
|
rocmcxx
|
||||||
|
]
|
||||||
|
++ lib.optionals useLibcxx [
|
||||||
|
libcxx
|
||||||
|
libcxx.out
|
||||||
|
libcxx.dev
|
||||||
|
];
|
||||||
|
postBuild = builtins.unsafeDiscardStringContext ''
|
||||||
|
found_files=$(find $out -name '*.cmake')
|
||||||
|
if [ -z "$found_files" ]; then
|
||||||
|
>&2 echo "Error: No CMake files found in $out"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
for target in ${clang-unwrapped.out} ${clang-unwrapped.lib} ${clang-unwrapped.dev}; do
|
||||||
|
if grep "$target" $found_files; then
|
||||||
|
>&2 echo "Unexpected ref to $target (clang-unwrapped) found"
|
||||||
|
# exit 1
|
||||||
|
# # FIXME: enable this to reduce closure size
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
'';
|
||||||
|
inherit version;
|
||||||
|
llvm-src = llvmSrc;
|
||||||
|
};
|
||||||
|
|
||||||
|
rocmClangStdenv = overrideCC (
|
||||||
|
if useLibcxx then llvmPackagesRocm.libcxxStdenv else llvmPackagesRocm.stdenv
|
||||||
|
) clang;
|
||||||
|
|
||||||
|
# Projects
|
||||||
|
openmp =
|
||||||
|
(llvmPackagesRocm.openmp.override {
|
||||||
|
stdenv = rocmClangStdenv;
|
||||||
|
llvm = rocm-merged-llvm;
|
||||||
|
targetLlvm = rocm-merged-llvm;
|
||||||
|
clang-unwrapped = clang;
|
||||||
|
}).overrideAttrs
|
||||||
|
(old: {
|
||||||
|
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
|
||||||
|
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ removeReferencesTo ];
|
||||||
|
cmakeFlags =
|
||||||
|
old.cmakeFlags
|
||||||
|
++ [
|
||||||
|
"-DDEVICELIBS_ROOT=${rocm-device-libs.src}"
|
||||||
|
# OMPD support is broken in ROCm 6.3. Haven't investigated why.
|
||||||
|
"-DLIBOMP_OMPD_SUPPORT:BOOL=FALSE"
|
||||||
|
"-DLIBOMP_OMPD_GDB_SUPPORT:BOOL=FALSE"
|
||||||
|
]
|
||||||
|
++ lib.optionals addGccLtoCmakeFlags [
|
||||||
|
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
|
||||||
|
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
|
||||||
|
];
|
||||||
|
env.LLVM = "${rocm-merged-llvm}";
|
||||||
|
env.LLVM_DIR = "${rocm-merged-llvm}";
|
||||||
|
buildInputs = old.buildInputs ++ [
|
||||||
|
rocm-device-libs
|
||||||
|
rocm-runtime
|
||||||
|
zlib
|
||||||
|
zstd
|
||||||
|
libxml2
|
||||||
|
libffi
|
||||||
|
];
|
||||||
|
});
|
||||||
|
}
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
From 6c2872afcd9ae8e313621eb6cb7f407e89097304 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Rix <Tom.Rix@amd.com>
|
||||||
|
Date: Sun, 13 Apr 2025 07:41:34 -0700
|
||||||
|
Subject: [PATCH] rocm-llvm: gold-plugin: fix ModuleName
|
||||||
|
|
||||||
|
---
|
||||||
|
llvm/tools/gold/gold-plugin.cpp | 6 ++----
|
||||||
|
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp
|
||||||
|
index 0d4ca5299689..dd577206408c 100644
|
||||||
|
--- a/tools/gold/gold-plugin.cpp
|
||||||
|
+++ b/tools/gold/gold-plugin.cpp
|
||||||
|
@@ -1100,10 +1100,8 @@ static std::vector<std::pair<SmallString<128>, bool>> runLTO() {
|
||||||
|
};
|
||||||
|
|
||||||
|
auto AddBuffer = [&](size_t Task, const Twine &moduleName,
|
||||||
|
- std::unique_ptr<MemoryBuffer> MB) {
|
||||||
|
- auto Stream = *AddStream(Task, ModuleName);
|
||||||
|
- Stream->OS << MB->getBuffer();
|
||||||
|
- check(Stream->commit(), "Failed to commit cache");
|
||||||
|
+ std::unique_ptr<MemoryBuffer> MB) {
|
||||||
|
+ *AddStream(Task, moduleName)->OS << MB->getBuffer();
|
||||||
|
};
|
||||||
|
|
||||||
|
FileCache Cache;
|
||||||
|
--
|
||||||
|
2.48.1
|
||||||
194
pkgs/rocm-modules/migraphx/default.nix
Normal file
194
pkgs/rocm-modules/migraphx/default.nix
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
pkg-config,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
clr,
|
||||||
|
openmp,
|
||||||
|
rocblas,
|
||||||
|
hipblas-common,
|
||||||
|
hipblas,
|
||||||
|
hipblaslt,
|
||||||
|
rocmlir,
|
||||||
|
miopen,
|
||||||
|
protobuf,
|
||||||
|
abseil-cpp,
|
||||||
|
half,
|
||||||
|
nlohmann_json,
|
||||||
|
msgpack,
|
||||||
|
sqlite,
|
||||||
|
oneDNN_2,
|
||||||
|
blaze,
|
||||||
|
texliveSmall,
|
||||||
|
doxygen,
|
||||||
|
sphinx,
|
||||||
|
docutils,
|
||||||
|
ghostscript,
|
||||||
|
python3Packages,
|
||||||
|
buildDocs ? false,
|
||||||
|
buildTests ? false,
|
||||||
|
gpuTargets ? clr.gpuTargets,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
latex = lib.optionalAttrs buildDocs (
|
||||||
|
texliveSmall.withPackages (
|
||||||
|
ps: with ps; [
|
||||||
|
latexmk
|
||||||
|
tex-gyre
|
||||||
|
fncychap
|
||||||
|
wrapfig
|
||||||
|
capt-of
|
||||||
|
framed
|
||||||
|
needspace
|
||||||
|
tabulary
|
||||||
|
varwidth
|
||||||
|
titlesec
|
||||||
|
epstopdf
|
||||||
|
]
|
||||||
|
)
|
||||||
|
);
|
||||||
|
in
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "migraphx";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildDocs [
|
||||||
|
"doc"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "AMDMIGraphX";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-PytBEnLBHyp0JzkSLbLRHslqGBk4mabKC62JZoXwHxE=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs =
|
||||||
|
[
|
||||||
|
pkg-config
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
python3Packages.python
|
||||||
|
]
|
||||||
|
++ lib.optionals buildDocs [
|
||||||
|
latex
|
||||||
|
doxygen
|
||||||
|
sphinx
|
||||||
|
docutils
|
||||||
|
ghostscript
|
||||||
|
python3Packages.sphinx-rtd-theme
|
||||||
|
python3Packages.breathe
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
openmp
|
||||||
|
rocblas
|
||||||
|
hipblas-common
|
||||||
|
hipblas
|
||||||
|
hipblaslt
|
||||||
|
rocmlir
|
||||||
|
miopen
|
||||||
|
protobuf
|
||||||
|
half
|
||||||
|
nlohmann_json
|
||||||
|
msgpack
|
||||||
|
sqlite
|
||||||
|
oneDNN_2
|
||||||
|
blaze
|
||||||
|
python3Packages.pybind11
|
||||||
|
python3Packages.onnx
|
||||||
|
];
|
||||||
|
|
||||||
|
LDFLAGS = "-Wl,--allow-shlib-undefined";
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DMIGRAPHX_ENABLE_GPU=ON"
|
||||||
|
"-DMIGRAPHX_ENABLE_CPU=ON"
|
||||||
|
"-DMIGRAPHX_ENABLE_FPGA=ON"
|
||||||
|
"-DMIGRAPHX_ENABLE_MLIR=OFF" # LLVM or rocMLIR mismatch?
|
||||||
|
"-DCMAKE_C_COMPILER=amdclang"
|
||||||
|
"-DCMAKE_CXX_COMPILER=amdclang++"
|
||||||
|
"-DCMAKE_VERBOSE_MAKEFILE=ON"
|
||||||
|
"-DEMBED_USE=CArrays" # Fixes error with lld
|
||||||
|
"-DDMIGRAPHX_ENABLE_PYTHON=ON"
|
||||||
|
"-DROCM_PATH=${clr}"
|
||||||
|
"-DHIP_ROOT_DIR=${clr}"
|
||||||
|
# migraphx relies on an incompatible fork of composable_kernel
|
||||||
|
# migraphxs relies on miopen which relies on current composable_kernel
|
||||||
|
# impossible to build with this ON; we can't link both of them even if we package both
|
||||||
|
"-DMIGRAPHX_USE_COMPOSABLEKERNEL=OFF"
|
||||||
|
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
|
||||||
|
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
|
||||||
|
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch =
|
||||||
|
''
|
||||||
|
export CXXFLAGS+=" -w -isystem${rocmlir}/include/rocmlir -I${half}/include -I${abseil-cpp}/include -I${hipblas-common}/include"
|
||||||
|
patchShebangs tools
|
||||||
|
|
||||||
|
# `error: '__clang_hip_runtime_wrapper.h' file not found [clang-diagnostic-error]`
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace "set(MIGRAPHX_TIDY_ERRORS ALL)" ""
|
||||||
|
''
|
||||||
|
+ lib.optionalString (!buildDocs) ''
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace "add_subdirectory(doc)" ""
|
||||||
|
''
|
||||||
|
+ lib.optionalString (!buildTests) ''
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace "add_subdirectory(test)" ""
|
||||||
|
'';
|
||||||
|
|
||||||
|
# Unfortunately, it seems like we have to call make on this manually
|
||||||
|
preInstall = lib.optionalString buildDocs ''
|
||||||
|
export HOME=$(mktemp -d)
|
||||||
|
make -j$NIX_BUILD_CORES doc
|
||||||
|
cd ../doc/pdf
|
||||||
|
make -j$NIX_BUILD_CORES
|
||||||
|
cd -
|
||||||
|
'';
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildDocs ''
|
||||||
|
mv ../doc/html $out/share/doc/migraphx
|
||||||
|
mv ../doc/pdf/MIGraphX.pdf $out/share/doc/migraphx
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv bin/test_* $test/bin
|
||||||
|
patchelf $test/bin/test_* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE"
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "AMD's graph optimization engine";
|
||||||
|
homepage = "https://github.com/ROCm/AMDMIGraphX";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
324
pkgs/rocm-modules/miopen/default.nix
Normal file
324
pkgs/rocm-modules/miopen/default.nix
Normal file
@@ -0,0 +1,324 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
fetchpatch,
|
||||||
|
rocmUpdateScript,
|
||||||
|
runCommand,
|
||||||
|
pkg-config,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocblas,
|
||||||
|
rocmlir,
|
||||||
|
rocrand,
|
||||||
|
rocm-runtime,
|
||||||
|
rocm-merged-llvm,
|
||||||
|
hipblas-common,
|
||||||
|
hipblas,
|
||||||
|
hipblaslt,
|
||||||
|
clr,
|
||||||
|
composable_kernel,
|
||||||
|
frugally-deep,
|
||||||
|
rocm-docs-core,
|
||||||
|
half,
|
||||||
|
boost,
|
||||||
|
sqlite,
|
||||||
|
bzip2,
|
||||||
|
lbzip2,
|
||||||
|
nlohmann_json,
|
||||||
|
texliveSmall,
|
||||||
|
doxygen,
|
||||||
|
sphinx,
|
||||||
|
zlib,
|
||||||
|
gtest,
|
||||||
|
rocm-comgr,
|
||||||
|
roctracer,
|
||||||
|
python3Packages,
|
||||||
|
# FIXME: should be able to use all clr targets
|
||||||
|
gpuTargets ? [
|
||||||
|
"gfx900"
|
||||||
|
"gfx906"
|
||||||
|
"gfx908"
|
||||||
|
"gfx90a"
|
||||||
|
"gfx942"
|
||||||
|
"gfx1030"
|
||||||
|
"gfx1100"
|
||||||
|
"gfx1101"
|
||||||
|
"gfx1102"
|
||||||
|
"gfx1201"
|
||||||
|
], # clr.gpuTargets
|
||||||
|
buildDocs ? false, # Needs internet because of rocm-docs-core
|
||||||
|
buildTests ? false,
|
||||||
|
withComposableKernel ? composable_kernel.anyGfx9Target,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
# FIXME: cmake files need patched to include this properly
|
||||||
|
cFlags = "-O3 -DNDEBUG -Wno-documentation-pedantic --offload-compress -I${hipblas-common}/include -I${hipblas}/include -I${roctracer}/include -I${nlohmann_json}/include -I${sqlite.dev}/include -I${rocrand}/include";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "MIOpen";
|
||||||
|
rev = "rocm-${version}";
|
||||||
|
hash = "sha256-DEcVj2vOwIYYyNKEKFqZ0fb9o+/QRpwiSksxwnmgEMc=";
|
||||||
|
fetchLFS = true;
|
||||||
|
fetchSubmodules = true;
|
||||||
|
# WORKAROUND: .lfsconfig is incorrectly set to exclude everything upstream
|
||||||
|
leaveDotGit = true;
|
||||||
|
postFetch = ''
|
||||||
|
export HOME=$(mktemp -d)
|
||||||
|
cd $out
|
||||||
|
set -x
|
||||||
|
git remote add origin $url
|
||||||
|
git fetch origin +refs/tags/rocm-${version}:refs/tags/rocm-${version}
|
||||||
|
git clean -fdx
|
||||||
|
git switch -c rocm-${version} refs/tags/rocm-${version}
|
||||||
|
git config lfs.fetchexclude "none"
|
||||||
|
rm .lfsconfig
|
||||||
|
git lfs install
|
||||||
|
git lfs track "*.kdb.bz2"
|
||||||
|
GIT_TRACE=1 git lfs fetch --include="src/kernels/**"
|
||||||
|
GIT_TRACE=1 git lfs pull --include="src/kernels/**"
|
||||||
|
git lfs checkout
|
||||||
|
|
||||||
|
rm -rf .git
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
latex = lib.optionalAttrs buildDocs (
|
||||||
|
texliveSmall.withPackages (
|
||||||
|
ps: with ps; [
|
||||||
|
latexmk
|
||||||
|
tex-gyre
|
||||||
|
fncychap
|
||||||
|
wrapfig
|
||||||
|
capt-of
|
||||||
|
framed
|
||||||
|
needspace
|
||||||
|
tabulary
|
||||||
|
varwidth
|
||||||
|
titlesec
|
||||||
|
]
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
gfx900 = runCommand "miopen-gfx900.kdb" { preferLocalBuild = true; } ''
|
||||||
|
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx900.kdb.bz2 > $out
|
||||||
|
'';
|
||||||
|
|
||||||
|
gfx906 = runCommand "miopen-gfx906.kdb" { preferLocalBuild = true; } ''
|
||||||
|
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx906.kdb.bz2 > $out
|
||||||
|
'';
|
||||||
|
|
||||||
|
gfx908 = runCommand "miopen-gfx908.kdb" { preferLocalBuild = true; } ''
|
||||||
|
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx908.kdb.bz2 > $out
|
||||||
|
'';
|
||||||
|
|
||||||
|
gfx90a = runCommand "miopen-gfx90a.kdb" { preferLocalBuild = true; } ''
|
||||||
|
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx90a.kdb.bz2 > $out
|
||||||
|
'';
|
||||||
|
|
||||||
|
gfx1030 = runCommand "miopen-gfx1030.kdb" { preferLocalBuild = true; } ''
|
||||||
|
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx1030.kdb.bz2 > $out
|
||||||
|
'';
|
||||||
|
gfx1201 = runCommand "miopen-gfx1201.kdb" { preferLocalBuild = true; } ''
|
||||||
|
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx1201.kdb.bz2 > $out
|
||||||
|
'';
|
||||||
|
in
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
inherit version src;
|
||||||
|
pname = "miopen";
|
||||||
|
|
||||||
|
env.CFLAGS = cFlags;
|
||||||
|
env.CXXFLAGS = cFlags;
|
||||||
|
|
||||||
|
# Find zstd and add to target. Mainly for torch.
|
||||||
|
patches = [
|
||||||
|
./skip-preexisting-dbs.patch
|
||||||
|
./fix-isnan.patch # https://github.com/ROCm/MIOpen/pull/3448
|
||||||
|
(fetchpatch {
|
||||||
|
url = "https://github.com/ROCm/MIOpen/commit/e608b4325646afeabb5e52846997b926d2019d19.patch";
|
||||||
|
hash = "sha256-oxa3qlIC2bzbwGxrQOZXoY/S7CpLsMrnWRB7Og0tk0M=";
|
||||||
|
})
|
||||||
|
(fetchpatch {
|
||||||
|
url = "https://github.com/ROCm/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch";
|
||||||
|
hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs=";
|
||||||
|
})
|
||||||
|
# FIXME: We need to rebase or drop this arch compat patch
|
||||||
|
# https://github.com/ROCm/MIOpen/issues/3540 suggests that
|
||||||
|
# arch compat patching doesn't work correctly for gfx1031
|
||||||
|
# (fetchpatch {
|
||||||
|
# name = "Extend-MIOpen-ISA-compatibility.patch";
|
||||||
|
# url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch";
|
||||||
|
# hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU=";
|
||||||
|
# })
|
||||||
|
];
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildDocs [
|
||||||
|
"doc"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
];
|
||||||
|
enableParallelBuilding = true;
|
||||||
|
env.ROCM_PATH = clr;
|
||||||
|
env.LD_LIBRARY_PATH = lib.makeLibraryPath [ rocm-runtime ];
|
||||||
|
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
pkg-config
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
hipblas
|
||||||
|
hipblas-common
|
||||||
|
rocblas
|
||||||
|
rocmlir
|
||||||
|
half
|
||||||
|
boost
|
||||||
|
sqlite
|
||||||
|
bzip2
|
||||||
|
nlohmann_json
|
||||||
|
frugally-deep
|
||||||
|
roctracer
|
||||||
|
rocrand
|
||||||
|
hipblaslt
|
||||||
|
]
|
||||||
|
++ lib.optionals withComposableKernel [
|
||||||
|
composable_kernel
|
||||||
|
]
|
||||||
|
++ lib.optionals buildDocs [
|
||||||
|
latex
|
||||||
|
doxygen
|
||||||
|
sphinx
|
||||||
|
rocm-docs-core
|
||||||
|
python3Packages.sphinx-rtd-theme
|
||||||
|
python3Packages.breathe
|
||||||
|
python3Packages.myst-parser
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
gtest
|
||||||
|
zlib
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
"-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
"-DMIOPEN_USE_SQLITE_PERFDB=ON"
|
||||||
|
"-DCMAKE_VERBOSE_MAKEFILE=ON"
|
||||||
|
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
|
||||||
|
# needs to stream to stdout so bzcat rather than bunzip2
|
||||||
|
"-DUNZIPPER=${bzip2}/bin/bzcat"
|
||||||
|
|
||||||
|
"-DCMAKE_C_COMPILER=amdclang"
|
||||||
|
"-DCMAKE_CXX_COMPILER=amdclang++"
|
||||||
|
"-DROCM_PATH=${clr}"
|
||||||
|
"-DHIP_ROOT_DIR=${clr}"
|
||||||
|
(lib.cmakeBool "MIOPEN_USE_ROCBLAS" true)
|
||||||
|
(lib.cmakeBool "MIOPEN_USE_HIPBLASLT" true)
|
||||||
|
(lib.cmakeBool "MIOPEN_USE_COMPOSABLEKERNEL" withComposableKernel)
|
||||||
|
(lib.cmakeBool "MIOPEN_USE_HIPRTC" true)
|
||||||
|
(lib.cmakeBool "MIOPEN_USE_COMGR" true)
|
||||||
|
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
"-DMIOPEN_BACKEND=HIP"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_TESTS=ON"
|
||||||
|
"-DMIOPEN_TEST_ALL=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
substituteInPlace cmake/ClangTidy.cmake \
|
||||||
|
--replace-fail 'macro(enable_clang_tidy)' 'macro(enable_clang_tidy)
|
||||||
|
endmacro()
|
||||||
|
macro(enable_clang_tidy_unused)' \
|
||||||
|
--replace-fail 'function(clang_tidy_check TARGET)' 'function(clang_tidy_check TARGET)
|
||||||
|
return()'
|
||||||
|
|
||||||
|
patchShebangs test src/composable_kernel fin utils install_deps.cmake
|
||||||
|
|
||||||
|
ln -sf ${gfx900} src/kernels/gfx900.kdb
|
||||||
|
ln -sf ${gfx906} src/kernels/gfx906.kdb
|
||||||
|
ln -sf ${gfx908} src/kernels/gfx908.kdb
|
||||||
|
ln -sf ${gfx90a} src/kernels/gfx90a.kdb
|
||||||
|
ln -sf ${gfx1030} src/kernels/gfx1030.kdb
|
||||||
|
ln -sf ${gfx1201} src/kernels/gfx1201.kdb
|
||||||
|
mkdir -p build/share/miopen/db/
|
||||||
|
ln -sf ${gfx900} build/share/miopen/db/gfx900.kdb
|
||||||
|
ln -sf ${gfx906} build/share/miopen/db/gfx906.kdb
|
||||||
|
ln -sf ${gfx908} build/share/miopen/db/gfx908.kdb
|
||||||
|
ln -sf ${gfx90a} build/share/miopen/db/gfx90a.kdb
|
||||||
|
ln -sf ${gfx1030} build/share/miopen/db/gfx1030.kdb
|
||||||
|
ln -sf ${gfx1201} build/share/miopen/db/gfx1201.kdb
|
||||||
|
'';
|
||||||
|
|
||||||
|
# Unfortunately, it seems like we have to call make on these manually
|
||||||
|
postBuild =
|
||||||
|
lib.optionalString buildDocs ''
|
||||||
|
python -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildTests ''
|
||||||
|
make -j$NIX_BUILD_CORES check
|
||||||
|
'';
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
''
|
||||||
|
rm $out/bin/install_precompiled_kernels.sh
|
||||||
|
ln -sf ${gfx900} $out/share/miopen/db/gfx900.kdb
|
||||||
|
ln -sf ${gfx906} $out/share/miopen/db/gfx906.kdb
|
||||||
|
ln -sf ${gfx908} $out/share/miopen/db/gfx908.kdb
|
||||||
|
ln -sf ${gfx90a} $out/share/miopen/db/gfx90a.kdb
|
||||||
|
ln -sf ${gfx1030} $out/share/miopen/db/gfx1030.kdb
|
||||||
|
ln -sf ${gfx1201} $out/share/miopen/db/gfx1201.kdb
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildDocs ''
|
||||||
|
mv ../doc/html $out/share/doc/miopen-hip
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv bin/test_* $test/bin
|
||||||
|
patchelf --set-rpath $out/lib:${
|
||||||
|
lib.makeLibraryPath (
|
||||||
|
finalAttrs.buildInputs
|
||||||
|
++ [
|
||||||
|
clr
|
||||||
|
rocm-comgr
|
||||||
|
]
|
||||||
|
)
|
||||||
|
} $test/bin/*
|
||||||
|
'';
|
||||||
|
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Machine intelligence library for ROCm";
|
||||||
|
homepage = "https://github.com/ROCm/MIOpen";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
31
pkgs/rocm-modules/miopen/fix-isnan.patch
Normal file
31
pkgs/rocm-modules/miopen/fix-isnan.patch
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
From 17f67e0aa31cd2f1c1cb012d3858abf6956acc72 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Sv. Lockal" <lockalsash@gmail.com>
|
||||||
|
Date: Tue, 24 Dec 2024 14:43:10 +0000
|
||||||
|
Subject: [PATCH] Fix missing isnan definition on libstdc++ >=14 systems
|
||||||
|
|
||||||
|
Closes #3441
|
||||||
|
---
|
||||||
|
driver/reducecalculation_driver.hpp | 3 ++-
|
||||||
|
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/driver/reducecalculation_driver.hpp b/driver/reducecalculation_driver.hpp
|
||||||
|
index 8226b3c953..2001969509 100644
|
||||||
|
--- a/driver/reducecalculation_driver.hpp
|
||||||
|
+++ b/driver/reducecalculation_driver.hpp
|
||||||
|
@@ -33,6 +33,7 @@
|
||||||
|
#include "random.hpp"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cfloat>
|
||||||
|
+#include <cmath>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <memory>
|
||||||
|
#include <miopen/miopen.h>
|
||||||
|
@@ -77,7 +78,7 @@ int32_t mloReduceCalculationForwardRunHost(miopenTensorDescriptor_t inputDesc,
|
||||||
|
for(size_t i = 0; i < reduce_size; ++i)
|
||||||
|
{
|
||||||
|
Tcheck val = static_cast<Tcheck>(input[input_idx]);
|
||||||
|
- if(nanPropagation && isnan(val))
|
||||||
|
+ if(nanPropagation && std::isnan(val))
|
||||||
|
{
|
||||||
|
val = 0.0f;
|
||||||
|
}
|
||||||
22
pkgs/rocm-modules/miopen/skip-preexisting-dbs.patch
Normal file
22
pkgs/rocm-modules/miopen/skip-preexisting-dbs.patch
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||||
|
index d0ffaf983..0b9ed0952 100644
|
||||||
|
--- a/CMakeLists.txt
|
||||||
|
+++ b/CMakeLists.txt
|
||||||
|
@@ -554,7 +554,7 @@ endif()
|
||||||
|
function(unpack_db db_bzip2_file)
|
||||||
|
get_filename_component(__fname ${db_bzip2_file} NAME_WLE)
|
||||||
|
add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}
|
||||||
|
- COMMAND ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname})
|
||||||
|
+ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname} || ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname})
|
||||||
|
string(REPLACE "." "_" __tname ${__fname})
|
||||||
|
add_custom_target(generate_${__tname} ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname})
|
||||||
|
|
||||||
|
@@ -563,7 +563,7 @@ function(unpack_db db_bzip2_file)
|
||||||
|
if(NOT MIOPEN_USE_SQLITE_PERFDB AND __extension STREQUAL ".db")
|
||||||
|
add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}.txt
|
||||||
|
DEPENDS sqlite2txt generate_${__tname}
|
||||||
|
- COMMAND $<TARGET_FILE:sqlite2txt> ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt
|
||||||
|
+ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname}.txt || $<TARGET_FILE:sqlite2txt> ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt
|
||||||
|
)
|
||||||
|
add_custom_target(generate_${__tname}_txt ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname}.txt)
|
||||||
|
add_dependencies(generate_kernels generate_${__tname}_txt)
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
From f0e66bd446d44df1d30faaad520613f5fb7f5916 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Martin Schwaighofer <mschwaig@users.noreply.github.com>
|
||||||
|
Date: Sat, 30 Mar 2024 15:36:52 +0100
|
||||||
|
Subject: [PATCH] set __STDC_CONSTANT_MACROS to make rocAL compile
|
||||||
|
|
||||||
|
---
|
||||||
|
CMakeLists.txt | 2 ++
|
||||||
|
1 file changed, 2 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||||
|
index 42b139b6..509915f1 100644
|
||||||
|
--- a/CMakeLists.txt
|
||||||
|
+++ b/CMakeLists.txt
|
||||||
|
@@ -149,6 +149,8 @@ message("-- ${Cyan} -D MIGRAPHX=${MIGRAPHX} [Turn ON/OFF MIGraphX Module (de
|
||||||
|
message("-- ${Cyan} -D BACKEND=${BACKEND} [Select MIVisionX Backend [options:CPU/OPENCL/HIP](default:HIP)]${ColourReset}")
|
||||||
|
message("-- ${Cyan} -D BUILD_WITH_AMD_ADVANCE=${BUILD_WITH_AMD_ADVANCE} [Turn ON/OFF Build for AMD advanced GPUs(default:OFF)]${ColourReset}")
|
||||||
|
|
||||||
|
+add_definitions(-D__STDC_CONSTANT_MACROS)
|
||||||
|
+
|
||||||
|
add_subdirectory(amd_openvx)
|
||||||
|
add_subdirectory(amd_openvx_extensions)
|
||||||
|
add_subdirectory(utilities)
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
150
pkgs/rocm-modules/mivisionx/default.nix
Normal file
150
pkgs/rocm-modules/mivisionx/default.nix
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocm-device-libs,
|
||||||
|
clr,
|
||||||
|
pkg-config,
|
||||||
|
rpp,
|
||||||
|
rocblas,
|
||||||
|
miopen,
|
||||||
|
migraphx,
|
||||||
|
openmp,
|
||||||
|
protobuf,
|
||||||
|
qtcreator,
|
||||||
|
opencv,
|
||||||
|
ffmpeg,
|
||||||
|
boost,
|
||||||
|
libjpeg_turbo,
|
||||||
|
half,
|
||||||
|
lmdb,
|
||||||
|
rapidjson,
|
||||||
|
rocm-docs-core,
|
||||||
|
python3Packages,
|
||||||
|
useOpenCL ? false,
|
||||||
|
useCPU ? false,
|
||||||
|
buildDocs ? false, # Needs internet
|
||||||
|
gpuTargets ? [ ],
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname =
|
||||||
|
"mivisionx-"
|
||||||
|
+ (
|
||||||
|
if (!useOpenCL && !useCPU) then
|
||||||
|
"hip"
|
||||||
|
else if (!useOpenCL && !useCPU) then
|
||||||
|
"opencl"
|
||||||
|
else
|
||||||
|
"cpu"
|
||||||
|
);
|
||||||
|
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "MIVisionX";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-07MivgCYmKLnhGDjOYsFBfwIxEoQLYNoRbOo3MPpVzE=";
|
||||||
|
};
|
||||||
|
|
||||||
|
patches = [
|
||||||
|
./0001-set-__STDC_CONSTANT_MACROS-to-make-rocAL-compile.patch
|
||||||
|
];
|
||||||
|
|
||||||
|
nativeBuildInputs =
|
||||||
|
[
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
pkg-config
|
||||||
|
]
|
||||||
|
++ lib.optionals buildDocs [
|
||||||
|
rocm-docs-core
|
||||||
|
python3Packages.python
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
miopen
|
||||||
|
migraphx
|
||||||
|
rpp
|
||||||
|
rocblas
|
||||||
|
openmp
|
||||||
|
half
|
||||||
|
protobuf
|
||||||
|
qtcreator
|
||||||
|
opencv
|
||||||
|
ffmpeg
|
||||||
|
boost
|
||||||
|
libjpeg_turbo
|
||||||
|
lmdb
|
||||||
|
rapidjson
|
||||||
|
python3Packages.pybind11
|
||||||
|
python3Packages.numpy
|
||||||
|
python3Packages.torchWithRocm
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DROCM_PATH=${clr}"
|
||||||
|
"-DAMDRPP_PATH=${rpp}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
"-DCMAKE_INSTALL_PREFIX_PYTHON=lib"
|
||||||
|
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
|
||||||
|
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
|
||||||
|
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
|
||||||
|
# "-DAMD_FP16_SUPPORT=ON" `error: typedef redefinition with different types ('__half' vs 'half_float::half')`
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals (!useOpenCL && !useCPU) [
|
||||||
|
"-DBACKEND=HIP"
|
||||||
|
]
|
||||||
|
++ lib.optionals (useOpenCL && !useCPU) [
|
||||||
|
"-DBACKEND=OCL"
|
||||||
|
]
|
||||||
|
++ lib.optionals useCPU [
|
||||||
|
"-DBACKEND=CPU"
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
# We need to not use hipcc and define the CXXFLAGS manually due to `undefined hidden symbol: tensorflow:: ...`
|
||||||
|
export CXXFLAGS+=" --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode"
|
||||||
|
# Properly find miopen, fix ffmpeg version detection
|
||||||
|
substituteInPlace amd_openvx_extensions/CMakeLists.txt \
|
||||||
|
--replace-fail "miopen PATHS \''${ROCM_PATH} QUIET" "miopen PATHS ${miopen} QUIET" \
|
||||||
|
--replace-fail "\''${ROCM_PATH}/include/miopen/config.h" "${miopen}/include/miopen/config.h"
|
||||||
|
|
||||||
|
# Properly find turbojpeg
|
||||||
|
substituteInPlace cmake/FindTurboJpeg.cmake \
|
||||||
|
--replace-fail "\''${TURBO_JPEG_PATH}/include" "${libjpeg_turbo.dev}/include" \
|
||||||
|
--replace-fail "\''${TURBO_JPEG_PATH}/lib" "${libjpeg_turbo.out}/lib"
|
||||||
|
'';
|
||||||
|
|
||||||
|
postBuild = lib.optionalString buildDocs ''
|
||||||
|
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Set of comprehensive computer vision and machine intelligence libraries, utilities, and applications";
|
||||||
|
homepage = "https://github.com/ROCm/MIVisionX";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
broken = useOpenCL;
|
||||||
|
};
|
||||||
|
})
|
||||||
42
pkgs/rocm-modules/mscclpp/default.nix
Normal file
42
pkgs/rocm-modules/mscclpp/default.nix
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
{
|
||||||
|
fetchFromGitHub,
|
||||||
|
stdenv,
|
||||||
|
cmake,
|
||||||
|
clr,
|
||||||
|
numactl,
|
||||||
|
nlohmann_json,
|
||||||
|
}:
|
||||||
|
stdenv.mkDerivation {
|
||||||
|
pname = "mscclpp";
|
||||||
|
version = "unstable-2024-12-13";
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "microsoft";
|
||||||
|
repo = "mscclpp";
|
||||||
|
rev = "ee75caf365a27b9ab7521cfdda220b55429e5c37";
|
||||||
|
hash = "sha256-/mi9T9T6OIVtJWN3YoEe9az/86rz7BrX537lqaEh3ig=";
|
||||||
|
};
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
];
|
||||||
|
buildInputs = [
|
||||||
|
clr
|
||||||
|
numactl
|
||||||
|
];
|
||||||
|
postPatch = ''
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace-fail "gfx90a gfx941 gfx942" "gfx908 gfx90a gfx942 gfx1030 gfx1100"
|
||||||
|
'';
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DMSCCLPP_BYPASS_GPU_CHECK=ON"
|
||||||
|
"-DMSCCLPP_USE_ROCM=ON"
|
||||||
|
"-DMSCCLPP_BUILD_TESTS=OFF"
|
||||||
|
"-DGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100"
|
||||||
|
"-DAMDGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100"
|
||||||
|
"-DMSCCLPP_BUILD_APPS_NCCL=ON"
|
||||||
|
"-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF"
|
||||||
|
"-DFETCHCONTENT_QUIET=OFF"
|
||||||
|
"-DFETCHCONTENT_TRY_FIND_PACKAGE_MODE=ALWAYS"
|
||||||
|
"-DFETCHCONTENT_SOURCE_DIR_JSON=${nlohmann_json.src}"
|
||||||
|
];
|
||||||
|
env.ROCM_PATH = clr;
|
||||||
|
}
|
||||||
144
pkgs/rocm-modules/rccl/default.nix
Normal file
144
pkgs/rocm-modules/rccl/default.nix
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocm-smi,
|
||||||
|
rocm-core,
|
||||||
|
clr,
|
||||||
|
mscclpp,
|
||||||
|
perl,
|
||||||
|
hipify,
|
||||||
|
gtest,
|
||||||
|
chrpath,
|
||||||
|
rocprofiler,
|
||||||
|
rocprofiler-register,
|
||||||
|
autoPatchelfHook,
|
||||||
|
buildTests ? false,
|
||||||
|
gpuTargets ? (clr.localGpuTargets or [ ]),
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
useAsan = buildTests;
|
||||||
|
useUbsan = buildTests;
|
||||||
|
san = lib.optionalString (useAsan || useUbsan) (
|
||||||
|
"-fno-gpu-sanitize -fsanitize=undefined "
|
||||||
|
+ (lib.optionalString useAsan "-fsanitize=address -shared-libsan ")
|
||||||
|
);
|
||||||
|
in
|
||||||
|
# Note: we can't properly test or make use of multi-node collective ops
|
||||||
|
# https://github.com/NixOS/nixpkgs/issues/366242 tracks kernel support
|
||||||
|
# kfd_peerdirect support which is on out-of-tree amdkfd in ROCm/ROCK-Kernel-Driver
|
||||||
|
# infiniband ib_peer_mem support isn't in the mainline kernel but is carried by some distros
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rccl${clr.gpuArchSuffix}";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
];
|
||||||
|
|
||||||
|
patches = [
|
||||||
|
./fix-mainline-support-and-ub.diff
|
||||||
|
./enable-mscclpp-on-all-gfx9.diff
|
||||||
|
./rccl-test-missing-iomanip.diff
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rccl";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-6lQBpoJKszgvt+UpNEKdiw74s3ZhC4zpA4HP+F6u7X4=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
perl
|
||||||
|
hipify
|
||||||
|
autoPatchelfHook # ASAN doesn't add rpath without this
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
rocm-smi
|
||||||
|
gtest
|
||||||
|
rocprofiler
|
||||||
|
rocprofiler-register
|
||||||
|
mscclpp
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
chrpath
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DHIP_CLANG_NUM_PARALLEL_JOBS=4"
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
"-DROCM_PATH=${clr}"
|
||||||
|
"-DHIP_COMPILER=${clr}/bin/amdclang++"
|
||||||
|
"-DCMAKE_CXX_COMPILER=${clr}/bin/amdclang++"
|
||||||
|
"-DROCM_PATCH_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}"
|
||||||
|
"-DROCM_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}"
|
||||||
|
"-DBUILD_BFD=OFF" # Can't get it to detect bfd.h
|
||||||
|
"-DENABLE_MSCCL_KERNEL=ON"
|
||||||
|
"-DENABLE_MSCCLPP=ON"
|
||||||
|
"-DMSCCLPP_ROOT=${mscclpp}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
# AMD can't make up their minds and keep changing which one is used in different projects.
|
||||||
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_TESTS=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
# -O2 and -fno-strict-aliasing due to UB issues in RCCL :c
|
||||||
|
# Reported upstream
|
||||||
|
env.CFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer";
|
||||||
|
env.CXXFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer";
|
||||||
|
env.LDFLAGS = "${san}";
|
||||||
|
postPatch = ''
|
||||||
|
patchShebangs src tools
|
||||||
|
'';
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString useAsan ''
|
||||||
|
patchelf --add-needed ${clr}/llvm/lib/linux/libclang_rt.asan-${stdenv.hostPlatform.parsed.cpu.name}.so $out/lib/librccl.so
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/* $test/bin
|
||||||
|
rmdir $out/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "ROCm communication collectives library";
|
||||||
|
homepage = "https://github.com/ROCm/rccl";
|
||||||
|
license = with licenses; [
|
||||||
|
bsd2
|
||||||
|
bsd3
|
||||||
|
];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
13
pkgs/rocm-modules/rccl/enable-mscclpp-on-all-gfx9.diff
Normal file
13
pkgs/rocm-modules/rccl/enable-mscclpp-on-all-gfx9.diff
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
diff --git a/src/init.cc b/src/init.cc
|
||||||
|
index 738f756..1b0e4fc 100644
|
||||||
|
--- a/src/init.cc
|
||||||
|
+++ b/src/init.cc
|
||||||
|
@@ -2049,7 +2049,7 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) {
|
||||||
|
if (mscclEnabled() && (comm->topo->mscclEnabled || mscclForceEnabled()) && mscclppCommCompatible(comm)) {
|
||||||
|
hipDeviceProp_t devProp;
|
||||||
|
CUDACHECK(hipGetDeviceProperties(&devProp, cudaDev));
|
||||||
|
- comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx94");
|
||||||
|
+ comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx9");
|
||||||
|
if (comm->mscclppCompatible) {
|
||||||
|
bool mapContainsId = (mscclpp_uniqueIdMap.count(job->commId) > 0);
|
||||||
|
auto& mscclppUniqueId = mscclpp_uniqueIdMap[job->commId];
|
||||||
178
pkgs/rocm-modules/rccl/fix-mainline-support-and-ub.diff
Normal file
178
pkgs/rocm-modules/rccl/fix-mainline-support-and-ub.diff
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
diff --git a/src/include/bootstrap.h b/src/include/bootstrap.h
|
||||||
|
index 8c5f081..9922b79 100644
|
||||||
|
--- a/src/include/bootstrap.h
|
||||||
|
+++ b/src/include/bootstrap.h
|
||||||
|
@@ -10,11 +10,13 @@
|
||||||
|
#include "nccl.h"
|
||||||
|
#include "comm.h"
|
||||||
|
|
||||||
|
+// this is accessed through unaligned ptrs because ncclUniqueId is a typedef of char[128]
|
||||||
|
struct ncclBootstrapHandle {
|
||||||
|
uint64_t magic;
|
||||||
|
union ncclSocketAddress addr;
|
||||||
|
};
|
||||||
|
static_assert(sizeof(struct ncclBootstrapHandle) <= sizeof(ncclUniqueId), "Bootstrap handle is too large to fit inside NCCL unique ID");
|
||||||
|
+static_assert(alignof(struct ncclBootstrapHandle) == alignof(ncclUniqueId), "Bootstrap handle must have same alignment as NCCL unique ID to avoid UB");
|
||||||
|
|
||||||
|
ncclResult_t bootstrapNetInit();
|
||||||
|
ncclResult_t bootstrapCreateRoot(struct ncclBootstrapHandle* handle, bool idFromEnv);
|
||||||
|
diff --git a/src/misc/rocmwrap.cc b/src/misc/rocmwrap.cc
|
||||||
|
index b3063d5..464b80d 100644
|
||||||
|
--- a/src/misc/rocmwrap.cc
|
||||||
|
+++ b/src/misc/rocmwrap.cc
|
||||||
|
@@ -131,9 +131,12 @@ static void initOnceFunc() {
|
||||||
|
//format and store the kernel conf file location
|
||||||
|
snprintf(kernel_conf_file, sizeof(kernel_conf_file), "/boot/config-%s", utsname.release);
|
||||||
|
fp = fopen(kernel_conf_file, "r");
|
||||||
|
- if (fp == NULL) INFO(NCCL_INIT,"Could not open kernel conf file");
|
||||||
|
+ if (fp == NULL) {
|
||||||
|
+ INFO(NCCL_INIT,"Could not open kernel conf file, will assume CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA are enabled");
|
||||||
|
+ }
|
||||||
|
//look for kernel_opt1 and kernel_opt2 in the conf file and check
|
||||||
|
- while (fgets(buf, sizeof(buf), fp) != NULL) {
|
||||||
|
+ // FIXME: This check is broken, CONFIG_DMABUF_MOVE_NOTIFY could be across a buf boundary.
|
||||||
|
+ while (fp && fgets(buf, sizeof(buf), fp) != NULL) {
|
||||||
|
if (strstr(buf, kernel_opt1) != NULL) {
|
||||||
|
found_opt1 = 1;
|
||||||
|
INFO(NCCL_INIT,"CONFIG_DMABUF_MOVE_NOTIFY=y in /boot/config-%s", utsname.release);
|
||||||
|
@@ -143,11 +146,12 @@ static void initOnceFunc() {
|
||||||
|
INFO(NCCL_INIT,"CONFIG_PCI_P2PDMA=y in /boot/config-%s", utsname.release);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
- if (!found_opt1 || !found_opt2) {
|
||||||
|
+ if (fp && (!found_opt1 || !found_opt2)) {
|
||||||
|
dmaBufSupport = 0;
|
||||||
|
INFO(NCCL_INIT, "CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA should be set for DMA_BUF in /boot/config-%s", utsname.release);
|
||||||
|
INFO(NCCL_INIT, "DMA_BUF_SUPPORT Failed due to OS kernel support");
|
||||||
|
}
|
||||||
|
+ if (fp) fclose(fp);
|
||||||
|
|
||||||
|
if(dmaBufSupport) INFO(NCCL_INIT, "DMA_BUF Support Enabled");
|
||||||
|
else goto error;
|
||||||
|
diff --git a/src/nccl.h.in b/src/nccl.h.in
|
||||||
|
index 1d127b0..6296073 100644
|
||||||
|
--- a/src/nccl.h.in
|
||||||
|
+++ b/src/nccl.h.in
|
||||||
|
@@ -39,7 +39,7 @@ typedef struct ncclComm* ncclComm_t;
|
||||||
|
#define NCCL_UNIQUE_ID_BYTES 128
|
||||||
|
/*! @brief Opaque unique id used to initialize communicators
|
||||||
|
@details The ncclUniqueId must be passed to all participating ranks */
|
||||||
|
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId;
|
||||||
|
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId;
|
||||||
|
|
||||||
|
/*! @defgroup rccl_result_code Result Codes
|
||||||
|
@details The various result codes that RCCL API calls may return
|
||||||
|
diff --git a/src/proxy.cc b/src/proxy.cc
|
||||||
|
index 50e5437..51bb401 100644
|
||||||
|
--- a/src/proxy.cc
|
||||||
|
+++ b/src/proxy.cc
|
||||||
|
@@ -965,7 +965,11 @@ struct ncclProxyConnectionPool {
|
||||||
|
|
||||||
|
static ncclResult_t ncclProxyNewConnection(struct ncclProxyConnectionPool* pool, int* id) {
|
||||||
|
if (pool->offset == NCCL_PROXY_CONN_POOL_SIZE) {
|
||||||
|
- NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1));
|
||||||
|
+ if (pool->pools) {
|
||||||
|
+ NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1));
|
||||||
|
+ } else {
|
||||||
|
+ NCCLCHECK(ncclCalloc(&pool->pools, pool->banks+1));
|
||||||
|
+ }
|
||||||
|
NCCLCHECK(ncclCalloc(pool->pools+pool->banks, NCCL_PROXY_CONN_POOL_SIZE));
|
||||||
|
pool->banks++;
|
||||||
|
pool->offset = 0;
|
||||||
|
diff --git a/src/transport/net_ib.cc b/src/transport/net_ib.cc
|
||||||
|
index 6d77784..49762d3 100644
|
||||||
|
--- a/src/transport/net_ib.cc
|
||||||
|
+++ b/src/transport/net_ib.cc
|
||||||
|
@@ -573,7 +573,7 @@ ncclResult_t ncclIbGdrSupport() {
|
||||||
|
// Requires support from NIC driver modules
|
||||||
|
// Use ONLY for debugging!
|
||||||
|
moduleLoaded = 1;
|
||||||
|
- INFO(NCCL_INIT, "RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1");
|
||||||
|
+ INFO(NCCL_INIT, "ncclIbGdrSupport: RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (moduleLoaded == -1) {
|
||||||
|
@@ -586,13 +586,14 @@ ncclResult_t ncclIbGdrSupport() {
|
||||||
|
// or created under a different path like `/sys/kernel/` or `/sys/` (depending on your ib_peer_mem module)
|
||||||
|
const char* memory_peers_paths[] = {"/sys/kernel/mm/memory_peers/amdkfd/version",
|
||||||
|
"/sys/kernel/memory_peers/amdkfd/version",
|
||||||
|
- "/sys/memory_peers/amdkfd/version"};
|
||||||
|
+ "/sys/memory_peers/amdkfd/version",
|
||||||
|
+ NULL};
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
while (memory_peers_paths[i]) {
|
||||||
|
if (access(memory_peers_paths[i], F_OK) == 0) {
|
||||||
|
moduleLoaded = 1;
|
||||||
|
- INFO(NCCL_INIT,"Found %s", memory_peers_paths[i]);
|
||||||
|
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Found %s", memory_peers_paths[i]);
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
moduleLoaded = 0;
|
||||||
|
@@ -612,22 +613,23 @@ ncclResult_t ncclIbGdrSupport() {
|
||||||
|
if (moduleLoaded == 0) {
|
||||||
|
// Check for `ib_register_peer_memory_client` symbol in `/proc/kallsyms`
|
||||||
|
// if your system uses native OS ib_peer module
|
||||||
|
- char buf[256];
|
||||||
|
- FILE *fp = NULL;
|
||||||
|
- fp = fopen("/proc/kallsyms", "r");
|
||||||
|
+ FILE *fp = fopen("/proc/kallsyms", "r");
|
||||||
|
+ char *line = NULL;
|
||||||
|
+ size_t len = 0;
|
||||||
|
|
||||||
|
if (fp == NULL) {
|
||||||
|
- INFO(NCCL_INIT,"Could not open /proc/kallsyms");
|
||||||
|
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Could not open /proc/kallsyms to check for ib_register_peer_memory_client");
|
||||||
|
} else {
|
||||||
|
- while (fgets(buf, sizeof(buf), fp) != NULL) {
|
||||||
|
- if (strstr(buf, "t ib_register_peer_memory_client") != NULL ||
|
||||||
|
- strstr(buf, "T ib_register_peer_memory_client") != NULL) {
|
||||||
|
+ while (getline(&line, &len, fp) > 0) {
|
||||||
|
+ if (line && strstr(line, "ib_register_peer_memory_client") != NULL) {
|
||||||
|
moduleLoaded = 1;
|
||||||
|
- INFO(NCCL_INIT,"Found ib_register_peer_memory_client in /proc/kallsyms");
|
||||||
|
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Found ib_register_peer_memory_client in /proc/kallsyms");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
+ if (line) free(line);
|
||||||
|
+ if (fp) fclose(fp);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// Check for the nv_peer_mem module being loaded
|
||||||
|
@@ -637,7 +639,7 @@ ncclResult_t ncclIbGdrSupport() {
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
if (moduleLoaded == 0) {
|
||||||
|
- INFO(NCCL_INIT,"GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol");
|
||||||
|
+ INFO(NCCL_INIT,"ncclIbGdrSupport: GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol");
|
||||||
|
return ncclSystemError;
|
||||||
|
}
|
||||||
|
return ncclSuccess;
|
||||||
|
diff --git a/tools/ib-test/include/nccl.h b/tools/ib-test/include/nccl.h
|
||||||
|
index 2c86c33..5801c61 100755
|
||||||
|
--- a/tools/ib-test/include/nccl.h
|
||||||
|
+++ b/tools/ib-test/include/nccl.h
|
||||||
|
@@ -31,7 +31,7 @@ extern "C" {
|
||||||
|
typedef struct ncclComm* ncclComm_t;
|
||||||
|
|
||||||
|
#define NCCL_UNIQUE_ID_BYTES 128
|
||||||
|
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
|
||||||
|
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
|
||||||
|
|
||||||
|
/* Error type */
|
||||||
|
typedef enum { ncclSuccess = 0,
|
||||||
|
diff --git a/tools/topo_expl/include/nccl.h b/tools/topo_expl/include/nccl.h
|
||||||
|
index 729561b..4e4bdd9 100644
|
||||||
|
--- a/tools/topo_expl/include/nccl.h
|
||||||
|
+++ b/tools/topo_expl/include/nccl.h
|
||||||
|
@@ -35,7 +35,7 @@ typedef struct ncclComm* ncclComm_t;
|
||||||
|
#define NCCL_COMM_NULL NULL
|
||||||
|
|
||||||
|
#define NCCL_UNIQUE_ID_BYTES 128
|
||||||
|
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
|
||||||
|
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
|
||||||
|
|
||||||
|
/*! @brief Error type */
|
||||||
|
typedef enum { ncclSuccess = 0,
|
||||||
10
pkgs/rocm-modules/rccl/rccl-test-missing-iomanip.diff
Normal file
10
pkgs/rocm-modules/rccl/rccl-test-missing-iomanip.diff
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
--- a/test/common/TestBed.cpp
|
||||||
|
+++ b/test/common/TestBed.cpp
|
||||||
|
@@ -4,6 +4,7 @@
|
||||||
|
* See LICENSE.txt for license information
|
||||||
|
************************************************************************/
|
||||||
|
#include <unistd.h>
|
||||||
|
+#include <iomanip>
|
||||||
|
#include "TestBed.hpp"
|
||||||
|
#include <rccl/rccl.h>
|
||||||
|
|
||||||
146
pkgs/rocm-modules/rdc/default.nix
Normal file
146
pkgs/rocm-modules/rdc/default.nix
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
amdsmi,
|
||||||
|
rocm-smi,
|
||||||
|
rocm-runtime,
|
||||||
|
libcap,
|
||||||
|
libdrm,
|
||||||
|
grpc,
|
||||||
|
protobuf,
|
||||||
|
openssl,
|
||||||
|
doxygen,
|
||||||
|
graphviz,
|
||||||
|
texliveSmall,
|
||||||
|
gtest,
|
||||||
|
buildDocs ? true,
|
||||||
|
buildTests ? false,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
latex = lib.optionalAttrs buildDocs (
|
||||||
|
texliveSmall.withPackages (
|
||||||
|
ps: with ps; [
|
||||||
|
changepage
|
||||||
|
latexmk
|
||||||
|
varwidth
|
||||||
|
multirow
|
||||||
|
hanging
|
||||||
|
adjustbox
|
||||||
|
collectbox
|
||||||
|
stackengine
|
||||||
|
enumitem
|
||||||
|
alphalph
|
||||||
|
wasysym
|
||||||
|
sectsty
|
||||||
|
tocloft
|
||||||
|
newunicodechar
|
||||||
|
etoc
|
||||||
|
helvetic
|
||||||
|
wasy
|
||||||
|
courier
|
||||||
|
]
|
||||||
|
)
|
||||||
|
);
|
||||||
|
in
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rdc";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildDocs [
|
||||||
|
"doc"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rdc";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-HkGumwag7mDERHiWwZ7cRQz0tzH+vIovY1HgX2g69d4=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs =
|
||||||
|
[
|
||||||
|
cmake
|
||||||
|
protobuf
|
||||||
|
]
|
||||||
|
++ lib.optionals buildDocs [
|
||||||
|
doxygen
|
||||||
|
graphviz
|
||||||
|
latex
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
amdsmi
|
||||||
|
rocm-smi
|
||||||
|
rocm-runtime
|
||||||
|
libcap
|
||||||
|
libdrm
|
||||||
|
grpc
|
||||||
|
openssl
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
gtest
|
||||||
|
];
|
||||||
|
|
||||||
|
CXXFLAGS = "-I${libcap.dev}/include";
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DCMAKE_VERBOSE_MAKEFILE=OFF"
|
||||||
|
"-DRDC_INSTALL_PREFIX=${placeholder "out"}"
|
||||||
|
"-DBUILD_ROCRTEST=ON"
|
||||||
|
"-DRSMI_INC_DIR=${rocm-smi}/include"
|
||||||
|
"-DRSMI_LIB_DIR=${rocm-smi}/lib"
|
||||||
|
"-DGRPC_ROOT=${grpc}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
"-DCMAKE_INSTALL_LIBEXECDIR=libexec"
|
||||||
|
"-DCMAKE_INSTALL_DOCDIR=doc"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_TESTS=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace "file(STRINGS /etc/os-release LINUX_DISTRO LIMIT_COUNT 1 REGEX \"NAME=\")" "set(LINUX_DISTRO \"NixOS\")"
|
||||||
|
'';
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
''
|
||||||
|
find $out/bin -executable -type f -exec \
|
||||||
|
patchelf {} --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE" \;
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test
|
||||||
|
mv $out/bin/rdctst_tests $test/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Simplifies administration and addresses infrastructure challenges in cluster and datacenter environments";
|
||||||
|
homepage = "https://github.com/ROCm/rdc";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
135
pkgs/rocm-modules/rocalution/default.nix
Normal file
135
pkgs/rocm-modules/rocalution/default.nix
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocblas,
|
||||||
|
rocsparse,
|
||||||
|
rocprim,
|
||||||
|
rocrand,
|
||||||
|
clr,
|
||||||
|
git,
|
||||||
|
pkg-config,
|
||||||
|
openmp,
|
||||||
|
openmpi,
|
||||||
|
gtest,
|
||||||
|
buildTests ? false,
|
||||||
|
buildBenchmarks ? false,
|
||||||
|
buildSamples ? false,
|
||||||
|
gpuTargets ? [ ], # gpuTargets = [ "gfx803" "gfx900:xnack-" "gfx906:xnack-" ... ]
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocalution";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"benchmark"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildSamples [
|
||||||
|
"sample"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocALUTION";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-bZx1Cc2jcIfysohKCKzj5mowM3IeCelRhVaBU73KnTo=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
git
|
||||||
|
pkg-config
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
rocblas
|
||||||
|
rocsparse
|
||||||
|
rocprim
|
||||||
|
rocrand
|
||||||
|
openmp
|
||||||
|
openmpi
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
gtest
|
||||||
|
];
|
||||||
|
|
||||||
|
CXXFLAGS = "-I${openmp.dev}/include";
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
|
||||||
|
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
|
||||||
|
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
|
||||||
|
"-DROCM_PATH=${clr}"
|
||||||
|
"-DHIP_ROOT_DIR=${clr}"
|
||||||
|
"-DSUPPORT_HIP=ON"
|
||||||
|
"-DSUPPORT_OMP=ON"
|
||||||
|
"-DSUPPORT_MPI=ON"
|
||||||
|
"-DBUILD_CLIENTS_SAMPLES=${if buildSamples then "ON" else "OFF"}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
"-DAMDGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}"
|
||||||
|
"-DGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_CLIENTS_TESTS=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/rocalution-test $test/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildBenchmarks ''
|
||||||
|
mkdir -p $benchmark/bin
|
||||||
|
mv $out/bin/rocalution-bench $benchmark/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildSamples ''
|
||||||
|
mkdir -p $sample/bin
|
||||||
|
mv clients/staging/* $sample/bin
|
||||||
|
rm $sample/bin/rocalution-test || true
|
||||||
|
rm $sample/bin/rocalution-bench || true
|
||||||
|
|
||||||
|
patchelf --set-rpath \
|
||||||
|
$out/lib:${lib.makeLibraryPath (finalAttrs.buildInputs ++ [ clr ])} \
|
||||||
|
$sample/bin/*
|
||||||
|
''
|
||||||
|
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||||
|
rmdir $out/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Iterative sparse solvers for ROCm";
|
||||||
|
homepage = "https://github.com/ROCm/rocALUTION";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
200
pkgs/rocm-modules/rocblas/default.nix
Normal file
200
pkgs/rocm-modules/rocblas/default.nix
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
fetchpatch,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
clr,
|
||||||
|
python3,
|
||||||
|
tensile,
|
||||||
|
msgpack,
|
||||||
|
libxml2,
|
||||||
|
gtest,
|
||||||
|
gfortran,
|
||||||
|
openmp,
|
||||||
|
git,
|
||||||
|
amd-blis,
|
||||||
|
zstd,
|
||||||
|
hipblas-common,
|
||||||
|
hipblaslt,
|
||||||
|
python3Packages,
|
||||||
|
rocm-smi,
|
||||||
|
libdrm,
|
||||||
|
roctracer,
|
||||||
|
buildTensile ? true,
|
||||||
|
buildTests ? true,
|
||||||
|
buildBenchmarks ? true,
|
||||||
|
# https://github.com/ROCm/Tensile/issues/1757
|
||||||
|
# Allows gfx101* users to use rocBLAS normally.
|
||||||
|
# Turn the below two values to `true` after the fix has been cherry-picked
|
||||||
|
# into a release. Just backporting that single fix is not enough because it
|
||||||
|
# depends on some previous commits.
|
||||||
|
tensileSepArch ? true,
|
||||||
|
tensileLazyLib ? true,
|
||||||
|
withHipBlasLt ? true,
|
||||||
|
# `gfx940`, `gfx941` are not present in this list because they are early
|
||||||
|
# engineering samples, and all final MI300 hardware are `gfx942`:
|
||||||
|
# https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130
|
||||||
|
#
|
||||||
|
# `gfx1012` is not present in this list because the ISA compatibility patches
|
||||||
|
# would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will
|
||||||
|
# always try to use `gfx1010` code objects, hence building for `gfx1012` is
|
||||||
|
# useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152
|
||||||
|
gpuTargets ? (
|
||||||
|
clr.localGpuTargets or [
|
||||||
|
"gfx1010"
|
||||||
|
# "gfx1012"
|
||||||
|
# "gfx1030"
|
||||||
|
# "gfx1100"
|
||||||
|
# "gfx1101"
|
||||||
|
# "gfx1102"
|
||||||
|
"gfx1201"
|
||||||
|
]
|
||||||
|
),
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
gpuTargets' = lib.concatStringsSep ";" gpuTargets;
|
||||||
|
in
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocblas${clr.gpuArchSuffix}";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs = [
|
||||||
|
"out"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocBLAS";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-To5V5bydYR5iuUxkwpx79RrNdncvWmR/v/w9VnlKBq4=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs =
|
||||||
|
[
|
||||||
|
cmake
|
||||||
|
# no ninja, it buffers console output and nix times out long periods of no output
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
git
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTensile [
|
||||||
|
tensile
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
python3
|
||||||
|
hipblas-common
|
||||||
|
]
|
||||||
|
++ lib.optionals withHipBlasLt [
|
||||||
|
hipblaslt
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTensile [
|
||||||
|
zstd
|
||||||
|
msgpack
|
||||||
|
libxml2
|
||||||
|
python3Packages.msgpack
|
||||||
|
python3Packages.zstandard
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
gtest
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||||
|
gfortran
|
||||||
|
openmp
|
||||||
|
amd-blis
|
||||||
|
rocm-smi
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [
|
||||||
|
python3Packages.pyyaml
|
||||||
|
];
|
||||||
|
|
||||||
|
dontStrip = true;
|
||||||
|
env.CXXFLAGS =
|
||||||
|
"-O3 -DNDEBUG -I${hipblas-common}/include -I${roctracer}/include -I${libdrm.dev}/include"
|
||||||
|
|
||||||
|
+ lib.optionalString (buildTests || buildBenchmarks) " -I${amd-blis}/include/blis";
|
||||||
|
# Fails to link tests if we don't add amd-blis libs
|
||||||
|
env.LDFLAGS =
|
||||||
|
"-L${roctracer}/lib -L${libdrm.dev}/lib"
|
||||||
|
+ lib.optionalString (
|
||||||
|
buildTests || buildBenchmarks
|
||||||
|
) " -Wl,--as-needed -L${amd-blis}/lib -lblis-mt -lcblas";
|
||||||
|
env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++";
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
(lib.cmakeFeature "CMAKE_BUILD_TYPE" "Release")
|
||||||
|
(lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true)
|
||||||
|
(lib.cmakeFeature "CMAKE_EXECUTE_PROCESS_COMMAND_ECHO" "STDERR")
|
||||||
|
(lib.cmakeFeature "CMAKE_Fortran_COMPILER" "${lib.getBin gfortran}/bin/gfortran")
|
||||||
|
(lib.cmakeFeature "CMAKE_Fortran_COMPILER_AR" "${lib.getBin gfortran}/bin/ar")
|
||||||
|
(lib.cmakeFeature "CMAKE_Fortran_COMPILER_RANLIB" "${lib.getBin gfortran}/bin/ranlib")
|
||||||
|
(lib.cmakeFeature "python" "python3")
|
||||||
|
(lib.cmakeFeature "SUPPORTED_TARGETS" gpuTargets')
|
||||||
|
(lib.cmakeFeature "AMDGPU_TARGETS" gpuTargets')
|
||||||
|
(lib.cmakeFeature "GPU_TARGETS" gpuTargets')
|
||||||
|
(lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile)
|
||||||
|
(lib.cmakeBool "ROCM_SYMLINK_LIBS" false)
|
||||||
|
(lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas")
|
||||||
|
(lib.cmakeBool "BUILD_WITH_HIPBLASLT" withHipBlasLt)
|
||||||
|
(lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
|
||||||
|
(lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
|
||||||
|
(lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildBenchmarks)
|
||||||
|
(lib.cmakeBool "BUILD_OFFLOAD_COMPRESS" true)
|
||||||
|
# Temporarily set variables to work around upstream CMakeLists issue
|
||||||
|
# Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTensile [
|
||||||
|
"-DCPACK_SET_DESTDIR=OFF"
|
||||||
|
"-DLINK_BLIS=ON"
|
||||||
|
"-DTensile_CODE_OBJECT_VERSION=default"
|
||||||
|
"-DTensile_LOGIC=asm_full"
|
||||||
|
"-DTensile_LIBRARY_FORMAT=msgpack"
|
||||||
|
(lib.cmakeBool "BUILD_WITH_PIP" false)
|
||||||
|
(lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch)
|
||||||
|
(lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib)
|
||||||
|
];
|
||||||
|
|
||||||
|
passthru.amdgpu_targets = gpuTargets';
|
||||||
|
|
||||||
|
patches = [
|
||||||
|
# (fetchpatch {
|
||||||
|
# name = "Extend-rocBLAS-HIP-ISA-compatibility.patch";
|
||||||
|
# url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch";
|
||||||
|
# hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo=";
|
||||||
|
# })
|
||||||
|
];
|
||||||
|
|
||||||
|
# Pass $NIX_BUILD_CORES to Tensile
|
||||||
|
postPatch = ''
|
||||||
|
substituteInPlace cmake/build-options.cmake \
|
||||||
|
--replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
|
||||||
|
# substituteInPlace CMakeLists.txt \
|
||||||
|
# --replace-fail "4.42.0" "4.43.0"
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
enableParallelBuilding = true;
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "BLAS implementation for ROCm platform";
|
||||||
|
homepage = "https://github.com/ROCm/rocBLAS";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
114
pkgs/rocm-modules/rocdbgapi/default.nix
Normal file
114
pkgs/rocm-modules/rocdbgapi/default.nix
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
git,
|
||||||
|
rocm-comgr,
|
||||||
|
rocm-runtime,
|
||||||
|
hwdata,
|
||||||
|
texliveSmall,
|
||||||
|
doxygen,
|
||||||
|
graphviz,
|
||||||
|
buildDocs ? true,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
latex = lib.optionalAttrs buildDocs (
|
||||||
|
texliveSmall.withPackages (
|
||||||
|
ps: with ps; [
|
||||||
|
changepage
|
||||||
|
latexmk
|
||||||
|
varwidth
|
||||||
|
multirow
|
||||||
|
hanging
|
||||||
|
adjustbox
|
||||||
|
collectbox
|
||||||
|
stackengine
|
||||||
|
enumitem
|
||||||
|
alphalph
|
||||||
|
wasysym
|
||||||
|
sectsty
|
||||||
|
tocloft
|
||||||
|
newunicodechar
|
||||||
|
etoc
|
||||||
|
helvetic
|
||||||
|
wasy
|
||||||
|
courier
|
||||||
|
]
|
||||||
|
)
|
||||||
|
);
|
||||||
|
in
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocdbgapi";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildDocs [
|
||||||
|
"doc"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "ROCdbgapi";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-Rr8+SNeFps0rjk4Jn2+rFmtRJfL42l0tNOz13oZQy+I=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs =
|
||||||
|
[
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
git
|
||||||
|
]
|
||||||
|
++ lib.optionals buildDocs [
|
||||||
|
latex
|
||||||
|
doxygen
|
||||||
|
graphviz
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
rocm-comgr
|
||||||
|
rocm-runtime
|
||||||
|
hwdata
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DPCI_IDS_PATH=${hwdata}/share/hwdata"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
];
|
||||||
|
|
||||||
|
# Unfortunately, it seems like we have to call make on this manually
|
||||||
|
postBuild = lib.optionalString buildDocs ''
|
||||||
|
export HOME=$(mktemp -d)
|
||||||
|
make -j$NIX_BUILD_CORES doc
|
||||||
|
'';
|
||||||
|
|
||||||
|
postInstall = lib.optionalString buildDocs ''
|
||||||
|
mv $out/share/html/amd-dbgapi $doc/share/doc/amd-dbgapi/html
|
||||||
|
rmdir $out/share/html
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Debugger support for control of execution and inspection state";
|
||||||
|
homepage = "https://github.com/ROCm/ROCdbgapi";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
174
pkgs/rocm-modules/rocfft/default.nix
Normal file
174
pkgs/rocm-modules/rocfft/default.nix
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
clr,
|
||||||
|
python3,
|
||||||
|
rocm-cmake,
|
||||||
|
sqlite,
|
||||||
|
boost,
|
||||||
|
fftw,
|
||||||
|
fftwFloat,
|
||||||
|
gtest,
|
||||||
|
openmp,
|
||||||
|
rocrand,
|
||||||
|
gpuTargets ? clr.localGpuTargets or clr.gpuTargets,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocfft${clr.gpuArchSuffix}";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocFFT";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-z8O//2lihXeVnYZklR8uUIgCS9RyNNRb+hM5keR5aYA=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
clr
|
||||||
|
python3
|
||||||
|
rocm-cmake
|
||||||
|
];
|
||||||
|
|
||||||
|
# FIXME: rocfft_aot_helper runs at the end of the build and has a risk of timing it out
|
||||||
|
# due to a long period with no terminal output
|
||||||
|
buildInputs = [ sqlite ];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DCMAKE_C_COMPILER=hipcc"
|
||||||
|
"-DCMAKE_CXX_COMPILER=hipcc"
|
||||||
|
"-DSQLITE_USE_SYSTEM_PACKAGE=ON"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
];
|
||||||
|
|
||||||
|
passthru = {
|
||||||
|
test = stdenv.mkDerivation {
|
||||||
|
pname = "${finalAttrs.pname}-test";
|
||||||
|
inherit (finalAttrs) version src;
|
||||||
|
|
||||||
|
sourceRoot = "${finalAttrs.src.name}/clients/tests";
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
clr
|
||||||
|
rocm-cmake
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
boost
|
||||||
|
fftw
|
||||||
|
fftwFloat
|
||||||
|
finalAttrs.finalPackage
|
||||||
|
gtest
|
||||||
|
openmp
|
||||||
|
rocrand
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DCMAKE_C_COMPILER=hipcc"
|
||||||
|
"-DCMAKE_CXX_COMPILER=hipcc"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall = ''
|
||||||
|
rm -r "$out/lib/fftw"
|
||||||
|
rmdir "$out/lib"
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
benchmark = stdenv.mkDerivation {
|
||||||
|
pname = "${finalAttrs.pname}-benchmark";
|
||||||
|
inherit (finalAttrs) version src;
|
||||||
|
|
||||||
|
sourceRoot = "${finalAttrs.src.name}/clients/rider";
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
clr
|
||||||
|
rocm-cmake
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
boost
|
||||||
|
finalAttrs.finalPackage
|
||||||
|
openmp
|
||||||
|
(python3.withPackages (
|
||||||
|
ps: with ps; [
|
||||||
|
pandas
|
||||||
|
scipy
|
||||||
|
]
|
||||||
|
))
|
||||||
|
rocrand
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DCMAKE_C_COMPILER=hipcc"
|
||||||
|
"-DCMAKE_CXX_COMPILER=hipcc"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall = ''
|
||||||
|
cp -a ../../../scripts/perf "$out/bin"
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
samples = stdenv.mkDerivation {
|
||||||
|
pname = "${finalAttrs.pname}-samples";
|
||||||
|
inherit (finalAttrs) version src;
|
||||||
|
|
||||||
|
sourceRoot = "${finalAttrs.src.name}/clients/samples";
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
clr
|
||||||
|
rocm-cmake
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
boost
|
||||||
|
finalAttrs.finalPackage
|
||||||
|
openmp
|
||||||
|
rocrand
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DCMAKE_C_COMPILER=hipcc"
|
||||||
|
"-DCMAKE_CXX_COMPILER=hipcc"
|
||||||
|
];
|
||||||
|
|
||||||
|
installPhase = ''
|
||||||
|
runHook preInstall
|
||||||
|
mkdir "$out"
|
||||||
|
cp -a bin "$out"
|
||||||
|
runHook postInstall
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "FFT implementation for ROCm";
|
||||||
|
homepage = "https://github.com/ROCm/rocFFT";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
117
pkgs/rocm-modules/rocgdb/default.nix
Normal file
117
pkgs/rocm-modules/rocgdb/default.nix
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
pkg-config,
|
||||||
|
texinfo,
|
||||||
|
bison,
|
||||||
|
flex,
|
||||||
|
glibc,
|
||||||
|
zlib,
|
||||||
|
zstd,
|
||||||
|
gmp,
|
||||||
|
mpfr,
|
||||||
|
ncurses,
|
||||||
|
expat,
|
||||||
|
rocdbgapi,
|
||||||
|
perl,
|
||||||
|
python3,
|
||||||
|
babeltrace,
|
||||||
|
sourceHighlight,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocgdb";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "ROCgdb";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-evDWg2w2FHv6OU5BQOCAXTlDm7JpwdJ3Wh5a2i5r1gQ=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
pkg-config
|
||||||
|
texinfo # For makeinfo
|
||||||
|
bison
|
||||||
|
flex
|
||||||
|
perl # used in mkinstalldirs script during installPhase
|
||||||
|
python3
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
zlib
|
||||||
|
zstd
|
||||||
|
gmp
|
||||||
|
mpfr
|
||||||
|
ncurses
|
||||||
|
expat
|
||||||
|
rocdbgapi
|
||||||
|
python3
|
||||||
|
babeltrace
|
||||||
|
sourceHighlight
|
||||||
|
];
|
||||||
|
|
||||||
|
configureFlags = [
|
||||||
|
# Ensure we build the amdgpu target
|
||||||
|
"--enable-targets=${stdenv.targetPlatform.config},amdgcn-amd-amdhsa"
|
||||||
|
"--with-amd-dbgapi=yes"
|
||||||
|
|
||||||
|
"--with-iconv-path=${glibc.bin}"
|
||||||
|
"--enable-tui"
|
||||||
|
"--with-babeltrace=${babeltrace}"
|
||||||
|
"--with-python=python3"
|
||||||
|
"--with-system-zlib"
|
||||||
|
"--with-system-zstd"
|
||||||
|
"--enable-64-bit-bfd"
|
||||||
|
"--with-gmp=${gmp.dev}"
|
||||||
|
"--with-mpfr=${mpfr.dev}"
|
||||||
|
"--with-expat=${expat}"
|
||||||
|
|
||||||
|
# So the installed binary is called "rocgdb" instead on plain "gdb"
|
||||||
|
"--program-prefix=roc"
|
||||||
|
|
||||||
|
# Disable building many components not used or incompatible with the amdgcn target
|
||||||
|
"--disable-sim"
|
||||||
|
"--disable-gdbserver"
|
||||||
|
"--disable-ld"
|
||||||
|
"--disable-gas"
|
||||||
|
"--disable-gdbserver"
|
||||||
|
"--disable-gdbtk"
|
||||||
|
"--disable-gprofng"
|
||||||
|
"--disable-shared"
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
for file in *; do
|
||||||
|
if [ -f "$file" ]; then
|
||||||
|
patchShebangs "$file"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
'';
|
||||||
|
|
||||||
|
# The source directory for ROCgdb (based on upstream GDB) contains multiple project
|
||||||
|
# of GNU’s toolchain (binutils and onther), we only need to install the GDB part.
|
||||||
|
installPhase = ''
|
||||||
|
make install-gdb
|
||||||
|
'';
|
||||||
|
|
||||||
|
env.CFLAGS = "-Wno-switch -Wno-format-nonliteral -I${zstd.dev}/include -I${zlib.dev}/include -I${expat.dev}/include -I${ncurses.dev}/include";
|
||||||
|
env.CXXFLAGS = finalAttrs.env.CFLAGS;
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "ROCm source-level debugger for Linux, based on GDB";
|
||||||
|
homepage = "https://github.com/ROCm/ROCgdb";
|
||||||
|
license = licenses.gpl3Plus;
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
38
pkgs/rocm-modules/rocm-cmake/default.nix
Normal file
38
pkgs/rocm-modules/rocm-cmake/default.nix
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
rocm-core,
|
||||||
|
cmake,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocm-cmake";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocm-cmake";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-wAipNWAB66YNf7exLSNPAzg3NgkGD9LPKfKiulL5yak=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [ cmake ];
|
||||||
|
|
||||||
|
buildInputs = [ rocm-core ];
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "CMake modules for common build tasks for the ROCm stack";
|
||||||
|
homepage = "https://github.com/ROCm/rocm-cmake";
|
||||||
|
license = licenses.mit;
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.unix;
|
||||||
|
};
|
||||||
|
})
|
||||||
73
pkgs/rocm-modules/rocm-comgr/default.nix
Normal file
73
pkgs/rocm-modules/rocm-comgr/default.nix
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchpatch,
|
||||||
|
cmake,
|
||||||
|
python3,
|
||||||
|
rocm-merged-llvm,
|
||||||
|
rocm-device-libs,
|
||||||
|
zlib,
|
||||||
|
zstd,
|
||||||
|
libxml2,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
llvmNativeTarget =
|
||||||
|
if stdenv.hostPlatform.isx86_64 then
|
||||||
|
"X86"
|
||||||
|
else if stdenv.hostPlatform.isAarch64 then
|
||||||
|
"AArch64"
|
||||||
|
else
|
||||||
|
throw "Unsupported ROCm LLVM platform";
|
||||||
|
in
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocm-comgr";
|
||||||
|
# In-tree with ROCm LLVM
|
||||||
|
inherit (rocm-merged-llvm) version;
|
||||||
|
src = rocm-merged-llvm.llvm-src;
|
||||||
|
|
||||||
|
sourceRoot = "${finalAttrs.src.name}/amd/comgr";
|
||||||
|
|
||||||
|
patches = [
|
||||||
|
# [Comgr] Extend ISA compatibility
|
||||||
|
# (fetchpatch {
|
||||||
|
# sha256 = "sha256-dgow0kwSWM1TnkqWOZDRQrh5nuF8p5jbYyOLCpQsH4k=";
|
||||||
|
# url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/a439e4f37ce71de48d4a979594276e3be0e6278f.patch";
|
||||||
|
# relative = "amd/comgr";
|
||||||
|
# })
|
||||||
|
#[Comgr] Extend ISA compatibility for CCOB
|
||||||
|
# (fetchpatch {
|
||||||
|
# sha256 = "sha256-6Rwz12Lk4R2JK3olii3cr2Zd0ZLYe7VSpK1YRCOsJWY=";
|
||||||
|
# url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/2d8c459a4d4c0567a7a275b4b54560d88e5c6919.patch";
|
||||||
|
# relative = "amd/comgr";
|
||||||
|
# })
|
||||||
|
];
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
python3
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
rocm-device-libs
|
||||||
|
libxml2
|
||||||
|
zlib
|
||||||
|
zstd
|
||||||
|
rocm-merged-llvm
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DCMAKE_VERBOSE_MAKEFILE=ON"
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
|
||||||
|
];
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "APIs for compiling and inspecting AMDGPU code objects";
|
||||||
|
homepage = "https://github.com/ROCm/ROCm-CompilerSupport/tree/amd-stg-open/lib/comgr";
|
||||||
|
license = licenses.ncsa;
|
||||||
|
maintainers = with maintainers; [ lovesegfault ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
53
pkgs/rocm-modules/rocm-core/default.nix
Normal file
53
pkgs/rocm-modules/rocm-core/default.nix
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
writeText,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocm-core";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocm-core";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-dDTCEAbeA88deLgUbdbulaHpHI9zcTze0mZeS49TsAM=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [ cmake ];
|
||||||
|
# FIXME: What's the correct way to set this?
|
||||||
|
env.ROCM_LIBPATCH_VERSION = "${lib.versions.major finalAttrs.version}0${lib.versions.minor finalAttrs.version}0${lib.versions.patch finalAttrs.version}";
|
||||||
|
env.BUILD_ID = "nixos-${finalAttrs.env.ROCM_LIBPATCH_VERSION}";
|
||||||
|
env.ROCM_BUILD_ID = "release-${finalAttrs.env.BUILD_ID}";
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DROCM_LIBPATCH_VERSION=${finalAttrs.env.ROCM_LIBPATCH_VERSION}"
|
||||||
|
"-DROCM_VERSION=${finalAttrs.version}"
|
||||||
|
"-DBUILD_ID=${finalAttrs.env.BUILD_ID}"
|
||||||
|
];
|
||||||
|
|
||||||
|
setupHook = writeText "setupHook.sh" ''
|
||||||
|
export ROCM_LIBPATCH_VERSION="${finalAttrs.env.ROCM_LIBPATCH_VERSION}"
|
||||||
|
export BUILD_ID="${finalAttrs.env.BUILD_ID}"
|
||||||
|
export ROCM_BUILD_ID="${finalAttrs.env.ROCM_BUILD_ID}"
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.ROCM_LIBPATCH_VERSION = finalAttrs.env.ROCM_LIBPATCH_VERSION;
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
page = "tags?per_page=4";
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Utility for getting the ROCm release version";
|
||||||
|
homepage = "https://github.com/ROCm/rocm-core";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
43
pkgs/rocm-modules/rocm-device-libs/cmake.patch
Normal file
43
pkgs/rocm-modules/rocm-device-libs/cmake.patch
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake
|
||||||
|
index 07c60eb..c736b3e 100644
|
||||||
|
--- a/amd/device-libs/cmake/Packages.cmake
|
||||||
|
+++ b/amd/device-libs/cmake/Packages.cmake
|
||||||
|
@@ -12,24 +12,29 @@ set_target_properties(${target} PROPERTIES
|
||||||
|
IMPORTED_LOCATION \"${target_path}\")")
|
||||||
|
endforeach()
|
||||||
|
configure_file(AMDDeviceLibsConfig.cmake.in
|
||||||
|
- ${PACKAGE_PREFIX}/AMDDeviceLibsConfig.cmake
|
||||||
|
+ lib/cmake/AMDDeviceLibs/AMDDeviceLibsConfig.cmake
|
||||||
|
@ONLY)
|
||||||
|
|
||||||
|
|
||||||
|
set(install_path_suffix "amdgcn/bitcode")
|
||||||
|
|
||||||
|
# Generate the install-tree package.
|
||||||
|
-# We do not know the absolute path to the intall tree until we are installed,
|
||||||
|
-# so we calculate it dynamically in AMD_DEVICE_LIBS_PREFIX_CODE and use
|
||||||
|
-# relative paths in the target imports in AMD_DEVICE_LIBS_TARGET_CODE.
|
||||||
|
-set(AMD_DEVICE_LIBS_PREFIX_CODE "
|
||||||
|
+if(IS_ABSOLUTE "${CMAKE_INSTALL_PREFIX}")
|
||||||
|
+ set(AMD_DEVICE_LIBS_PREFIX_CODE "set(AMD_DEVICE_LIBS_PREFIX \"${CMAKE_INSTALL_PREFIX}\")")
|
||||||
|
+else()
|
||||||
|
+ # We do not know the absolute path to the install tree until we are installed,
|
||||||
|
+ # so we calculate it dynamically in AMD_DEVICE_LIBS_PREFIX_CODE and use
|
||||||
|
+ # relative paths in the target imports in AMD_DEVICE_LIBS_TARGET_CODE.
|
||||||
|
+ set(AMD_DEVICE_LIBS_PREFIX_CODE "
|
||||||
|
# Derive absolute install prefix from config file path.
|
||||||
|
get_filename_component(AMD_DEVICE_LIBS_PREFIX \"\${CMAKE_CURRENT_LIST_FILE}\" PATH)")
|
||||||
|
-string(REGEX REPLACE "/" ";" count "${PACKAGE_PREFIX}")
|
||||||
|
-foreach(p ${count})
|
||||||
|
- set(AMD_DEVICE_LIBS_PREFIX_CODE "${AMD_DEVICE_LIBS_PREFIX_CODE}
|
||||||
|
+ string(REGEX REPLACE "/" ";" count "${PACKAGE_PREFIX}")
|
||||||
|
+ foreach(p ${count})
|
||||||
|
+ set(AMD_DEVICE_LIBS_PREFIX_CODE "${AMD_DEVICE_LIBS_PREFIX_CODE}
|
||||||
|
get_filename_component(AMD_DEVICE_LIBS_PREFIX \"\${AMD_DEVICE_LIBS_PREFIX}\" PATH)")
|
||||||
|
-endforeach()
|
||||||
|
+ endforeach()
|
||||||
|
+endif()
|
||||||
|
+
|
||||||
|
set(AMD_DEVICE_LIBS_TARGET_CODE)
|
||||||
|
foreach(target ${AMDGCN_LIB_LIST})
|
||||||
|
get_target_property(target_name ${target} ARCHIVE_OUTPUT_NAME)
|
||||||
62
pkgs/rocm-modules/rocm-device-libs/default.nix
Normal file
62
pkgs/rocm-modules/rocm-device-libs/default.nix
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
cmake,
|
||||||
|
ninja,
|
||||||
|
libxml2,
|
||||||
|
zlib,
|
||||||
|
zstd,
|
||||||
|
ncurses,
|
||||||
|
rocm-merged-llvm,
|
||||||
|
python3,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
llvmNativeTarget =
|
||||||
|
if stdenv.hostPlatform.isx86_64 then
|
||||||
|
"X86"
|
||||||
|
else if stdenv.hostPlatform.isAarch64 then
|
||||||
|
"AArch64"
|
||||||
|
else
|
||||||
|
throw "Unsupported ROCm LLVM platform";
|
||||||
|
in
|
||||||
|
stdenv.mkDerivation {
|
||||||
|
pname = "rocm-device-libs";
|
||||||
|
# In-tree with ROCm LLVM
|
||||||
|
inherit (rocm-merged-llvm) version;
|
||||||
|
src = rocm-merged-llvm.llvm-src;
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
cd amd/device-libs
|
||||||
|
'';
|
||||||
|
|
||||||
|
patches = [ ./cmake.patch ];
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
ninja
|
||||||
|
python3
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
libxml2
|
||||||
|
zlib
|
||||||
|
zstd
|
||||||
|
ncurses
|
||||||
|
rocm-merged-llvm
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DCMAKE_RELEASE_TYPE=Release"
|
||||||
|
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
|
||||||
|
];
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Set of AMD-specific device-side language runtime libraries";
|
||||||
|
homepage = "https://github.com/ROCm/ROCm-Device-Libs";
|
||||||
|
license = licenses.ncsa;
|
||||||
|
maintainers = with maintainers; [ lovesegfault ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
}
|
||||||
71
pkgs/rocm-modules/rocm-docs-core/default.nix
Normal file
71
pkgs/rocm-modules/rocm-docs-core/default.nix
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
fetchFromGitHub,
|
||||||
|
gitUpdater,
|
||||||
|
buildPythonPackage,
|
||||||
|
setuptools,
|
||||||
|
beautifulsoup4,
|
||||||
|
gitpython,
|
||||||
|
pydata-sphinx-theme,
|
||||||
|
pygithub,
|
||||||
|
sphinx,
|
||||||
|
breathe,
|
||||||
|
myst-nb,
|
||||||
|
myst-parser,
|
||||||
|
sphinx-book-theme,
|
||||||
|
sphinx-copybutton,
|
||||||
|
sphinx-design,
|
||||||
|
sphinx-external-toc,
|
||||||
|
sphinx-notfound-page,
|
||||||
|
pyyaml,
|
||||||
|
fastjsonschema,
|
||||||
|
}:
|
||||||
|
|
||||||
|
# FIXME: Move to rocmPackages_common
|
||||||
|
buildPythonPackage rec {
|
||||||
|
pname = "rocm-docs-core";
|
||||||
|
version = "1.19.0";
|
||||||
|
format = "pyproject";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocm-docs-core";
|
||||||
|
rev = "v${version}";
|
||||||
|
hash = "sha256-vmtOf9e8RhWQ0ecL+Sn1HJGK+ILCaQxeQvUkQ8W8YX8=";
|
||||||
|
};
|
||||||
|
|
||||||
|
buildInputs = [ setuptools ];
|
||||||
|
|
||||||
|
propagatedBuildInputs = [
|
||||||
|
beautifulsoup4
|
||||||
|
gitpython
|
||||||
|
pydata-sphinx-theme
|
||||||
|
pygithub
|
||||||
|
sphinx
|
||||||
|
breathe
|
||||||
|
myst-nb
|
||||||
|
myst-parser
|
||||||
|
sphinx-book-theme
|
||||||
|
sphinx-copybutton
|
||||||
|
sphinx-design
|
||||||
|
sphinx-external-toc
|
||||||
|
sphinx-notfound-page
|
||||||
|
pyyaml
|
||||||
|
fastjsonschema
|
||||||
|
];
|
||||||
|
|
||||||
|
pythonImportsCheck = [ "rocm_docs" ];
|
||||||
|
|
||||||
|
passthru.updateScript = gitUpdater { rev-prefix = "v"; };
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "ROCm Documentation Python package for ReadTheDocs build standardization";
|
||||||
|
homepage = "https://github.com/ROCm/rocm-docs-core";
|
||||||
|
license = with licenses; [
|
||||||
|
mit
|
||||||
|
cc-by-40
|
||||||
|
];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
}
|
||||||
27
pkgs/rocm-modules/rocm-path/default.nix
Normal file
27
pkgs/rocm-modules/rocm-path/default.nix
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
symlinkJoin,
|
||||||
|
linkFarm,
|
||||||
|
clr,
|
||||||
|
hipblas,
|
||||||
|
hipblas-common,
|
||||||
|
rocblas,
|
||||||
|
rocsolver,
|
||||||
|
rocsparse,
|
||||||
|
rocm-device-libs,
|
||||||
|
rocm-smi,
|
||||||
|
llvm,
|
||||||
|
}:
|
||||||
|
symlinkJoin {
|
||||||
|
name = "rocm-path-${clr.version}";
|
||||||
|
paths = [
|
||||||
|
clr
|
||||||
|
hipblas-common
|
||||||
|
hipblas
|
||||||
|
rocblas
|
||||||
|
rocsolver
|
||||||
|
rocsparse
|
||||||
|
rocm-device-libs
|
||||||
|
rocm-smi
|
||||||
|
(linkFarm "rocm-llvm-subdir" { llvm = llvm.clang; })
|
||||||
|
];
|
||||||
|
}
|
||||||
115
pkgs/rocm-modules/rocm-runtime/default.nix
Normal file
115
pkgs/rocm-modules/rocm-runtime/default.nix
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
fetchpatch,
|
||||||
|
rocmUpdateScript,
|
||||||
|
pkg-config,
|
||||||
|
cmake,
|
||||||
|
ninja,
|
||||||
|
xxd,
|
||||||
|
rocm-device-libs,
|
||||||
|
elfutils,
|
||||||
|
libdrm,
|
||||||
|
numactl,
|
||||||
|
valgrind,
|
||||||
|
libxml2,
|
||||||
|
rocm-merged-llvm,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocm-runtime";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "ROCR-Runtime";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-LOILnvjGwlLoB99+TdZib7VJsgp45yGJiEPgrlwXItI=";
|
||||||
|
};
|
||||||
|
|
||||||
|
env.CFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w";
|
||||||
|
env.CXXFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w";
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
pkg-config
|
||||||
|
cmake
|
||||||
|
ninja
|
||||||
|
xxd
|
||||||
|
rocm-merged-llvm
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
elfutils
|
||||||
|
libdrm
|
||||||
|
numactl
|
||||||
|
# without valgrind, additional work for "kCodeCopyAligned11" is done in the installPhase
|
||||||
|
valgrind
|
||||||
|
libxml2
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DBUILD_SHARED_LIBS=ON"
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
];
|
||||||
|
|
||||||
|
patches = [
|
||||||
|
# Patches for UB at runtime https://github.com/ROCm/ROCR-Runtime/issues/272
|
||||||
|
# (fetchpatch {
|
||||||
|
# # [PATCH] hsa-runtime: set underlying type of hsa_region_info_t and hsa_amd_region_info_t to int
|
||||||
|
# url = "https://github.com/ROCm/ROCR-Runtime/commit/39a6a168fa07e289a10f6e20e6ead4e303e99ba0.patch";
|
||||||
|
# hash = "sha256-CshJJDvII1nNyNmt+YjwMwfBHUTlrdsxkhwfgBwO+WE=";
|
||||||
|
# })
|
||||||
|
# (fetchpatch {
|
||||||
|
# # [PATCH] rocr: refactor of runtime.cpp based on Coverity
|
||||||
|
# url = "https://github.com/ROCm/ROCR-Runtime/commit/441bd9fe6c7bdb5c4c31f71524ed642786bc923e.patch";
|
||||||
|
# hash = "sha256-7bQXxGkipzgT2aXRxCuh3Sfmo/zc/IOmA0x1zB+fMb0=";
|
||||||
|
# })
|
||||||
|
(fetchpatch {
|
||||||
|
# [PATCH] queues: fix UB due to 1 << 31
|
||||||
|
url = "https://github.com/ROCm/ROCR-Runtime/commit/9b8a0f5dbee1903fa990a7d8accc1c5fbc549636.patch";
|
||||||
|
hash = "sha256-KlZWjfngH8yKly08iwC+Bzpvp/4dkaTpRIKdFYwRI+U=";
|
||||||
|
})
|
||||||
|
(fetchpatch {
|
||||||
|
# [PATCH] topology: fix UB due to 1 << 31
|
||||||
|
url = "https://github.com/ROCm/ROCR-Runtime/commit/d1d00bfee386d263e13c2b64fb6ffd1156deda7c.patch";
|
||||||
|
hash = "sha256-u70WEZaphQ7qTfgQPFATwdKWtHytu7CFH7Pzv1rOM8w=";
|
||||||
|
})
|
||||||
|
(fetchpatch {
|
||||||
|
# [PATCH] kfd_ioctl: fix UB due to 1 << 31
|
||||||
|
url = "https://github.com/ROCm/ROCR-Runtime/commit/41bfc66aef437a5b349f71105fa4b907cc7e17d5.patch";
|
||||||
|
hash = "sha256-A7VhPR3eSsmjq2cTBSjBIz9i//WiNjoXm0EsRKtF+ns=";
|
||||||
|
})
|
||||||
|
./remove-hsa-aqlprofile-dep.patch
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
patchShebangs --build \
|
||||||
|
runtime/hsa-runtime/core/runtime/trap_handler/create_trap_handler_header.sh \
|
||||||
|
runtime/hsa-runtime/core/runtime/blit_shaders/create_blit_shader_header.sh \
|
||||||
|
runtime/hsa-runtime/image/blit_src/create_hsaco_ascii_file.sh
|
||||||
|
patchShebangs --host image core runtime
|
||||||
|
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace 'hsa/include/hsa' 'include/hsa'
|
||||||
|
|
||||||
|
export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode"
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Platform runtime for ROCm";
|
||||||
|
homepage = "https://github.com/ROCm/ROCR-Runtime";
|
||||||
|
license = with licenses; [ ncsa ];
|
||||||
|
maintainers = with maintainers; [ lovesegfault ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
libhsa-amd-aqlprofile64 library is unfree
|
||||||
|
Bug: https://github.com/ROCm/ROCm/issues/1781
|
||||||
|
--- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp
|
||||||
|
+++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp
|
||||||
|
@@ -1333,11 +1333,6 @@ hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const {
|
||||||
|
setFlag(HSA_EXTENSION_AMD_PC_SAMPLING);
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (os::LibHandle lib = os::LoadLib(kAqlProfileLib)) {
|
||||||
|
- os::CloseLib(lib);
|
||||||
|
- setFlag(HSA_EXTENSION_AMD_AQLPROFILE);
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
setFlag(HSA_EXTENSION_AMD_PROFILER);
|
||||||
|
|
||||||
|
break;
|
||||||
|
--- a/runtime/hsa-runtime/core/runtime/hsa.cpp
|
||||||
|
+++ b/runtime/hsa-runtime/core/runtime/hsa.cpp
|
||||||
|
@@ -490,7 +490,7 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v
|
||||||
|
return HSA_STATUS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (extension == HSA_EXTENSION_AMD_AQLPROFILE) {
|
||||||
|
+ if (0) {
|
||||||
|
if (version_major != hsa_ven_amd_aqlprofile_VERSION_MAJOR) {
|
||||||
|
debug_print("aqlprofile API incompatible ver %d, current ver %d\n",
|
||||||
|
version_major, hsa_ven_amd_aqlprofile_VERSION_MAJOR);
|
||||||
89
pkgs/rocm-modules/rocm-smi/cmake.patch
Normal file
89
pkgs/rocm-modules/rocm-smi/cmake.patch
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
diff --git a/rocm_smi-backward-compat.cmake b/rocm_smi-backward-compat.cmake
|
||||||
|
index aa8fd9c..59afce5 100644
|
||||||
|
--- a/rocm_smi-backward-compat.cmake
|
||||||
|
+++ b/rocm_smi-backward-compat.cmake
|
||||||
|
@@ -72,7 +72,12 @@ function(generate_wrapper_header)
|
||||||
|
set(include_guard "${include_guard}COMGR_WRAPPER_INCLUDE_${INC_GAURD_NAME}_H")
|
||||||
|
#set #include statement
|
||||||
|
get_filename_component(file_name ${header_file} NAME)
|
||||||
|
- set(include_statements "${include_statements}#include \"../../../${CMAKE_INSTALL_INCLUDEDIR}/${ROCM_SMI}/${file_name}\"\n")
|
||||||
|
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_INCLUDEDIR})
|
||||||
|
+ set(include_dir "${CMAKE_INSTALL_INCLUDEDIR}")
|
||||||
|
+ else()
|
||||||
|
+ set(include_dir "../../../${CMAKE_INSTALL_INCLUDEDIR}")
|
||||||
|
+ endif()
|
||||||
|
+ set(include_statements "${include_statements}#include \"${include_dir}/${ROCM_SMI}/${file_name}\"\n")
|
||||||
|
configure_file(${RSMI_WRAPPER_DIR}/header.hpp.in ${RSMI_WRAPPER_INC_DIR}/${file_name})
|
||||||
|
unset(include_guard)
|
||||||
|
unset(include_statements)
|
||||||
|
@@ -90,7 +95,12 @@ function(generate_wrapper_header)
|
||||||
|
set(include_guard "${include_guard}COMGR_WRAPPER_INCLUDE_${INC_GAURD_NAME}_H")
|
||||||
|
#set #include statement
|
||||||
|
get_filename_component(file_name ${header_file} NAME)
|
||||||
|
- set(include_statements "${include_statements}#include \"../../../${CMAKE_INSTALL_INCLUDEDIR}/${OAM_TARGET_NAME}/${file_name}\"\n")
|
||||||
|
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_INCLUDEDIR})
|
||||||
|
+ set(include_dir "${CMAKE_INSTALL_INCLUDEDIR}")
|
||||||
|
+ else()
|
||||||
|
+ set(include_dir "../../../${CMAKE_INSTALL_INCLUDEDIR}")
|
||||||
|
+ endif()
|
||||||
|
+ set(include_statements "${include_statements}#include \"${include_dir}/${OAM_TARGET_NAME}/${file_name}\"\n")
|
||||||
|
configure_file(${RSMI_WRAPPER_DIR}/header.hpp.in ${OAM_WRAPPER_INC_DIR}/${file_name})
|
||||||
|
unset(include_guard)
|
||||||
|
unset(include_statements)
|
||||||
|
@@ -123,11 +133,16 @@ function(create_library_symlink)
|
||||||
|
set(library_files "${LIB_RSMI}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBDIR})
|
||||||
|
+ set(install_libdir "${CMAKE_INSTALL_LIBDIR}")
|
||||||
|
+ else()
|
||||||
|
+ set(install_libdir "../../${CMAKE_INSTALL_LIBDIR}")
|
||||||
|
+ endif()
|
||||||
|
foreach(file_name ${library_files})
|
||||||
|
add_custom_target(link_${file_name} ALL
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E create_symlink
|
||||||
|
- ../../${CMAKE_INSTALL_LIBDIR}/${file_name} ${RSMI_WRAPPER_LIB_DIR}/${file_name})
|
||||||
|
+ ${install_libdir}/${file_name} ${RSMI_WRAPPER_LIB_DIR}/${file_name})
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
file(MAKE_DIRECTORY ${OAM_WRAPPER_LIB_DIR})
|
||||||
|
@@ -151,11 +166,16 @@ function(create_library_symlink)
|
||||||
|
set(library_files "${LIB_OAM}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBDIR})
|
||||||
|
+ set(install_libdir "${CMAKE_INSTALL_LIBDIR}")
|
||||||
|
+ else()
|
||||||
|
+ set(install_libdir "../../${CMAKE_INSTALL_LIBDIR}")
|
||||||
|
+ endif()
|
||||||
|
foreach(file_name ${library_files})
|
||||||
|
add_custom_target(link_${file_name} ALL
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E create_symlink
|
||||||
|
- ../../${CMAKE_INSTALL_LIBDIR}/${file_name} ${OAM_WRAPPER_LIB_DIR}/${file_name})
|
||||||
|
+ ${install_libdir}/${file_name} ${OAM_WRAPPER_LIB_DIR}/${file_name})
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
endfunction()
|
||||||
|
diff --git a/rocm_smi/CMakeLists.txt b/rocm_smi/CMakeLists.txt
|
||||||
|
index c594eeb..d3ed39d 100755
|
||||||
|
--- a/rocm_smi/CMakeLists.txt
|
||||||
|
+++ b/rocm_smi/CMakeLists.txt
|
||||||
|
@@ -105,10 +105,15 @@ endif ()
|
||||||
|
#file reorganization changes
|
||||||
|
#rocm_smi.py moved to libexec/rocm_smi. so creating rocm-smi symlink
|
||||||
|
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
|
||||||
|
+if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBEXECDIR})
|
||||||
|
+ set(install_libexecdir "${CMAKE_INSTALL_LIBEXECDIR}")
|
||||||
|
+else()
|
||||||
|
+ set(install_libexecdir "../${CMAKE_INSTALL_LIBEXECDIR}")
|
||||||
|
+endif()
|
||||||
|
add_custom_target(link-rocm-smi ALL
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E create_symlink
|
||||||
|
- ../${CMAKE_INSTALL_LIBEXECDIR}/${ROCM_SMI}/rocm_smi.py ${CMAKE_CURRENT_BINARY_DIR}/bin/rocm-smi)
|
||||||
|
+ ${install_libexecdir}/${ROCM_SMI}/rocm_smi.py ${CMAKE_CURRENT_BINARY_DIR}/bin/rocm-smi)
|
||||||
|
|
||||||
|
## Add the install directives for the runtime library.
|
||||||
|
install(TARGETS ${ROCM_SMI_TARGET}
|
||||||
58
pkgs/rocm-modules/rocm-smi/default.nix
Normal file
58
pkgs/rocm-modules/rocm-smi/default.nix
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
wrapPython,
|
||||||
|
libdrm,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocm-smi";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocm_smi_lib";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-qshAMVhHJIA06fGOTJx5/l1t8wKv7KbmvdCSKNuEIKM=";
|
||||||
|
};
|
||||||
|
|
||||||
|
patches = [ ./cmake.patch ];
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
wrapPython
|
||||||
|
libdrm
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall = ''
|
||||||
|
wrapPythonProgramsIn $out
|
||||||
|
mv $out/libexec/rocm_smi/.rsmiBindingsInit.py-wrapped $out/libexec/rocm_smi/rsmiBindingsInit.py
|
||||||
|
mv $out/libexec/rocm_smi/.rsmiBindings.py-wrapped $out/libexec/rocm_smi/rsmiBindings.py
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "System management interface for AMD GPUs supported by ROCm";
|
||||||
|
homepage = "https://github.com/ROCm/rocm_smi_lib";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
maintainers = with maintainers; [ lovesegfault ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = [ "x86_64-linux" ];
|
||||||
|
};
|
||||||
|
})
|
||||||
32
pkgs/rocm-modules/rocm-tests/default.nix
Normal file
32
pkgs/rocm-modules/rocm-tests/default.nix
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
{
|
||||||
|
clr,
|
||||||
|
ollama,
|
||||||
|
python3Packages,
|
||||||
|
rocmPackages,
|
||||||
|
magma-hip,
|
||||||
|
emptyDirectory,
|
||||||
|
stdenv,
|
||||||
|
}:
|
||||||
|
# This package exists purely to have a bunch of passthru.tests attrs
|
||||||
|
stdenv.mkDerivation {
|
||||||
|
name = "rocm-tests";
|
||||||
|
nativeBuildInputs = [
|
||||||
|
clr
|
||||||
|
];
|
||||||
|
src = emptyDirectory;
|
||||||
|
postInstall = "mkdir -p $out";
|
||||||
|
passthru.tests = {
|
||||||
|
ollama = ollama.override {
|
||||||
|
inherit rocmPackages;
|
||||||
|
acceleration = "rocm";
|
||||||
|
};
|
||||||
|
torch = python3Packages.torch.override {
|
||||||
|
inherit rocmPackages;
|
||||||
|
rocmSupport = true;
|
||||||
|
cudaSupport = false;
|
||||||
|
magma-hip = magma-hip.override {
|
||||||
|
inherit rocmPackages;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
65
pkgs/rocm-modules/rocminfo/default.nix
Normal file
65
pkgs/rocm-modules/rocminfo/default.nix
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocm-runtime,
|
||||||
|
busybox,
|
||||||
|
python3,
|
||||||
|
gnugrep,
|
||||||
|
clr, # Only for localGpuTargets
|
||||||
|
# rocminfo requires that the calling user have a password and be in
|
||||||
|
# the video group. If we let rocm_agent_enumerator rely upon
|
||||||
|
# rocminfo's output, then it, too, has those requirements. Instead,
|
||||||
|
# we can specify the GPU targets for this system (e.g. "gfx803" for
|
||||||
|
# Polaris) such that no system call is needed for downstream
|
||||||
|
# compilers to determine the desired target.
|
||||||
|
defaultTargets ? (clr.localGpuTargets or [ ]),
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
version = "6.4.1";
|
||||||
|
pname = "rocminfo";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocminfo";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
sha256 = "sha256-YscZ5sFsLOVBg98w2X6vTzniTvl9NfCkIE+HAH6vv5Y=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [ rocm-runtime ];
|
||||||
|
propagatedBuildInputs = [ python3 ];
|
||||||
|
cmakeFlags = [ "-DROCRTST_BLD_TYPE=Release" ];
|
||||||
|
|
||||||
|
prePatch = ''
|
||||||
|
patchShebangs rocm_agent_enumerator
|
||||||
|
sed 's,lsmod | grep ,${busybox}/bin/lsmod | ${gnugrep}/bin/grep ,' -i rocminfo.cc
|
||||||
|
'';
|
||||||
|
|
||||||
|
postInstall = lib.optionalString (defaultTargets != [ ]) ''
|
||||||
|
echo '${lib.concatStringsSep "\n" defaultTargets}' > $out/bin/target.lst
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "ROCm Application for Reporting System Info";
|
||||||
|
homepage = "https://github.com/ROCm/rocminfo";
|
||||||
|
license = licenses.ncsa;
|
||||||
|
maintainers = with maintainers; [ lovesegfault ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
165
pkgs/rocm-modules/rocmlir/default.nix
Normal file
165
pkgs/rocm-modules/rocmlir/default.nix
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocminfo,
|
||||||
|
clr,
|
||||||
|
git,
|
||||||
|
libxml2,
|
||||||
|
libedit,
|
||||||
|
zstd,
|
||||||
|
zlib,
|
||||||
|
ncurses,
|
||||||
|
python3Packages,
|
||||||
|
buildRockCompiler ? false,
|
||||||
|
buildTests ? false, # `argument of type 'NoneType' is not iterable`
|
||||||
|
}:
|
||||||
|
|
||||||
|
# FIXME: rocmlir has an entire separate LLVM build in a subdirectory this is silly
|
||||||
|
# It seems to be forked from AMD's own LLVM
|
||||||
|
# If possible reusing the rocmPackages.llvm build would be better
|
||||||
|
# Would have to confirm it is compatible with ROCm's tagged LLVM.
|
||||||
|
# Fairly likely it's not given AMD's track record with forking their own software in incompatible ways
|
||||||
|
# in subdirs
|
||||||
|
|
||||||
|
# Theoretically, we could have our MLIR have an output
|
||||||
|
# with the source and built objects so that we can just
|
||||||
|
# use it as the external LLVM repo for this
|
||||||
|
let
|
||||||
|
suffix = if buildRockCompiler then "-rock" else "";
|
||||||
|
|
||||||
|
llvmNativeTarget =
|
||||||
|
if stdenv.hostPlatform.isx86_64 then
|
||||||
|
"X86"
|
||||||
|
else if stdenv.hostPlatform.isAarch64 then
|
||||||
|
"AArch64"
|
||||||
|
else
|
||||||
|
throw "Unsupported ROCm LLVM platform";
|
||||||
|
in
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocmlir${suffix}";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals (!buildRockCompiler) [
|
||||||
|
"external"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocMLIR";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-p/gvr1Z6yZtO5N+ecSouXiCrf520jt1HMOy/tohUHfI=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
python3Packages.python
|
||||||
|
python3Packages.tomli
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
git
|
||||||
|
libxml2
|
||||||
|
libedit
|
||||||
|
];
|
||||||
|
|
||||||
|
propagatedBuildInputs = [
|
||||||
|
zstd
|
||||||
|
zlib
|
||||||
|
ncurses
|
||||||
|
];
|
||||||
|
|
||||||
|
patches = [
|
||||||
|
./initparamdata-sort-const.patch
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
"-DLLVM_USE_LINKER=lld"
|
||||||
|
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
|
||||||
|
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
|
||||||
|
"-DLLVM_ENABLE_LIBCXX=ON"
|
||||||
|
"-DLLVM_ENABLE_TERMINFO=ON"
|
||||||
|
"-DROCM_PATH=${clr}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
(lib.cmakeBool "BUILD_FAT_LIBROCKCOMPILER" buildRockCompiler)
|
||||||
|
]
|
||||||
|
++ lib.optionals (!buildRockCompiler) [
|
||||||
|
"-DROCM_TEST_CHIPSET=gfx000"
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
patchShebangs mlir
|
||||||
|
patchShebangs external/llvm-project/mlir/lib/Dialect/GPU/AmdDeviceLibsIncGen.py
|
||||||
|
|
||||||
|
# Fixes mlir/lib/Analysis/BufferDependencyAnalysis.cpp:41:19: error: redefinition of 'read'
|
||||||
|
substituteInPlace mlir/lib/Analysis/BufferDependencyAnalysis.cpp \
|
||||||
|
--replace-fail "enum EffectType { read, write, unknown };" "enum class EffectType { read, write, unknown };"
|
||||||
|
|
||||||
|
# remove when no longer required
|
||||||
|
substituteInPlace mlir/test/{e2e/generateE2ETest.py,fusion/e2e/generate-fusion-tests.py} \
|
||||||
|
--replace-fail "\"/opt/rocm/bin" "\"${rocminfo}/bin"
|
||||||
|
|
||||||
|
substituteInPlace mlir/utils/performance/common/CMakeLists.txt \
|
||||||
|
--replace-fail "/opt/rocm" "${clr}"
|
||||||
|
'';
|
||||||
|
|
||||||
|
dontBuild = true;
|
||||||
|
doCheck = true;
|
||||||
|
|
||||||
|
# Certain libs aren't being generated, try enabling tests next update
|
||||||
|
checkTarget =
|
||||||
|
if buildRockCompiler then
|
||||||
|
"librockCompiler"
|
||||||
|
else if buildTests then
|
||||||
|
"check-rocmlir"
|
||||||
|
else
|
||||||
|
"check-rocmlir-build-only";
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
let
|
||||||
|
libPath = lib.makeLibraryPath [
|
||||||
|
zstd
|
||||||
|
zlib
|
||||||
|
ncurses
|
||||||
|
clr
|
||||||
|
stdenv.cc.cc
|
||||||
|
];
|
||||||
|
in
|
||||||
|
lib.optionals (!buildRockCompiler) ''
|
||||||
|
mkdir -p $external/lib
|
||||||
|
cp -a external/llvm-project/llvm/lib/{*.a*,*.so*} $external/lib
|
||||||
|
patchelf --set-rpath $external/lib:$out/lib:${libPath} $external/lib/*.so*
|
||||||
|
patchelf --set-rpath $out/lib:$external/lib:${libPath} $out/{bin/*,lib/*.so*}
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
page = "tags?per_page=4";
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "MLIR-based convolution and GEMM kernel generator";
|
||||||
|
homepage = "https://github.com/ROCm/rocMLIR";
|
||||||
|
license = with licenses; [ asl20 ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
13
pkgs/rocm-modules/rocmlir/initparamdata-sort-const.patch
Normal file
13
pkgs/rocm-modules/rocmlir/initparamdata-sort-const.patch
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
|
||||||
|
index 3f5ee596819a..590d53788822 100644
|
||||||
|
--- a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
|
||||||
|
+++ b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
|
||||||
|
@@ -209,7 +209,7 @@ private:
|
||||||
|
size_t original_pos;
|
||||||
|
int64_t padding_amount;
|
||||||
|
|
||||||
|
- bool operator<(const InitParamData &rhs) {
|
||||||
|
+ bool operator<(const InitParamData &rhs) const {
|
||||||
|
if (this->padding_amount < rhs.padding_amount) {
|
||||||
|
return true;
|
||||||
|
} else if (this->padding_amount == rhs.padding_amount) {
|
||||||
98
pkgs/rocm-modules/rocprim/default.nix
Normal file
98
pkgs/rocm-modules/rocprim/default.nix
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
clr,
|
||||||
|
gtest,
|
||||||
|
gbenchmark,
|
||||||
|
buildTests ? false,
|
||||||
|
buildBenchmarks ? false,
|
||||||
|
gpuTargets ? [ ],
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocprim";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"benchmark"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocPRIM";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-EP61n4syYMWjTDkjC0dWLj9yau6KL2qu1Bs5IBtw580=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
lib.optionals buildTests [
|
||||||
|
gtest
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
gbenchmark
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_TEST=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"-DBUILD_BENCHMARK=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/test_* $test/bin
|
||||||
|
mv $out/bin/rocprim $test/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildBenchmarks ''
|
||||||
|
mkdir -p $benchmark/bin
|
||||||
|
mv $out/bin/benchmark_* $benchmark/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||||
|
rmdir $out/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "ROCm parallel primitives";
|
||||||
|
homepage = "https://github.com/ROCm/rocPRIM";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
79
pkgs/rocm-modules/rocprofiler-register/default.nix
Normal file
79
pkgs/rocm-modules/rocprofiler-register/default.nix
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
rocm-runtime,
|
||||||
|
rocprofiler,
|
||||||
|
numactl,
|
||||||
|
libpciaccess,
|
||||||
|
libxml2,
|
||||||
|
elfutils,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
clang,
|
||||||
|
clr,
|
||||||
|
python3Packages,
|
||||||
|
gpuTargets ? clr.gpuTargets,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocprofiler-register";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocprofiler-register";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-HaN4XMHuCRDfKOpfuZ2SkOEQfAZKouh6luqbtATUYm0=";
|
||||||
|
fetchSubmodules = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
clang
|
||||||
|
clr
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
numactl
|
||||||
|
libpciaccess
|
||||||
|
libxml2
|
||||||
|
elfutils
|
||||||
|
rocm-runtime
|
||||||
|
|
||||||
|
rocprofiler.rocmtoolkit-merged
|
||||||
|
|
||||||
|
python3Packages.lxml
|
||||||
|
python3Packages.cppheaderparser
|
||||||
|
python3Packages.pyyaml
|
||||||
|
python3Packages.barectf
|
||||||
|
python3Packages.pandas
|
||||||
|
];
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
|
||||||
|
"-DHIP_ROOT_DIR=${clr}"
|
||||||
|
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
"-DBUILD_TEST=OFF"
|
||||||
|
"-DROCPROFILER_BUILD_TESTS=0"
|
||||||
|
"-DROCPROFILER_BUILD_SAMPLES=0"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
];
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = "rocprofiler-register";
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Profiling with perf-counters and derived metrics";
|
||||||
|
homepage = "https://github.com/ROCm/rocprofiler";
|
||||||
|
license = with licenses; [ mit ]; # mitx11
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
|
||||||
|
index 46efbd5..ca2cc3b 100644
|
||||||
|
--- a/test/CMakeLists.txt
|
||||||
|
+++ b/test/CMakeLists.txt
|
||||||
|
@@ -127,10 +127,6 @@ function(generate_hsaco TARGET_ID INPUT_FILE OUTPUT_FILE)
|
||||||
|
DEPENDS ${INPUT_FILE} clang
|
||||||
|
COMMENT "Building ${OUTPUT_FILE}..."
|
||||||
|
VERBATIM)
|
||||||
|
- install(
|
||||||
|
- FILES ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
|
||||||
|
- DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests-v1
|
||||||
|
- COMPONENT tests)
|
||||||
|
set(HSACO_TARGET_LIST
|
||||||
|
${HSACO_TARGET_LIST} ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
|
||||||
|
PARENT_SCOPE)
|
||||||
142
pkgs/rocm-modules/rocprofiler/default.nix
Normal file
142
pkgs/rocm-modules/rocprofiler/default.nix
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
symlinkJoin,
|
||||||
|
cmake,
|
||||||
|
clang,
|
||||||
|
clr,
|
||||||
|
rocm-core,
|
||||||
|
rocm-runtime,
|
||||||
|
rocm-device-libs,
|
||||||
|
roctracer,
|
||||||
|
rocdbgapi,
|
||||||
|
numactl,
|
||||||
|
libpciaccess,
|
||||||
|
libxml2,
|
||||||
|
elfutils,
|
||||||
|
mpi,
|
||||||
|
systemd,
|
||||||
|
gtest,
|
||||||
|
git,
|
||||||
|
python3Packages,
|
||||||
|
gpuTargets ? clr.gpuTargets,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
rocmtoolkit-merged = symlinkJoin {
|
||||||
|
name = "rocmtoolkit-merged";
|
||||||
|
|
||||||
|
paths = [
|
||||||
|
rocm-core
|
||||||
|
rocm-runtime
|
||||||
|
rocm-device-libs
|
||||||
|
roctracer
|
||||||
|
rocdbgapi
|
||||||
|
clr
|
||||||
|
];
|
||||||
|
|
||||||
|
postBuild = ''
|
||||||
|
rm -rf $out/nix-support
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
in
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocprofiler";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocprofiler";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-CgW8foM4W3K19kUK/l8IsH2Q9DHi/z88viXTxhNqlHQ=";
|
||||||
|
fetchSubmodules = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
patches = [
|
||||||
|
# These just simply won't build
|
||||||
|
./0000-dont-install-tests-hsaco.patch
|
||||||
|
./optional-aql-in-cmake.patch
|
||||||
|
];
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
clang
|
||||||
|
clr
|
||||||
|
git
|
||||||
|
python3Packages.lxml
|
||||||
|
python3Packages.cppheaderparser
|
||||||
|
python3Packages.pyyaml
|
||||||
|
python3Packages.barectf
|
||||||
|
python3Packages.pandas
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
numactl
|
||||||
|
libpciaccess
|
||||||
|
libxml2
|
||||||
|
elfutils
|
||||||
|
mpi
|
||||||
|
systemd
|
||||||
|
gtest
|
||||||
|
];
|
||||||
|
|
||||||
|
propagatedBuildInputs = [ rocmtoolkit-merged ];
|
||||||
|
|
||||||
|
# HACK: allow building without aqlprofile, probably explodes at runtime if use profiling
|
||||||
|
env.LDFLAGS = "-z nodefs -Wl,-undefined,dynamic_lookup,--unresolved-symbols=ignore-all";
|
||||||
|
#HACK: rocprofiler's cmake doesn't add these deps properly
|
||||||
|
env.CXXFLAGS = "-I${libpciaccess}/include -I${numactl.dev}/include -I${rocmtoolkit-merged}/include -I${elfutils.dev}/include -w";
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
|
||||||
|
"-DHIP_ROOT_DIR=${clr}"
|
||||||
|
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DBUILD_TEST=OFF"
|
||||||
|
"-DROCPROFILER_BUILD_TESTS=0"
|
||||||
|
"-DROCPROFILER_BUILD_SAMPLES=0"
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
patchShebangs .
|
||||||
|
|
||||||
|
substituteInPlace cmake_modules/rocprofiler_utils.cmake \
|
||||||
|
--replace-fail 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)' 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)
|
||||||
|
return()'
|
||||||
|
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace-fail 'set(ROCPROFILER_BUILD_TESTS ON)' ""
|
||||||
|
|
||||||
|
substituteInPlace tests-v2/featuretests/profiler/CMakeLists.txt \
|
||||||
|
--replace "--build-id=sha1" "--build-id=sha1 --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode"
|
||||||
|
|
||||||
|
substituteInPlace test/CMakeLists.txt \
|
||||||
|
--replace "\''${ROCM_ROOT_DIR}/amdgcn/bitcode" "${rocm-device-libs}/amdgcn/bitcode"
|
||||||
|
'';
|
||||||
|
|
||||||
|
postInstall = ''
|
||||||
|
# Why do these have the executable bit set?
|
||||||
|
chmod -x $out/libexec/rocprofiler/counters/*.xml
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
passthru.rocmtoolkit-merged = rocmtoolkit-merged;
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Profiling with perf-counters and derived metrics";
|
||||||
|
homepage = "https://github.com/ROCm/rocprofiler";
|
||||||
|
license = with licenses; [ mit ]; # mitx11
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
147
pkgs/rocm-modules/rocprofiler/optional-aql-in-cmake.patch
Normal file
147
pkgs/rocm-modules/rocprofiler/optional-aql-in-cmake.patch
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
From https://raw.githubusercontent.com/AphidGit/rocm_compile/refs/heads/main/rocprofiler.patch
|
||||||
|
diff --git a/cmake_modules/rocprofiler_env.cmake b/cmake_modules/rocprofiler_env.cmake
|
||||||
|
index 7b7c472..0aba3ed 100644
|
||||||
|
--- a/cmake_modules/rocprofiler_env.cmake
|
||||||
|
+++ b/cmake_modules/rocprofiler_env.cmake
|
||||||
|
@@ -36,6 +36,7 @@ if(ROCPROFILER_DEBUG_TRACE)
|
||||||
|
target_compile_definitions(rocprofiler-build-flags INTERFACE DEBUG_TRACE=1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
+set(ROCPROFILER_LD_AQLPROFILE false)
|
||||||
|
# Enable direct loading of AQL-profile HSA extension
|
||||||
|
if(ROCPROFILER_LD_AQLPROFILE)
|
||||||
|
target_compile_definitions(rocprofiler-build-flags INTERFACE ROCP_LD_AQLPROFILE=1)
|
||||||
|
@@ -80,9 +81,3 @@ if("${ROCM_ROOT_DIR}" STREQUAL "")
|
||||||
|
message(FATAL_ERROR "ROCM_ROOT_DIR is not found.")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
-find_library(
|
||||||
|
- HSA_AMD_AQLPROFILE_LIBRARY
|
||||||
|
- NAMES hsa-amd-aqlprofile64
|
||||||
|
- HINTS ${CMAKE_PREFIX_PATH}
|
||||||
|
- PATHS ${ROCM_ROOT_DIR}
|
||||||
|
- PATH_SUFFIXES lib REQUIRED)
|
||||||
|
diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt
|
||||||
|
index 61782f0..16c83bf 100644
|
||||||
|
--- a/src/api/CMakeLists.txt
|
||||||
|
+++ b/src/api/CMakeLists.txt
|
||||||
|
@@ -51,15 +51,6 @@ find_file(
|
||||||
|
NO_DEFAULT_PATH REQUIRED)
|
||||||
|
get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY)
|
||||||
|
|
||||||
|
-find_library(
|
||||||
|
- AQLPROFILE_LIB "libhsa-amd-aqlprofile64.so"
|
||||||
|
- HINTS ${CMAKE_PREFIX_PATH}
|
||||||
|
- PATHS ${ROCM_PATH}
|
||||||
|
- PATH_SUFFIXES lib)
|
||||||
|
-
|
||||||
|
-if(NOT AQLPROFILE_LIB)
|
||||||
|
- message(FATAL_ERROR "AQL_PROFILE not installed. Please install hsa-amd-aqlprofile!")
|
||||||
|
-endif()
|
||||||
|
|
||||||
|
# ########################################################################################
|
||||||
|
# Adding Old Library Files
|
||||||
|
@@ -247,7 +238,7 @@ target_include_directories(
|
||||||
|
PUBLIC $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include/rocprofiler>
|
||||||
|
PRIVATE ${LIB_DIR} ${ROOT_DIR} ${PROJECT_SOURCE_DIR}/include/rocprofiler)
|
||||||
|
target_link_libraries(
|
||||||
|
- ${ROCPROFILER_TARGET} PRIVATE ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 c stdc++
|
||||||
|
+ ${ROCPROFILER_TARGET} PRIVATE hsa-runtime64::hsa-runtime64 c stdc++
|
||||||
|
dl rocprofiler::build-flags rocprofiler::memcheck)
|
||||||
|
|
||||||
|
get_target_property(ROCPROFILER_LIBRARY_V1_NAME ${ROCPROFILER_TARGET} NAME)
|
||||||
|
@@ -325,8 +316,7 @@ target_link_options(
|
||||||
|
-Wl,--no-undefined)
|
||||||
|
target_link_libraries(
|
||||||
|
rocprofiler-v2
|
||||||
|
- PRIVATE ${AQLPROFILE_LIB}
|
||||||
|
- hsa-runtime64::hsa-runtime64
|
||||||
|
+ PRIVATE hsa-runtime64::hsa-runtime64
|
||||||
|
Threads::Threads
|
||||||
|
atomic
|
||||||
|
numa
|
||||||
|
diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp
|
||||||
|
index 2c47186..6b39634 100644
|
||||||
|
--- a/src/util/hsa_rsrc_factory.cpp
|
||||||
|
+++ b/src/util/hsa_rsrc_factory.cpp
|
||||||
|
@@ -155,17 +155,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
|
||||||
|
if (kern_arg_pool_ == nullptr)
|
||||||
|
CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
|
||||||
|
|
||||||
|
- // Get AqlProfile API table
|
||||||
|
- aqlprofile_api_ = {};
|
||||||
|
-#ifdef ROCP_LD_AQLPROFILE
|
||||||
|
- status = LoadAqlProfileLib(&aqlprofile_api_);
|
||||||
|
-#else
|
||||||
|
- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
|
||||||
|
- hsa_ven_amd_aqlprofile_VERSION_MAJOR,
|
||||||
|
- sizeof(aqlprofile_api_), &aqlprofile_api_);
|
||||||
|
-#endif
|
||||||
|
- CHECK_STATUS("aqlprofile API table load failed", status);
|
||||||
|
-
|
||||||
|
// Get Loader API table
|
||||||
|
loader_api_ = {};
|
||||||
|
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
|
||||||
|
diff --git a/test/util/hsa_rsrc_factory.cpp b/test/util/hsa_rsrc_factory.cpp
|
||||||
|
index 0a44d18..fab5b75 100644
|
||||||
|
--- a/test/util/hsa_rsrc_factory.cpp
|
||||||
|
+++ b/test/util/hsa_rsrc_factory.cpp
|
||||||
|
@@ -137,17 +137,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
|
||||||
|
if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR);
|
||||||
|
if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
|
||||||
|
|
||||||
|
- // Get AqlProfile API table
|
||||||
|
- aqlprofile_api_ = {0};
|
||||||
|
-#ifdef ROCP_LD_AQLPROFILE
|
||||||
|
- status = LoadAqlProfileLib(&aqlprofile_api_);
|
||||||
|
-#else
|
||||||
|
- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
|
||||||
|
- hsa_ven_amd_aqlprofile_VERSION_MAJOR,
|
||||||
|
- sizeof(aqlprofile_api_), &aqlprofile_api_);
|
||||||
|
-#endif
|
||||||
|
- CHECK_STATUS("aqlprofile API table load failed", status);
|
||||||
|
-
|
||||||
|
// Get Loader API table
|
||||||
|
loader_api_ = {0};
|
||||||
|
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
|
||||||
|
diff --git a/tests-v2/unittests/core/CMakeLists.txt b/tests-v2/unittests/core/CMakeLists.txt
|
||||||
|
index 107cb51..0f6d4bf 100644
|
||||||
|
--- a/tests-v2/unittests/core/CMakeLists.txt
|
||||||
|
+++ b/tests-v2/unittests/core/CMakeLists.txt
|
||||||
|
@@ -235,8 +235,7 @@ set_target_properties(runCoreUnitTests PROPERTIES
|
||||||
|
INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests")
|
||||||
|
target_link_libraries(
|
||||||
|
runCoreUnitTests
|
||||||
|
- PRIVATE ${AQLPROFILE_LIB}
|
||||||
|
- test_hsatool_library
|
||||||
|
+ PRIVATE test_hsatool_library
|
||||||
|
hsa-runtime64::hsa-runtime64
|
||||||
|
Threads::Threads
|
||||||
|
GTest::gtest GTest::gtest_main
|
||||||
|
@@ -285,4 +284,4 @@ endif()
|
||||||
|
# for the *_FilePlugin tests
|
||||||
|
if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output")
|
||||||
|
file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output")
|
||||||
|
-endif()
|
||||||
|
\ No newline at end of file
|
||||||
|
+endif()
|
||||||
|
diff --git a/tests-v2/unittests/profiler/CMakeLists.txt b/tests-v2/unittests/profiler/CMakeLists.txt
|
||||||
|
index 53180d5..0c4d4a7 100644
|
||||||
|
--- a/tests-v2/unittests/profiler/CMakeLists.txt
|
||||||
|
+++ b/tests-v2/unittests/profiler/CMakeLists.txt
|
||||||
|
@@ -122,7 +122,7 @@ target_compile_definitions(
|
||||||
|
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
|
||||||
|
|
||||||
|
target_link_libraries(
|
||||||
|
- runUnitTests PRIVATE rocprofiler-v2 ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64
|
||||||
|
+ runUnitTests PRIVATE rocprofiler-v2 hsa-runtime64::hsa-runtime64
|
||||||
|
GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES} dw elf c dl)
|
||||||
|
|
||||||
|
add_dependencies(tests runUnitTests)
|
||||||
|
@@ -158,4 +158,4 @@ endif()
|
||||||
|
# for the *_FilePlugin tests
|
||||||
|
if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output")
|
||||||
|
file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output")
|
||||||
|
-endif()
|
||||||
|
\ No newline at end of file
|
||||||
|
+endif()
|
||||||
59
pkgs/rocm-modules/rocr-debug-agent/default.nix
Normal file
59
pkgs/rocm-modules/rocr-debug-agent/default.nix
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
clr,
|
||||||
|
git,
|
||||||
|
rocdbgapi,
|
||||||
|
elfutils,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocr-debug-agent";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocr_debug_agent";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-otoxZ2NHkPDIFhvn4/nvaQ/W4LF38Nx9MZ9IYEf1DyY=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
clr
|
||||||
|
git
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
rocdbgapi
|
||||||
|
elfutils
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
|
||||||
|
"-DHIP_ROOT_DIR=${clr}"
|
||||||
|
"-DHIP_PATH=${clr}"
|
||||||
|
];
|
||||||
|
|
||||||
|
# Weird install target
|
||||||
|
postInstall = ''
|
||||||
|
rm -rf $out/src
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Library that provides some debugging functionality for ROCr";
|
||||||
|
homepage = "https://github.com/ROCm/rocr_debug_agent";
|
||||||
|
license = with licenses; [ ncsa ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
99
pkgs/rocm-modules/rocrand/default.nix
Normal file
99
pkgs/rocm-modules/rocrand/default.nix
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
clr,
|
||||||
|
gtest,
|
||||||
|
gbenchmark,
|
||||||
|
buildTests ? false,
|
||||||
|
buildBenchmarks ? false,
|
||||||
|
gpuTargets ? clr.localGpuTargets or [ ],
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocrand${clr.gpuArchSuffix}";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"benchmark"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocRAND";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-XqGPXx+LqjJs602vtG5u578B1hitGlsopA0izbClHro=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
lib.optionals buildTests [
|
||||||
|
gtest
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
gbenchmark
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DHIP_ROOT_DIR=${clr}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_TEST=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"-DBUILD_BENCHMARK=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/test_* $test/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildBenchmarks ''
|
||||||
|
mkdir -p $benchmark/bin
|
||||||
|
mv $out/bin/benchmark_* $benchmark/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||||
|
rm -r $out/bin/rocRAND
|
||||||
|
# Fail if bin/ isn't actually empty
|
||||||
|
rmdir $out/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Generate pseudo-random and quasi-random numbers";
|
||||||
|
homepage = "https://github.com/ROCm/rocRAND";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
135
pkgs/rocm-modules/rocsolver/default.nix
Normal file
135
pkgs/rocm-modules/rocsolver/default.nix
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocblas,
|
||||||
|
rocprim,
|
||||||
|
rocsparse,
|
||||||
|
clr,
|
||||||
|
fmt,
|
||||||
|
gtest,
|
||||||
|
gfortran,
|
||||||
|
lapack-reference,
|
||||||
|
buildTests ? false,
|
||||||
|
buildBenchmarks ? false,
|
||||||
|
gpuTargets ? (
|
||||||
|
clr.localGpuTargets or [
|
||||||
|
"gfx900"
|
||||||
|
"gfx906"
|
||||||
|
"gfx908"
|
||||||
|
"gfx90a"
|
||||||
|
"gfx942"
|
||||||
|
"gfx1010"
|
||||||
|
"gfx1030"
|
||||||
|
"gfx1100"
|
||||||
|
"gfx1101"
|
||||||
|
"gfx1102"
|
||||||
|
"gfx1201"
|
||||||
|
]
|
||||||
|
),
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocsolver${clr.gpuArchSuffix}";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"benchmark"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocSOLVER";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-u5GRWetMnhEBJ9HZcXoEaqpdO8f0cuSPnq+XawljfUs=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs =
|
||||||
|
[
|
||||||
|
cmake
|
||||||
|
# no ninja, it buffers console output and nix times out long periods of no output
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||||
|
gfortran
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
# FIXME: rocblas and rocsolver can't build in parallel
|
||||||
|
# but rocsolver doesn't need rocblas' offload builds at build time
|
||||||
|
# could we build against a rocblas-minimal?
|
||||||
|
rocblas
|
||||||
|
rocprim
|
||||||
|
rocsparse
|
||||||
|
fmt
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
gtest
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||||
|
lapack-reference
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DHIP_CLANG_NUM_PARALLEL_JOBS=4"
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
"-DCMAKE_VERBOSE_MAKEFILE=ON"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_CLIENTS_TESTS=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/rocsolver-test $test/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildBenchmarks ''
|
||||||
|
mkdir -p $benchmark/bin
|
||||||
|
mv $out/bin/rocsolver-bench $benchmark/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||||
|
rmdir $out/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = "rocsolver";
|
||||||
|
inherit (finalAttrs.src) owner repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
requiredSystemFeatures = [ "big-parallel" ];
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "ROCm LAPACK implementation";
|
||||||
|
homepage = "https://github.com/ROCm/rocSOLVER";
|
||||||
|
license = with licenses; [ bsd2 ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
timeout = 14400; # 4 hours
|
||||||
|
maxSilent = 14400; # 4 hours
|
||||||
|
};
|
||||||
|
})
|
||||||
161
pkgs/rocm-modules/rocsparse/default.nix
Normal file
161
pkgs/rocm-modules/rocsparse/default.nix
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
fetchzip,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocprim,
|
||||||
|
clr,
|
||||||
|
gfortran,
|
||||||
|
git,
|
||||||
|
gtest,
|
||||||
|
boost,
|
||||||
|
python3Packages,
|
||||||
|
buildTests ? false,
|
||||||
|
buildBenchmarks ? false, # Seems to depend on tests
|
||||||
|
gpuTargets ? clr.localGpuTargets or clr.gpuTargets,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocsparse${clr.gpuArchSuffix}";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"benchmark"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocSPARSE";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-l7rOuVthfrSO5bnhgm49cjPnRbV/2sFhSRT+mShkBek=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
# no ninja, it buffers console output and nix times out long periods of no output
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
gfortran
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
rocprim
|
||||||
|
git
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||||
|
gtest
|
||||||
|
boost
|
||||||
|
python3Packages.python
|
||||||
|
python3Packages.pyyaml
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||||
|
"-DBUILD_CLIENTS_TESTS=ON"
|
||||||
|
"-DCMAKE_MATRICES_DIR=/build/source/matrices"
|
||||||
|
"-Dpython=python3"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
# We have to manually generate the matrices
|
||||||
|
postPatch = lib.optionalString (buildTests || buildBenchmarks) ''
|
||||||
|
mkdir -p matrices
|
||||||
|
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-01}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-02}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-03}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-04}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-05}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-06}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-07}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-08}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-09}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-10}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-11}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-12}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-13}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-14}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-15}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-16}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-17}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-18}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-19}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-20}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-21}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-22}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-23}/*.mtx matrices
|
||||||
|
ln -s ${finalAttrs.passthru.matrices.matrix-24}/*.mtx matrices
|
||||||
|
|
||||||
|
# Not used by the original cmake, causes an error
|
||||||
|
rm matrices/*_b.mtx
|
||||||
|
|
||||||
|
echo "deps/convert.cpp -> deps/mtx2csr"
|
||||||
|
hipcc deps/convert.cpp -O3 -o deps/mtx2csr
|
||||||
|
|
||||||
|
for mat in $(ls -1 matrices | cut -d "." -f 1); do
|
||||||
|
echo "mtx2csr: $mat.mtx -> $mat.csr"
|
||||||
|
deps/mtx2csr matrices/$mat.mtx matrices/$mat.csr
|
||||||
|
unlink matrices/$mat.mtx
|
||||||
|
done
|
||||||
|
'';
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildBenchmarks ''
|
||||||
|
mkdir -p $benchmark/bin
|
||||||
|
cp -a $out/bin/* $benchmark/bin
|
||||||
|
rm $benchmark/bin/rocsparse-test
|
||||||
|
''
|
||||||
|
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/* $test/bin
|
||||||
|
rm $test/bin/rocsparse-bench || true
|
||||||
|
mv /build/source/matrices $test
|
||||||
|
rmdir $out/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru = {
|
||||||
|
matrices = import ./deps.nix {
|
||||||
|
inherit fetchzip;
|
||||||
|
mirror1 = "https://sparse.tamu.edu/MM";
|
||||||
|
mirror2 = "https://www.cise.ufl.edu/research/sparse/MM";
|
||||||
|
};
|
||||||
|
|
||||||
|
updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "ROCm SPARSE implementation";
|
||||||
|
homepage = "https://github.com/ROCm/rocSPARSE";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
223
pkgs/rocm-modules/rocsparse/deps.nix
Normal file
223
pkgs/rocm-modules/rocsparse/deps.nix
Normal file
@@ -0,0 +1,223 @@
|
|||||||
|
{
|
||||||
|
fetchzip,
|
||||||
|
mirror1,
|
||||||
|
mirror2,
|
||||||
|
}:
|
||||||
|
|
||||||
|
{
|
||||||
|
matrix-01 = fetchzip {
|
||||||
|
sha256 = "sha256-AHur5ZIDZTFRrO2GV0ieXrffq4KUiGWiZ59pv0fUtEQ=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/SNAP/amazon0312.tar.gz"
|
||||||
|
"${mirror2}/SNAP/amazon0312.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-02 = fetchzip {
|
||||||
|
sha256 = "sha256-0rSxaN4lQcdaCLsvlgicG70FXUxXeERPiEmQ4MzbRdE=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/Muite/Chebyshev4.tar.gz"
|
||||||
|
"${mirror2}/Muite/Chebyshev4.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-03 = fetchzip {
|
||||||
|
sha256 = "sha256-hDzDWDUnHEyFedX/tMNq83ZH8uWyM4xtZYUUAD3rizo=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/FEMLAB/sme3Dc.tar.gz"
|
||||||
|
"${mirror2}/FEMLAB/sme3Dc.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-04 = fetchzip {
|
||||||
|
sha256 = "sha256-GmN2yOt/MoX01rKe05aTyB3ypUP4YbQGOITZ0BqPmC0=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/Williams/webbase-1M.tar.gz"
|
||||||
|
"${mirror2}/Williams/webbase-1M.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-05 = fetchzip {
|
||||||
|
sha256 = "sha256-gQNjfVyWzNM9RwImJGhkhahRmZz74LzDs1oijL7mI7k=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/Williams/mac_econ_fwd500.tar.gz"
|
||||||
|
"${mirror2}/Williams/mac_econ_fwd500.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-06 = fetchzip {
|
||||||
|
sha256 = "sha256-87cdZjntNcTuz5BtO59irhcuRbPllWSbhCEX3Td02qc=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/Williams/mc2depi.tar.gz"
|
||||||
|
"${mirror2}/Williams/mc2depi.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-07 = fetchzip {
|
||||||
|
sha256 = "sha256-WRamuJX3D8Tm+k0q67RjUDG3DeNAxhKiaPkk5afY5eU=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/Bova/rma10.tar.gz"
|
||||||
|
"${mirror2}/Bova/rma10.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-08 = fetchzip {
|
||||||
|
sha256 = "sha256-5dhkm293Mc3lzakKxHy5W5XIn4Rw+gihVh7gyrjEHXo=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/JGD_BIBD/bibd_22_8.tar.gz"
|
||||||
|
"${mirror2}/JGD_BIBD/bibd_22_8.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-09 = fetchzip {
|
||||||
|
sha256 = "sha256-czjLWCjXAjZCk5TGYHaEkwSAzQu3TQ3QyB6eNKR4G88=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/Hamm/scircuit.tar.gz"
|
||||||
|
"${mirror2}/Hamm/scircuit.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-10 = fetchzip {
|
||||||
|
sha256 = "sha256-bYuLnJViAIcIejAkh69/bsNAVIDU4wfTLtD+nmHd6FM=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/Sandia/ASIC_320k.tar.gz"
|
||||||
|
"${mirror2}/Sandia/ASIC_320k.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-11 = fetchzip {
|
||||||
|
sha256 = "sha256-aDwn8P1khYjo2Agbq5m9ZBInJUxf/knJNvyptt0fak0=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/GHS_psdef/bmwcra_1.tar.gz"
|
||||||
|
"${mirror2}/GHS_psdef/bmwcra_1.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-12 = fetchzip {
|
||||||
|
sha256 = "sha256-8OJqA/byhlAZd869TPUzZFdsOiwOoRGfKyhM+RMjXoY=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/HB/nos1.tar.gz"
|
||||||
|
"${mirror2}/HB/nos1.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-13 = fetchzip {
|
||||||
|
sha256 = "sha256-FS0rKqmg+uHwsM/yGfQLBdd7LH/rUrdutkNGBD/Mh1I=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/HB/nos2.tar.gz"
|
||||||
|
"${mirror2}/HB/nos2.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-14 = fetchzip {
|
||||||
|
sha256 = "sha256-DANnlrNJikrI7Pst9vRedtbuxepyHmCIu2yhltc4Qcs=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/HB/nos3.tar.gz"
|
||||||
|
"${mirror2}/HB/nos3.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-15 = fetchzip {
|
||||||
|
sha256 = "sha256-21mUgqjWGUfYgiWwSrKh9vH8Vdt3xzcefmqYNYRpxiY=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/HB/nos4.tar.gz"
|
||||||
|
"${mirror2}/HB/nos4.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-16 = fetchzip {
|
||||||
|
sha256 = "sha256-FOuXvGqBBFNkVS6cexmkluret54hCfCOdK+DOZllE4c=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/HB/nos5.tar.gz"
|
||||||
|
"${mirror2}/HB/nos5.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-17 = fetchzip {
|
||||||
|
sha256 = "sha256-+7NI1rA/qQxYPpjXKHvAaCZ+LSaAJ4xuJvMRMBEUYxg=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/HB/nos6.tar.gz"
|
||||||
|
"${mirror2}/HB/nos6.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-18 = fetchzip {
|
||||||
|
sha256 = "sha256-q3NxJjbwGGcFiQ9nhWfUKgZmdVwCfPmgQoqy0AqOsNc=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/HB/nos7.tar.gz"
|
||||||
|
"${mirror2}/HB/nos7.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-19 = fetchzip {
|
||||||
|
sha256 = "sha256-0GAN6qmVfD+tprIigzuUUUwm5KVhkN9X65wMEvFltDY=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/DNVS/shipsec1.tar.gz"
|
||||||
|
"${mirror2}/DNVS/shipsec1.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-20 = fetchzip {
|
||||||
|
sha256 = "sha256-f28Du/Urxsiq5NkRmRO10Zz9vvGRjEchquzHzbZpZ7U=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/Cote/mplate.tar.gz"
|
||||||
|
"${mirror2}/Cote/mplate.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-21 = fetchzip {
|
||||||
|
sha256 = "sha256-O+Wy0NfCU1hVUOfNR1dJpvDHLBwwa301IRJDrQJnhak=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/Bai/qc2534.tar.gz"
|
||||||
|
"${mirror2}/Bai/qc2534.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-22 = fetchzip {
|
||||||
|
sha256 = "sha256-oxMnt8U5Cf1ILWcBdU6W9jdSMMm+U6bIVl8nm3n3+OA=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/Chevron/Chevron2.tar.gz"
|
||||||
|
"${mirror2}/Chevron/Chevron2.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-23 = fetchzip {
|
||||||
|
sha256 = "sha256-MFD9BxFI/3IS7yatW121BAI04fbqrXpgYDT5UKjeKcU=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/Chevron/Chevron3.tar.gz"
|
||||||
|
"${mirror2}/Chevron/Chevron3.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
matrix-24 = fetchzip {
|
||||||
|
sha256 = "sha256-ikS8O51pe1nt3BNyhvfvqCbVL0+bg/da9bqGqeBDkTg=";
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"${mirror1}/Chevron/Chevron4.tar.gz"
|
||||||
|
"${mirror2}/Chevron/Chevron4.tar.gz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
}
|
||||||
94
pkgs/rocm-modules/rocthrust/default.nix
Normal file
94
pkgs/rocm-modules/rocthrust/default.nix
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocprim,
|
||||||
|
clr,
|
||||||
|
gtest,
|
||||||
|
buildTests ? false,
|
||||||
|
buildBenchmarks ? false,
|
||||||
|
gpuTargets ? [ ],
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocthrust";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"benchmark"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocThrust";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-bHyqG0pSt6bc6cDMnd1uY+0o+V3cxdp0mUEzWYRdd20=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
rocprim
|
||||||
|
clr
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = lib.optionals buildTests [
|
||||||
|
gtest
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DHIP_ROOT_DIR=${clr}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"-DBUILD_TEST=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"-DBUILD_BENCHMARKS=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/{test_*,*.hip} $test/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildBenchmarks ''
|
||||||
|
mkdir -p $benchmark/bin
|
||||||
|
mv $out/bin/benchmark_* $benchmark/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
||||||
|
rm -rf $out/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "ROCm parallel algorithm library";
|
||||||
|
homepage = "https://github.com/ROCm/rocThrust";
|
||||||
|
license = with licenses; [ asl20 ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
120
pkgs/rocm-modules/roctracer/default.nix
Normal file
120
pkgs/rocm-modules/roctracer/default.nix
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
clr,
|
||||||
|
rocm-device-libs,
|
||||||
|
libxml2,
|
||||||
|
doxygen,
|
||||||
|
graphviz,
|
||||||
|
gcc-unwrapped,
|
||||||
|
libbacktrace,
|
||||||
|
rocm-runtime,
|
||||||
|
python3Packages,
|
||||||
|
buildDocs ? false, # Nothing seems to be generated, so not making the output
|
||||||
|
buildTests ? false,
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "roctracer";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildDocs [
|
||||||
|
"doc"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildTests [
|
||||||
|
"test"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "roctracer";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-Dwk5cBZLysmsVA2kwpQM0FQt2KXOGcaZcAw/d8VUaXw=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs =
|
||||||
|
[
|
||||||
|
cmake
|
||||||
|
clr
|
||||||
|
]
|
||||||
|
++ lib.optionals buildDocs [
|
||||||
|
doxygen
|
||||||
|
graphviz
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
libxml2
|
||||||
|
libbacktrace
|
||||||
|
python3Packages.python
|
||||||
|
python3Packages.cppheaderparser
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags = [
|
||||||
|
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
];
|
||||||
|
|
||||||
|
env.NIX_CFLAGS_COMPILE = toString [
|
||||||
|
# Needed with GCC 12
|
||||||
|
"-Wno-error=array-bounds"
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch =
|
||||||
|
''
|
||||||
|
export HIP_DEVICE_LIB_PATH=${rocm-device-libs}/amdgcn/bitcode
|
||||||
|
''
|
||||||
|
+ lib.optionalString (!buildTests) ''
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace "add_subdirectory(test)" ""
|
||||||
|
'';
|
||||||
|
|
||||||
|
# Tests always fail, probably need GPU
|
||||||
|
# doCheck = buildTests;
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString buildDocs ''
|
||||||
|
mkdir -p $doc
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildTests ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
# Not sure why this is an install target
|
||||||
|
find $out/test -executable -type f -exec mv {} $test/bin \;
|
||||||
|
rm $test/bin/{*.sh,*.py}
|
||||||
|
patchelf --set-rpath $out/lib:${
|
||||||
|
lib.makeLibraryPath (
|
||||||
|
finalAttrs.buildInputs
|
||||||
|
++ [
|
||||||
|
clr
|
||||||
|
gcc-unwrapped.lib
|
||||||
|
rocm-runtime
|
||||||
|
]
|
||||||
|
)
|
||||||
|
} $test/bin/*
|
||||||
|
rm -rf $out/test
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Tracer callback/activity library";
|
||||||
|
homepage = "https://github.com/ROCm/roctracer";
|
||||||
|
license = with licenses; [ mit ]; # mitx11
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
35
pkgs/rocm-modules/rocwmma/0000-dont-fetch-googletest.patch
Normal file
35
pkgs/rocm-modules/rocwmma/0000-dont-fetch-googletest.patch
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
|
||||||
|
index 0d00883..86ce282 100644
|
||||||
|
--- a/test/CMakeLists.txt
|
||||||
|
+++ b/test/CMakeLists.txt
|
||||||
|
@@ -30,30 +30,6 @@ cmake_dependent_option( ROCWMMA_BUILD_VALIDATION_TESTS "Build validation tests"
|
||||||
|
cmake_dependent_option( ROCWMMA_BUILD_BENCHMARK_TESTS "Build benchmarking tests" OFF "ROCWMMA_BUILD_TESTS" OFF )
|
||||||
|
cmake_dependent_option( ROCWMMA_BUILD_EXTENDED_TESTS "Build extended test parameter coverage" OFF "ROCWMMA_BUILD_TESTS" OFF )
|
||||||
|
|
||||||
|
-# Test/benchmark requires additional dependencies
|
||||||
|
-include( FetchContent )
|
||||||
|
-
|
||||||
|
-FetchContent_Declare(
|
||||||
|
- googletest
|
||||||
|
- GIT_REPOSITORY https://github.com/google/googletest.git
|
||||||
|
- GIT_TAG release-1.12.1
|
||||||
|
-)
|
||||||
|
-FetchContent_GetProperties(googletest)
|
||||||
|
-if(NOT googletest_POPULATED)
|
||||||
|
-
|
||||||
|
- # Fetch the content using default details
|
||||||
|
- FetchContent_Populate(googletest)
|
||||||
|
- # Save the shared libs setting, then force to static libs
|
||||||
|
- set(BUILD_SHARED_LIBS_OLD ${BUILD_SHARED_LIBS})
|
||||||
|
- set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build SHARED libraries" FORCE)
|
||||||
|
-
|
||||||
|
- # Add gtest targets as static libs
|
||||||
|
- add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR})
|
||||||
|
-
|
||||||
|
- # Restore shared libs setting
|
||||||
|
- set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_OLD} CACHE INTERNAL "Build SHARED libraries" FORCE)
|
||||||
|
-endif()
|
||||||
|
-
|
||||||
|
set(ROCWMMA_TEST_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
set(ROCWMMA_COMMON_TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/hip_device.cpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/rocwmma_gtest_main.cpp)
|
||||||
121
pkgs/rocm-modules/rocwmma/default.nix
Normal file
121
pkgs/rocm-modules/rocwmma/default.nix
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocm-smi,
|
||||||
|
clr,
|
||||||
|
openmp,
|
||||||
|
gtest,
|
||||||
|
rocblas,
|
||||||
|
buildTests ? false, # Will likely fail building because wavefront shifts are not supported for certain archs
|
||||||
|
buildExtendedTests ? false,
|
||||||
|
buildBenchmarks ? false,
|
||||||
|
buildSamples ? false,
|
||||||
|
gpuTargets ? [ ],
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname = "rocwmma";
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
outputs =
|
||||||
|
[
|
||||||
|
"out"
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||||
|
"test"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"benchmark"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildSamples [
|
||||||
|
"sample"
|
||||||
|
];
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rocWMMA";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-fjyxMrzt74rE7Gf4v4WawYltuw1fvahwZUpauMIE3qc=";
|
||||||
|
};
|
||||||
|
|
||||||
|
patches = lib.optionals (buildTests || buildBenchmarks) [
|
||||||
|
./0000-dont-fetch-googletest.patch
|
||||||
|
];
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs =
|
||||||
|
[
|
||||||
|
openmp
|
||||||
|
]
|
||||||
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
||||||
|
rocm-smi
|
||||||
|
gtest
|
||||||
|
rocblas
|
||||||
|
];
|
||||||
|
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
|
||||||
|
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
|
||||||
|
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
|
||||||
|
"-DROCWMMA_BUILD_TESTS=${if buildTests || buildBenchmarks then "ON" else "OFF"}"
|
||||||
|
"-DROCWMMA_BUILD_SAMPLES=${if buildSamples then "ON" else "OFF"}"
|
||||||
|
# Manually define CMAKE_INSTALL_<DIR>
|
||||||
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
||||||
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||||
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||||
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildExtendedTests [
|
||||||
|
"-DROCWMMA_BUILD_EXTENDED_TESTS=ON"
|
||||||
|
]
|
||||||
|
++ lib.optionals buildBenchmarks [
|
||||||
|
"-DROCWMMA_BUILD_BENCHMARK_TESTS=ON"
|
||||||
|
"-DROCWMMA_BENCHMARK_WITH_ROCBLAS=ON"
|
||||||
|
];
|
||||||
|
|
||||||
|
postInstall =
|
||||||
|
lib.optionalString (buildTests || buildBenchmarks) ''
|
||||||
|
mkdir -p $test/bin
|
||||||
|
mv $out/bin/{*_test,*-validate} $test/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildBenchmarks ''
|
||||||
|
mkdir -p $benchmark/bin
|
||||||
|
mv $out/bin/*-bench $benchmark/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString buildSamples ''
|
||||||
|
mkdir -p $sample/bin
|
||||||
|
mv $out/bin/sgemmv $sample/bin
|
||||||
|
mv $out/bin/simple_gemm $sample/bin
|
||||||
|
mv $out/bin/simple_dlrm $sample/bin
|
||||||
|
''
|
||||||
|
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
|
||||||
|
rm -rf $out/bin
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Mixed precision matrix multiplication and accumulation";
|
||||||
|
homepage = "https://github.com/ROCm/rocWMMA";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
})
|
||||||
104
pkgs/rocm-modules/rpp/default.nix
Normal file
104
pkgs/rocm-modules/rpp/default.nix
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
fetchFromGitHub,
|
||||||
|
rocmUpdateScript,
|
||||||
|
cmake,
|
||||||
|
rocm-cmake,
|
||||||
|
rocm-docs-core,
|
||||||
|
half,
|
||||||
|
clr,
|
||||||
|
openmp,
|
||||||
|
boost,
|
||||||
|
python3Packages,
|
||||||
|
buildDocs ? false, # Needs internet
|
||||||
|
useOpenCL ? false,
|
||||||
|
useCPU ? false,
|
||||||
|
gpuTargets ? [ ],
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
pname =
|
||||||
|
"rpp-"
|
||||||
|
+ (
|
||||||
|
if (!useOpenCL && !useCPU) then
|
||||||
|
"hip"
|
||||||
|
else if (!useOpenCL && !useCPU) then
|
||||||
|
"opencl"
|
||||||
|
else
|
||||||
|
"cpu"
|
||||||
|
);
|
||||||
|
|
||||||
|
version = "6.4.1";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "rpp";
|
||||||
|
rev = "rocm-${finalAttrs.version}";
|
||||||
|
hash = "sha256-rccVjSrOVIe4ZDtloCoCCI3u9UIcUqdirHIzS7ffAas=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs =
|
||||||
|
[
|
||||||
|
cmake
|
||||||
|
rocm-cmake
|
||||||
|
clr
|
||||||
|
]
|
||||||
|
++ lib.optionals buildDocs [
|
||||||
|
rocm-docs-core
|
||||||
|
python3Packages.python
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
half
|
||||||
|
openmp
|
||||||
|
boost
|
||||||
|
];
|
||||||
|
|
||||||
|
CFLAGS = "-I${openmp.dev}/include";
|
||||||
|
CXXFLAGS = "-I${openmp.dev}/include";
|
||||||
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
|
||||||
|
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
|
||||||
|
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
|
||||||
|
"-DROCM_PATH=${clr}"
|
||||||
|
]
|
||||||
|
++ lib.optionals (gpuTargets != [ ]) [
|
||||||
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||||
|
]
|
||||||
|
++ lib.optionals (!useOpenCL && !useCPU) [
|
||||||
|
"-DBACKEND=HIP"
|
||||||
|
]
|
||||||
|
++ lib.optionals (useOpenCL && !useCPU) [
|
||||||
|
"-DBACKEND=OCL"
|
||||||
|
]
|
||||||
|
++ lib.optionals useCPU [
|
||||||
|
"-DBACKEND=CPU"
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = lib.optionalString (!useOpenCL && !useCPU) ''
|
||||||
|
# Bad path
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace "COMPILER_FOR_HIP \''${ROCM_PATH}/llvm/bin/clang++" "COMPILER_FOR_HIP ${clr}/bin/hipcc"
|
||||||
|
'';
|
||||||
|
|
||||||
|
postBuild = lib.optionalString buildDocs ''
|
||||||
|
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html
|
||||||
|
'';
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = finalAttrs.pname;
|
||||||
|
inherit (finalAttrs.src) owner;
|
||||||
|
inherit (finalAttrs.src) repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Comprehensive high-performance computer vision library for AMD processors";
|
||||||
|
homepage = "https://github.com/ROCm/rpp";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
broken = useOpenCL;
|
||||||
|
};
|
||||||
|
})
|
||||||
115
pkgs/rocm-modules/tensile/default.nix
Normal file
115
pkgs/rocm-modules/tensile/default.nix
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
fetchFromGitHub,
|
||||||
|
fetchpatch,
|
||||||
|
rocmUpdateScript,
|
||||||
|
buildPythonPackage,
|
||||||
|
pytestCheckHook,
|
||||||
|
setuptools,
|
||||||
|
pyyaml,
|
||||||
|
msgpack,
|
||||||
|
simplejson,
|
||||||
|
ujson,
|
||||||
|
distro,
|
||||||
|
orjson,
|
||||||
|
pandas,
|
||||||
|
joblib,
|
||||||
|
filelock,
|
||||||
|
clr,
|
||||||
|
rich,
|
||||||
|
isTensileLite ? false,
|
||||||
|
}:
|
||||||
|
|
||||||
|
buildPythonPackage rec {
|
||||||
|
pname = if isTensileLite then "tensilelite" else "tensile";
|
||||||
|
# Using a specific commit which has code object compression support from after the 6.3 release
|
||||||
|
# Without compression packages are too large for hydra
|
||||||
|
version = "6.4.1";
|
||||||
|
format = "pyproject";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "ROCm";
|
||||||
|
repo = "Tensile";
|
||||||
|
rev = "1752af518190500891a865379a4569b8abf6ba01";
|
||||||
|
hash = "sha256-Wvz4PVs//3Ox7ykZHpjPzOVwlyATyc+MmVVenfTzWK4=";
|
||||||
|
};
|
||||||
|
|
||||||
|
# TODO: It should be possible to run asm caps test ONCE for all supported arches
|
||||||
|
# We currently disable the test because it's slow and runs each time tensile launches
|
||||||
|
|
||||||
|
postPatch =
|
||||||
|
lib.optionalString (!isTensileLite) ''
|
||||||
|
if grep -F .SafeLoader Tensile/LibraryIO.py; then
|
||||||
|
substituteInPlace Tensile/LibraryIO.py \
|
||||||
|
--replace-fail "yaml.SafeLoader" "yaml.CSafeLoader"
|
||||||
|
fi
|
||||||
|
# See TODO above about asm caps test
|
||||||
|
substituteInPlace Tensile/Common.py \
|
||||||
|
--replace-fail 'if globalParameters["AssemblerPath"] is not None:' "if False:"
|
||||||
|
''
|
||||||
|
+ ''
|
||||||
|
# Add an assert that the fallback 9,0,0 is supported before setting the kernel to it
|
||||||
|
# If it's not detected as supported we have an issue with compiler paths or the compiler is broken
|
||||||
|
# and it's better to stop immediately
|
||||||
|
substituteInPlace Tensile/KernelWriter.py \
|
||||||
|
--replace-fail '= (9,0,0)' '= (9,0,0);assert(globalParameters["AsmCaps"][(9,0,0)]["SupportedISA"])'
|
||||||
|
find . -type f -iname "*.sh" -exec chmod +x {} \;
|
||||||
|
patchShebangs Tensile
|
||||||
|
'';
|
||||||
|
|
||||||
|
buildInputs = [ setuptools ];
|
||||||
|
|
||||||
|
propagatedBuildInputs =
|
||||||
|
[
|
||||||
|
pyyaml
|
||||||
|
msgpack
|
||||||
|
pandas
|
||||||
|
joblib
|
||||||
|
distro
|
||||||
|
]
|
||||||
|
++ lib.optionals (!isTensileLite) [
|
||||||
|
rich
|
||||||
|
]
|
||||||
|
++ lib.optionals isTensileLite [
|
||||||
|
simplejson
|
||||||
|
ujson
|
||||||
|
orjson
|
||||||
|
];
|
||||||
|
|
||||||
|
patches =
|
||||||
|
lib.optional (!isTensileLite) ./tensile-solutionstructs-perf-fix.diff
|
||||||
|
++ lib.optional (!isTensileLite) ./tensile-create-library-dont-copy-twice.diff
|
||||||
|
++ lib.optional (!isTensileLite) (fetchpatch {
|
||||||
|
# [PATCH] Extend Tensile HIP ISA compatibility
|
||||||
|
sha256 = "sha256-d+fVf/vz+sxGqJ96vuxe0jRMgbC5K6j5FQ5SJ1e3Sl8=";
|
||||||
|
url = "https://github.com/GZGavinZhao/Tensile/commit/855cb15839849addb0816a6dde45772034a3e41f.patch";
|
||||||
|
})
|
||||||
|
++ lib.optional isTensileLite ./tensilelite-create-library-dont-copy-twice.diff
|
||||||
|
++ lib.optional isTensileLite ./tensilelite-gen_assembly-venv-err-handling.diff;
|
||||||
|
# ++ lib.optional isTensileLite ./tensilelite-compression.diff;
|
||||||
|
|
||||||
|
doCheck = false; # Too many errors, not sure how to set this up properly
|
||||||
|
|
||||||
|
nativeCheckInputs = [
|
||||||
|
pytestCheckHook
|
||||||
|
filelock
|
||||||
|
clr
|
||||||
|
];
|
||||||
|
|
||||||
|
env.ROCM_PATH = "${clr}";
|
||||||
|
|
||||||
|
pythonImportsCheck = [ "Tensile" ];
|
||||||
|
|
||||||
|
passthru.updateScript = rocmUpdateScript {
|
||||||
|
name = pname;
|
||||||
|
inherit (src) owner repo;
|
||||||
|
};
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "GEMMs and tensor contractions";
|
||||||
|
homepage = "https://github.com/ROCm/Tensile";
|
||||||
|
license = with licenses; [ mit ];
|
||||||
|
teams = [ teams.rocm ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
|
||||||
|
index a1644606..c6ca2882 100644
|
||||||
|
--- a/Tensile/TensileCreateLibrary.py
|
||||||
|
+++ b/Tensile/TensileCreateLibrary.py
|
||||||
|
@@ -852,9 +852,14 @@ def copyStaticFiles(outputPath=None):
|
||||||
|
"KernelHeader.h",
|
||||||
|
]
|
||||||
|
|
||||||
|
+ import filecmp
|
||||||
|
for fileName in libraryStaticFiles:
|
||||||
|
# copy file
|
||||||
|
- shutil.copy(os.path.join(globalParameters["SourcePath"], fileName), outputPath)
|
||||||
|
+ # no need to copy twice if it has already been copied
|
||||||
|
+ src = os.path.join(globalParameters["SourcePath"], fileName)
|
||||||
|
+ dst = os.path.join(outputPath, os.path.basename(src))
|
||||||
|
+ if not os.path.isfile(dst) or not filecmp.cmp(src, dst):
|
||||||
|
+ shutil.copyfile(src, dst)
|
||||||
|
|
||||||
|
return libraryStaticFiles
|
||||||
|
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
diff --git a/Tensile/SolutionStructs.py b/Tensile/SolutionStructs.py
|
||||||
|
index f663c6f1..17bcf897 100644
|
||||||
|
--- a/Tensile/SolutionStructs.py
|
||||||
|
+++ b/Tensile/SolutionStructs.py
|
||||||
|
@@ -4828,24 +4828,26 @@ class Solution(collections.abc.Mapping):
|
||||||
|
# create a dictionary of lists of parameter values
|
||||||
|
@staticmethod
|
||||||
|
def getSerialNaming(objs):
|
||||||
|
+ valid_params = sorted(validParameters.keys())
|
||||||
|
data = {}
|
||||||
|
- for objIdx in range(0, len(objs)):
|
||||||
|
- obj = objs[objIdx]
|
||||||
|
- for paramName in sorted(obj.keys()):
|
||||||
|
- if paramName in list(validParameters.keys()):
|
||||||
|
- paramValue = obj[paramName]
|
||||||
|
- if paramName in data:
|
||||||
|
- if paramValue not in data[paramName]:
|
||||||
|
- data[paramName].append(paramValue)
|
||||||
|
- else:
|
||||||
|
- data[paramName] = [ paramValue ]
|
||||||
|
- maxObjs = 1
|
||||||
|
- for paramName in data:
|
||||||
|
- if not isinstance(data[paramName][0],dict):
|
||||||
|
- data[paramName] = sorted(data[paramName])
|
||||||
|
- maxObjs *= len(data[paramName])
|
||||||
|
- numDigits = len(str(maxObjs))
|
||||||
|
- return [ data, numDigits ]
|
||||||
|
+
|
||||||
|
+ objs = [getattr(obj, "_state", obj) for obj in objs]
|
||||||
|
+
|
||||||
|
+ for param in valid_params:
|
||||||
|
+ d = []
|
||||||
|
+ for obj in objs:
|
||||||
|
+ if param in obj:
|
||||||
|
+ v = obj[param]
|
||||||
|
+ if v not in d:
|
||||||
|
+ d.append(v)
|
||||||
|
+ if len(d):
|
||||||
|
+ if not isinstance(d[0], dict): d.sort()
|
||||||
|
+ data[param] = d
|
||||||
|
+
|
||||||
|
+ # Calculate max objects using prod() from math module
|
||||||
|
+ max_objs = math.prod(len(values) for values in data.values())
|
||||||
|
+ num_digits = len(str(max_objs))
|
||||||
|
+ return data, num_digits
|
||||||
|
|
||||||
|
########################################
|
||||||
|
# Get Name Serial
|
||||||
345
pkgs/rocm-modules/tensile/tensilelite-compression.diff
Normal file
345
pkgs/rocm-modules/tensile/tensilelite-compression.diff
Normal file
@@ -0,0 +1,345 @@
|
|||||||
|
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
|
||||||
|
index b8cea84558..1bc24bd1dd 100644
|
||||||
|
--- a/Tensile/TensileCreateLibrary.py
|
||||||
|
+++ b/Tensile/TensileCreateLibrary.py
|
||||||
|
@@ -41,6 +41,7 @@
|
||||||
|
from .SolutionLibrary import MasterSolutionLibrary
|
||||||
|
from .SolutionStructs import Solution
|
||||||
|
from .CustomYamlLoader import load_logic_gfx_arch
|
||||||
|
+from .Utilities.Profile import profile
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import collections
|
||||||
|
@@ -1233,7 +1234,7 @@ def validateLibrary(masterLibraries: MasterSolutionLibrary,
|
||||||
|
################################################################################
|
||||||
|
# Tensile Create Library
|
||||||
|
################################################################################
|
||||||
|
-@timing
|
||||||
|
+@profile
|
||||||
|
def TensileCreateLibrary():
|
||||||
|
print1("")
|
||||||
|
print1(HR)
|
||||||
|
@@ -1558,7 +1559,6 @@ def param(key, value):
|
||||||
|
|
||||||
|
print1("# Check if generated files exists.")
|
||||||
|
|
||||||
|
- @timing
|
||||||
|
def checkFileExistence(files):
|
||||||
|
for filePath in files:
|
||||||
|
if not os.path.exists(filePath):
|
||||||
|
diff --git a/Tensile/Utilities/Profile.py b/Tensile/Utilities/Profile.py
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..cc3c7eb44c
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/Tensile/Utilities/Profile.py
|
||||||
|
@@ -0,0 +1,77 @@
|
||||||
|
+################################################################################
|
||||||
|
+#
|
||||||
|
+# Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
+#
|
||||||
|
+# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
+# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
+# in the Software without restriction, including without limitation the rights
|
||||||
|
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
+# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
+# furnished to do so, subject to the following conditions:
|
||||||
|
+#
|
||||||
|
+# The above copyright notice and this permission notice shall be included in
|
||||||
|
+# all copies or substantial portions of the Software.
|
||||||
|
+#
|
||||||
|
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
+# SOFTWARE.
|
||||||
|
+#
|
||||||
|
+################################################################################
|
||||||
|
+
|
||||||
|
+import cProfile
|
||||||
|
+import pstats
|
||||||
|
+import os
|
||||||
|
+
|
||||||
|
+from pathlib import Path
|
||||||
|
+from datetime import datetime, timezone
|
||||||
|
+from typing import Callable, Tuple
|
||||||
|
+
|
||||||
|
+PROFILE_ENV_VAR = "TENSILE_PROFILE"
|
||||||
|
+
|
||||||
|
+def profile(func: Callable) -> Callable:
|
||||||
|
+ """Profiling decorator.
|
||||||
|
+
|
||||||
|
+ Add ``@profile`` to mark a function for profiling; set the environment variable
|
||||||
|
+ TENSILE_PROFILE=ON to enable profiling decorated functions.
|
||||||
|
+ """
|
||||||
|
+ if not envVariableIsSet(PROFILE_ENV_VAR):
|
||||||
|
+ return func
|
||||||
|
+ def wrapper(*args, **kwargs):
|
||||||
|
+ path, filename = initProfileArtifacts(func.__name__)
|
||||||
|
+
|
||||||
|
+ prof = cProfile.Profile()
|
||||||
|
+ output = prof.runcall(func, *args, **kwargs)
|
||||||
|
+ result = pstats.Stats(prof)
|
||||||
|
+ result.sort_stats(pstats.SortKey.TIME)
|
||||||
|
+ result.dump_stats(path/filename)
|
||||||
|
+
|
||||||
|
+ return output
|
||||||
|
+ return wrapper
|
||||||
|
+
|
||||||
|
+def envVariableIsSet(varName: str) -> bool:
|
||||||
|
+ """Checks if the provided environment variable is set to "ON", "TRUE", or "1"
|
||||||
|
+ Args:
|
||||||
|
+ varName: Environment variable name.
|
||||||
|
+ Returns:
|
||||||
|
+ True if the environment variable is set, otherwise False.
|
||||||
|
+ """
|
||||||
|
+ value = os.environ.get(varName, "").upper()
|
||||||
|
+ return True if value in ["ON", "TRUE", "1"] else False
|
||||||
|
+
|
||||||
|
+def initProfileArtifacts(funcName: str) -> Tuple[Path, str]:
|
||||||
|
+ """Initializes filenames and paths for profiling artifacts based on the current datetime
|
||||||
|
+ Args:
|
||||||
|
+ funcName: The name of the function being profiled, nominally passed via func.__name__
|
||||||
|
+ Returns:
|
||||||
|
+ A tuple (path, filename) where the path is the artifact directory and filename is
|
||||||
|
+ a .prof file with the profiling results.
|
||||||
|
+ """
|
||||||
|
+ dt = datetime.now(timezone.utc)
|
||||||
|
+ filename = f"{funcName}-{dt.strftime('%Y-%m-%dT%H-%M-%SZ')}.prof"
|
||||||
|
+ path = Path().cwd()/f"profiling-results-{dt.strftime('%Y-%m-%d')}"
|
||||||
|
+ path.mkdir(exist_ok=True)
|
||||||
|
+ return path, filename
|
||||||
|
|
||||||
|
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
|
||||||
|
index e62b0072df..2c843ba936 100644
|
||||||
|
--- a/Tensile/TensileCreateLibrary.py
|
||||||
|
+++ b/Tensile/TensileCreateLibrary.py
|
||||||
|
@@ -56,7 +56,7 @@
|
||||||
|
import sys
|
||||||
|
from timeit import default_timer as timer
|
||||||
|
from pathlib import Path
|
||||||
|
-from typing import Sequence, List
|
||||||
|
+from typing import Sequence, List, Union
|
||||||
|
|
||||||
|
def timing(func):
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
@@ -90,87 +90,142 @@ def processKernelSource(kernel, kernelWriterAssembly, ti):
|
||||||
|
|
||||||
|
return (err, src, header, kernelName, filename)
|
||||||
|
|
||||||
|
-def getAssemblyCodeObjectFiles(kernels, kernelWriterAssembly, outputPath):
|
||||||
|
- destDir = ensurePath(os.path.join(outputPath, 'library'))
|
||||||
|
- asmDir = kernelWriterAssembly.getAssemblyDirectory()
|
||||||
|
- archs = collections.defaultdict(list)
|
||||||
|
+def linkIntoCodeObject(
|
||||||
|
+ objFiles: List[str], coPathDest: Union[Path, str], kernelWriterAssembly: KernelWriterAssembly
|
||||||
|
+):
|
||||||
|
+ """Links object files into a code object file.
|
||||||
|
|
||||||
|
- for k in filter(lambda k: k['KernelLanguage'] == 'Assembly', kernels):
|
||||||
|
- archs[tuple(k['ISA'])].append(k)
|
||||||
|
+ Args:
|
||||||
|
+ objectFiles: A list of object files to be linked.
|
||||||
|
+ coPathDest: The destination path for the code object file.
|
||||||
|
+ kernelWriterAssembly: An instance of KernelWriterAssembly to get link arguments.
|
||||||
|
|
||||||
|
- coFiles = []
|
||||||
|
+ Raises:
|
||||||
|
+ RuntimeError: If linker invocation fails.
|
||||||
|
+ """
|
||||||
|
+ if os.name == "nt":
|
||||||
|
+ # On Windows, the objectFiles list command line (including spaces)
|
||||||
|
+ # exceeds the limit of 8191 characters, so using response file
|
||||||
|
+
|
||||||
|
+ responseFile = os.path.join('/tmp', 'clangArgs.txt')
|
||||||
|
+ with open(responseFile, 'wt') as file:
|
||||||
|
+ file.write(" ".join(objFiles))
|
||||||
|
+ file.flush()
|
||||||
|
+
|
||||||
|
+ args = [globalParameters['AssemblerPath'], '-target', 'amdgcn-amd-amdhsa', '-o', coFileRaw, '@clangArgs.txt']
|
||||||
|
+ subprocess.check_call(args, cwd=asmDir)
|
||||||
|
+ else:
|
||||||
|
+ numObjFiles = len(objFiles)
|
||||||
|
+ maxObjFiles = 10000
|
||||||
|
+
|
||||||
|
+ if numObjFiles > maxObjFiles:
|
||||||
|
+ batchedObjFiles = [objFiles[i:i+maxObjFiles] for i in range(0, numObjFiles, maxObjFiles)]
|
||||||
|
+ batchSize = int(math.ceil(numObjFiles / maxObjFiles))
|
||||||
|
+
|
||||||
|
+ newObjFiles = [str(coPathDest) + "." + str(i) for i in range(0, batchSize)]
|
||||||
|
+ newObjFilesOutput = []
|
||||||
|
+
|
||||||
|
+ for batch, filename in zip(batchedObjFiles, newObjFiles):
|
||||||
|
+ if len(batch) > 1:
|
||||||
|
+ args = [globalParameters["ROCmLdPath"], "-r"] + batch + [ "-o", filename]
|
||||||
|
+ print2(f"Linking object files into fewer object files: {' '.join(args)}")
|
||||||
|
+ subprocess.check_call(args)
|
||||||
|
+ newObjFilesOutput.append(filename)
|
||||||
|
+ else:
|
||||||
|
+ newObjFilesOutput.append(batchedObjFiles[0])
|
||||||
|
+
|
||||||
|
+ args = kernelWriterAssembly.getLinkCodeObjectArgs(newObjFilesOutput, str(coPathDest))
|
||||||
|
+ print2(f"Linking object files into code object: {' '.join(args)}")
|
||||||
|
+ subprocess.check_call(args)
|
||||||
|
+ else:
|
||||||
|
+ args = kernelWriterAssembly.getLinkCodeObjectArgs(objFiles, str(coPathDest))
|
||||||
|
+ print2(f"Linking object files into code object: {' '.join(args)}")
|
||||||
|
+ subprocess.check_call(args)
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+def compressCodeObject(
|
||||||
|
+ coPathSrc: Union[Path, str], coPathDest: Union[Path, str], gfx: str, bundler: str
|
||||||
|
+):
|
||||||
|
+ """Compresses a code object file using the provided bundler.
|
||||||
|
+
|
||||||
|
+ Args:
|
||||||
|
+ coPathSrc: The source path of the code object file to be compressed.
|
||||||
|
+ coPathDest: The destination path for the compressed code object file.
|
||||||
|
+ gfx: The target GPU architecture.
|
||||||
|
+ bundler: The path to the Clang Offload Bundler executable.
|
||||||
|
+
|
||||||
|
+ Raises:
|
||||||
|
+ RuntimeError: If compressing the code object file fails.
|
||||||
|
+ """
|
||||||
|
+ args = [
|
||||||
|
+ bundler,
|
||||||
|
+ "--compress",
|
||||||
|
+ "--type=o",
|
||||||
|
+ "--bundle-align=4096",
|
||||||
|
+ f"--targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--{gfx}",
|
||||||
|
+ "--input=/dev/null",
|
||||||
|
+ f"--input={str(coPathSrc)}",
|
||||||
|
+ f"--output={str(coPathDest)}",
|
||||||
|
+ ]
|
||||||
|
+
|
||||||
|
+ print2(f"Bundling/compressing code objects: {' '.join(args)}")
|
||||||
|
+ try:
|
||||||
|
+ out = subprocess.check_output(args, stderr=subprocess.STDOUT)
|
||||||
|
+ print2(f"Output: {out}")
|
||||||
|
+ except subprocess.CalledProcessError as err:
|
||||||
|
+ raise RuntimeError(
|
||||||
|
+ f"Error compressing code object via bundling: {err.output}\nFailed command: {' '.join(args)}"
|
||||||
|
+ )
|
||||||
|
+
|
||||||
|
+def buildAssemblyCodeObjectFiles(kernels, kernelWriterAssembly, outputPath):
|
||||||
|
+
|
||||||
|
+ isAsm = lambda k: k["KernelLanguage"] == "Assembly"
|
||||||
|
+
|
||||||
|
+ extObj = ".o"
|
||||||
|
+ extCo = ".co"
|
||||||
|
+ extCoRaw = ".co.raw"
|
||||||
|
|
||||||
|
- for arch, archKernels in archs.items():
|
||||||
|
+ destDir = Path(ensurePath(os.path.join(outputPath, 'library')))
|
||||||
|
+ asmDir = Path(kernelWriterAssembly.getAssemblyDirectory())
|
||||||
|
+
|
||||||
|
+ archKernelMap = collections.defaultdict(list)
|
||||||
|
+ for k in filter(isAsm, kernels):
|
||||||
|
+ archKernelMap[tuple(k['ISA'])].append(k)
|
||||||
|
+
|
||||||
|
+ coFiles = []
|
||||||
|
+ for arch, archKernels in archKernelMap.items():
|
||||||
|
if len(archKernels) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
- archName = getGfxName(arch)
|
||||||
|
+ gfx = getGfxName(arch)
|
||||||
|
|
||||||
|
if globalParameters["MergeFiles"] or globalParameters["NumMergedFiles"] > 1 or globalParameters["LazyLibraryLoading"]:
|
||||||
|
- objectFiles = [kernelWriterAssembly.getKernelFileBase(k) + '.o' for k in archKernels if 'codeObjectFile' not in k]
|
||||||
|
+ objectFiles = [str(asmDir / (kernelWriterAssembly.getKernelFileBase(k) + extObj)) for k in archKernels if 'codeObjectFile' not in k]
|
||||||
|
|
||||||
|
- #Group kernels from placeholder libraries
|
||||||
|
coFileMap = collections.defaultdict(list)
|
||||||
|
+
|
||||||
|
if len(objectFiles):
|
||||||
|
- coFileMap[os.path.join(destDir, "TensileLibrary_"+archName+".co")] = objectFiles
|
||||||
|
+ coFileMap[asmDir / ("TensileLibrary_"+ gfx + extCoRaw)] = objectFiles
|
||||||
|
|
||||||
|
for kernel in archKernels:
|
||||||
|
coName = kernel.get("codeObjectFile", None)
|
||||||
|
if coName:
|
||||||
|
- coFileMap[os.path.join(destDir, coName+".co")] += [kernelWriterAssembly.getKernelFileBase(kernel) + '.o']
|
||||||
|
+ coFileMap[asmDir / (coName + extCoRaw)].append(str(asmDir / (kernelWriterAssembly.getKernelFileBase(kernel) + extObj)))
|
||||||
|
|
||||||
|
- for coFile, objectFiles in coFileMap.items():
|
||||||
|
- if os.name == "nt":
|
||||||
|
- # On Windows, the objectFiles list command line (including spaces)
|
||||||
|
- # exceeds the limit of 8191 characters, so using response file
|
||||||
|
+ for coFileRaw, objFiles in coFileMap.items():
|
||||||
|
|
||||||
|
- responseArgs = objectFiles
|
||||||
|
- responseFile = os.path.join(asmDir, 'clangArgs.txt')
|
||||||
|
- with open(responseFile, 'wt') as file:
|
||||||
|
- file.write( " ".join(responseArgs) )
|
||||||
|
- file.flush()
|
||||||
|
-
|
||||||
|
- args = [globalParameters['AssemblerPath'], '-target', 'amdgcn-amd-amdhsa', '-o', coFile, '@clangArgs.txt']
|
||||||
|
- subprocess.check_call(args, cwd=asmDir)
|
||||||
|
- else:
|
||||||
|
- numOfObjectFiles = len(objectFiles)
|
||||||
|
- splitFiles = 10000
|
||||||
|
- if numOfObjectFiles > splitFiles:
|
||||||
|
- slicedObjectFilesList = [objectFiles[x:x+splitFiles] for x in range(0, numOfObjectFiles, splitFiles)]
|
||||||
|
- objectFileBasename = os.path.split(coFile)[-1].split('.')[0]
|
||||||
|
- numOfOneSliceOfObjectFiles = int(math.ceil(numOfObjectFiles / splitFiles))
|
||||||
|
- newObjectFiles = [ objectFileBasename + "_" + str(i) + ".o" for i in range(0, numOfOneSliceOfObjectFiles)]
|
||||||
|
- newObjectFilesOutput = []
|
||||||
|
- for slicedObjectFiles, objectFile in zip(slicedObjectFilesList, newObjectFiles):
|
||||||
|
- if len(slicedObjectFiles) > 1:
|
||||||
|
- args = [globalParameters["ROCmLdPath"], "-r"] + slicedObjectFiles + [ "-o", objectFile ]
|
||||||
|
- if globalParameters["PrintCodeCommands"]:
|
||||||
|
- print(asmDir)
|
||||||
|
- print(' '.join(args))
|
||||||
|
- subprocess.check_call(args, cwd=asmDir)
|
||||||
|
- newObjectFilesOutput.append(objectFile)
|
||||||
|
- else:
|
||||||
|
- newObjectFilesOutput.append(slicedObjectFiles[0])
|
||||||
|
- args = kernelWriterAssembly.getLinkCodeObjectArgs(newObjectFilesOutput, coFile)
|
||||||
|
- if globalParameters["PrintCodeCommands"]:
|
||||||
|
- print(asmDir)
|
||||||
|
- print(' '.join(args))
|
||||||
|
- subprocess.check_call(args, cwd=asmDir)
|
||||||
|
- else:
|
||||||
|
- args = kernelWriterAssembly.getLinkCodeObjectArgs(objectFiles, coFile)
|
||||||
|
- if globalParameters["PrintCodeCommands"]:
|
||||||
|
- print(asmDir)
|
||||||
|
- print(' '.join(args))
|
||||||
|
- subprocess.check_call(args, cwd=asmDir)
|
||||||
|
+ linkIntoCodeObject(objFiles, coFileRaw, kernelWriterAssembly)
|
||||||
|
+ coFile = destDir / coFileRaw.name.replace(extCoRaw, extCo)
|
||||||
|
+ compressCodeObject(coFileRaw, coFile, gfx, globalParameters["ClangOffloadBundlerPath"])
|
||||||
|
|
||||||
|
coFiles.append(coFile)
|
||||||
|
else:
|
||||||
|
# no mergefiles
|
||||||
|
def newCoFileName(kName):
|
||||||
|
if globalParameters["PackageLibrary"]:
|
||||||
|
- return os.path.join(destDir, archName, kName + '.co')
|
||||||
|
+ return os.path.join(destDir, gfx, kName + '.co')
|
||||||
|
else:
|
||||||
|
- return os.path.join(destDir, kName + '_' + archName + '.co')
|
||||||
|
+ return os.path.join(destDir, kName + '_' + gfx + '.co')
|
||||||
|
|
||||||
|
def orgCoFileName(kName):
|
||||||
|
return os.path.join(asmDir, kName + '.co')
|
||||||
|
@@ -179,6 +234,8 @@ def orgCoFileName(kName):
|
||||||
|
map(lambda k: kernelWriterAssembly.getKernelFileBase(k), archKernels)), "Copying code objects"):
|
||||||
|
shutil.copyfile(src, dst)
|
||||||
|
coFiles.append(dst)
|
||||||
|
+ printWarning("Code object files are not compressed in `--no-merge-files` build mode.")
|
||||||
|
+
|
||||||
|
return coFiles
|
||||||
|
|
||||||
|
def which(p):
|
||||||
|
@@ -645,7 +702,7 @@ def success(kernel):
|
||||||
|
|
||||||
|
if not globalParameters["GenerateSourcesAndExit"]:
|
||||||
|
codeObjectFiles += buildSourceCodeObjectFiles(CxxCompiler, kernelFiles, outputPath)
|
||||||
|
- codeObjectFiles += getAssemblyCodeObjectFiles(kernelsToBuild, kernelWriterAssembly, outputPath)
|
||||||
|
+ codeObjectFiles += buildAssemblyCodeObjectFiles(kernelsToBuild, kernelWriterAssembly, outputPath)
|
||||||
|
|
||||||
|
Common.popWorkingPath() # build_tmp
|
||||||
|
Common.popWorkingPath() # workingDir
|
||||||
|
|
||||||
@@ -0,0 +1,37 @@
|
|||||||
|
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
|
||||||
|
index 2b9da394..b001fa7c 100644
|
||||||
|
--- a/Tensile/TensileCreateLibrary.py
|
||||||
|
+++ b/Tensile/TensileCreateLibrary.py
|
||||||
|
@@ -808,10 +808,13 @@ def copyStaticFiles(outputPath=None):
|
||||||
|
"ReductionTemplate.h",
|
||||||
|
"memory_gfx.h" ]
|
||||||
|
|
||||||
|
+ import filecmp
|
||||||
|
for fileName in libraryStaticFiles:
|
||||||
|
- # copy file
|
||||||
|
- shutil.copy( os.path.join(globalParameters["SourcePath"], fileName), \
|
||||||
|
- outputPath )
|
||||||
|
+ src = os.path.join(globalParameters["SourcePath"], fileName)
|
||||||
|
+ dst = os.path.join(outputPath, os.path.basename(src))
|
||||||
|
+ # no need to copy twice if it has already been copied
|
||||||
|
+ if not os.path.isfile(dst) or not filecmp.cmp(src, dst):
|
||||||
|
+ shutil.copyfile(src, dst)
|
||||||
|
|
||||||
|
return libraryStaticFiles
|
||||||
|
|
||||||
|
@@ -1417,9 +1420,13 @@ def TensileCreateLibrary():
|
||||||
|
writeCMake(outputPath, solutionFiles, sourceKernelFiles, staticFiles, masterLibraries)
|
||||||
|
|
||||||
|
# Make sure to copy the library static files.
|
||||||
|
+ import filecmp
|
||||||
|
for fileName in staticFiles:
|
||||||
|
- shutil.copy( os.path.join(globalParameters["SourcePath"], fileName), \
|
||||||
|
- outputPath )
|
||||||
|
+ src = os.path.join(globalParameters["SourcePath"], fileName)
|
||||||
|
+ dst = os.path.join(outputPath, os.path.basename(src))
|
||||||
|
+ # no need to copy twice if it has already been copied
|
||||||
|
+ if not os.path.isfile(dst) or not filecmp.cmp(src, dst):
|
||||||
|
+ shutil.copyfile(src, dst)
|
||||||
|
|
||||||
|
# write solutions and kernels
|
||||||
|
codeObjectFiles = writeSolutionsAndKernels(outputPath, CxxCompiler, None, solutions,
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
diff --git a/Tensile/Ops/gen_assembly.sh b/Tensile/Ops/gen_assembly.sh
|
||||||
|
index 0b21b6c6..609f1dd1 100755
|
||||||
|
--- a/Tensile/Ops/gen_assembly.sh
|
||||||
|
+++ b/Tensile/Ops/gen_assembly.sh
|
||||||
|
@@ -23,6 +23,8 @@
|
||||||
|
#
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
+set -x
|
||||||
|
+
|
||||||
|
archStr=$1
|
||||||
|
dst=$2
|
||||||
|
venv=$3
|
||||||
|
@@ -35,7 +37,13 @@ fi
|
||||||
|
|
||||||
|
toolchain=${rocm_path}/llvm/bin/clang++
|
||||||
|
|
||||||
|
-. ${venv}/bin/activate
|
||||||
|
+if ! [ -z ${TENSILE_GEN_ASSEMBLY_TOOLCHAIN+x} ]; then
|
||||||
|
+ toolchain="${TENSILE_GEN_ASSEMBLY_TOOLCHAIN}"
|
||||||
|
+fi
|
||||||
|
+
|
||||||
|
+if [ -f ${venv}/bin/activate ]; then
|
||||||
|
+ . ${venv}/bin/activate
|
||||||
|
+fi
|
||||||
|
|
||||||
|
IFS=';' read -r -a archs <<< "$archStr"
|
||||||
|
|
||||||
|
@@ -77,4 +85,6 @@ for arch in "${archs[@]}"; do
|
||||||
|
python3 ./ExtOpCreateLibrary.py --src=$dst --co=$dst/extop_$arch.co --output=$dst --arch=$arch
|
||||||
|
done
|
||||||
|
|
||||||
|
-deactivate
|
||||||
|
+if [ -f ${venv}/bin/activate ]; then
|
||||||
|
+ deactivate
|
||||||
|
+fi
|
||||||
56
pkgs/rocm-modules/triton/default.nix
Normal file
56
pkgs/rocm-modules/triton/default.nix
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
{
|
||||||
|
triton-no-cuda,
|
||||||
|
rocmPackages,
|
||||||
|
fetchFromGitHub,
|
||||||
|
}:
|
||||||
|
(triton-no-cuda.override (_old: {
|
||||||
|
inherit rocmPackages;
|
||||||
|
rocmSupport = true;
|
||||||
|
stdenv = rocmPackages.llvm.rocmClangStdenv;
|
||||||
|
llvm = rocmPackages.triton-llvm;
|
||||||
|
})).overridePythonAttrs
|
||||||
|
(old: {
|
||||||
|
doCheck = false;
|
||||||
|
stdenv = rocmPackages.llvm.rocmClangStdenv;
|
||||||
|
version = "3.2.0";
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "triton-lang";
|
||||||
|
repo = "triton";
|
||||||
|
rev = "9641643da6c52000c807b5eeed05edaec4402a67"; # "release/3.2.x";
|
||||||
|
hash = "sha256-V1lpARwOLn28ZHfjiWR/JJWGw3MB34c+gz6Tq1GOVfo=";
|
||||||
|
};
|
||||||
|
buildInputs = old.buildInputs ++ [
|
||||||
|
rocmPackages.clr
|
||||||
|
];
|
||||||
|
dontStrip = true;
|
||||||
|
env = old.env // {
|
||||||
|
CXXFLAGS = "-O3 -I${rocmPackages.clr}/include -I/build/source/third_party/triton/third_party/nvidia/backend/include";
|
||||||
|
TRITON_OFFLINE_BUILD = 1;
|
||||||
|
};
|
||||||
|
patches = [ ];
|
||||||
|
postPatch = ''
|
||||||
|
# Remove nvidia backend so we don't depend on unfree nvidia headers
|
||||||
|
# when we only want to target ROCm
|
||||||
|
rm -rf third_party/nvidia
|
||||||
|
substituteInPlace CMakeLists.txt \
|
||||||
|
--replace-fail "add_subdirectory(test)" ""
|
||||||
|
sed -i '/nvidia\|NVGPU\|registerConvertTritonGPUToLLVMPass\|mlir::test::/Id' bin/RegisterTritonDialects.h
|
||||||
|
sed -i '/TritonTestAnalysis/Id' bin/CMakeLists.txt
|
||||||
|
substituteInPlace python/setup.py \
|
||||||
|
--replace-fail 'backends = [*BackendInstaller.copy(["nvidia", "amd"]), *BackendInstaller.copy_externals()]' \
|
||||||
|
'backends = [*BackendInstaller.copy(["amd"]), *BackendInstaller.copy_externals()]'
|
||||||
|
find . -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} +
|
||||||
|
find . -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} +
|
||||||
|
# remove any downloads
|
||||||
|
substituteInPlace python/setup.py \
|
||||||
|
--replace-fail "[get_json_package_info()]" "[]"\
|
||||||
|
--replace-fail "[get_llvm_package_info()]" "[]"\
|
||||||
|
--replace-fail "curr_version != version" "False"
|
||||||
|
# Don't fetch googletest
|
||||||
|
substituteInPlace cmake/AddTritonUnitTest.cmake \
|
||||||
|
--replace-fail 'include(''${PROJECT_SOURCE_DIR}/unittest/googletest.cmake)' "" \
|
||||||
|
--replace-fail "include(GoogleTest)" "find_package(GTest REQUIRED)"
|
||||||
|
substituteInPlace third_party/amd/backend/compiler.py \
|
||||||
|
--replace-fail '"/opt/rocm/llvm/bin/ld.lld"' "os.environ['ROCM_PATH']"' + "/llvm/bin/ld.lld"'
|
||||||
|
'';
|
||||||
|
})
|
||||||
62
pkgs/rocm-modules/update.nix
Normal file
62
pkgs/rocm-modules/update.nix
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
writeScript,
|
||||||
|
}:
|
||||||
|
|
||||||
|
{
|
||||||
|
name ? "",
|
||||||
|
owner ? "",
|
||||||
|
repo ? "",
|
||||||
|
page ? "releases",
|
||||||
|
# input: array of [ { tag_name: "rocm-6.x.x", }, ... ]. some entries may have bad names like rocm-test-date we want to skip
|
||||||
|
# output: first tag_name/name that's a proper version if any
|
||||||
|
filter ? "map(.tag_name // .name) | map(select(test(\"^rocm-[0-9]+\\\\.[0-9]+(\\\\.[0-9]+)?$\"))) | first | ltrimstr(\"rocm-\")",
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
pname =
|
||||||
|
if lib.hasPrefix "rocm-llvm-" name then "llvm.${lib.removePrefix "rocm-llvm-" name}" else name;
|
||||||
|
|
||||||
|
updateScript = writeScript "update.sh" ''
|
||||||
|
#!/usr/bin/env nix-shell
|
||||||
|
#!nix-shell -i bash -p curl jq common-updater-scripts
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
fetch_releases() {
|
||||||
|
local api_url="https://api.github.com/repos/${owner}/${repo}/${page}"
|
||||||
|
if [ "${page}" = "releases" ]; then
|
||||||
|
api_url="$api_url?per_page=4"
|
||||||
|
fi
|
||||||
|
>&2 echo $api_url
|
||||||
|
curl ''${GITHUB_TOKEN:+-u ":$GITHUB_TOKEN"} -sL "$api_url"
|
||||||
|
}
|
||||||
|
|
||||||
|
find_valid_version() {
|
||||||
|
local releases="$1"
|
||||||
|
>&2 echo "$releases"
|
||||||
|
# Wrap in array if not already an array to make handline specific release or tags page the same
|
||||||
|
>&2 echo jq -r 'if type == "array" then . else [.] end | ${filter}'
|
||||||
|
echo "$releases" | jq -r 'if type == "array" then . else [.] end | ${filter}'
|
||||||
|
}
|
||||||
|
|
||||||
|
releases="$(fetch_releases)"
|
||||||
|
version="$(find_valid_version "$releases")"
|
||||||
|
|
||||||
|
if [ -z "$version" ]; then
|
||||||
|
echo "No valid version found in the fetched release(s)." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
IFS='.' read -ra version_arr <<< "$version"
|
||||||
|
|
||||||
|
>&2 echo parsed version "$version_arr" from "$version"
|
||||||
|
|
||||||
|
if (( ''${version_arr[0]} > 6 )); then
|
||||||
|
echo "'rocmPackages_6.${pname}' is already at its maximum allowed version.''\nAny further upgrades should go into 'rocmPackages_X.${pname}'." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
update-source-version rocmPackages_6.${pname} "$version" --ignore-same-hash
|
||||||
|
'';
|
||||||
|
in
|
||||||
|
[ updateScript ]
|
||||||
Reference in New Issue
Block a user