diff options
author | Andrew Ammerlaan <andrewammerlaan@gentoo.org> | 2022-03-05 21:49:57 +0100 |
---|---|---|
committer | Andrew Ammerlaan <andrewammerlaan@gentoo.org> | 2022-03-05 21:51:52 +0100 |
commit | db81ee2a9a65c0aaebb4df461f8be388c66cdd16 (patch) | |
tree | 866c78b9cd619c0df9c1aa147b1e3504955529a9 /dev-util/Tensile | |
parent | dev-util/hip: fix compile, add missing dep (diff) | |
download | gentoo-db81ee2a9a65c0aaebb4df461f8be388c66cdd16.tar.gz gentoo-db81ee2a9a65c0aaebb4df461f8be388c66cdd16.tar.bz2 gentoo-db81ee2a9a65c0aaebb4df461f8be388c66cdd16.zip |
dev-util/Tensile: add new dependency of rocBLAS
Package-Manager: Portage-3.0.30, Repoman-3.0.3
Signed-off-by: Andrew Ammerlaan <andrewammerlaan@gentoo.org>
Diffstat (limited to 'dev-util/Tensile')
-rw-r--r-- | dev-util/Tensile/Manifest | 1 | ||||
-rw-r--r-- | dev-util/Tensile/Tensile-4.3.0.ebuild | 74 | ||||
-rw-r--r-- | dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch | 96 | ||||
-rw-r--r-- | dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch | 23 | ||||
-rw-r--r-- | dev-util/Tensile/metadata.xml | 11 |
5 files changed, 205 insertions, 0 deletions
diff --git a/dev-util/Tensile/Manifest b/dev-util/Tensile/Manifest new file mode 100644 index 000000000000..fd4cac044af8 --- /dev/null +++ b/dev-util/Tensile/Manifest @@ -0,0 +1 @@ +DIST rocm-Tensile-4.3.0.tar.gz 14250149 BLAKE2B 030138eaca2a0aadd96801e6dbd72e510716dd90553ef3795c5e04e00a34a05ecae82b24f755e4033a4acfcdb1cf26291da1e7902bb090f89a010d403e832beb SHA512 126db0b413c716fba8d5be9bff7a44fd1badacbf32f3db8d0db649819177db37ebd56fd22dd3c809655f5d29675be115e698cd10bc3d0b4b23878ae3726fce47 diff --git a/dev-util/Tensile/Tensile-4.3.0.ebuild b/dev-util/Tensile/Tensile-4.3.0.ebuild new file mode 100644 index 000000000000..cf33fb19cf10 --- /dev/null +++ b/dev-util/Tensile/Tensile-4.3.0.ebuild @@ -0,0 +1,74 @@ +# Copyright 1999-2022 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +PYTHON_COMPAT=( python3_{8..10} ) +DISTUTILS_USE_PEP517=setuptools +inherit distutils-r1 + +DESCRIPTION="Stretching GPU performance for GEMMs and tensor contractions" +HOMEPAGE="https://github.com/ROCmSoftwarePlatform/Tensile" +SRC_URI="https://github.com/ROCmSoftwarePlatform/Tensile/archive/rocm-${PV}.tar.gz -> rocm-Tensile-${PV}.tar.gz" +S="${WORKDIR}/${PN}-rocm-${PV}" + +LICENSE="MIT" +KEYWORDS="~amd64" +SLOT="0/$(ver_cut 1-2)" + +# Not compatible with recent versions of pytest +RESTRICT="test" + +RDEPEND="${PYTHON_DEPS} + dev-python/pyyaml[${PYTHON_USEDEP}] + dev-python/msgpack[${PYTHON_USEDEP}] +" +DEPEND="${RDEPEND} + dev-util/hip:${SLOT} +" +BDEPEND="test? ( + dev-util/rocminfo:${SLOT} +)" + +PATCHES=( + "${FILESDIR}/Tensile-${PV}-hsaco-compile-specified-arch.patch" # backported from upstream, should remove after 4.3.0 + "${FILESDIR}/Tensile-4.3.0-output-commands.patch" +) + +CMAKE_USE_DIR="${WORKDIR}/Source" + +distutils_enable_tests pytest + +src_prepare() { + distutils-r1_src_prepare + + mv ${PN}/Source "${WORKDIR}"/ || die + sed -e "/ROCM_SMI_ROOT/s,lib,$(get_libdir)," \ + -i "${WORKDIR}"/Source/cmake/FindROCmSMI.cmake || die + sed -r -e "/TENSILE_USE_LLVM/s/ON/OFF/" \ + -i "${WORKDIR}"/Source/CMakeLists.txt || die + + sed -e "/HipClangVersion/s/0,0,0/$(ver_rs 1-3 ,)/" \ + -e "/SourcePath/s,os\.path\.join.*$,\"${EPREFIX}/usr/share/${PN}\"," \ + -i ${PN}/Common.py || die + + sed -e "s|os\.path\.dirname.*$|\"${EPREFIX}/usr/share/Tensile\", end='')|" \ + -i ${PN}/__init__.py || die +} + +src_test() { + ROCM_PATH="${EPREFIX}/usr/" distutils-r1_src_test +} + +src_install() { + distutils-r1_src_install + + # Move the cmake files to the correct directory + mkdir -p "${ED}/usr/$(get_libdir)/cmake/${PN}" || die + mv "${ED}/usr/cmake/"* "${ED}/usr/$(get_libdir)/cmake/${PN}" || die + rm -r "${ED}/usr/cmake" || die + + insinto /usr/share/${PN} + doins -r "${WORKDIR}"/Source/* + dosym . /usr/share/${PN}/Source +} diff --git a/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch b/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch new file mode 100644 index 000000000000..8e6753781a2a --- /dev/null +++ b/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch @@ -0,0 +1,96 @@ +https://github.com/ROCmSoftwarePlatform/Tensile/issues/1395 +https://github.com/ROCmSoftwarePlatform/Tensile/pull/1398 + +--- a/Tensile/TensileCreateLibrary.py ++++ b/Tensile/TensileCreateLibrary.py +@@ -136,6 +136,35 @@ def which(p): + return candidate + return None + ++def splitArchs(): ++ # Helper for architecture ++ def isSupported(arch): ++ return globalParameters["AsmCaps"][arch]["SupportedISA"] and \ ++ globalParameters["AsmCaps"][arch]["SupportedSource"] ++ ++ if ";" in globalParameters["Architecture"]: ++ wantedArchs = globalParameters["Architecture"].split(";") ++ else: ++ wantedArchs = globalParameters["Architecture"].split("_") ++ archs = [] ++ cmdlineArchs = [] ++ if "all" in wantedArchs: ++ for arch in globalParameters['SupportedISA']: ++ if isSupported(arch): ++ if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)): ++ if (arch == (9,0,10)): ++ archs += [gfxName(arch) + '-xnack+'] ++ cmdlineArchs += [gfxName(arch) + ':xnack+'] ++ archs += [gfxName(arch) + '-xnack-'] ++ cmdlineArchs += [gfxName(arch) + ':xnack-'] ++ else: ++ archs += [gfxName(arch)] ++ cmdlineArchs += [gfxName(arch)] ++ else: ++ for arch in wantedArchs: ++ archs += [re.sub(":", "-", arch)] ++ cmdlineArchs += [arch] ++ return archs, cmdlineArchs + + def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile): + buildPath = ensurePath(os.path.join(globalParameters['WorkingPath'], 'code_object_tmp')) +@@ -149,24 +178,8 @@ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile): + objectFilename = base + '.o' + soFilename = base + '.so' + +- def isSupported(arch): +- return globalParameters["AsmCaps"][arch]["SupportedISA"] and \ +- globalParameters["AsmCaps"][arch]["SupportedSource"] +- + if (CxxCompiler == "hipcc"): +- archs = [] +- cmdlineArchs = [] +- for arch in globalParameters['SupportedISA']: +- if isSupported(arch): +- if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)): +- if (arch == (9,0,10)): +- archs += [gfxName(arch) + '-xnack+'] +- cmdlineArchs += [gfxName(arch) + ':xnack+'] +- archs += [gfxName(arch) + '-xnack-'] +- cmdlineArchs += [gfxName(arch) + ':xnack-'] +- else: +- archs += [gfxName(arch)] +- cmdlineArchs += [gfxName(arch)] ++ archs, cmdlineArchs = splitArchs() + + archFlags = ['--offload-arch=' + arch for arch in cmdlineArchs] + +@@ -1063,11 +1076,6 @@ def buildObjectFileNames(solutionWriter, kernelWriterSource, kernelWriterAssembl + sourceKernels = list([k for k in kernels if k['KernelLanguage'] == 'Source']) + asmKernels = list([k for k in kernels if k['KernelLanguage'] == 'Assembly']) + +- # Helper for architecture +- def isSupported(arch): +- return globalParameters["AsmCaps"][arch]["SupportedISA"] and \ +- globalParameters["AsmCaps"][arch]["SupportedSource"] +- + # Build a list of kernel object names. + for kernel in sourceKernels: + sourceKernelNames += [kernelWriterSource.getKernelFileBase(kernel)] +@@ -1081,15 +1089,7 @@ def buildObjectFileNames(solutionWriter, kernelWriterSource, kernelWriterAssembl + + # Source based kernels are built for all supported architectures + if (cxxCompiler == 'hipcc'): +- sourceArchs = [] +- for arch in globalParameters['SupportedISA']: +- if isSupported(arch): +- if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)): +- if (arch == (9,0,10)): +- sourceArchs += [gfxName(arch) + '-xnack+'] +- sourceArchs += [gfxName(arch) + '-xnack-'] +- else: +- sourceArchs += [gfxName(arch)] ++ sourceArchs, _ = splitArchs() + else: + raise RuntimeError("Unknown compiler %s" % cxxCompiler) + diff --git a/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch b/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch new file mode 100644 index 000000000000..be5a4db21429 --- /dev/null +++ b/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch @@ -0,0 +1,23 @@ +diff --color -uprN orig/Tensile/cmake/TensileConfig.cmake Tensile-rocm-4.3.0/Tensile/cmake/TensileConfig.cmake +--- orig/Tensile/cmake/TensileConfig.cmake 2021-08-18 17:48:00.115478470 +0800 ++++ Tensile-rocm-4.3.0/Tensile/cmake/TensileConfig.cmake 2021-08-18 17:48:49.963478801 +0800 +@@ -234,6 +234,7 @@ function(TensileCreateLibraryFiles + COMMENT "Generating Tensile Libraries" + OUTPUT ${Tensile_EMBED_LIBRARY_SOURCE};${Tensile_MANIFEST_CONTENTS} + COMMAND ${CommandLine} ++ USES_TERMINAL + ) + + set("${Tensile_VAR_PREFIX}_ALL_FILES" ${Tensile_MANIFEST_CONTENTS} PARENT_SCOPE) +diff --color -uprN orig/Tensile/Common.py Tensile-rocm-4.3.0/Tensile/Common.py +--- orig/Tensile/Common.py 2021-08-18 17:48:00.075478470 +0800 ++++ Tensile-rocm-4.3.0/Tensile/Common.py 2021-08-18 17:48:23.287478624 +0800 +@@ -179,7 +179,7 @@ globalParameters["PrintTensorD"] = 0 + globalParameters["PrintTensorRef"] = 0 # Print reference tensor. 0x1=after init; 0x2=after copy-back; 0x3=both + globalParameters["PrintIndexAssignments"] = 0 # Print the tensor index assignment info + globalParameters["PrintWinnersOnly"] = False # Only print the solutions which become the fastest +-globalParameters["PrintCodeCommands"] = False # print the commands used to generate the code objects (asm,link,hip-clang, etc) ++globalParameters["PrintCodeCommands"] = True # print the commands used to generate the code objects (asm,link,hip-clang, etc) + globalParameters["DumpTensors"] = False # If True, dump tensors to binary files instead of printing them. + + # TODO - remove this when NewClient is mainstream diff --git a/dev-util/Tensile/metadata.xml b/dev-util/Tensile/metadata.xml new file mode 100644 index 000000000000..9bbebec502ca --- /dev/null +++ b/dev-util/Tensile/metadata.xml @@ -0,0 +1,11 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE pkgmetadata SYSTEM 'http://www.gentoo.org/dtd/metadata.dtd'> +<pkgmetadata> + <maintainer type="project"> + <email>sci@gentoo.org</email> + <name>Science Project</name> + </maintainer> + <upstream> + <remote-id type="github">ROCmSoftwarePlatform/Tensile</remote-id> + </upstream> +</pkgmetadata> |