* [gentoo-commits] repo/gentoo:master commit in: sci-ml/caffe2/files/, sci-ml/caffe2/
@ 2025-04-05 12:55 Alfredo Tupone
0 siblings, 0 replies; 6+ messages in thread
From: Alfredo Tupone @ 2025-04-05 12:55 UTC (permalink / raw
To: gentoo-commits
commit: 0b45f8a6d7fae82791efe2caf4934b25d6138f4f
Author: Sv. Lockal <lockalsash <AT> gmail <DOT> com>
AuthorDate: Thu Apr 3 18:54:54 2025 +0000
Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Sat Apr 5 12:52:52 2025 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=0b45f8a6
sci-ml/caffe2: fix build for ROCm
* Revised the fix for `-fclang-abi-compat=17`.
* Added fixes for libc++
Closes: https://bugs.gentoo.org/953101
Signed-off-by: Sv. Lockal <lockalsash <AT> gmail.com>
Closes: https://github.com/gentoo/gentoo/pull/41448
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>
sci-ml/caffe2/Manifest | 1 +
...ffe2-2.6.0-r2.ebuild => caffe2-2.6.0-r3.ebuild} | 64 +++++++++++++---------
.../files/caffe2-2.6.0-rocm-fix-std-cpp17.patch | 50 +++++++++++++++++
3 files changed, 89 insertions(+), 26 deletions(-)
diff --git a/sci-ml/caffe2/Manifest b/sci-ml/caffe2/Manifest
index afeccbf65514..9edc8e9aadc7 100644
--- a/sci-ml/caffe2/Manifest
+++ b/sci-ml/caffe2/Manifest
@@ -1,4 +1,5 @@
DIST caffe2-patches-20240809.tar.gz 15242 BLAKE2B 77503c61487e7d85cca5afcab9a6e638f9833a70861845638cf1b62bc492d7b6650e6db81d53ebb2f39c6313509250d339f725f04d03ec6dd23dd0cf70843d8c SHA512 74b3b0b6671b655ecac93f7436c4ed7cb0157a83aafbf6afcc0811e11cef341cd8f638db1a111bcbb01e1a6dd4daf3a36b96d7a8ce90f04c2fa091bd6e3a142b
+DIST composable_kernel-50ee4267.tar.gz 4194795 BLAKE2B b3c97d98a0c9e4620fdae3d30006edf55cc60ffa7f8518f6acb8d808647bc4de362c2e2b7e974686503fa2c7f359b6981cfbda74e40cc1bad4d351c5d2ff92e1 SHA512 9fc6f5f15556f020414b4567520329ef762209a82411a246c2bc1240a9fed2669f7fcb982cf773e3e9561bf9a2c557dba82b8b469d2e5844e679e2f5ab7c3e17
DIST pytorch-2.4.1.tar.gz 115029469 BLAKE2B c2909ff27d527bc57cba56b780d3b8cd07a043ab045caa6c6b27857a16f9ad10aaab2116b26226b1e46ee08ffb44007965d914464418e4ae14ca48c3f3f383bb SHA512 7e9b4485e242eaf0d648765c6621d73d95e7107b766646a098175436d1ab2e2b864badd0757a3bab6b7c318233f2120bad9ac07b39bb9e357897919580c87631
DIST pytorch-2.5.1.tar.gz 116091366 BLAKE2B 7838b17562b94ffc7d798031348689db607dd5eae2a3c35be365972e2b52a2c1b12067068d5aca5ab00cf0977d9c2c3c9ae5337d69534c864c732e6256cbeef6 SHA512 a913a466324a65fa3d79c5e9ad4d605fc7976f0134fda2f81aaa3cea29d56926604999b8a238759646d211e63b47bbb446cdffa86ca8defd8159f11e30301289
DIST pytorch-2.6.0.tar.gz 119594438 BLAKE2B 3152eb341cf42295e147e59625beb9c06608aa4b78f9618c1c0024b10c1c767715d07fe8c4be52d029ac47f808cd0d5e65c9530ec90d951a64b993083b4067ad SHA512 a70da80ff09d226085e18228132cf6bb236ad8cc47eed52375d0d2a615f09dd33849da947270b5670c184eab60cb8e2adf11d801babfbda7aa621400501d07b0
diff --git a/sci-ml/caffe2/caffe2-2.6.0-r2.ebuild b/sci-ml/caffe2/caffe2-2.6.0-r3.ebuild
similarity index 81%
rename from sci-ml/caffe2/caffe2-2.6.0-r2.ebuild
rename to sci-ml/caffe2/caffe2-2.6.0-r3.ebuild
index b9747ad9eed4..37c752237aa1 100644
--- a/sci-ml/caffe2/caffe2-2.6.0-r2.ebuild
+++ b/sci-ml/caffe2/caffe2-2.6.0-r3.ebuild
@@ -10,10 +10,17 @@ inherit python-single-r1 cmake cuda flag-o-matic prefix rocm toolchain-funcs
MYPN=pytorch
MYP=${MYPN}-${PV}
+# caffe2-2.6.0 depends on future version of composable kernel
+# TODO: replace it with RDEPEND in the future
+CK_COMMIT=50ee4267e27b875d149e642f4cebd47be1dc3b57
+CK_P=composable_kernel-${CK_COMMIT:0:8}
+
DESCRIPTION="A deep learning framework"
HOMEPAGE="https://pytorch.org/"
-SRC_URI="https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz
- -> ${MYP}.tar.gz"
+SRC_URI="
+ https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz -> ${MYP}.tar.gz
+ rocm? ( https://github.com/ROCm/composable_kernel/archive/${CK_COMMIT}.tar.gz -> ${CK_P}.tar.gz )
+"
S="${WORKDIR}"/${MYP}
@@ -68,24 +75,19 @@ RDEPEND="
sci-ml/gemmlowp
)
rocm? (
- =dev-util/hip-6.1*
- =dev-libs/rccl-6.1*[${ROCM_USEDEP}]
- =sci-libs/rocThrust-6.1*[${ROCM_USEDEP}]
- =sci-libs/rocPRIM-6.1*[${ROCM_USEDEP}]
- =sci-libs/hipBLAS-6.1*[${ROCM_USEDEP}]
- =sci-libs/hipFFT-6.1*[${ROCM_USEDEP}]
- =sci-libs/hipSPARSE-6.1*[${ROCM_USEDEP}]
- =sci-libs/hipRAND-6.1*[${ROCM_USEDEP}]
- =sci-libs/hipCUB-6.1*[${ROCM_USEDEP}]
- =sci-libs/hipSOLVER-6.1*[${ROCM_USEDEP}]
- =sci-libs/miopen-6.1*[${ROCM_USEDEP}]
- =dev-util/roctracer-6.1*[${ROCM_USEDEP}]
-
- =sci-libs/hipBLASLt-6.1*
- amdgpu_targets_gfx90a? ( =sci-libs/hipBLASLt-6.1*[amdgpu_targets_gfx90a] )
- amdgpu_targets_gfx940? ( =sci-libs/hipBLASLt-6.1*[amdgpu_targets_gfx940] )
- amdgpu_targets_gfx941? ( =sci-libs/hipBLASLt-6.1*[amdgpu_targets_gfx941] )
- amdgpu_targets_gfx942? ( =sci-libs/hipBLASLt-6.1*[amdgpu_targets_gfx942] )
+ >=dev-libs/rccl-6.1 <dev-libs/rccl-6.4
+ >=dev-util/hip-6.1 <dev-util/hip-6.4
+ >=dev-util/roctracer-6.1 <dev-util/roctracer-6.4
+ >=sci-libs/hipBLAS-6.1 <sci-libs/hipBLAS-6.4
+ >=sci-libs/hipBLASLt-6.1 <sci-libs/hipBLASLt-6.4
+ >=sci-libs/hipCUB-6.1 <sci-libs/hipCUB-6.4
+ >=sci-libs/hipFFT-6.1 <sci-libs/hipFFT-6.4
+ >=sci-libs/hipRAND-6.1 <sci-libs/hipRAND-6.4
+ >=sci-libs/hipSOLVER-6.1 <sci-libs/hipSOLVER-6.4
+ >=sci-libs/hipSPARSE-6.1 <sci-libs/hipSPARSE-6.4
+ >=sci-libs/miopen-6.1 <sci-libs/miopen-6.4
+ >=sci-libs/rocPRIM-6.1 <sci-libs/rocPRIM-6.4
+ >=sci-libs/rocThrust-6.1 <sci-libs/rocThrust-6.4
)
distributed? (
sci-ml/tensorpipe[cuda?]
@@ -122,6 +124,7 @@ PATCHES=(
"${FILESDIR}"/${PN}-2.4.0-cpp-httplib.patch
"${FILESDIR}"/${PN}-2.5.1-glog-0.6.0.patch
"${FILESDIR}"/${PN}-2.5.1-newfix-functorch-install.patch
+ "${FILESDIR}"/${PN}-2.6.0-rocm-fix-std-cpp17.patch
)
src_prepare() {
@@ -178,9 +181,22 @@ src_prepare() {
if use rocm; then
sed -e "s:/opt/rocm:/usr:" \
-e "s:lib/cmake:$(get_libdir)/cmake:g" \
- -e "s/HIP 1.0/HIP 1.0 REQUIRED/" \
-i cmake/public/LoadHIP.cmake || die
+ # TODO: delete, when caffe2 depends on systemwide composable_kernel
+ sed -e "s:third_party/composable_kernel:../composable_kernel-${CK_COMMIT}:g" \
+ -i aten/src/ATen/CMakeLists.txt || die
+
+ if tc-is-clang; then
+ # Systemwide gcc (for absl and at::TensorBase) + hipcc (llvm>=18) need abi-compat=17.
+ # But systemwide clang>=18 + hipcc (>=llvm-18) need opposite!
+ # See also: https://github.com/llvm/llvm-project/issues/102443#issuecomment-2329726287
+ sed '/-fclang-abi-compat=17/d' -i cmake/Dependencies.cmake || die
+ fi
+
+ # Workaround for libc++ issue https://github.com/llvm/llvm-project/issues/100802
+ sed 's/std::memcpy/memcpy/g' -i c10/util/Half.h || die
+
ebegin "HIPifying cuda sources"
${EPYTHON} tools/amd_build/build_amd.py || die
eend $?
@@ -275,15 +291,11 @@ src_configure() {
mycmakeargs+=(
-DUSE_NCCL=ON
-DUSE_SYSTEM_NCCL=ON
+ -DCMAKE_REQUIRE_FIND_PACKAGE_HIP=ON
)
# ROCm libraries produce too much warnings
append-cxxflags -Wno-deprecated-declarations -Wno-unused-result
-
- if tc-is-clang; then
- # fix mangling in LLVM: https://github.com/llvm/llvm-project/issues/85656
- append-cxxflags -fclang-abi-compat=17
- fi
fi
if use onednn; then
diff --git a/sci-ml/caffe2/files/caffe2-2.6.0-rocm-fix-std-cpp17.patch b/sci-ml/caffe2/files/caffe2-2.6.0-rocm-fix-std-cpp17.patch
new file mode 100644
index 000000000000..1b8084ac187f
--- /dev/null
+++ b/sci-ml/caffe2/files/caffe2-2.6.0-rocm-fix-std-cpp17.patch
@@ -0,0 +1,50 @@
+Fix for CXX=clang USE=rocm error: invalid argument '-std=c++17' not allowed with 'C'
+https://github.com/pytorch/pytorch/issues/103222
+--- a/c10/hip/CMakeLists.txt
++++ b/c10/hip/CMakeLists.txt
+@@ -36,6 +36,7 @@ if(NOT BUILD_LIBTORCHLESS)
+
+ # Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake
+ target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS})
++ set_target_properties(c10_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+
+ # caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be
+ # minimal. I'm not sure if we need hip_hcc or not; for now leave it out
+--- a/caffe2/CMakeLists.txt
++++ b/caffe2/CMakeLists.txt
+@@ -1684,6 +1684,7 @@ if(USE_ROCM)
+
+ # Since PyTorch files contain HIP headers, these flags are required for the necessary definitions to be added.
+ target_compile_options(torch_hip PUBLIC ${HIP_CXX_FLAGS}) # experiment
++ set_target_properties(torch_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+
+ target_link_libraries(torch_hip PUBLIC c10_hip)
+
+@@ -1886,6 +1887,7 @@ if(BUILD_TEST)
+ target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
+ target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
+ target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
++ set_target_properties(${test_name} PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+ add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+ if(INSTALL_TEST)
+ set_target_properties(${test_name} PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
+--- a/cmake/Dependencies.cmake
++++ b/cmake/Dependencies.cmake
+@@ -1043,7 +1043,6 @@ if(USE_ROCM)
+ list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
+ list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN)
+ list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
+- list(APPEND HIP_CXX_FLAGS -std=c++17)
+ list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2)
+ if(HIP_NEW_TYPE_ENUMS)
+ list(APPEND HIP_CXX_FLAGS -DHIP_NEW_TYPE_ENUMS)
+--- a/cmake/public/utils.cmake
++++ b/cmake/public/utils.cmake
+@@ -267,6 +267,7 @@ function(caffe2_hip_binary_target target_name_or_src)
+ caffe2_binary_target(${target_name_or_src})
+
+ target_compile_options(${__target} PRIVATE ${HIP_CXX_FLAGS})
++ set_target_properties(${__target} PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+ target_include_directories(${__target} PRIVATE ${Caffe2_HIP_INCLUDE})
+ endfunction()
+
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [gentoo-commits] repo/gentoo:master commit in: sci-ml/caffe2/files/, sci-ml/caffe2/
@ 2025-06-26 10:27 Alfredo Tupone
0 siblings, 0 replies; 6+ messages in thread
From: Alfredo Tupone @ 2025-06-26 10:27 UTC (permalink / raw
To: gentoo-commits
commit: ef5f44a9103abd57cc5b568d9de39496d28f016b
Author: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Thu Jun 26 10:25:51 2025 +0000
Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Thu Jun 26 10:26:26 2025 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=ef5f44a9
sci-ml/caffe2: fix build with glog-0.7.1
Closes: https://bugs.gentoo.org/956629
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>
sci-ml/caffe2/caffe2-2.7.0-r2.ebuild | 1 +
sci-ml/caffe2/files/caffe2-2.7.0-glog-0.7.1.patch | 11 +++++++++++
2 files changed, 12 insertions(+)
diff --git a/sci-ml/caffe2/caffe2-2.7.0-r2.ebuild b/sci-ml/caffe2/caffe2-2.7.0-r2.ebuild
index 9c49aacddb11..3e0bdbbb2574 100644
--- a/sci-ml/caffe2/caffe2-2.7.0-r2.ebuild
+++ b/sci-ml/caffe2/caffe2-2.7.0-r2.ebuild
@@ -127,6 +127,7 @@ PATCHES=(
"${FILESDIR}"/${PN}-2.5.1-newfix-functorch-install.patch
"${FILESDIR}"/${PN}-2.6.0-rocm-fix-std-cpp17.patch
"${FILESDIR}"/${P}-cmake.patch
+ "${FILESDIR}"/${P}-glog-0.7.1.patch
)
src_prepare() {
diff --git a/sci-ml/caffe2/files/caffe2-2.7.0-glog-0.7.1.patch b/sci-ml/caffe2/files/caffe2-2.7.0-glog-0.7.1.patch
new file mode 100644
index 000000000000..7d4b5ba0244a
--- /dev/null
+++ b/sci-ml/caffe2/files/caffe2-2.7.0-glog-0.7.1.patch
@@ -0,0 +1,11 @@
+--- a/c10/util/Logging.cpp 2025-06-26 11:47:58.901240691 +0200
++++ b/c10/util/Logging.cpp 2025-06-26 11:51:56.959290731 +0200
+@@ -352,7 +352,7 @@
+
+ void ShowLogInfoToStderr() {
+ FLAGS_logtostderr = 1;
+- FLAGS_minloglevel = std::min(FLAGS_minloglevel, google::GLOG_INFO);
++ FLAGS_minloglevel = std::min(FLAGS_minloglevel, int(google::GLOG_INFO));
+ }
+ } // namespace c10
+
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [gentoo-commits] repo/gentoo:master commit in: sci-ml/caffe2/files/, sci-ml/caffe2/
@ 2025-06-29 19:32 Alfredo Tupone
0 siblings, 0 replies; 6+ messages in thread
From: Alfredo Tupone @ 2025-06-29 19:32 UTC (permalink / raw
To: gentoo-commits
commit: f4ae275eb61101b9cbdbdcfa5cffec42f938219a
Author: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Sun Jun 29 19:25:12 2025 +0000
Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Sun Jun 29 19:25:40 2025 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=f4ae275e
sci-ml/caffe2: fix llvm build
Closes: https://bugs.gentoo.org/953366
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>
sci-ml/caffe2/caffe2-2.7.0-r2.ebuild | 5 ++++-
sci-ml/caffe2/files/caffe2-2.7.0-llvm.patch | 15 +++++++++++++++
2 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/sci-ml/caffe2/caffe2-2.7.0-r2.ebuild b/sci-ml/caffe2/caffe2-2.7.0-r2.ebuild
index f969e5ff404d..fffdae3035c6 100644
--- a/sci-ml/caffe2/caffe2-2.7.0-r2.ebuild
+++ b/sci-ml/caffe2/caffe2-2.7.0-r2.ebuild
@@ -139,10 +139,13 @@ PATCHES=(
"${FILESDIR}"/${PN}-2.6.0-rocm-fix-std-cpp17.patch
"${FILESDIR}"/${P}-cmake.patch
"${FILESDIR}"/${P}-glog-0.7.1.patch
+ "${FILESDIR}"/${P}-llvm.patch
)
src_prepare() {
- use flash && mv "${WORKDIR}"/${FLASH_P}/* third_party/${FLASH_PN}/ || die
+ if use flash; then
+ mv "${WORKDIR}"/${FLASH_P}/* third_party/${FLASH_PN}/ || die
+ fi
filter-lto #bug 862672
# Unbundle fmt
diff --git a/sci-ml/caffe2/files/caffe2-2.7.0-llvm.patch b/sci-ml/caffe2/files/caffe2-2.7.0-llvm.patch
new file mode 100644
index 000000000000..e0818fa31e71
--- /dev/null
+++ b/sci-ml/caffe2/files/caffe2-2.7.0-llvm.patch
@@ -0,0 +1,15 @@
+--- a/c10/util/strong_type.h 2025-06-29 10:28:19.365533325 +0200
++++ b/c10/util/strong_type.h 2025-06-29 10:28:40.944598046 +0200
+@@ -1604,12 +1604,6 @@
+ return hash<T>::operator()(value_of(tt));
+ }
+ };
+-template <typename T, typename Tag, typename ... M>
+-struct is_arithmetic<::strong::type<T, Tag, M...>>
+- : is_base_of<::strong::arithmetic::modifier<::strong::type<T, Tag, M...>>,
+- ::strong::type<T, Tag, M...>>
+-{
+-};
+
+ #if STRONG_HAS_STD_FORMAT
+ template<typename T, typename Tag, typename... M, typename Char>
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [gentoo-commits] repo/gentoo:master commit in: sci-ml/caffe2/files/, sci-ml/caffe2/
@ 2025-07-02 6:13 Alfredo Tupone
0 siblings, 0 replies; 6+ messages in thread
From: Alfredo Tupone @ 2025-07-02 6:13 UTC (permalink / raw
To: gentoo-commits
commit: 26296e4b172b355097bdb4816ebceeaa0755bb74
Author: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Wed Jul 2 06:12:05 2025 +0000
Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Wed Jul 2 06:12:49 2025 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=26296e4b
sci-ml/caffe2: drop 2.4.1-r7
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>
sci-ml/caffe2/Manifest | 2 -
sci-ml/caffe2/caffe2-2.4.1-r7.ebuild | 305 -----------------------
sci-ml/caffe2/files/caffe2-2.4.0-cstdint.patch | 10 -
sci-ml/caffe2/files/caffe2-2.4.0-libfmt-11.patch | 44 ----
4 files changed, 361 deletions(-)
diff --git a/sci-ml/caffe2/Manifest b/sci-ml/caffe2/Manifest
index 8d81a58682be..fda2a4a29e01 100644
--- a/sci-ml/caffe2/Manifest
+++ b/sci-ml/caffe2/Manifest
@@ -1,8 +1,6 @@
-DIST caffe2-patches-20240809.tar.gz 15242 BLAKE2B 77503c61487e7d85cca5afcab9a6e638f9833a70861845638cf1b62bc492d7b6650e6db81d53ebb2f39c6313509250d339f725f04d03ec6dd23dd0cf70843d8c SHA512 74b3b0b6671b655ecac93f7436c4ed7cb0157a83aafbf6afcc0811e11cef341cd8f638db1a111bcbb01e1a6dd4daf3a36b96d7a8ce90f04c2fa091bd6e3a142b
DIST composable_kernel-50ee4267.tar.gz 4194795 BLAKE2B b3c97d98a0c9e4620fdae3d30006edf55cc60ffa7f8518f6acb8d808647bc4de362c2e2b7e974686503fa2c7f359b6981cfbda74e40cc1bad4d351c5d2ff92e1 SHA512 9fc6f5f15556f020414b4567520329ef762209a82411a246c2bc1240a9fed2669f7fcb982cf773e3e9561bf9a2c557dba82b8b469d2e5844e679e2f5ab7c3e17
DIST composable_kernel-8086bbe3.tar.gz 4418862 BLAKE2B b710e3d4586899443ec01044dad19fd2f992c351e2f65ba526dfcc47cc65c095beaf8ac21a8f71c02a0eb524d364e817b27241a9198884f2bdae9924b51e24e4 SHA512 8410b5a1c864d71f3034ef0d9d1245078856d09cc191faec59856c229bf11d89ae291036d735cb5cec4f1d72e6e9e8f6921833147f9619d30cfab8722d3a9f63
DIST flash-attention-2.7.4.gh.tar.gz 5841323 BLAKE2B 432999d763f2b3d732580ddfea5d3e01370351db0656546259a5e500a07516dd03c98828bfb55855dabe4adc651033b5d97ea4725ca46158b9970f0fbc662710 SHA512 05a4afb09e666f7404d6a3f8b5256e7bed6eba60a6f1bde2b7dbb96d318975f0b458c2521c7a38d88e97b6e4c27f29077cf787849daf82586e33f43a3d9a84b3
-DIST pytorch-2.4.1.tar.gz 115029469 BLAKE2B c2909ff27d527bc57cba56b780d3b8cd07a043ab045caa6c6b27857a16f9ad10aaab2116b26226b1e46ee08ffb44007965d914464418e4ae14ca48c3f3f383bb SHA512 7e9b4485e242eaf0d648765c6621d73d95e7107b766646a098175436d1ab2e2b864badd0757a3bab6b7c318233f2120bad9ac07b39bb9e357897919580c87631
DIST pytorch-2.5.1.tar.gz 116091366 BLAKE2B 7838b17562b94ffc7d798031348689db607dd5eae2a3c35be365972e2b52a2c1b12067068d5aca5ab00cf0977d9c2c3c9ae5337d69534c864c732e6256cbeef6 SHA512 a913a466324a65fa3d79c5e9ad4d605fc7976f0134fda2f81aaa3cea29d56926604999b8a238759646d211e63b47bbb446cdffa86ca8defd8159f11e30301289
DIST pytorch-2.6.0.tar.gz 119594438 BLAKE2B 3152eb341cf42295e147e59625beb9c06608aa4b78f9618c1c0024b10c1c767715d07fe8c4be52d029ac47f808cd0d5e65c9530ec90d951a64b993083b4067ad SHA512 a70da80ff09d226085e18228132cf6bb236ad8cc47eed52375d0d2a615f09dd33849da947270b5670c184eab60cb8e2adf11d801babfbda7aa621400501d07b0
DIST pytorch-2.7.0.tar.gz 50197290 BLAKE2B 2a317d1e9b0d8876f1593382246cd9f786eff3c1b8602353c5e0010dc8414720c5de61886361843a0c33268830c784963a89b410b361e1b67636e652f6a6a2eb SHA512 63eb0363ea68d23567f5524ee8b51756d9302bbe1cbefa367335ab5ebe652523dba75fa417ea3e7eedfc67aa4bef1434c8b7e3dfde2152061b91b6e489763a55
diff --git a/sci-ml/caffe2/caffe2-2.4.1-r7.ebuild b/sci-ml/caffe2/caffe2-2.4.1-r7.ebuild
deleted file mode 100644
index b51bab3da130..000000000000
--- a/sci-ml/caffe2/caffe2-2.4.1-r7.ebuild
+++ /dev/null
@@ -1,305 +0,0 @@
-# Copyright 2022-2025 Gentoo Authors
-# Distributed under the terms of the GNU General Public License v2
-
-EAPI=8
-
-PYTHON_COMPAT=( python3_{10..13} )
-ROCM_VERSION=6.1
-inherit python-single-r1 cmake cuda flag-o-matic prefix rocm toolchain-funcs
-
-MYPN=pytorch
-MYP=${MYPN}-${PV}
-
-DESCRIPTION="A deep learning framework"
-HOMEPAGE="https://pytorch.org/"
-SRC_URI="https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz
- -> ${MYP}.tar.gz
- https://dev.gentoo.org/~tupone/distfiles/${PN}-patches-20240809.tar.gz"
-
-S="${WORKDIR}"/${MYP}
-
-LICENSE="BSD"
-SLOT="0"
-KEYWORDS="~amd64"
-IUSE="cuda distributed fbgemm flash gloo mkl mpi nnpack +numpy onednn openblas opencl openmp qnnpack rocm xnnpack"
-RESTRICT="test"
-REQUIRED_USE="
- ${PYTHON_REQUIRED_USE}
- mpi? ( distributed )
- gloo? ( distributed )
- ?? ( cuda rocm )
- rocm? (
- || ( ${ROCM_REQUIRED_USE} )
- !flash
- )
-"
-
-# CUDA 12 not supported yet: https://github.com/pytorch/pytorch/issues/91122
-RDEPEND="
- ${PYTHON_DEPS}
- dev-cpp/abseil-cpp:=
- dev-cpp/gflags:=
- >=dev-cpp/glog-0.5.0
- dev-libs/cpuinfo
- dev-libs/libfmt:=
- dev-cpp/opentelemetry-cpp
- dev-libs/protobuf:=
- dev-libs/pthreadpool
- dev-libs/sleef[cpu_flags_x86_avx512f(+),cpu_flags_x86_avx(+)]
- dev-libs/sleef[cpu_flags_x86_sse3(+),cpu_flags_x86_ssse3(+)]
- dev-libs/sleef[cpu_flags_x86_sse4_1(+),cpu_flags_x86_sse4_2(+)]
- virtual/lapack
- sci-ml/onnx
- sci-ml/foxi
- cuda? (
- dev-libs/cudnn
- >=sci-ml/cudnn-frontend-1.0.3:0/8
- <dev-util/nvidia-cuda-toolkit-12.5:=[profiler]
- )
- fbgemm? ( sci-ml/FBGEMM )
- gloo? ( sci-ml/gloo[cuda?] )
- mpi? ( virtual/mpi )
- nnpack? ( sci-ml/NNPACK )
- numpy? ( $(python_gen_cond_dep '
- dev-python/numpy[${PYTHON_USEDEP}]
- ') )
- onednn? ( sci-ml/oneDNN )
- opencl? ( virtual/opencl )
- qnnpack? (
- !sci-libs/QNNPACK
- sci-ml/gemmlowp
- )
- rocm? (
- =dev-util/hip-6.1*
- =dev-libs/rccl-6.1*
- =sci-libs/rocThrust-6.1*
- =sci-libs/rocPRIM-6.1*
- =sci-libs/hipBLAS-6.1*
- =sci-libs/hipFFT-6.1*
- =sci-libs/hipSPARSE-6.1*
- =sci-libs/hipRAND-6.1*
- =sci-libs/hipCUB-6.1*
- =sci-libs/hipSOLVER-6.1*
- =sci-libs/miopen-6.1*
- =dev-util/roctracer-6.1*
-
- =sci-libs/hipBLASLt-6.1*
- amdgpu_targets_gfx90a? ( =sci-libs/hipBLASLt-6.1*[amdgpu_targets_gfx90a] )
- amdgpu_targets_gfx940? ( =sci-libs/hipBLASLt-6.1*[amdgpu_targets_gfx940] )
- amdgpu_targets_gfx941? ( =sci-libs/hipBLASLt-6.1*[amdgpu_targets_gfx941] )
- amdgpu_targets_gfx942? ( =sci-libs/hipBLASLt-6.1*[amdgpu_targets_gfx942] )
- )
- distributed? (
- sci-ml/tensorpipe[cuda?]
- dev-cpp/cpp-httplib
- )
- xnnpack? ( sci-ml/XNNPACK )
- mkl? ( sci-libs/mkl )
- openblas? ( sci-libs/openblas )
-"
-DEPEND="
- ${RDEPEND}
- dev-libs/clog
- dev-libs/psimd
- dev-libs/FXdiv
- dev-libs/pocketfft
- dev-libs/flatbuffers
- sci-ml/FP16
- sci-ml/kineto
- $(python_gen_cond_dep '
- dev-python/pybind11[${PYTHON_USEDEP}]
- dev-python/pyyaml[${PYTHON_USEDEP}]
- dev-python/typing-extensions[${PYTHON_USEDEP}]
- ')
- cuda? ( <=dev-libs/cutlass-3.4.1 )
- onednn? ( sci-ml/ideep )
-"
-
-PATCHES=(
- ../patches/${PN}-2.4.0-gentoo.patch
- ../patches/${PN}-2.4.0-install-dirs.patch
- ../patches/${PN}-1.12.0-glog-0.6.0.patch
- ../patches/${PN}-1.13.1-tensorpipe.patch
- ../patches/${PN}-2.3.0-cudnn_include_fix.patch
- ../patches/${PN}-2.1.2-fix-rpath.patch
- ../patches/${PN}-2.4.0-fix-openmp-link.patch
- ../patches/${PN}-2.4.0-rocm-fix-std-cpp17.patch
- ../patches/${PN}-2.2.2-musl.patch
- ../patches/${PN}-2.4.0-exclude-aotriton.patch
- ../patches/${PN}-2.3.0-fix-rocm-gcc14-clamp.patch
- ../patches/${PN}-2.3.0-fix-libcpp.patch
- "${FILESDIR}"/${PN}-2.4.0-libfmt-11.patch
- "${FILESDIR}"/${PN}-2.4.0-cpp-httplib.patch
- "${FILESDIR}"/${PN}-2.4.0-cstdint.patch
-)
-
-src_prepare() {
- filter-lto #bug 862672
- sed -i \
- -e "/third_party\/gloo/d" \
- cmake/Dependencies.cmake \
- || die
- cmake_src_prepare
- pushd torch/csrc/jit/serialization || die
- flatc --cpp --gen-mutable --scoped-enums mobile_bytecode.fbs || die
- popd
- # prefixify the hardcoded paths, after all patches are applied
- hprefixify \
- aten/CMakeLists.txt \
- caffe2/CMakeLists.txt \
- cmake/Metal.cmake \
- cmake/Modules/*.cmake \
- cmake/Modules_CUDA_fix/FindCUDNN.cmake \
- cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake \
- cmake/Modules_CUDA_fix/upstream/FindPackageHandleStandardArgs.cmake \
- cmake/public/LoadHIP.cmake \
- cmake/public/cuda.cmake \
- cmake/Dependencies.cmake \
- torch/CMakeLists.txt \
- CMakeLists.txt
-
- if use rocm; then
- sed -e "s:/opt/rocm:/usr:" \
- -e "s:lib/cmake:$(get_libdir)/cmake:g" \
- -e "s/HIP 1.0/HIP 1.0 REQUIRED/" \
- -i cmake/public/LoadHIP.cmake || die
-
- ebegin "HIPifying cuda sources"
- ${EPYTHON} tools/amd_build/build_amd.py || die
- eend $?
- fi
-}
-
-src_configure() {
- if use cuda && [[ -z ${TORCH_CUDA_ARCH_LIST} ]]; then
- ewarn "WARNING: caffe2 is being built with its default CUDA compute capabilities: 3.5 and 7.0."
- ewarn "These may not be optimal for your GPU."
- ewarn ""
- ewarn "To configure caffe2 with the CUDA compute capability that is optimal for your GPU,"
- ewarn "set TORCH_CUDA_ARCH_LIST in your make.conf, and re-emerge caffe2."
- ewarn "For example, to use CUDA capability 7.5 & 3.5, add: TORCH_CUDA_ARCH_LIST=7.5 3.5"
- ewarn "For a Maxwell model GPU, an example value would be: TORCH_CUDA_ARCH_LIST=Maxwell"
- ewarn ""
- ewarn "You can look up your GPU's CUDA compute capability at https://developer.nvidia.com/cuda-gpus"
- ewarn "or by running /opt/cuda/extras/demo_suite/deviceQuery | grep 'CUDA Capability'"
- fi
-
- local mycmakeargs=(
- -DBUILD_CUSTOM_PROTOBUF=OFF
- -DBUILD_SHARED_LIBS=ON
-
- -DUSE_CCACHE=OFF
- -DUSE_CUDA=$(usex cuda)
- -DUSE_DISTRIBUTED=$(usex distributed)
- -DUSE_MPI=$(usex mpi)
- -DUSE_FAKELOWP=OFF
- -DUSE_FBGEMM=$(usex fbgemm)
- -DUSE_FLASH_ATTENTION=$(usex flash)
- -DUSE_MEM_EFF_ATTENTION=OFF
- -DUSE_GFLAGS=ON
- -DUSE_GLOG=ON
- -DUSE_GLOO=$(usex gloo)
- -DUSE_KINETO=OFF # TODO
- -DUSE_MAGMA=OFF # TODO: In GURU as sci-libs/magma
- -DUSE_MKLDNN=$(usex onednn)
- -DUSE_NNPACK=$(usex nnpack)
- -DUSE_XNNPACK=$(usex xnnpack)
- -DUSE_SYSTEM_XNNPACK=$(usex xnnpack)
- -DUSE_TENSORPIPE=$(usex distributed)
- -DUSE_PYTORCH_QNNPACK=$(usex qnnpack)
- -DUSE_NUMPY=$(usex numpy)
- -DUSE_OPENCL=$(usex opencl)
- -DUSE_OPENMP=$(usex openmp)
- -DUSE_ROCM=$(usex rocm)
- -DUSE_SYSTEM_CPUINFO=ON
- -DUSE_SYSTEM_PYBIND11=ON
- -DUSE_UCC=OFF
- -DUSE_VALGRIND=OFF
- -DPython_EXECUTABLE="${PYTHON}"
- -DUSE_ITT=OFF
- -DUSE_SYSTEM_PTHREADPOOL=ON
- -DUSE_SYSTEM_PSIMD=ON
- -DUSE_SYSTEM_FXDIV=ON
- -DUSE_SYSTEM_FP16=ON
- -DUSE_SYSTEM_GLOO=ON
- -DUSE_SYSTEM_ONNX=ON
- -DUSE_SYSTEM_SLEEF=ON
- -DUSE_PYTORCH_METAL=OFF
- -DUSE_XPU=OFF
-
- -Wno-dev
- -DTORCH_INSTALL_LIB_DIR="${EPREFIX}"/usr/$(get_libdir)
- -DLIBSHM_INSTALL_LIB_SUBDIR="${EPREFIX}"/usr/$(get_libdir)
- )
-
- if use mkl; then
- mycmakeargs+=(-DBLAS=MKL)
- elif use openblas; then
- mycmakeargs+=(-DBLAS=OpenBLAS)
- else
- mycmakeargs+=(-DBLAS=Generic -DBLAS_LIBRARIES=)
- fi
-
- if use cuda; then
- addpredict "/dev/nvidiactl" # bug 867706
- addpredict "/dev/char"
- addpredict "/proc/self/task" # bug 926116
-
- mycmakeargs+=(
- -DUSE_CUDNN=ON
- -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}"
- -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library
- -DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f | tr -d \")"
- )
- elif use rocm; then
- export PYTORCH_ROCM_ARCH="$(get_amdgpu_flags)"
-
- mycmakeargs+=(
- -DUSE_NCCL=ON
- -DUSE_SYSTEM_NCCL=ON
- )
-
- # ROCm libraries produce too much warnings
- append-cxxflags -Wno-deprecated-declarations -Wno-unused-result
-
- if tc-is-clang; then
- # fix mangling in LLVM: https://github.com/llvm/llvm-project/issues/85656
- append-cxxflags -fclang-abi-compat=17
- fi
- fi
-
- if use onednn; then
- mycmakeargs+=(
- -DUSE_MKLDNN=ON
- -DMKLDNN_FOUND=ON
- -DMKLDNN_LIBRARIES=dnnl
- -DMKLDNN_INCLUDE_DIR="${ESYSROOT}/usr/include/oneapi/dnnl"
- )
- fi
-
- cmake_src_configure
-
- # do not rerun cmake and the build process in src_install
- sed '/RERUN/,+1d' -i "${BUILD_DIR}"/build.ninja || die
-}
-
-python_install() {
- python_domodule python/caffe2
- python_domodule python/torch
- ln -s ../../../../../include/torch \
- "${D}$(python_get_sitedir)"/torch/include/torch || die # bug 923269
-}
-
-src_install() {
- cmake_src_install
-
- # Used by pytorch ebuild
- insinto "/var/lib/${PN}"
- doins "${BUILD_DIR}"/CMakeCache.txt
-
- rm -rf python
- mkdir -p python/torch/include || die
- mv "${ED}"/usr/lib/python*/site-packages/caffe2 python/ || die
- cp torch/version.py python/torch/ || die
- python_install
-}
diff --git a/sci-ml/caffe2/files/caffe2-2.4.0-cstdint.patch b/sci-ml/caffe2/files/caffe2-2.4.0-cstdint.patch
deleted file mode 100644
index f248ab031eb0..000000000000
--- a/sci-ml/caffe2/files/caffe2-2.4.0-cstdint.patch
+++ /dev/null
@@ -1,10 +0,0 @@
---- a/caffe2/utils/string_utils.cc 2024-09-05 08:29:06.930438069 +0200
-+++ b/caffe2/utils/string_utils.cc 2024-09-05 08:29:28.398137596 +0200
-@@ -3,6 +3,7 @@
- #include <algorithm>
- #include <sstream>
- #include <vector>
-+#include <cstdint>
-
- namespace caffe2 {
-
diff --git a/sci-ml/caffe2/files/caffe2-2.4.0-libfmt-11.patch b/sci-ml/caffe2/files/caffe2-2.4.0-libfmt-11.patch
deleted file mode 100644
index 9f6740a07f1f..000000000000
--- a/sci-ml/caffe2/files/caffe2-2.4.0-libfmt-11.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-Fix build against libfmt-11
-
-https://github.com/pytorch/pytorch/commit/83eedf66b9e7f52323d9f45c5dfaa64472452595
-https://github.com/pytorch/pytorch/pull/130628
-
-From 83eedf66b9e7f52323d9f45c5dfaa64472452595 Mon Sep 17 00:00:00 2001
-From: Aaron Gokaslan <aaronGokaslan@gmail.com>
-Date: Tue, 16 Jul 2024 06:12:08 +0000
-Subject: [PATCH] Update libfmt submodule to 11.0.1 (#130628)
-
-Update libfmt to 11.0.1 reopen of https://github.com/pytorch/pytorch/pull/129962. Requires a kineto update and moves fmt::join into a separate include so added it where necessary.
-
-Pull Request resolved: https://github.com/pytorch/pytorch/pull/130628
-Approved by: https://github.com/aaronenyeshi
---- a/torch/csrc/distributed/c10d/socket.cpp
-+++ b/torch/csrc/distributed/c10d/socket.cpp
-@@ -32,6 +32,7 @@ C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wdeprecated")
- #include <fmt/chrono.h>
- C10_DIAGNOSTIC_POP()
- #include <fmt/format.h>
-+#include <fmt/ranges.h>
-
- #include <torch/csrc/distributed/c10d/error.h>
- #include <torch/csrc/distributed/c10d/exception.h>
---- a/torch/csrc/profiler/standalone/execution_trace_observer.cpp
-+++ b/torch/csrc/profiler/standalone/execution_trace_observer.cpp
-@@ -10,6 +10,7 @@
- #endif // _WIN32
-
- #include <fmt/format.h>
-+#include <fmt/ranges.h>
- #include <chrono>
- #include <cmath>
- #include <fstream>
---- a/torch/csrc/profiler/util.cpp
-+++ b/torch/csrc/profiler/util.cpp
-@@ -5,6 +5,7 @@
- #include <c10/util/ArrayRef.h>
- #include <c10/util/irange.h>
- #include <fmt/format.h>
-+#include <fmt/ranges.h>
-
- #ifdef USE_KINETO
- #include <libkineto.h>
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [gentoo-commits] repo/gentoo:master commit in: sci-ml/caffe2/files/, sci-ml/caffe2/
@ 2025-07-11 10:05 Alfredo Tupone
0 siblings, 0 replies; 6+ messages in thread
From: Alfredo Tupone @ 2025-07-11 10:05 UTC (permalink / raw
To: gentoo-commits
commit: 24b6befbdc895b920d307417a5067bfb90e5d5fa
Author: Sv. Lockal <lockalsash <AT> gmail <DOT> com>
AuthorDate: Fri Jul 11 07:37:55 2025 +0000
Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Fri Jul 11 10:04:59 2025 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=24b6befb
sci-ml/caffe2: fix gfx101x compilation and memefficient linkage
Closes: https://bugs.gentoo.org/959808
Bug: https://bugs.gentoo.org/956674
Signed-off-by: Sv. Lockal <lockalsash <AT> gmail.com>
Part-of: https://github.com/gentoo/gentoo/pull/42956
Closes: https://github.com/gentoo/gentoo/pull/42956
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>
...ffe2-2.7.1-r2.ebuild => caffe2-2.7.1-r3.ebuild} | 6 +
.../caffe2/files/caffe2-2.7.1-aotriton-fixes.patch | 27 ++++
.../files/composable-kernel-6.4.1-expand-isa.patch | 141 +++++++++++++++++++++
3 files changed, 174 insertions(+)
diff --git a/sci-ml/caffe2/caffe2-2.7.1-r2.ebuild b/sci-ml/caffe2/caffe2-2.7.1-r3.ebuild
similarity index 97%
rename from sci-ml/caffe2/caffe2-2.7.1-r2.ebuild
rename to sci-ml/caffe2/caffe2-2.7.1-r3.ebuild
index 4ccb6c07061c..c314b266cdc3 100644
--- a/sci-ml/caffe2/caffe2-2.7.1-r2.ebuild
+++ b/sci-ml/caffe2/caffe2-2.7.1-r3.ebuild
@@ -147,6 +147,7 @@ PATCHES=(
"${FILESDIR}"/${PN}-2.7.0-glog-0.7.1.patch
"${FILESDIR}"/${PN}-2.7.0-llvm.patch
"${FILESDIR}"/${PN}-2.7.1-ck-config.patch
+ "${FILESDIR}"/${PN}-2.7.1-aotriton-fixes.patch
)
src_prepare() {
@@ -221,6 +222,11 @@ src_prepare() {
sed -e "s:third_party/composable_kernel:../composable_kernel-${CK_COMMIT}:g" \
-i aten/src/ATen/CMakeLists.txt || die
+ # Bug 959808: fix for gfx101x targets
+ pushd "${WORKDIR}/composable_kernel-${CK_COMMIT}" > /dev/null || die
+ eapply "${FILESDIR}"/composable-kernel-6.4.1-expand-isa.patch
+ popd > /dev/null || die
+
if tc-is-clang; then
# Systemwide gcc (for absl and at::TensorBase) + hipcc (llvm>=18) need abi-compat=17.
# But systemwide clang>=18 + hipcc (>=llvm-18) need opposite!
diff --git a/sci-ml/caffe2/files/caffe2-2.7.1-aotriton-fixes.patch b/sci-ml/caffe2/files/caffe2-2.7.1-aotriton-fixes.patch
new file mode 100644
index 000000000000..1d2c7bf8f89d
--- /dev/null
+++ b/sci-ml/caffe2/files/caffe2-2.7.1-aotriton-fixes.patch
@@ -0,0 +1,27 @@
+Fix installation with aotriton
+
+Upstream bug: https://github.com/pytorch/pytorch/issues/158109
+--- a/cmake/External/aotriton.cmake
++++ b/cmake/External/aotriton.cmake
+@@ -43,10 +43,6 @@ if(NOT __AOTRITON_INCLUDED)
+
+ # Note it is INSTALL"ED"
+ if(DEFINED ENV{AOTRITON_INSTALLED_PREFIX})
+- install(DIRECTORY
+- $ENV{AOTRITON_INSTALLED_PREFIX}/lib64
+- $ENV{AOTRITON_INSTALLED_PREFIX}/include
+- DESTINATION ${__AOTRITON_INSTALL_DIR})
+ set(__AOTRITON_INSTALL_DIR "$ENV{AOTRITON_INSTALLED_PREFIX}")
+ message(STATUS "Using Preinstalled AOTriton at ${__AOTRITON_INSTALL_DIR}")
+ elseif(DEFINED ENV{AOTRITON_INSTALL_FROM_SOURCE})
+--- a/caffe2/CMakeLists.txt
++++ b/caffe2/CMakeLists.txt
+@@ -921,7 +921,7 @@ if(USE_ROCM)
+ set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
+ list(APPEND Caffe2_HIP_SRCS ${GENERATED_CXX_TORCH_CUDA})
+ hip_add_library(torch_hip ${Caffe2_HIP_SRCS})
+- if(USE_FLASH_ATTENTION)
++ if(USE_FLASH_ATTENTION OR USE_MEM_EFF_ATTENTION)
+ target_link_libraries(torch_hip PRIVATE __caffe2_aotriton)
+ endif()
+ set(CUDA_LINK_LIBRARIES_KEYWORD)
diff --git a/sci-ml/caffe2/files/composable-kernel-6.4.1-expand-isa.patch b/sci-ml/caffe2/files/composable-kernel-6.4.1-expand-isa.patch
new file mode 100644
index 000000000000..8a3fb4e1ec6d
--- /dev/null
+++ b/sci-ml/caffe2/files/composable-kernel-6.4.1-expand-isa.patch
@@ -0,0 +1,141 @@
+Fix for "undeclared identifier 'CK_BUFFER_RESOURCE_3RD_DWORD'" for AMDGPU_TARGETS="gfx1012".
+Combines of 3 patches from https://github.com/ROCm/composable_kernel/issues/775#issuecomment-2726315348
+
+Bug: https://bugs.gentoo.org/947583
+Bug: https://bugs.gentoo.org/show_bug.cgi?id=959808
+--- a/include/ck/ck.hpp
++++ b/include/ck/ck.hpp
+@@ -82,7 +82,7 @@ CK_DECLARE_ENV_VAR_BOOL(CK_LOGGING)
+ #define CK_BUFFER_RESOURCE_3RD_DWORD -1
+ #elif defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__) || defined(__gfx9__)
+ #define CK_BUFFER_RESOURCE_3RD_DWORD 0x00020000
+-#elif defined(__gfx103__)
++#elif defined(__gfx101__) || defined(__gfx103__)
+ #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000
+ #elif defined(__gfx11__) || defined(__gfx12__)
+ #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31004000
+@@ -90,12 +90,12 @@ CK_DECLARE_ENV_VAR_BOOL(CK_LOGGING)
+
+ // FMA instruction
+ #ifndef __HIP_DEVICE_COMPILE__ // for host code, define nothing
+-#elif defined(__gfx803__) || defined(__gfx900__) // for GPU code
+-#define CK_USE_AMD_V_MAC_F32
+-#elif defined(__gfx906__) || defined(__gfx9__) || defined(__gfx103__) // for GPU code
++#elif defined(__gfx906__) || defined(__gfx9__) || defined(__gfx103__) || defined(__gfx1011__) || defined(__gfx1012__) // for GPU code
+ #define CK_USE_AMD_V_FMAC_F32
+ #define CK_USE_AMD_V_DOT2_F32_F16
+ #define CK_USE_AMD_V_DOT4_I32_I8
++#elif defined(__gfx803__) || defined(__gfx900__) || defined(__gfx101__) // for GPU code
++#define CK_USE_AMD_V_MAC_F32
+ #elif defined(__gfx11__) || defined(__gfx12__)
+ #define CK_USE_AMD_V_FMAC_F32
+ #define CK_USE_AMD_V_DOT2_F32_F16
+--- a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp
++++ b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp
+@@ -71,7 +71,7 @@ __global__ void
+ const Block2CTileMap block_2_ctile_map)
+ {
+ #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx908__) || \
+- defined(__gfx90a__) || defined(__gfx94__) || defined(__gfx103__) || defined(__gfx11__) || \
++ defined(__gfx90a__) || defined(__gfx94__) || defined(__gfx101__) || defined(__gfx103__) || defined(__gfx11__) || \
+ defined(__gfx12__))
+
+ const index_t num_blocks_per_batch =
+--- a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp
++++ b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp
+@@ -51,7 +51,7 @@ __global__ void
+ const Block2CTileMap block_2_ctile_map)
+ {
+ #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx9__) || \
+- defined(__gfx103__) || defined(__gfx11__) || defined(__gfx12__))
++ defined(__gfx101__) || defined(__gfx103__) || defined(__gfx11__) || defined(__gfx12__))
+
+ constexpr index_t shared_block_size =
+ GridwiseGemm::GetSharedMemoryNumberOfByte() / sizeof(ABDataType);
+--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp
++++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp
+@@ -48,7 +48,7 @@ __global__ void
+ const Block2CTileMap block_2_ctile_map,
+ const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch)
+ {
+-#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx103__) || \
++#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx101__) || defined(__gfx103__) || \
+ defined(__gfx90a__) || defined(__gfx908__) || defined(__gfx94__) || defined(__gfx11__) || \
+ defined(__gfx12__))
+ const index_t num_blocks_per_batch =
+--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp
++++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp
+@@ -90,7 +90,7 @@ __global__ void
+ const Block2CTileMap block_2_ctile_map,
+ const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch)
+ {
+-#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx103__) || \
++#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx101__) || defined(__gfx103__) || \
+ defined(__gfx90a__) || defined(__gfx908__) || defined(__gfx94__) || defined(__gfx11__) || \
+ defined(__gfx12__))
+ // offset base pointer for each work-group
+--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp
++++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp
+@@ -106,7 +106,7 @@ __global__ void
+ const Block2CTileMap block_2_ctile_map,
+ const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch)
+ {
+-#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx103__) || \
++#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx101__) || defined(__gfx103__) || \
+ defined(__gfx11__) || defined(__gfx12__))
+ // offset base pointer for each work-group
+ const index_t num_blocks_per_batch =
+--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
++++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
+@@ -40,7 +40,7 @@ __global__ void
+ const CDEElementwiseOperation cde_element_op)
+ {
+ #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx908__) || \
+- defined(__gfx90a__) || defined(__gfx103__) || defined(__gfx11__) || defined(__gfx94__) || \
++ defined(__gfx90a__) || defined(__gfx101__) || defined(__gfx103__) || defined(__gfx11__) || defined(__gfx94__) || \
+ defined(__gfx12__))
+ __shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];
+
+--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp
++++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp
+@@ -28,7 +28,7 @@ __global__ void
+ #endif
+ kernel_gemm_dpp(const typename GridwiseGemm::Argument karg)
+ {
+-#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx103__) || defined(__gfx11__))
++#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx101__) || defined(__gfx103__) || defined(__gfx11__))
+ __shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];
+
+ const auto a_grid_desc_ak0_m_ak1 = amd_wave_read_first_lane(
+--- a/include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp
++++ b/include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp
+@@ -36,7 +36,7 @@ __global__ void
+ const ComputePtrOffsetOfStridedBatch compute_ptr_offset_of_batch)
+ {
+ #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx908__) || \
+- defined(__gfx90a__) || defined(__gfx94__) || defined(__gfx103__) || defined(__gfx11__) || \
++ defined(__gfx90a__) || defined(__gfx94__) || defined(__gfx101__) || defined(__gfx103__) || defined(__gfx11__) || \
+ defined(__gfx12__))
+ GridwiseTensorRearrangeKernel::Run(in_grid_desc,
+ p_in_global,
+--- a/include/ck_tile/core/config.hpp
++++ b/include/ck_tile/core/config.hpp
+@@ -10,6 +10,9 @@
+ #if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) || defined(__gfx950__)
+ #define __gfx94__
+ #endif
++#if defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__)
++#define __gfx101__
++#endif
+ #if defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1032__) || \
+ defined(__gfx1034__) || defined(__gfx1035__) || defined(__gfx1036__) || \
+ defined(__gfx10_3_generic__)
+@@ -199,7 +202,7 @@
+ #elif defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__) || \
+ defined(__gfx9__) // for GPU code
+ #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x00020000
+-#elif defined(__gfx103__) // for GPU code
++#elif defined(__gfx101__) || defined(__gfx103__) // for GPU code
+ #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x31014000
+ #elif defined(__gfx11__) || defined(__gfx12__) // for GPU code
+ #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x31004000
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [gentoo-commits] repo/gentoo:master commit in: sci-ml/caffe2/files/, sci-ml/caffe2/
@ 2025-08-16 18:41 Alfredo Tupone
0 siblings, 0 replies; 6+ messages in thread
From: Alfredo Tupone @ 2025-08-16 18:41 UTC (permalink / raw
To: gentoo-commits
commit: 03ad26826550632dbb96033b6b0cb3a3447539af
Author: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Sat Aug 16 18:40:56 2025 +0000
Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Sat Aug 16 18:40:56 2025 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=03ad2682
sci-ml/caffe2: drop 2.5.1-r12, 2.7.0-r3
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>
sci-ml/caffe2/Manifest | 2 -
sci-ml/caffe2/caffe2-2.5.1-r12.ebuild | 331 -----------------------
sci-ml/caffe2/caffe2-2.7.0-r3.ebuild | 366 --------------------------
sci-ml/caffe2/files/caffe2-2.5.1-gentoo.patch | 127 ---------
4 files changed, 826 deletions(-)
diff --git a/sci-ml/caffe2/Manifest b/sci-ml/caffe2/Manifest
index cb3c35b99981..2e7e0c634bf9 100644
--- a/sci-ml/caffe2/Manifest
+++ b/sci-ml/caffe2/Manifest
@@ -1,8 +1,6 @@
DIST composable_kernel-50ee4267.tar.gz 4194795 BLAKE2B b3c97d98a0c9e4620fdae3d30006edf55cc60ffa7f8518f6acb8d808647bc4de362c2e2b7e974686503fa2c7f359b6981cfbda74e40cc1bad4d351c5d2ff92e1 SHA512 9fc6f5f15556f020414b4567520329ef762209a82411a246c2bc1240a9fed2669f7fcb982cf773e3e9561bf9a2c557dba82b8b469d2e5844e679e2f5ab7c3e17
DIST composable_kernel-8086bbe3.tar.gz 4418862 BLAKE2B b710e3d4586899443ec01044dad19fd2f992c351e2f65ba526dfcc47cc65c095beaf8ac21a8f71c02a0eb524d364e817b27241a9198884f2bdae9924b51e24e4 SHA512 8410b5a1c864d71f3034ef0d9d1245078856d09cc191faec59856c229bf11d89ae291036d735cb5cec4f1d72e6e9e8f6921833147f9619d30cfab8722d3a9f63
DIST flash-attention-2.7.4.gh.tar.gz 5841323 BLAKE2B 432999d763f2b3d732580ddfea5d3e01370351db0656546259a5e500a07516dd03c98828bfb55855dabe4adc651033b5d97ea4725ca46158b9970f0fbc662710 SHA512 05a4afb09e666f7404d6a3f8b5256e7bed6eba60a6f1bde2b7dbb96d318975f0b458c2521c7a38d88e97b6e4c27f29077cf787849daf82586e33f43a3d9a84b3
-DIST pytorch-2.5.1.tar.gz 116091366 BLAKE2B 7838b17562b94ffc7d798031348689db607dd5eae2a3c35be365972e2b52a2c1b12067068d5aca5ab00cf0977d9c2c3c9ae5337d69534c864c732e6256cbeef6 SHA512 a913a466324a65fa3d79c5e9ad4d605fc7976f0134fda2f81aaa3cea29d56926604999b8a238759646d211e63b47bbb446cdffa86ca8defd8159f11e30301289
DIST pytorch-2.6.0.tar.gz 119594438 BLAKE2B 3152eb341cf42295e147e59625beb9c06608aa4b78f9618c1c0024b10c1c767715d07fe8c4be52d029ac47f808cd0d5e65c9530ec90d951a64b993083b4067ad SHA512 a70da80ff09d226085e18228132cf6bb236ad8cc47eed52375d0d2a615f09dd33849da947270b5670c184eab60cb8e2adf11d801babfbda7aa621400501d07b0
-DIST pytorch-2.7.0.tar.gz 50197290 BLAKE2B 2a317d1e9b0d8876f1593382246cd9f786eff3c1b8602353c5e0010dc8414720c5de61886361843a0c33268830c784963a89b410b361e1b67636e652f6a6a2eb SHA512 63eb0363ea68d23567f5524ee8b51756d9302bbe1cbefa367335ab5ebe652523dba75fa417ea3e7eedfc67aa4bef1434c8b7e3dfde2152061b91b6e489763a55
DIST pytorch-2.7.1.tar.gz 50203605 BLAKE2B 3f4b2643d86fe9ff30b2f335353dfe6a8e222bcc12143bc5d09268fb37bfd42f9451620e6e0db225c3c3e7930c999115fdd2ed62b7eae93b0d5e233270c7c760 SHA512 a9fc2252af9031c2cd46dde558c491aea8bc322fb80157a7760f300a44b759d4bfe866f030fbb974b80493057cfff4dd512498f99a100ed6d05bf620258ed37e
DIST pytorch-2.8.0.tar.gz 56565754 BLAKE2B a8f07513b92f9293f8322508f9fc73a462f89fe51cb1f280af371cee19cbe7e2bf900ba2b3c43fd08ea415566db441a6d6310d77f18477e957641be311a361a5 SHA512 448e9dad4aa10f1793d35e6ffe9f0f69b7719d41e6eccceb687a8d0c148e22d03e4f76170a05308ef9323a7aea41aa74605077ae1d68c6d949f13b3340ebf310
diff --git a/sci-ml/caffe2/caffe2-2.5.1-r12.ebuild b/sci-ml/caffe2/caffe2-2.5.1-r12.ebuild
deleted file mode 100644
index 6b5f88a1e12b..000000000000
--- a/sci-ml/caffe2/caffe2-2.5.1-r12.ebuild
+++ /dev/null
@@ -1,331 +0,0 @@
-# Copyright 2022-2025 Gentoo Authors
-# Distributed under the terms of the GNU General Public License v2
-
-EAPI=8
-
-PYTHON_COMPAT=( python3_{10..13} )
-ROCM_VERSION=6.1
-inherit python-single-r1 cmake cuda flag-o-matic prefix rocm toolchain-funcs
-
-MYPN=pytorch
-MYP=${MYPN}-${PV}
-
-DESCRIPTION="A deep learning framework"
-HOMEPAGE="https://pytorch.org/"
-SRC_URI="https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz
- -> ${MYP}.tar.gz"
-
-S="${WORKDIR}"/${MYP}
-
-LICENSE="BSD"
-SLOT="0"
-KEYWORDS="~amd64"
-IUSE="cuda distributed fbgemm flash gloo memefficient mkl mpi nnpack +numpy
- onednn openblas opencl openmp qnnpack rocm xnnpack"
-RESTRICT="test"
-REQUIRED_USE="
- ${PYTHON_REQUIRED_USE}
- mpi? ( distributed )
- gloo? ( distributed )
- ?? ( cuda rocm )
- rocm? (
- || ( ${ROCM_REQUIRED_USE} )
- !flash
- )
-"
-
-RDEPEND="
- ${PYTHON_DEPS}
- dev-cpp/abseil-cpp:=
- dev-cpp/gflags:=
- >=dev-cpp/glog-0.5.0
- dev-cpp/nlohmann_json
- dev-cpp/opentelemetry-cpp
- dev-libs/cpuinfo
- dev-libs/libfmt:=
- dev-libs/protobuf:=
- dev-libs/pthreadpool
- dev-libs/sleef
- sci-ml/onnx
- sci-ml/foxi
- virtual/lapack
- cuda? (
- dev-libs/cudnn
- >=sci-ml/cudnn-frontend-1.0.3:0/8
- dev-util/nvidia-cuda-toolkit:=[profiler]
- )
- fbgemm? ( sci-ml/FBGEMM )
- gloo? ( <=sci-ml/gloo-2023.12.03[cuda?] )
- mpi? ( virtual/mpi )
- nnpack? ( sci-ml/NNPACK )
- numpy? ( $(python_gen_cond_dep '
- dev-python/numpy[${PYTHON_USEDEP}]
- ') )
- onednn? ( =sci-ml/oneDNN-3.5* )
- opencl? ( virtual/opencl )
- qnnpack? (
- !sci-libs/QNNPACK
- sci-ml/gemmlowp
- )
- rocm? (
- =dev-util/hip-6.1*
- =dev-libs/rccl-6.1*
- =sci-libs/rocThrust-6.1*
- =sci-libs/rocPRIM-6.1*
- =sci-libs/hipBLAS-6.1*
- =sci-libs/hipFFT-6.1*
- =sci-libs/hipSPARSE-6.1*
- =sci-libs/hipRAND-6.1*
- =sci-libs/hipCUB-6.1*
- =sci-libs/hipSOLVER-6.1*
- =sci-libs/miopen-6.1*
- =dev-util/roctracer-6.1*
-
- =sci-libs/hipBLASLt-6.1*
- amdgpu_targets_gfx90a? ( =sci-libs/hipBLASLt-6.1*[amdgpu_targets_gfx90a] )
- amdgpu_targets_gfx940? ( =sci-libs/hipBLASLt-6.1*[amdgpu_targets_gfx940] )
- amdgpu_targets_gfx941? ( =sci-libs/hipBLASLt-6.1*[amdgpu_targets_gfx941] )
- amdgpu_targets_gfx942? ( =sci-libs/hipBLASLt-6.1*[amdgpu_targets_gfx942] )
- )
- distributed? (
- sci-ml/tensorpipe[cuda?]
- dev-cpp/cpp-httplib
- )
- xnnpack? ( ~sci-ml/XNNPACK-2024.02.29 )
- mkl? ( sci-libs/mkl )
- openblas? ( sci-libs/openblas )
-"
-
-DEPEND="
- ${RDEPEND}
- dev-libs/flatbuffers
- dev-libs/FXdiv
- dev-libs/pocketfft
- dev-libs/psimd
- sci-ml/FP16
- sci-ml/kineto
- $(python_gen_cond_dep '
- dev-python/pybind11[${PYTHON_USEDEP}]
- dev-python/pyyaml[${PYTHON_USEDEP}]
- dev-python/typing-extensions[${PYTHON_USEDEP}]
- ')
- cuda? ( <=dev-libs/cutlass-3.4.1 )
- onednn? ( sci-ml/ideep )
- qnnpack? ( dev-libs/clog )
-"
-
-PATCHES=(
- "${FILESDIR}"/${P}-unbundle_fmt.patch
- "${FILESDIR}"/${P}-unbundle_kineto.patch
- "${FILESDIR}"/${P}-cudnn_include_fix.patch
- "${FILESDIR}"/${P}-gentoo.patch
- "${FILESDIR}"/${PN}-2.4.0-cpp-httplib.patch
- "${FILESDIR}"/${P}-glog-0.6.0.patch
- "${FILESDIR}"/${P}-newfix-functorch-install.patch
-)
-
-src_prepare() {
- filter-lto #bug 862672
-
- # Unbundle fmt
- sed -i \
- -e 's|::fmt-header-only||' \
- c10/CMakeLists.txt \
- cmake/Dependencies.cmake \
- torch/CMakeLists.txt \
- || die
-
- # Drop third_party from CMake tree
- sed -i \
- -e '/add_subdirectory.*third_party/d' \
- CMakeLists.txt \
- cmake/Dependencies.cmake \
- cmake/ProtoBuf.cmake \
- aten/src/ATen/CMakeLists.txt \
- || die
- # Change libc10* path
- sed -i \
- -e "/EXPORT/s|DESTINATION lib)|DESTINATION $(get_libdir))|" \
- c10/cuda/CMakeLists.txt \
- c10/CMakeLists.txt \
- c10/hip/CMakeLists.txt \
- || die
- sed -i \
- -e '/Using pocketfft in directory:/d' \
- cmake/Dependencies.cmake \
- || die
-
- cmake_src_prepare
- pushd torch/csrc/jit/serialization || die
- flatc --cpp --gen-mutable --scoped-enums mobile_bytecode.fbs || die
- popd
-
- # prefixify the hardcoded paths, after all patches are applied
- hprefixify \
- aten/CMakeLists.txt \
- caffe2/CMakeLists.txt \
- cmake/Metal.cmake \
- cmake/Modules/*.cmake \
- cmake/Modules_CUDA_fix/FindCUDNN.cmake \
- cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake \
- cmake/Modules_CUDA_fix/upstream/FindPackageHandleStandardArgs.cmake \
- cmake/public/LoadHIP.cmake \
- cmake/public/cuda.cmake \
- cmake/Dependencies.cmake \
- torch/CMakeLists.txt \
- CMakeLists.txt
-
- if use rocm; then
- sed -e "s:/opt/rocm:/usr:" \
- -e "s:lib/cmake:$(get_libdir)/cmake:g" \
- -e "s/HIP 1.0/HIP 1.0 REQUIRED/" \
- -i cmake/public/LoadHIP.cmake || die
-
- ebegin "HIPifying cuda sources"
- ${EPYTHON} tools/amd_build/build_amd.py || die
- eend $?
- fi
-}
-
-src_configure() {
- if use cuda && [[ -z ${TORCH_CUDA_ARCH_LIST} ]]; then
- ewarn "WARNING: caffe2 is being built with its default CUDA compute capabilities: 3.5 and 7.0."
- ewarn "These may not be optimal for your GPU."
- ewarn ""
- ewarn "To configure caffe2 with the CUDA compute capability that is optimal for your GPU,"
- ewarn "set TORCH_CUDA_ARCH_LIST in your make.conf, and re-emerge caffe2."
- ewarn "For example, to use CUDA capability 7.5 & 3.5, add: TORCH_CUDA_ARCH_LIST=7.5 3.5"
- ewarn "For a Maxwell model GPU, an example value would be: TORCH_CUDA_ARCH_LIST=Maxwell"
- ewarn ""
- ewarn "You can look up your GPU's CUDA compute capability at https://developer.nvidia.com/cuda-gpus"
- ewarn "or by running /opt/cuda/extras/demo_suite/deviceQuery | grep 'CUDA Capability'"
- fi
-
- local mycmakeargs=(
- -DBUILD_CUSTOM_PROTOBUF=OFF
- -DLIBSHM_INSTALL_LIB_SUBDIR="${EPREFIX}"/usr/$(get_libdir)
- -DPython_EXECUTABLE="${PYTHON}"
- -DTORCH_INSTALL_LIB_DIR="${EPREFIX}"/usr/$(get_libdir)
- -DUSE_CCACHE=OFF
- -DUSE_CUDA=$(usex cuda)
- -DUSE_DISTRIBUTED=$(usex distributed)
- -DUSE_FAKELOWP=OFF
- -DUSE_FBGEMM=$(usex fbgemm)
- -DUSE_FLASH_ATTENTION=$(usex flash)
- -DUSE_GFLAGS=ON
- -DUSE_GLOG=ON
- -DUSE_GLOO=$(usex gloo)
- -DUSE_ITT=OFF
- -DUSE_KINETO=OFF # TODO
- -DUSE_MAGMA=OFF # TODO: In GURU as sci-libs/magma
- -DUSE_MEM_EFF_ATTENTION=$(usex memefficient)
- -DUSE_MKLDNN=$(usex onednn)
- -DUSE_MPI=$(usex mpi)
- -DUSE_NCCL=OFF
- -DUSE_NNPACK=$(usex nnpack)
- -DUSE_NUMA=OFF
- -DUSE_NUMPY=$(usex numpy)
- -DUSE_OPENCL=$(usex opencl)
- -DUSE_OPENMP=$(usex openmp)
- -DUSE_PYTORCH_QNNPACK=$(usex qnnpack)
- -DUSE_PYTORCH_METAL=OFF
- -DUSE_ROCM=$(usex rocm)
- -DUSE_SYSTEM_CPUINFO=ON
- -DUSE_SYSTEM_EIGEN_INSTALL=ON
- -DUSE_SYSTEM_FP16=ON
- -DUSE_SYSTEM_FXDIV=ON
- -DUSE_SYSTEM_GLOO=ON
- -DUSE_SYSTEM_ONNX=ON
- -DUSE_SYSTEM_PSIMD=ON
- -DUSE_SYSTEM_PSIMD=ON
- -DUSE_SYSTEM_PTHREADPOOL=ON
- -DUSE_SYSTEM_PYBIND11=ON
- -DUSE_SYSTEM_SLEEF=ON
- -DUSE_SYSTEM_XNNPACK=$(usex xnnpack)
- -DUSE_TENSORPIPE=$(usex distributed)
- -DUSE_UCC=OFF
- -DUSE_VALGRIND=OFF
- -DUSE_XNNPACK=$(usex xnnpack)
- -DUSE_XPU=OFF
- -Wno-dev
- )
-
- if use mkl; then
- mycmakeargs+=(-DBLAS=MKL)
- elif use openblas; then
- mycmakeargs+=(-DBLAS=OpenBLAS)
- else
- mycmakeargs+=(-DBLAS=Generic -DBLAS_LIBRARIES=)
- fi
-
- if use cuda; then
- addpredict "/dev/nvidiactl" # bug 867706
- addpredict "/dev/char"
- addpredict "/proc/self/task" # bug 926116
-
- mycmakeargs+=(
- -DUSE_CUDNN=ON
- -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}"
- -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library
- -DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f | tr -d \")"
- )
- elif use rocm; then
- export PYTORCH_ROCM_ARCH="$(get_amdgpu_flags)"
-
- mycmakeargs+=(
- -DUSE_NCCL=ON
- -DUSE_SYSTEM_NCCL=ON
- )
-
- # ROCm libraries produce too much warnings
- append-cxxflags -Wno-deprecated-declarations -Wno-unused-result
-
- if tc-is-clang; then
- # fix mangling in LLVM: https://github.com/llvm/llvm-project/issues/85656
- append-cxxflags -fclang-abi-compat=17
- fi
- fi
-
- if use onednn; then
- mycmakeargs+=(
- -DMKLDNN_FOUND=ON
- -DMKLDNN_LIBRARIES=dnnl
- -DMKLDNN_INCLUDE_DIR="${ESYSROOT}/usr/include/oneapi/dnnl"
- )
- fi
-
- cmake_src_configure
-}
-
-src_compile() {
- PYTORCH_BUILD_VERSION=${PV} \
- PYTORCH_BUILD_NUMBER=0 \
- cmake_src_compile
-}
-
-python_install() {
- python_domodule python/torch
- mkdir "${D}"$(python_get_sitedir)/torch/bin || die
- mkdir "${D}"$(python_get_sitedir)/torch/lib || die
- mkdir "${D}"$(python_get_sitedir)/torch/include || die
- ln -s ../../../../../include/torch \
- "${D}$(python_get_sitedir)"/torch/include/torch || die # bug 923269
- ln -s ../../../../../bin/torch_shm_manager \
- "${D}"/$(python_get_sitedir)/torch/bin/torch_shm_manager || die
- ln -s ../../../../../$(get_libdir)/libtorch_global_deps.so \
- "${D}"/$(python_get_sitedir)/torch/lib/libtorch_global_deps.so || die
-}
-
-src_install() {
- cmake_src_install
-
- # Used by pytorch ebuild
- insinto "/var/lib/${PN}"
- doins "${BUILD_DIR}"/CMakeCache.txt
- dostrip -x /var/lib/${PN}/functorch.so
-
- rm -rf python
- mkdir -p python/torch || die
- cp torch/version.py python/torch/ || die
- python_install
-}
diff --git a/sci-ml/caffe2/caffe2-2.7.0-r3.ebuild b/sci-ml/caffe2/caffe2-2.7.0-r3.ebuild
deleted file mode 100644
index cf3872ea65f9..000000000000
--- a/sci-ml/caffe2/caffe2-2.7.0-r3.ebuild
+++ /dev/null
@@ -1,366 +0,0 @@
-# Copyright 2022-2025 Gentoo Authors
-# Distributed under the terms of the GNU General Public License v2
-
-EAPI=8
-
-PYTHON_COMPAT=( python3_{10..13} )
-ROCM_VERSION=6.1
-inherit python-single-r1 cmake cuda flag-o-matic prefix rocm toolchain-funcs
-
-MYPN=pytorch
-MYP=${MYPN}-${PV}
-
-# caffe2-2.6.0 depends on future version of composable kernel
-# TODO: replace it with RDEPEND in the future
-CK_COMMIT=8086bbe3a78d931eb96fe12fdc014082e18d18d3
-CK_P=composable_kernel-${CK_COMMIT:0:8}
-
-FLASH_PV=2.7.4
-FLASH_PN=flash-attention
-FLASH_P=${FLASH_PN}-${FLASH_PV}
-
-DESCRIPTION="A deep learning framework"
-HOMEPAGE="https://pytorch.org/"
-SRC_URI="
- https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz -> ${MYP}.tar.gz
- rocm? (
- https://github.com/ROCm/composable_kernel/archive/${CK_COMMIT}.tar.gz
- -> ${CK_P}.tar.gz
- )
- flash? (
- https://github.com/Dao-AILab/${FLASH_PN}/archive/refs/tags/v${FLASH_PV}.tar.gz
- -> ${FLASH_P}.gh.tar.gz
- )
-"
-
-S="${WORKDIR}"/${MYP}
-
-LICENSE="BSD"
-SLOT="0"
-KEYWORDS="~amd64 ~arm64"
-IUSE="cuda cusparselt distributed fbgemm flash gloo memefficient mkl mpi nnpack +numpy
- onednn openblas opencl openmp qnnpack rocm xnnpack"
-RESTRICT="test"
-REQUIRED_USE="
- ${PYTHON_REQUIRED_USE}
- mpi? ( distributed )
- gloo? ( distributed )
- ?? ( cuda rocm )
- rocm? (
- || ( ${ROCM_REQUIRED_USE} )
- !flash
- )
-"
-
-RDEPEND="
- ${PYTHON_DEPS}
- dev-cpp/abseil-cpp:=
- dev-cpp/gflags:=
- >=dev-cpp/glog-0.5.0
- dev-cpp/nlohmann_json
- dev-cpp/opentelemetry-cpp
- dev-libs/cpuinfo
- dev-libs/libfmt:=
- dev-libs/protobuf:=
- dev-libs/pthreadpool
- dev-libs/sleef
- sci-ml/foxi
- ~sci-ml/kineto-0.4.0_p20250214
- sci-ml/onnx
- virtual/lapack
- cuda? (
- dev-libs/cudnn
- >=sci-ml/cudnn-frontend-1.0.3:0/8
- dev-util/nvidia-cuda-toolkit:=[profiler]
- cusparselt? ( dev-libs/cusparselt )
- )
- fbgemm? ( sci-ml/FBGEMM )
- gloo? ( <=sci-ml/gloo-2023.12.03[cuda?] )
- mpi? ( virtual/mpi )
- nnpack? ( sci-ml/NNPACK )
- numpy? ( $(python_gen_cond_dep '
- dev-python/numpy[${PYTHON_USEDEP}]
- ') )
- onednn? ( =sci-ml/oneDNN-3.5* )
- opencl? ( virtual/opencl )
- qnnpack? (
- !sci-libs/QNNPACK
- sci-ml/gemmlowp
- )
- rocm? (
- >=dev-libs/rccl-6.1 <dev-libs/rccl-6.5
- >=dev-util/hip-6.1 <dev-util/hip-6.5
- >=dev-util/roctracer-6.1 <dev-util/roctracer-6.5
- >=sci-libs/hipBLAS-6.1 <sci-libs/hipBLAS-6.5
- >=sci-libs/hipBLASLt-6.1 <sci-libs/hipBLASLt-6.5
- >=sci-libs/hipCUB-6.1 <sci-libs/hipCUB-6.5
- >=sci-libs/hipFFT-6.1 <sci-libs/hipFFT-6.5
- >=sci-libs/hipRAND-6.1 <sci-libs/hipRAND-6.5
- >=sci-libs/hipSOLVER-6.1 <sci-libs/hipSOLVER-6.5
- >=sci-libs/hipSPARSE-6.1 <sci-libs/hipSPARSE-6.5
- >=sci-libs/miopen-6.1 <sci-libs/miopen-6.5
- >=sci-libs/rocPRIM-6.1 <sci-libs/rocPRIM-6.5
- >=sci-libs/rocThrust-6.1 <sci-libs/rocThrust-6.5
- )
- distributed? (
- sci-ml/tensorpipe[cuda?]
- dev-cpp/cpp-httplib
- )
- xnnpack? ( >=sci-ml/XNNPACK-2024.11 )
- mkl? ( sci-libs/mkl )
- openblas? ( sci-libs/openblas )
-"
-
-DEPEND="
- ${RDEPEND}
- dev-libs/flatbuffers
- dev-libs/FXdiv
- dev-libs/pocketfft
- dev-libs/psimd
- sci-ml/FP16
- $(python_gen_cond_dep '
- dev-python/pybind11[${PYTHON_USEDEP}]
- dev-python/pyyaml[${PYTHON_USEDEP}]
- dev-python/typing-extensions[${PYTHON_USEDEP}]
- ')
- cuda? ( ~dev-libs/cutlass-3.8.0 )
- onednn? ( sci-ml/ideep )
- qnnpack? ( dev-libs/clog )
-"
-
-PATCHES=(
- "${FILESDIR}"/${PN}-2.5.1-unbundle_fmt.patch
- "${FILESDIR}"/${PN}-2.5.1-unbundle_kineto.patch
- "${FILESDIR}"/${PN}-2.5.1-cudnn_include_fix.patch
- "${FILESDIR}"/${P}-gentoo.patch
- "${FILESDIR}"/${PN}-2.4.0-cpp-httplib.patch
- "${FILESDIR}"/${PN}-2.5.1-glog-0.6.0.patch
- "${FILESDIR}"/${PN}-2.5.1-newfix-functorch-install.patch
- "${FILESDIR}"/${PN}-2.6.0-rocm-fix-std-cpp17.patch
- "${FILESDIR}"/${P}-cmake.patch
- "${FILESDIR}"/${P}-glog-0.7.1.patch
- "${FILESDIR}"/${P}-llvm.patch
-)
-
-src_prepare() {
- if use flash; then
- mv "${WORKDIR}"/${FLASH_P}/* third_party/${FLASH_PN}/ || die
- fi
- filter-lto #bug 862672
-
- # Unbundle fmt
- sed -i \
- -e 's|::fmt-header-only||' \
- c10/CMakeLists.txt \
- cmake/Dependencies.cmake \
- torch/CMakeLists.txt \
- || die
-
- # Drop third_party from CMake tree
- sed -i \
- -e '/add_subdirectory.*third_party/d' \
- CMakeLists.txt \
- cmake/Dependencies.cmake \
- cmake/ProtoBuf.cmake \
- aten/src/ATen/CMakeLists.txt \
- || die
- # Change libc10* path
- sed -i \
- -e "/EXPORT/s|DESTINATION lib)|DESTINATION $(get_libdir))|" \
- c10/cuda/CMakeLists.txt \
- c10/CMakeLists.txt \
- c10/hip/CMakeLists.txt \
- || die
- sed -i \
- -e '/Using pocketfft in directory:/d' \
- cmake/Dependencies.cmake \
- || die
-
- # Noisy warnings from Logging.h
- sed -i 's/-Wextra-semi//' cmake/public/utils.cmake || die
-
- cmake_src_prepare
- pushd torch/csrc/jit/serialization || die
- flatc --cpp --gen-mutable --scoped-enums mobile_bytecode.fbs || die
- popd
-
- # prefixify the hardcoded paths, after all patches are applied
- hprefixify \
- aten/CMakeLists.txt \
- caffe2/CMakeLists.txt \
- cmake/Metal.cmake \
- cmake/Modules/*.cmake \
- cmake/Modules_CUDA_fix/FindCUDNN.cmake \
- cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake \
- cmake/Modules_CUDA_fix/upstream/FindPackageHandleStandardArgs.cmake \
- cmake/public/LoadHIP.cmake \
- cmake/public/cuda.cmake \
- cmake/Dependencies.cmake \
- torch/CMakeLists.txt \
- CMakeLists.txt
-
- if use rocm; then
- sed -e "s:/opt/rocm:/usr:" \
- -e "s:lib/cmake:$(get_libdir)/cmake:g" \
- -i cmake/public/LoadHIP.cmake || die
-
- # TODO: delete, when caffe2 depends on systemwide composable_kernel
- sed -e "s:third_party/composable_kernel:../composable_kernel-${CK_COMMIT}:g" \
- -i aten/src/ATen/CMakeLists.txt || die
-
- if tc-is-clang; then
- # Systemwide gcc (for absl and at::TensorBase) + hipcc (llvm>=18) need abi-compat=17.
- # But systemwide clang>=18 + hipcc (>=llvm-18) need opposite!
- # See also: https://github.com/llvm/llvm-project/issues/102443#issuecomment-2329726287
- sed '/-fclang-abi-compat=17/d' -i cmake/Dependencies.cmake || die
- fi
-
- # Workaround for libc++ issue https://github.com/llvm/llvm-project/issues/100802
- sed 's/std::memcpy/memcpy/g' -i c10/util/Half.h || die
-
- ebegin "HIPifying cuda sources"
- ${EPYTHON} tools/amd_build/build_amd.py || die
- eend $?
- fi
-}
-
-src_configure() {
- if use cuda && [[ -z ${TORCH_CUDA_ARCH_LIST} ]]; then
- ewarn "WARNING: caffe2 is being built with its default CUDA compute capabilities: 3.5 and 7.0."
- ewarn "These may not be optimal for your GPU."
- ewarn ""
- ewarn "To configure caffe2 with the CUDA compute capability that is optimal for your GPU,"
- ewarn "set TORCH_CUDA_ARCH_LIST in your make.conf, and re-emerge caffe2."
- ewarn "For example, to use CUDA capability 7.5 & 3.5, add: TORCH_CUDA_ARCH_LIST=7.5 3.5"
- ewarn "For a Maxwell model GPU, an example value would be: TORCH_CUDA_ARCH_LIST=Maxwell"
- ewarn ""
- ewarn "You can look up your GPU's CUDA compute capability at https://developer.nvidia.com/cuda-gpus"
- ewarn "or by running /opt/cuda/extras/demo_suite/deviceQuery | grep 'CUDA Capability'"
- fi
-
- local mycmakeargs=(
- -DBUILD_CUSTOM_PROTOBUF=OFF
- -DLIBSHM_INSTALL_LIB_SUBDIR="${EPREFIX}"/usr/$(get_libdir)
- -DPython_EXECUTABLE="${PYTHON}"
- -DTORCH_INSTALL_LIB_DIR="${EPREFIX}"/usr/$(get_libdir)
- -DUSE_CCACHE=OFF
- -DUSE_CUDA=$(usex cuda)
- -DUSE_DISTRIBUTED=$(usex distributed)
- -DUSE_FAKELOWP=OFF
- -DUSE_FBGEMM=$(usex fbgemm)
- -DUSE_FLASH_ATTENTION=$(usex flash)
- -DUSE_GFLAGS=ON
- -DUSE_GLOG=ON
- -DUSE_GLOO=$(usex gloo)
- -DUSE_ITT=OFF
- -DUSE_KINETO=ON
- -DUSE_KLEIDIAI=OFF # TODO
- -DUSE_MAGMA=OFF # TODO: In GURU as sci-libs/magma
- -DUSE_MEM_EFF_ATTENTION=$(usex memefficient)
- -DUSE_MKLDNN=$(usex onednn)
- -DUSE_MPI=$(usex mpi)
- -DUSE_NCCL=OFF
- -DUSE_NNPACK=$(usex nnpack)
- -DUSE_NUMA=OFF
- -DUSE_NUMPY=$(usex numpy)
- -DUSE_OPENCL=$(usex opencl)
- -DUSE_OPENMP=$(usex openmp)
- -DUSE_PYTORCH_QNNPACK=$(usex qnnpack)
- -DUSE_PYTORCH_METAL=OFF
- -DUSE_ROCM=$(usex rocm)
- -DUSE_SYSTEM_CPUINFO=ON
- -DUSE_SYSTEM_EIGEN_INSTALL=ON
- -DUSE_SYSTEM_FP16=ON
- -DUSE_SYSTEM_FXDIV=ON
- -DUSE_SYSTEM_GLOO=ON
- -DUSE_SYSTEM_NVTX=ON
- -DUSE_SYSTEM_ONNX=ON
- -DUSE_SYSTEM_PSIMD=ON
- -DUSE_SYSTEM_PTHREADPOOL=ON
- -DUSE_SYSTEM_PYBIND11=ON
- -DUSE_SYSTEM_SLEEF=ON
- -DUSE_SYSTEM_XNNPACK=$(usex xnnpack)
- -DUSE_TENSORPIPE=$(usex distributed)
- -DUSE_UCC=OFF
- -DUSE_VALGRIND=OFF
- -DUSE_XNNPACK=$(usex xnnpack)
- -DUSE_XPU=OFF
- -Wno-dev
- )
-
- if use mkl; then
- mycmakeargs+=(-DBLAS=MKL)
- elif use openblas; then
- mycmakeargs+=(-DBLAS=OpenBLAS)
- else
- mycmakeargs+=(-DBLAS=Generic -DBLAS_LIBRARIES=)
- fi
-
- if use cuda; then
- addpredict "/dev/nvidiactl" # bug 867706
- addpredict "/dev/char"
- addpredict "/proc/self/task" # bug 926116
-
- mycmakeargs+=(
- -DUSE_CUDNN=ON
- -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}"
- -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library
- -DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f | tr -d \")"
- -DUSE_CUSPARSELT=$(usex cusparselt)
- )
- elif use rocm; then
- export PYTORCH_ROCM_ARCH="$(get_amdgpu_flags)"
-
- mycmakeargs+=(
- -DUSE_NCCL=ON
- -DUSE_SYSTEM_NCCL=ON
- -DCMAKE_REQUIRE_FIND_PACKAGE_HIP=ON
- )
-
- # ROCm libraries produce too much warnings
- append-cxxflags -Wno-deprecated-declarations -Wno-unused-result -Wno-unused-value
- fi
-
- if use onednn; then
- mycmakeargs+=(
- -DMKLDNN_FOUND=ON
- -DMKLDNN_LIBRARIES=dnnl
- -DMKLDNN_INCLUDE_DIR="${ESYSROOT}/usr/include/oneapi/dnnl"
- )
- fi
-
- cmake_src_configure
-}
-
-src_compile() {
- PYTORCH_BUILD_VERSION=${PV} \
- PYTORCH_BUILD_NUMBER=0 \
- cmake_src_compile
-}
-
-python_install() {
- python_domodule python/torch
- mkdir "${D}"$(python_get_sitedir)/torch/bin || die
- mkdir "${D}"$(python_get_sitedir)/torch/lib || die
- mkdir "${D}"$(python_get_sitedir)/torch/include || die
- ln -s ../../../../../include/torch \
- "${D}$(python_get_sitedir)"/torch/include/torch || die # bug 923269
- ln -s ../../../../../bin/torch_shm_manager \
- "${D}"/$(python_get_sitedir)/torch/bin/torch_shm_manager || die
- ln -s ../../../../../$(get_libdir)/libtorch_global_deps.so \
- "${D}"/$(python_get_sitedir)/torch/lib/libtorch_global_deps.so || die
-}
-
-src_install() {
- cmake_src_install
-
- # Used by pytorch ebuild
- insinto "/var/lib/${PN}"
- doins "${BUILD_DIR}"/CMakeCache.txt
- dostrip -x /var/lib/${PN}/functorch.so
-
- rm -rf python
- mkdir -p python/torch || die
- cp torch/version.py python/torch/ || die
- python_install
-}
diff --git a/sci-ml/caffe2/files/caffe2-2.5.1-gentoo.patch b/sci-ml/caffe2/files/caffe2-2.5.1-gentoo.patch
deleted file mode 100644
index f923b6746a4b..000000000000
--- a/sci-ml/caffe2/files/caffe2-2.5.1-gentoo.patch
+++ /dev/null
@@ -1,127 +0,0 @@
---- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -986,12 +986,11 @@ endif()
- # third_party/FBGEMM
- include(cmake/public/utils.cmake)
- if(NOT MSVC)
-- string(APPEND CMAKE_CXX_FLAGS " -O2 -fPIC")
-+ string(APPEND CMAKE_CXX_FLAGS " -O2")
- # Eigen fails to build with some versions, so convert this to a warning
- # Details at http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1459
- string(APPEND CMAKE_CXX_FLAGS " -Wall")
- string(APPEND CMAKE_CXX_FLAGS " -Wextra")
-- append_cxx_flag_if_supported("-Werror=return-type" CMAKE_CXX_FLAGS)
- append_cxx_flag_if_supported("-Werror=non-virtual-dtor" CMAKE_CXX_FLAGS)
- append_cxx_flag_if_supported("-Werror=braced-scalar-init" CMAKE_CXX_FLAGS)
- append_cxx_flag_if_supported("-Werror=range-loop-construct" CMAKE_CXX_FLAGS)
-@@ -1085,7 +1084,6 @@ if(NOT MSVC)
- string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
- append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS)
- append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
-- append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
- else()
- # skip unwanted includes from windows.h
- add_compile_definitions(WIN32_LEAN_AND_MEAN)
---- a/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
-+++ b/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
-@@ -324,16 +324,8 @@ set_target_properties(pytorch_qnnpack PROPERTIES PUBLIC_HEADER include/pytorch_q
- set_target_properties(pytorch_qnnpack PROPERTIES PUBLIC_HEADER include/qnnpack_func.h)
-
- # ---[ Configure clog
--if(NOT TARGET clog)
-- set(CLOG_BUILD_TESTS OFF CACHE BOOL "")
-- set(CLOG_RUNTIME_TYPE "${CPUINFO_RUNTIME_TYPE}" CACHE STRING "")
-- add_subdirectory(
-- "${CLOG_SOURCE_DIR}"
-- "${CONFU_DEPENDENCIES_BINARY_DIR}/clog")
-- # We build static version of clog but a dynamic library may indirectly depend on it
-- set_property(TARGET clog PROPERTY POSITION_INDEPENDENT_CODE ON)
--endif()
--target_link_libraries(pytorch_qnnpack PUBLIC clog)
-+find_library(CLOG_LIBRARY NAMES clog REQUIRED)
-+target_link_libraries(pytorch_qnnpack PUBLIC ${CLOG_LIBRARY})
-
- # ---[ Configure cpuinfo
- if(NOT TARGET cpuinfo AND USE_SYSTEM_CPUINFO)
---- a/caffe2/CMakeLists.txt
-+++ b/caffe2/CMakeLists.txt
-@@ -87,7 +87,7 @@ endif()
- # Note: the folders that are being commented out have not been properly
- # addressed yet.
-
--if(NOT MSVC AND USE_XNNPACK)
-+if(FALSE)
- if(NOT TARGET fxdiv)
- set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
- set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
-@@ -1081,7 +1081,6 @@ if(USE_XPU)
- endif()
-
- if(NOT MSVC AND USE_XNNPACK)
-- TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
- endif()
-
- # ==========================================================
---- a/cmake/Codegen.cmake
-+++ b/cmake/Codegen.cmake
-@@ -57,7 +57,7 @@ if(INTERN_BUILD_ATEN_OPS)
- if(MSVC)
- set(OPT_FLAG "/fp:strict ")
- else(MSVC)
-- set(OPT_FLAG "-O3 ")
-+ set(OPT_FLAG " ")
- if("${CMAKE_BUILD_TYPE}" MATCHES "Debug")
- set(OPT_FLAG " ")
- endif()
---- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -473,7 +473,9 @@ if(USE_PYTORCH_QNNPACK)
- set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
- set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
- # QNNPACK depends on gemmlowp headers
-- target_include_directories(pytorch_qnnpack PRIVATE "${CAFFE2_THIRD_PARTY_ROOT}/gemmlowp")
-+ find_package(gemmlowp REQUIRED)
-+ get_target_property(GEMMLOWP_INCLUDE_DIRS gemmlowp::gemmlowp INTERFACE_INCLUDE_DIRECTORIES)
-+ target_include_directories(pytorch_qnnpack PRIVATE ${GEMMLOWP_INCLUDE_DIRS})
-
- if(PYTORCH_QNNPACK_CUSTOM_THREADPOOL)
- target_compile_definitions(
-@@ -710,7 +712,7 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
- endif()
-
- # ---[ FBGEMM
--if(USE_FBGEMM)
-+if(FALSE)
- set(CAFFE2_THIRD_PARTY_ROOT "${PROJECT_SOURCE_DIR}/third_party")
- if(NOT DEFINED FBGEMM_SOURCE_DIR)
- set(FBGEMM_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/fbgemm" CACHE STRING "FBGEMM source directory")
-@@ -758,6 +760,7 @@ if(USE_FBGEMM)
- endif()
-
- if(USE_FBGEMM)
-+ list(APPEND Caffe2_DEPENDENCY_LIBS fbgemm)
- caffe2_update_option(USE_FBGEMM ON)
- else()
- caffe2_update_option(USE_FBGEMM OFF)
---- a/cmake/External/nnpack.cmake
-+++ b/cmake/External/nnpack.cmake
-@@ -56,7 +56,7 @@ if(ANDROID OR IOS OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAM
- set(PTHREADPOOL_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/pthreadpool" CACHE STRING "pthreadpool source directory")
- set(GOOGLETEST_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/googletest" CACHE STRING "Google Test source directory")
-
-- if(NOT TARGET nnpack)
-+ if(FALSE)
- if(NOT USE_SYSTEM_PTHREADPOOL AND USE_INTERNAL_PTHREADPOOL_IMPL)
- set(NNPACK_CUSTOM_THREADPOOL ON CACHE BOOL "")
- endif()
---- a/cmake/public/utils.cmake
-+++ b/cmake/public/utils.cmake
-@@ -422,8 +422,6 @@ function(torch_compile_options libname)
- endif()
-
- # Use -O2 for release builds (-O3 doesn't improve perf, and -Os results in perf regression)
-- target_compile_options(${libname} PRIVATE
-- $<$<AND:$<COMPILE_LANGUAGE:CXX>,$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>>>:-O2>)
-
- endfunction()
-
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2025-08-16 18:41 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-02 6:13 [gentoo-commits] repo/gentoo:master commit in: sci-ml/caffe2/files/, sci-ml/caffe2/ Alfredo Tupone
-- strict thread matches above, loose matches on Subject: below --
2025-08-16 18:41 Alfredo Tupone
2025-07-11 10:05 Alfredo Tupone
2025-06-29 19:32 Alfredo Tupone
2025-06-26 10:27 Alfredo Tupone
2025-04-05 12:55 Alfredo Tupone
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox