aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCebtenzzre <cebtenzzre@gmail.com>2023-08-04 11:35:22 -0400
committerGitHub <noreply@github.com>2023-08-04 17:35:22 +0200
commit4329d1acb01c353803a54733b8eef9d93d0b84b2 (patch)
treed562a446a434a6e17e7c76cdcb4982afd8a8131f
parent02f9d96a866268700b8d8e7acbbcb4392c5ff345 (diff)
CUDA: use min compute capability of GPUs actually used (#2506)
-rw-r--r--ggml-cuda.cu3
1 files changed, 2 insertions, 1 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 4321e46..d64d704 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -5347,7 +5347,8 @@ void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_
} else {
int min_compute_capability = INT_MAX;
for (int id = 0; id < g_device_count; ++id) {
- if (min_compute_capability > g_compute_capabilities[id]) {
+ if (min_compute_capability > g_compute_capabilities[id]
+ && g_tensor_split[id] < (id + 1 < g_device_count ? g_tensor_split[id + 1] : 1.0f)) {
min_compute_capability = g_compute_capabilities[id];
}
}