diff options
author | Cebtenzzre <cebtenzzre@gmail.com> | 2023-08-04 11:35:22 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-04 17:35:22 +0200 |
commit | 4329d1acb01c353803a54733b8eef9d93d0b84b2 (patch) | |
tree | d562a446a434a6e17e7c76cdcb4982afd8a8131f | |
parent | 02f9d96a866268700b8d8e7acbbcb4392c5ff345 (diff) |
CUDA: use min compute capability of GPUs actually used (#2506)
-rw-r--r-- | ggml-cuda.cu | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 4321e46..d64d704 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -5347,7 +5347,8 @@ void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_ } else { int min_compute_capability = INT_MAX; for (int id = 0; id < g_device_count; ++id) { - if (min_compute_capability > g_compute_capabilities[id]) { + if (min_compute_capability > g_compute_capabilities[id] + && g_tensor_split[id] < (id + 1 < g_device_count ? g_tensor_split[id + 1] : 1.0f)) { min_compute_capability = g_compute_capabilities[id]; } } |