Spaces:
Running
Running
Commit
·
38b1143
1
Parent(s):
b9bc04d
Add an option to build without CUDA VMM (llama/7067)
Browse filesAdd an option to build ggml cuda without CUDA VMM
resolves
https://github.com/ggerganov/llama.cpp/issues/6889
https://forums.developer.nvidia.com/t/potential-nvshmem-allocated-memory-performance-issue/275416/4
- ggml-cuda.cu +3 -3
ggml-cuda.cu
CHANGED
|
@@ -113,7 +113,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|
| 113 |
for (int id = 0; id < info.device_count; ++id) {
|
| 114 |
int device_vmm = 0;
|
| 115 |
|
| 116 |
-
#if !defined(GGML_USE_HIPBLAS)
|
| 117 |
CUdevice device;
|
| 118 |
CU_CHECK(cuDeviceGet(&device, id));
|
| 119 |
CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
|
|
@@ -259,7 +259,7 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
|
|
| 259 |
};
|
| 260 |
|
| 261 |
// pool with virtual memory
|
| 262 |
-
#if !defined(GGML_USE_HIPBLAS)
|
| 263 |
struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
|
| 264 |
static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB
|
| 265 |
|
|
@@ -356,7 +356,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
|
|
| 356 |
#endif // !defined(GGML_USE_HIPBLAS)
|
| 357 |
|
| 358 |
std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) {
|
| 359 |
-
#if !defined(GGML_USE_HIPBLAS)
|
| 360 |
if (ggml_cuda_info().devices[device].vmm) {
|
| 361 |
return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
|
| 362 |
}
|
|
|
|
| 113 |
for (int id = 0; id < info.device_count; ++id) {
|
| 114 |
int device_vmm = 0;
|
| 115 |
|
| 116 |
+
#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM)
|
| 117 |
CUdevice device;
|
| 118 |
CU_CHECK(cuDeviceGet(&device, id));
|
| 119 |
CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
|
|
|
|
| 259 |
};
|
| 260 |
|
| 261 |
// pool with virtual memory
|
| 262 |
+
#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM)
|
| 263 |
struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
|
| 264 |
static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB
|
| 265 |
|
|
|
|
| 356 |
#endif // !defined(GGML_USE_HIPBLAS)
|
| 357 |
|
| 358 |
std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) {
|
| 359 |
+
#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM)
|
| 360 |
if (ggml_cuda_info().devices[device].vmm) {
|
| 361 |
return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
|
| 362 |
}
|