wtambellini commited on
Commit
38b1143
·
1 Parent(s): b9bc04d

Add an option to build without CUDA VMM (llama/7067)

Browse files

Add an option to build ggml cuda without CUDA VMM
resolves
https://github.com/ggerganov/llama.cpp/issues/6889
https://forums.developer.nvidia.com/t/potential-nvshmem-allocated-memory-performance-issue/275416/4

Files changed (1) hide show
  1. ggml-cuda.cu +3 -3
ggml-cuda.cu CHANGED
@@ -113,7 +113,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
113
  for (int id = 0; id < info.device_count; ++id) {
114
  int device_vmm = 0;
115
 
116
- #if !defined(GGML_USE_HIPBLAS)
117
  CUdevice device;
118
  CU_CHECK(cuDeviceGet(&device, id));
119
  CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
@@ -259,7 +259,7 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
259
  };
260
 
261
  // pool with virtual memory
262
- #if !defined(GGML_USE_HIPBLAS)
263
  struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
264
  static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB
265
 
@@ -356,7 +356,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
356
  #endif // !defined(GGML_USE_HIPBLAS)
357
 
358
  std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) {
359
- #if !defined(GGML_USE_HIPBLAS)
360
  if (ggml_cuda_info().devices[device].vmm) {
361
  return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
362
  }
 
113
  for (int id = 0; id < info.device_count; ++id) {
114
  int device_vmm = 0;
115
 
116
+ #if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM)
117
  CUdevice device;
118
  CU_CHECK(cuDeviceGet(&device, id));
119
  CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
 
259
  };
260
 
261
  // pool with virtual memory
262
+ #if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM)
263
  struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
264
  static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB
265
 
 
356
  #endif // !defined(GGML_USE_HIPBLAS)
357
 
358
  std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) {
359
+ #if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM)
360
  if (ggml_cuda_info().devices[device].vmm) {
361
  return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
362
  }