1 //===--- cuda/dynamic_cuda/cuda.h --------------------------------- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The parts of the cuda api that are presently in use by the openmp cuda plugin 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef DYNAMIC_CUDA_CUDA_H_INCLUDED 14 #define DYNAMIC_CUDA_CUDA_H_INCLUDED 15 16 #include <cstddef> 17 #include <cstdint> 18 19 #define cuDeviceTotalMem cuDeviceTotalMem_v2 20 #define cuModuleGetGlobal cuModuleGetGlobal_v2 21 #define cuMemGetInfo cuMemGetInfo_v2 22 #define cuMemAlloc cuMemAlloc_v2 23 #define cuMemFree cuMemFree_v2 24 #define cuMemAllocHost cuMemAllocHost_v2 25 #define cuDevicePrimaryCtxRelease cuDevicePrimaryCtxRelease_v2 26 #define cuDevicePrimaryCtxSetFlags cuDevicePrimaryCtxSetFlags_v2 27 28 typedef int CUdevice; 29 typedef uintptr_t CUdeviceptr; 30 typedef struct CUmod_st *CUmodule; 31 typedef struct CUctx_st *CUcontext; 32 typedef struct CUfunc_st *CUfunction; 33 typedef void (*CUhostFn)(void *userData); 34 typedef struct CUstream_st *CUstream; 35 typedef struct CUevent_st *CUevent; 36 37 #define CU_DEVICE_INVALID ((CUdevice)(-2)) 38 39 typedef unsigned long long CUmemGenericAllocationHandle_v1; 40 typedef CUmemGenericAllocationHandle_v1 CUmemGenericAllocationHandle; 41 42 #define CU_DEVICE_INVALID ((CUdevice)(-2)) 43 44 typedef enum CUmemAllocationGranularity_flags_enum { 45 CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0x0, 46 CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 0x1 47 } CUmemAllocationGranularity_flags; 48 49 typedef enum CUmemAccess_flags_enum { 50 CU_MEM_ACCESS_FLAGS_PROT_NONE = 0x0, 51 CU_MEM_ACCESS_FLAGS_PROT_READ = 0x1, 52 CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 0x3, 53 CU_MEM_ACCESS_FLAGS_PROT_MAX = 0x7FFFFFFF 54 } CUmemAccess_flags; 55 56 typedef enum CUmemLocationType_enum { 57 CU_MEM_LOCATION_TYPE_INVALID = 0x0, 58 CU_MEM_LOCATION_TYPE_DEVICE = 0x1, 59 CU_MEM_LOCATION_TYPE_MAX = 0x7FFFFFFF 60 } CUmemLocationType; 61 62 typedef struct CUmemLocation_st { 63 CUmemLocationType type; 64 int id; 65 } CUmemLocation_v1; 66 typedef CUmemLocation_v1 CUmemLocation; 67 68 typedef struct CUmemAccessDesc_st { 69 CUmemLocation location; 70 CUmemAccess_flags flags; 71 } CUmemAccessDesc_v1; 72 73 typedef CUmemAccessDesc_v1 CUmemAccessDesc; 74 75 typedef enum CUmemAllocationType_enum { 76 CU_MEM_ALLOCATION_TYPE_INVALID = 0x0, 77 CU_MEM_ALLOCATION_TYPE_PINNED = 0x1, 78 CU_MEM_ALLOCATION_TYPE_MAX = 0x7FFFFFFF 79 } CUmemAllocationType; 80 81 typedef enum CUmemAllocationHandleType_enum { 82 CU_MEM_HANDLE_TYPE_NONE = 0x0, 83 CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 0x1, 84 CU_MEM_HANDLE_TYPE_WIN32 = 0x2, 85 CU_MEM_HANDLE_TYPE_WIN32_KMT = 0x4, 86 CU_MEM_HANDLE_TYPE_MAX = 0x7FFFFFFF 87 } CUmemAllocationHandleType; 88 89 typedef struct CUmemAllocationProp_st { 90 CUmemAllocationType type; 91 CUmemAllocationHandleType requestedHandleTypes; 92 CUmemLocation location; 93 94 void *win32HandleMetaData; 95 struct { 96 unsigned char compressionType; 97 unsigned char gpuDirectRDMACapable; 98 unsigned short usage; 99 unsigned char reserved[4]; 100 } allocFlags; 101 } CUmemAllocationProp_v1; 102 typedef CUmemAllocationProp_v1 CUmemAllocationProp; 103 104 typedef enum cudaError_enum { 105 CUDA_SUCCESS = 0, 106 CUDA_ERROR_INVALID_VALUE = 1, 107 CUDA_ERROR_NO_DEVICE = 100, 108 CUDA_ERROR_INVALID_HANDLE = 400, 109 CUDA_ERROR_NOT_READY = 600, 110 CUDA_ERROR_TOO_MANY_PEERS = 711, 111 } CUresult; 112 113 typedef enum CUstream_flags_enum { 114 CU_STREAM_DEFAULT = 0x0, 115 CU_STREAM_NON_BLOCKING = 0x1, 116 } CUstream_flags; 117 118 typedef enum CUlimit_enum { 119 CU_LIMIT_STACK_SIZE = 0x0, 120 CU_LIMIT_PRINTF_FIFO_SIZE = 0x1, 121 CU_LIMIT_MALLOC_HEAP_SIZE = 0x2, 122 CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x3, 123 CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x4, 124 CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x5, 125 CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x6, 126 CU_LIMIT_MAX 127 } CUlimit; 128 129 typedef enum CUdevice_attribute_enum { 130 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, 131 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, 132 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, 133 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, 134 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, 135 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, 136 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, 137 CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, 138 CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, 139 CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, 140 CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, 141 CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, 142 CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, 143 CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, 144 CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, 145 CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, 146 CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, 147 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, 148 CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, 149 CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, 150 CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, 151 CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, 152 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, 153 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, 154 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, 155 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, 156 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, 157 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, 158 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27, 159 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28, 160 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29, 161 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, 162 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, 163 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, 164 CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, 165 CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, 166 CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, 167 CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, 168 CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, 169 CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35, 170 CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36, 171 CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37, 172 CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38, 173 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, 174 CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, 175 CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41, 176 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, 177 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43, 178 CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44, 179 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45, 180 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46, 181 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, 182 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48, 183 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, 184 CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50, 185 CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51, 186 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52, 187 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53, 188 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, 189 CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55, 190 CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56, 191 CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57, 192 CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58, 193 CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59, 194 CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60, 195 CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61, 196 CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62, 197 CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63, 198 CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64, 199 CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65, 200 CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66, 201 CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67, 202 CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, 203 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69, 204 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70, 205 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71, 206 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72, 207 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73, 208 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74, 209 CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75, 210 CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76, 211 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77, 212 CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78, 213 CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79, 214 CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80, 215 CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81, 216 CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82, 217 CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83, 218 CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84, 219 CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85, 220 CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86, 221 CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87, 222 CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88, 223 CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89, 224 CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90, 225 CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91, 226 CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92, 227 CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93, 228 CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94, 229 CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95, 230 CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96, 231 CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97, 232 CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98, 233 CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99, 234 CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100, 235 CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101, 236 CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102, 237 CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102, 238 CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103, 239 CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104, 240 CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105, 241 CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106, 242 CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107, 243 CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108, 244 CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109, 245 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110, 246 CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111, 247 CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112, 248 CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113, 249 CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114, 250 CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115, 251 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116, 252 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117, 253 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118, 254 CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119, 255 CU_DEVICE_ATTRIBUTE_MAX, 256 } CUdevice_attribute; 257 258 typedef enum CUfunction_attribute_enum { 259 CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, 260 } CUfunction_attribute; 261 262 typedef enum CUctx_flags_enum { 263 CU_CTX_SCHED_BLOCKING_SYNC = 0x04, 264 CU_CTX_SCHED_MASK = 0x07, 265 } CUctx_flags; 266 267 typedef enum CUmemAttach_flags_enum { 268 CU_MEM_ATTACH_GLOBAL = 0x1, 269 CU_MEM_ATTACH_HOST = 0x2, 270 CU_MEM_ATTACH_SINGLE = 0x4, 271 } CUmemAttach_flags; 272 273 typedef enum CUcomputeMode_enum { 274 CU_COMPUTEMODE_DEFAULT = 0, 275 CU_COMPUTEMODE_PROHIBITED = 2, 276 CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3, 277 } CUcompute_mode; 278 279 typedef enum CUevent_flags_enum { 280 CU_EVENT_DEFAULT = 0x0, 281 CU_EVENT_BLOCKING_SYNC = 0x1, 282 CU_EVENT_DISABLE_TIMING = 0x2, 283 CU_EVENT_INTERPROCESS = 0x4 284 } CUevent_flags; 285 286 static inline void *CU_LAUNCH_PARAM_END = (void *)0x00; 287 static inline void *CU_LAUNCH_PARAM_BUFFER_POINTER = (void *)0x01; 288 static inline void *CU_LAUNCH_PARAM_BUFFER_SIZE = (void *)0x02; 289 290 typedef void (*CUstreamCallback)(CUstream, CUresult, void *); 291 292 CUresult cuCtxGetDevice(CUdevice *); 293 CUresult cuDeviceGet(CUdevice *, int); 294 CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice); 295 CUresult cuDeviceGetCount(int *); 296 CUresult cuFuncGetAttribute(int *, CUfunction_attribute, CUfunction); 297 298 // Device info 299 CUresult cuDeviceGetName(char *, int, CUdevice); 300 CUresult cuDeviceTotalMem(size_t *, CUdevice); 301 CUresult cuDriverGetVersion(int *); 302 303 CUresult cuGetErrorString(CUresult, const char **); 304 CUresult cuInit(unsigned); 305 CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned, 306 unsigned, unsigned, unsigned, CUstream, void **, 307 void **); 308 CUresult cuLaunchHostFunc(CUstream, CUhostFn, void *); 309 310 CUresult cuMemAlloc(CUdeviceptr *, size_t); 311 CUresult cuMemAllocHost(void **, size_t); 312 CUresult cuMemAllocManaged(CUdeviceptr *, size_t, unsigned int); 313 CUresult cuMemAllocAsync(CUdeviceptr *, size_t, CUstream); 314 315 CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream); 316 CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t); 317 CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream); 318 CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t); 319 CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, size_t, CUstream); 320 321 CUresult cuMemFree(CUdeviceptr); 322 CUresult cuMemFreeHost(void *); 323 CUresult cuMemFreeAsync(CUdeviceptr, CUstream); 324 325 CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *); 326 CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *); 327 328 CUresult cuModuleUnload(CUmodule); 329 CUresult cuStreamCreate(CUstream *, unsigned); 330 CUresult cuStreamDestroy(CUstream); 331 CUresult cuStreamSynchronize(CUstream); 332 CUresult cuStreamQuery(CUstream); 333 CUresult cuStreamAddCallback(CUstream, CUstreamCallback, void *, unsigned int); 334 CUresult cuCtxSetCurrent(CUcontext); 335 CUresult cuDevicePrimaryCtxRelease(CUdevice); 336 CUresult cuDevicePrimaryCtxGetState(CUdevice, unsigned *, int *); 337 CUresult cuDevicePrimaryCtxSetFlags(CUdevice, unsigned); 338 CUresult cuDevicePrimaryCtxRetain(CUcontext *, CUdevice); 339 CUresult cuModuleLoadDataEx(CUmodule *, const void *, unsigned, void *, 340 void **); 341 342 CUresult cuDeviceCanAccessPeer(int *, CUdevice, CUdevice); 343 CUresult cuCtxEnablePeerAccess(CUcontext, unsigned); 344 CUresult cuMemcpyPeerAsync(CUdeviceptr, CUcontext, CUdeviceptr, CUcontext, 345 size_t, CUstream); 346 347 CUresult cuCtxGetLimit(size_t *, CUlimit); 348 CUresult cuCtxSetLimit(CUlimit, size_t); 349 350 CUresult cuEventCreate(CUevent *, unsigned int); 351 CUresult cuEventRecord(CUevent, CUstream); 352 CUresult cuStreamWaitEvent(CUstream, CUevent, unsigned int); 353 CUresult cuEventSynchronize(CUevent); 354 CUresult cuEventDestroy(CUevent); 355 356 CUresult cuMemUnmap(CUdeviceptr ptr, size_t size); 357 CUresult cuMemRelease(CUmemGenericAllocationHandle handle); 358 CUresult cuMemAddressFree(CUdeviceptr ptr, size_t size); 359 CUresult cuMemGetInfo(size_t *free, size_t *total); 360 CUresult cuMemAddressReserve(CUdeviceptr *ptr, size_t size, size_t alignment, 361 CUdeviceptr addr, unsigned long long flags); 362 CUresult cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, 363 CUmemGenericAllocationHandle handle, 364 unsigned long long flags); 365 CUresult cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size, 366 const CUmemAllocationProp *prop, unsigned long long flags); 367 CUresult cuMemSetAccess(CUdeviceptr ptr, size_t size, 368 const CUmemAccessDesc *desc, size_t count); 369 CUresult cuMemGetAllocationGranularity(size_t *granularity, 370 const CUmemAllocationProp *prop, 371 CUmemAllocationGranularity_flags option); 372 373 #endif 374