xref: /llvm-project/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h (revision bd8a8181288c9e16eb90fff78cbbc63b4687963a)
1 //===--- cuda/dynamic_cuda/cuda.h --------------------------------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The parts of the cuda api that are presently in use by the openmp cuda plugin
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef DYNAMIC_CUDA_CUDA_H_INCLUDED
14 #define DYNAMIC_CUDA_CUDA_H_INCLUDED
15 
16 #include <cstddef>
17 #include <cstdint>
18 
19 #define cuDeviceTotalMem cuDeviceTotalMem_v2
20 #define cuModuleGetGlobal cuModuleGetGlobal_v2
21 #define cuMemGetInfo cuMemGetInfo_v2
22 #define cuMemAlloc cuMemAlloc_v2
23 #define cuMemFree cuMemFree_v2
24 #define cuMemAllocHost cuMemAllocHost_v2
25 #define cuDevicePrimaryCtxRelease cuDevicePrimaryCtxRelease_v2
26 #define cuDevicePrimaryCtxSetFlags cuDevicePrimaryCtxSetFlags_v2
27 
28 typedef int CUdevice;
29 typedef uintptr_t CUdeviceptr;
30 typedef struct CUmod_st *CUmodule;
31 typedef struct CUctx_st *CUcontext;
32 typedef struct CUfunc_st *CUfunction;
33 typedef void (*CUhostFn)(void *userData);
34 typedef struct CUstream_st *CUstream;
35 typedef struct CUevent_st *CUevent;
36 
37 #define CU_DEVICE_INVALID ((CUdevice)(-2))
38 
39 typedef unsigned long long CUmemGenericAllocationHandle_v1;
40 typedef CUmemGenericAllocationHandle_v1 CUmemGenericAllocationHandle;
41 
42 #define CU_DEVICE_INVALID ((CUdevice)(-2))
43 
44 typedef enum CUmemAllocationGranularity_flags_enum {
45   CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0x0,
46   CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 0x1
47 } CUmemAllocationGranularity_flags;
48 
49 typedef enum CUmemAccess_flags_enum {
50   CU_MEM_ACCESS_FLAGS_PROT_NONE = 0x0,
51   CU_MEM_ACCESS_FLAGS_PROT_READ = 0x1,
52   CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 0x3,
53   CU_MEM_ACCESS_FLAGS_PROT_MAX = 0x7FFFFFFF
54 } CUmemAccess_flags;
55 
56 typedef enum CUmemLocationType_enum {
57   CU_MEM_LOCATION_TYPE_INVALID = 0x0,
58   CU_MEM_LOCATION_TYPE_DEVICE = 0x1,
59   CU_MEM_LOCATION_TYPE_MAX = 0x7FFFFFFF
60 } CUmemLocationType;
61 
62 typedef struct CUmemLocation_st {
63   CUmemLocationType type;
64   int id;
65 } CUmemLocation_v1;
66 typedef CUmemLocation_v1 CUmemLocation;
67 
68 typedef struct CUmemAccessDesc_st {
69   CUmemLocation location;
70   CUmemAccess_flags flags;
71 } CUmemAccessDesc_v1;
72 
73 typedef CUmemAccessDesc_v1 CUmemAccessDesc;
74 
75 typedef enum CUmemAllocationType_enum {
76   CU_MEM_ALLOCATION_TYPE_INVALID = 0x0,
77   CU_MEM_ALLOCATION_TYPE_PINNED = 0x1,
78   CU_MEM_ALLOCATION_TYPE_MAX = 0x7FFFFFFF
79 } CUmemAllocationType;
80 
81 typedef enum CUmemAllocationHandleType_enum {
82   CU_MEM_HANDLE_TYPE_NONE = 0x0,
83   CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 0x1,
84   CU_MEM_HANDLE_TYPE_WIN32 = 0x2,
85   CU_MEM_HANDLE_TYPE_WIN32_KMT = 0x4,
86   CU_MEM_HANDLE_TYPE_MAX = 0x7FFFFFFF
87 } CUmemAllocationHandleType;
88 
89 typedef struct CUmemAllocationProp_st {
90   CUmemAllocationType type;
91   CUmemAllocationHandleType requestedHandleTypes;
92   CUmemLocation location;
93 
94   void *win32HandleMetaData;
95   struct {
96     unsigned char compressionType;
97     unsigned char gpuDirectRDMACapable;
98     unsigned short usage;
99     unsigned char reserved[4];
100   } allocFlags;
101 } CUmemAllocationProp_v1;
102 typedef CUmemAllocationProp_v1 CUmemAllocationProp;
103 
104 typedef enum cudaError_enum {
105   CUDA_SUCCESS = 0,
106   CUDA_ERROR_INVALID_VALUE = 1,
107   CUDA_ERROR_NO_DEVICE = 100,
108   CUDA_ERROR_INVALID_HANDLE = 400,
109   CUDA_ERROR_NOT_READY = 600,
110   CUDA_ERROR_TOO_MANY_PEERS = 711,
111 } CUresult;
112 
113 typedef enum CUstream_flags_enum {
114   CU_STREAM_DEFAULT = 0x0,
115   CU_STREAM_NON_BLOCKING = 0x1,
116 } CUstream_flags;
117 
118 typedef enum CUlimit_enum {
119   CU_LIMIT_STACK_SIZE = 0x0,
120   CU_LIMIT_PRINTF_FIFO_SIZE = 0x1,
121   CU_LIMIT_MALLOC_HEAP_SIZE = 0x2,
122   CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x3,
123   CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x4,
124   CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x5,
125   CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x6,
126   CU_LIMIT_MAX
127 } CUlimit;
128 
129 typedef enum CUdevice_attribute_enum {
130   CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
131   CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
132   CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
133   CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
134   CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
135   CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
136   CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
137   CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
138   CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
139   CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
140   CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
141   CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
142   CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
143   CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
144   CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
145   CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
146   CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
147   CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
148   CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
149   CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
150   CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
151   CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
152   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
153   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
154   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
155   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
156   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
157   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
158   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,
159   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,
160   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,
161   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
162   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
163   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
164   CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
165   CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
166   CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
167   CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
168   CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
169   CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,
170   CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,
171   CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,
172   CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,
173   CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
174   CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
175   CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
176   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,
177   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,
178   CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44,
179   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,
180   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,
181   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47,
182   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,
183   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49,
184   CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,
185   CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,
186   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,
187   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,
188   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54,
189   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55,
190   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56,
191   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57,
192   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58,
193   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59,
194   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60,
195   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,
196   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,
197   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,
198   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,
199   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,
200   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66,
201   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,
202   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68,
203   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,
204   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,
205   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,
206   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,
207   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73,
208   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,
209   CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
210   CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
211   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77,
212   CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78,
213   CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79,
214   CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80,
215   CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,
216   CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
217   CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,
218   CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,
219   CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,
220   CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86,
221   CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87,
222   CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88,
223   CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89,
224   CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90,
225   CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91,
226   CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92,
227   CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93,
228   CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94,
229   CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95,
230   CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96,
231   CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97,
232   CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98,
233   CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99,
234   CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100,
235   CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101,
236   CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102,
237   CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102,
238   CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103,
239   CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104,
240   CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105,
241   CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106,
242   CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107,
243   CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108,
244   CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109,
245   CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110,
246   CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111,
247   CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112,
248   CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113,
249   CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114,
250   CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115,
251   CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116,
252   CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117,
253   CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118,
254   CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119,
255   CU_DEVICE_ATTRIBUTE_MAX,
256 } CUdevice_attribute;
257 
258 typedef enum CUfunction_attribute_enum {
259   CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
260 } CUfunction_attribute;
261 
262 typedef enum CUctx_flags_enum {
263   CU_CTX_SCHED_BLOCKING_SYNC = 0x04,
264   CU_CTX_SCHED_MASK = 0x07,
265 } CUctx_flags;
266 
267 typedef enum CUmemAttach_flags_enum {
268   CU_MEM_ATTACH_GLOBAL = 0x1,
269   CU_MEM_ATTACH_HOST = 0x2,
270   CU_MEM_ATTACH_SINGLE = 0x4,
271 } CUmemAttach_flags;
272 
273 typedef enum CUcomputeMode_enum {
274   CU_COMPUTEMODE_DEFAULT = 0,
275   CU_COMPUTEMODE_PROHIBITED = 2,
276   CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3,
277 } CUcompute_mode;
278 
279 typedef enum CUevent_flags_enum {
280   CU_EVENT_DEFAULT = 0x0,
281   CU_EVENT_BLOCKING_SYNC = 0x1,
282   CU_EVENT_DISABLE_TIMING = 0x2,
283   CU_EVENT_INTERPROCESS = 0x4
284 } CUevent_flags;
285 
286 static inline void *CU_LAUNCH_PARAM_END = (void *)0x00;
287 static inline void *CU_LAUNCH_PARAM_BUFFER_POINTER = (void *)0x01;
288 static inline void *CU_LAUNCH_PARAM_BUFFER_SIZE = (void *)0x02;
289 
290 typedef void (*CUstreamCallback)(CUstream, CUresult, void *);
291 
292 CUresult cuCtxGetDevice(CUdevice *);
293 CUresult cuDeviceGet(CUdevice *, int);
294 CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice);
295 CUresult cuDeviceGetCount(int *);
296 CUresult cuFuncGetAttribute(int *, CUfunction_attribute, CUfunction);
297 
298 // Device info
299 CUresult cuDeviceGetName(char *, int, CUdevice);
300 CUresult cuDeviceTotalMem(size_t *, CUdevice);
301 CUresult cuDriverGetVersion(int *);
302 
303 CUresult cuGetErrorString(CUresult, const char **);
304 CUresult cuInit(unsigned);
305 CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned,
306                         unsigned, unsigned, unsigned, CUstream, void **,
307                         void **);
308 CUresult cuLaunchHostFunc(CUstream, CUhostFn, void *);
309 
310 CUresult cuMemAlloc(CUdeviceptr *, size_t);
311 CUresult cuMemAllocHost(void **, size_t);
312 CUresult cuMemAllocManaged(CUdeviceptr *, size_t, unsigned int);
313 CUresult cuMemAllocAsync(CUdeviceptr *, size_t, CUstream);
314 
315 CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream);
316 CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t);
317 CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream);
318 CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t);
319 CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, size_t, CUstream);
320 
321 CUresult cuMemFree(CUdeviceptr);
322 CUresult cuMemFreeHost(void *);
323 CUresult cuMemFreeAsync(CUdeviceptr, CUstream);
324 
325 CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *);
326 CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *);
327 
328 CUresult cuModuleUnload(CUmodule);
329 CUresult cuStreamCreate(CUstream *, unsigned);
330 CUresult cuStreamDestroy(CUstream);
331 CUresult cuStreamSynchronize(CUstream);
332 CUresult cuStreamQuery(CUstream);
333 CUresult cuStreamAddCallback(CUstream, CUstreamCallback, void *, unsigned int);
334 CUresult cuCtxSetCurrent(CUcontext);
335 CUresult cuDevicePrimaryCtxRelease(CUdevice);
336 CUresult cuDevicePrimaryCtxGetState(CUdevice, unsigned *, int *);
337 CUresult cuDevicePrimaryCtxSetFlags(CUdevice, unsigned);
338 CUresult cuDevicePrimaryCtxRetain(CUcontext *, CUdevice);
339 CUresult cuModuleLoadDataEx(CUmodule *, const void *, unsigned, void *,
340                             void **);
341 
342 CUresult cuDeviceCanAccessPeer(int *, CUdevice, CUdevice);
343 CUresult cuCtxEnablePeerAccess(CUcontext, unsigned);
344 CUresult cuMemcpyPeerAsync(CUdeviceptr, CUcontext, CUdeviceptr, CUcontext,
345                            size_t, CUstream);
346 
347 CUresult cuCtxGetLimit(size_t *, CUlimit);
348 CUresult cuCtxSetLimit(CUlimit, size_t);
349 
350 CUresult cuEventCreate(CUevent *, unsigned int);
351 CUresult cuEventRecord(CUevent, CUstream);
352 CUresult cuStreamWaitEvent(CUstream, CUevent, unsigned int);
353 CUresult cuEventSynchronize(CUevent);
354 CUresult cuEventDestroy(CUevent);
355 
356 CUresult cuMemUnmap(CUdeviceptr ptr, size_t size);
357 CUresult cuMemRelease(CUmemGenericAllocationHandle handle);
358 CUresult cuMemAddressFree(CUdeviceptr ptr, size_t size);
359 CUresult cuMemGetInfo(size_t *free, size_t *total);
360 CUresult cuMemAddressReserve(CUdeviceptr *ptr, size_t size, size_t alignment,
361                              CUdeviceptr addr, unsigned long long flags);
362 CUresult cuMemMap(CUdeviceptr ptr, size_t size, size_t offset,
363                   CUmemGenericAllocationHandle handle,
364                   unsigned long long flags);
365 CUresult cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size,
366                      const CUmemAllocationProp *prop, unsigned long long flags);
367 CUresult cuMemSetAccess(CUdeviceptr ptr, size_t size,
368                         const CUmemAccessDesc *desc, size_t count);
369 CUresult cuMemGetAllocationGranularity(size_t *granularity,
370                                        const CUmemAllocationProp *prop,
371                                        CUmemAllocationGranularity_flags option);
372 
373 #endif
374