1 // RUN: %libomptarget-compile-generic 2 // RUN: env HSA_XNACK=1 \ 3 // RUN: %libomptarget-run-generic | %fcheck-generic 4 // XFAIL: nvptx64-nvidia-cuda 5 // XFAIL: nvptx64-nvidia-cuda-LTO 6 7 // REQUIRES: unified_shared_memory 8 9 #include <omp.h> 10 #include <stdio.h> 11 12 // --------------------------------------------------------------------------- 13 // Various definitions copied from OpenMP RTL 14 15 extern void __tgt_register_requires(int64_t); 16 17 // End of definitions copied from OpenMP RTL. 18 // --------------------------------------------------------------------------- 19 20 #pragma omp requires unified_shared_memory 21 22 #define N 1024 23 24 void init(int A[], int B[], int C[]) { 25 for (int i = 0; i < N; ++i) { 26 A[i] = 0; 27 B[i] = 1; 28 C[i] = i; 29 } 30 } 31 32 int main(int argc, char *argv[]) { 33 const int device = omp_get_default_device(); 34 35 // Manual registration of requires flags for Clang versions 36 // that do not support requires. 37 __tgt_register_requires(8); 38 39 // CHECK: Initial device: [[INITIAL_DEVICE:[0-9]+]] 40 printf("Initial device: %d\n", omp_get_initial_device()); 41 // CHECK: Num devices: [[INITIAL_DEVICE]] 42 printf("Num devices: %d\n", omp_get_num_devices()); 43 44 // 45 // Target alloc & target memcpy 46 // 47 int A[N], B[N], C[N]; 48 49 // Init 50 init(A, B, C); 51 52 int *pA, *pB, *pC; 53 54 // map ptrs 55 pA = &A[0]; 56 pB = &B[0]; 57 pC = &C[0]; 58 59 int *d_A = (int *)omp_target_alloc(N * sizeof(int), device); 60 int *d_B = (int *)omp_target_alloc(N * sizeof(int), device); 61 int *d_C = (int *)omp_target_alloc(N * sizeof(int), device); 62 63 // CHECK: omp_target_alloc succeeded 64 printf("omp_target_alloc %s\n", d_A && d_B && d_C ? "succeeded" : "failed"); 65 66 omp_target_memcpy(d_B, pB, N * sizeof(int), 0, 0, device, 67 omp_get_initial_device()); 68 omp_target_memcpy(d_C, pC, N * sizeof(int), 0, 0, device, 69 omp_get_initial_device()); 70 71 #pragma omp target is_device_ptr(d_A, d_B, d_C) device(device) 72 { 73 #pragma omp parallel for schedule(static, 1) 74 for (int i = 0; i < N; i++) { 75 d_A[i] = d_B[i] + d_C[i] + 1; 76 } 77 } 78 79 omp_target_memcpy(pA, d_A, N * sizeof(int), 0, 0, omp_get_initial_device(), 80 device); 81 82 // CHECK: Test omp_target_memcpy: Succeeded 83 int fail = 0; 84 for (int i = 0; i < N; ++i) { 85 if (A[i] != i + 2) 86 fail++; 87 } 88 if (fail) { 89 printf("Test omp_target_memcpy: Failed\n"); 90 } else { 91 printf("Test omp_target_memcpy: Succeeded\n"); 92 } 93 94 // 95 // target_is_present and target_associate/disassociate_ptr 96 // 97 init(A, B, C); 98 99 // CHECK: B is not present, associating it... 100 // CHECK: omp_target_associate_ptr B succeeded 101 if (!omp_target_is_present(B, device)) { 102 printf("B is not present, associating it...\n"); 103 int rc = omp_target_associate_ptr(B, d_B, N * sizeof(int), 0, device); 104 printf("omp_target_associate_ptr B %s\n", !rc ? "succeeded" : "failed"); 105 } 106 107 // CHECK: C is not present, associating it... 108 // CHECK: omp_target_associate_ptr C succeeded 109 if (!omp_target_is_present(C, device)) { 110 printf("C is not present, associating it...\n"); 111 int rc = omp_target_associate_ptr(C, d_C, N * sizeof(int), 0, device); 112 printf("omp_target_associate_ptr C %s\n", !rc ? "succeeded" : "failed"); 113 } 114 115 // CHECK: Inside target data: A is not present 116 // CHECK: Inside target data: B is present 117 // CHECK: Inside target data: C is present 118 #pragma omp target data map(from : B, C) device(device) 119 { 120 printf("Inside target data: A is%s present\n", 121 omp_target_is_present(A, device) ? "" : " not"); 122 printf("Inside target data: B is%s present\n", 123 omp_target_is_present(B, device) ? "" : " not"); 124 printf("Inside target data: C is%s present\n", 125 omp_target_is_present(C, device) ? "" : " not"); 126 127 #pragma omp target map(from : A) device(device) 128 { 129 #pragma omp parallel for schedule(static, 1) 130 for (int i = 0; i < N; i++) 131 A[i] = B[i] + C[i] + 1; 132 } 133 } 134 135 // CHECK: B is present, disassociating it... 136 // CHECK: omp_target_disassociate_ptr B succeeded 137 // CHECK: C is present, disassociating it... 138 // CHECK: omp_target_disassociate_ptr C succeeded 139 if (omp_target_is_present(B, device)) { 140 printf("B is present, disassociating it...\n"); 141 int rc = omp_target_disassociate_ptr(B, device); 142 printf("omp_target_disassociate_ptr B %s\n", !rc ? "succeeded" : "failed"); 143 } 144 if (omp_target_is_present(C, device)) { 145 printf("C is present, disassociating it...\n"); 146 int rc = omp_target_disassociate_ptr(C, device); 147 printf("omp_target_disassociate_ptr C %s\n", !rc ? "succeeded" : "failed"); 148 } 149 150 // CHECK: Test omp_target_associate_ptr: Succeeded 151 fail = 0; 152 for (int i = 0; i < N; ++i) { 153 if (A[i] != i + 2) 154 fail++; 155 } 156 if (fail) { 157 printf("Test omp_target_associate_ptr: Failed\n"); 158 } else { 159 printf("Test omp_target_associate_ptr: Succeeded\n"); 160 } 161 162 omp_target_free(d_A, device); 163 omp_target_free(d_B, device); 164 omp_target_free(d_C, device); 165 166 printf("Done!\n"); 167 168 return 0; 169 } 170