xref: /llvm-project/offload/test/mapping/auto_zero_copy_globals.cpp (revision 8823448807f3b1a1362d1417e062d763734e02f5)
1 // clang-format off
2 // RUN: %libomptarget-compilexx-generic
3 // RUN: env OMPX_APU_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=60 %libomptarget-run-generic 2>&1 \
4 // RUN: | %fcheck-generic -check-prefix=CHECK
5 
6 // REQUIRES: amdgpu
7 // REQUIRES: unified_shared_memory
8 
9 // clang-format on
10 
11 #include <cstdint>
12 #include <cstdio>
13 
14 /// Test for globals under automatic zero-copy.
15 /// Because we are building without unified_shared_memory
16 /// requirement pragma, all globals are allocated in the device
17 /// memory of all used GPUs. To ensure those globals contain the intended
18 /// values, we need to execute H2D and D2H memory copies even if we are running
19 /// in automatic zero-copy. This only applies to globals. Local variables (their
20 /// host pointers) are passed to the kernels by-value, according to the
21 /// automatic zero-copy behavior.
22 
23 #pragma omp begin declare target
24 int32_t x;     // 4 bytes
25 int32_t z[10]; // 40 bytes
26 int32_t *k;    // 20 bytes
27 #pragma omp end declare target
28 
main()29 int main() {
30   int32_t *dev_k = nullptr;
31   x = 3;
32   int32_t y = -1;
33   for (size_t t = 0; t < 10; t++)
34     z[t] = t;
35   k = new int32_t[5];
36 
37   printf("Host pointer for k = %p\n", k);
38   for (size_t t = 0; t < 5; t++)
39     k[t] = -t;
40 
41 /// target update to forces a copy between host and device global, which we must
42 /// execute to keep the two global copies consistent. CHECK: Copying data from
43 /// host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=40, Name=z
44 #pragma omp target update to(z[ : 10])
45 
46 /// target map with always modifier (for x) forces a copy between host and
47 /// device global, which we must execute to keep the two global copies
48 /// consistent. k's content (host address) is passed by-value to the kernel
49 /// (Size=20 case). y, being a local variable, is also passed by-value to the
50 /// kernel (Size=4 case) CHECK: Return HstPtrBegin {{.*}} Size=4 for unified
51 /// shared memory CHECK: Return HstPtrBegin {{.*}} Size=20 for unified shared
52 /// memory CHECK: Copying data from host to device, HstPtr={{.*}},
53 /// TgtPtr={{.*}}, Size=4, Name=x
54 #pragma omp target map(to : k[ : 5]) map(always, tofrom : x) map(tofrom : y)   \
55     map(from : dev_k)
56   {
57     x++;
58     y++;
59     for (size_t t = 0; t < 10; t++)
60       z[t]++;
61     dev_k = k;
62   }
63 /// CHECK-NOT: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
64 /// Size=20, Name=k
65 
66 /// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
67 /// Size=4, Name=x
68 
69 /// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
70 /// Size=40, Name=z
71 #pragma omp target update from(z[ : 10])
72 
73   /// CHECK-NOT: k pointer not correctly passed to kernel
74   if (dev_k != k)
75     printf("k pointer not correctly passed to kernel\n");
76 
77   delete[] k;
78   return 0;
79 }
80