// RUN: %libomptarget-compile-and-run-generic #include #include #define NUM_DIMS 3 int main() { int d = omp_get_default_device(); int id = omp_get_initial_device(); int a[128], b[64], c[128], e[16], q[128], i; void *p; if (d < 0 || d >= omp_get_num_devices()) d = id; p = omp_target_alloc(130 * sizeof(int), d); if (p == NULL) return 0; for (i = 0; i < 128; i++) q[i] = 0; if (omp_target_memcpy(p, q, 128 * sizeof(int), 0, 0, d, id) != 0) abort(); size_t volume[NUM_DIMS] = {2, 2, 3}; size_t dst_offsets[NUM_DIMS] = {0, 0, 0}; size_t src_offsets[NUM_DIMS] = {0, 0, 0}; size_t dst_dimensions[NUM_DIMS] = {3, 4, 5}; size_t src_dimensions[NUM_DIMS] = {2, 3, 4}; for (i = 0; i < 128; i++) a[i] = 42; for (i = 0; i < 64; i++) b[i] = 24; for (i = 0; i < 128; i++) c[i] = 0; for (i = 0; i < 16; i++) e[i] = 77; omp_depend_t obj[2]; #pragma omp parallel num_threads(5) #pragma omp single { #pragma omp task depend(out : p) omp_target_memcpy(p, a, 128 * sizeof(int), 0, 0, d, id); #pragma omp task depend(inout : p) omp_target_memcpy(p, b, 64 * sizeof(int), 0, 0, d, id); #pragma omp task depend(out : c) for (i = 0; i < 128; i++) c[i] = i + 1; #pragma omp depobj(obj[0]) depend(inout : p) #pragma omp depobj(obj[1]) depend(in : c) /* This produces: 1 2 3 - - 5 6 7 - - at positions 0..9 and 13 14 15 - - 17 18 19 - - at positions 20..29. */ omp_target_memcpy_rect_async(p, c, sizeof(int), NUM_DIMS, volume, dst_offsets, src_offsets, dst_dimensions, src_dimensions, d, id, 2, obj); #pragma omp task depend(in : p) omp_target_memcpy(p, e, 16 * sizeof(int), 0, 0, d, id); } #pragma omp taskwait if (omp_target_memcpy(q, p, 128 * sizeof(int), 0, 0, id, d) != 0) abort(); for (i = 0; i < 16; ++i) if (q[i] != 77) abort(); if (q[20] != 13 || q[21] != 14 || q[22] != 15 || q[25] != 17 || q[26] != 18 || q[27] != 19) abort(); for (i = 28; i < 64; ++i) if (q[i] != 24) abort(); for (i = 64; i < 128; ++i) if (q[i] != 42) abort(); omp_target_free(p, d); return 0; }