xref: /llvm-project/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll (revision cd3a4c31bc9694d160de54c6a4daa53e152cb463)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
2; RUN: opt -S -passes=openmp-opt-cgscc -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency < %s | FileCheck %s
3target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
4
5; CHECK: %struct.__tgt_async_info = type { ptr }
6
7%struct.ident_t = type { i32, i32, i32, i32, ptr }
8%struct.__tgt_offload_entry = type { ptr, ptr, i64, i32, i32 }
9
10@.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 35]
11@.__omp_offloading_heavyComputation1.region_id = weak constant i8 0
12@.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 8]
13@.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 800]
14
15@.__omp_offloading_heavyComputation2.region_id = weak constant i8 0
16@.offload_maptypes.3 = private unnamed_addr constant [2 x i64] [i64 35, i64 35]
17
18@.__omp_offloading_heavyComputation3.region_id = weak constant i8 0
19@.offload_sizes.2 = private unnamed_addr constant [2 x i64] [i64 4, i64 0]
20@.offload_maptypes.4 = private unnamed_addr constant [2 x i64] [i64 800, i64 544]
21
22@.offload_maptypes.5 = private unnamed_addr constant [1 x i64] [i64 33]
23
24@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, ptr @.str0 }, align 8
25@.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
26
27;double heavyComputation1() {
28;  double a = rand() % 777;
29;  double random = rand();
30;
31;  //#pragma omp target data map(a)
32;  ptr args[1];
33;  args[0] = &a;
34;  __tgt_target_data_begin(..., args, ...)
35;
36;  #pragma omp target teams
37;  for (int i = 0; i < 1000; ++i) {
38;    a *= i*i / 2;
39;  }
40;
41;  return random + a;
42;}
43define dso_local double @heavyComputation1() {
44; CHECK-LABEL: define {{[^@]+}}@heavyComputation1() {
45; CHECK-NEXT:  entry:
46; CHECK-NEXT:    [[A:%.*]] = alloca double, align 8
47; CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
48; CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
49; CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [1 x ptr], align 8
50; CHECK-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [1 x ptr], align 8
51; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @rand()
52; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[CALL]], 777
53; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[REM]] to double
54; CHECK-NEXT:    store double [[CONV]], ptr [[A]], align 8
55; CHECK-NEXT:    [[CALL1:%.*]] = tail call i32 (...) @rand()
56; CHECK-NEXT:    store ptr [[A]], ptr [[DOTOFFLOAD_BASEPTRS]], align 8
57; CHECK-NEXT:    store ptr [[A]], ptr [[DOTOFFLOAD_PTRS]], align 8
58; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper(ptr @[[GLOB0:[0-9]+]], i64 -1, i32 1, ptr nonnull [[DOTOFFLOAD_BASEPTRS]], ptr nonnull [[DOTOFFLOAD_PTRS]], ptr @.offload_sizes.1, ptr @.offload_maptypes, ptr null, ptr null)
59; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[A]], align 8
60; CHECK-NEXT:    store i64 [[TMP0]], ptr [[DOTOFFLOAD_BASEPTRS4]], align 8
61; CHECK-NEXT:    store i64 [[TMP0]], ptr [[DOTOFFLOAD_PTRS5]], align 8
62; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(ptr @[[GLOB0]], i64 -1, ptr nonnull @.__omp_offloading_heavyComputation1.region_id, i32 1, ptr nonnull [[DOTOFFLOAD_BASEPTRS4]], ptr nonnull [[DOTOFFLOAD_PTRS5]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr null, ptr null, i32 0, i32 0)
63; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0
64; CHECK-NEXT:    br i1 [[DOTNOT]], label [[OMP_OFFLOAD_CONT:%.*]], label [[OMP_OFFLOAD_FAILED:%.*]]
65; CHECK:       omp_offload.failed:
66; CHECK-NEXT:    call void @heavyComputation1FallBack(i64 [[TMP0]])
67; CHECK-NEXT:    br label [[OMP_OFFLOAD_CONT]]
68; CHECK:       omp_offload.cont:
69; CHECK-NEXT:    [[CONV2:%.*]] = sitofp i32 [[CALL1]] to double
70; CHECK-NEXT:    call void @__tgt_target_data_end_mapper(ptr @[[GLOB0]], i64 -1, i32 1, ptr nonnull [[DOTOFFLOAD_BASEPTRS]], ptr nonnull [[DOTOFFLOAD_PTRS]], ptr @.offload_sizes.1, ptr @.offload_maptypes, ptr null, ptr null)
71; CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[A]], align 8
72; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP2]], [[CONV2]]
73; CHECK-NEXT:    ret double [[ADD]]
74;
75
76
77
78
79
80
81entry:
82  %a = alloca double, align 8
83  %.offload_baseptrs = alloca [1 x ptr], align 8
84  %.offload_ptrs = alloca [1 x ptr], align 8
85  %.offload_baseptrs4 = alloca [1 x ptr], align 8
86  %.offload_ptrs5 = alloca [1 x ptr], align 8
87
88  %call = tail call i32 (...) @rand()
89  %rem = srem i32 %call, 777
90  %conv = sitofp i32 %rem to double
91  store double %conv, ptr %a, align 8
92
93  ; FIXME: call to @__tgt_target_data_begin_mapper_issue(ptr @0, ...) should be moved here.
94  %call1 = tail call i32 (...) @rand()
95
96  store ptr %a, ptr %.offload_baseptrs, align 8
97  store ptr %a, ptr %.offload_ptrs, align 8
98  call void @__tgt_target_data_begin_mapper(ptr @0, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr @.offload_sizes.1, ptr @.offload_maptypes, ptr null, ptr null)
99
100  %0 = load i64, ptr %a, align 8
101  store i64 %0, ptr %.offload_baseptrs4, align 8
102  store i64 %0, ptr %.offload_ptrs5, align 8
103
104  ; FIXME: call to @__tgt_target_data_begin_mapper_wait(...) should be moved here.
105  %1 = call i32 @__tgt_target_teams_mapper(ptr @0, i64 -1, ptr nonnull @.__omp_offloading_heavyComputation1.region_id, i32 1, ptr nonnull %.offload_baseptrs4, ptr nonnull %.offload_ptrs5, ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr null, ptr null, i32 0, i32 0)
106  %.not = icmp eq i32 %1, 0
107  br i1 %.not, label %omp_offload.cont, label %omp_offload.failed
108
109omp_offload.failed:                               ; preds = %entry
110  call void @heavyComputation1FallBack(i64 %0)
111  br label %omp_offload.cont
112
113omp_offload.cont:                                 ; preds = %omp_offload.failed, %entry
114  %conv2 = sitofp i32 %call1 to double
115  call void @__tgt_target_data_end_mapper(ptr @0, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr @.offload_sizes.1, ptr @.offload_maptypes, ptr null, ptr null)
116  %2 = load double, ptr %a, align 8
117  %add = fadd double %2, %conv2
118  ret double %add
119}
120
121define internal void @heavyComputation1FallBack(i64 %a) {
122; CHECK-LABEL: define {{[^@]+}}@heavyComputation1FallBack
123; CHECK-SAME: (i64 [[A:%.*]]) {
124; CHECK-NEXT:  entry:
125; CHECK-NEXT:    ret void
126;
127entry:
128  ; Fallback for offloading function heavyComputation1.
129  ret void
130}
131
132;int heavyComputation2(ptr a, unsigned size) {
133;  int random = rand() % 7;
134;
135;  //#pragma omp target data map(a[0:size], size)
136;  ptr args[2];
137;  args[0] = &a;
138;  args[1] = &size;
139;  __tgt_target_data_begin(..., args, ...)
140;
141;  #pragma omp target teams
142;  for (int i = 0; i < size; ++i) {
143;    a[i] = ++aptr 3.141624;
144;  }
145;
146;  return random;
147;}
148define dso_local i32 @heavyComputation2(ptr %a, i32 %size) {
149; CHECK-LABEL: define {{[^@]+}}@heavyComputation2
150; CHECK-SAME: (ptr [[A:%.*]], i32 [[SIZE:%.*]]) {
151; CHECK-NEXT:  entry:
152; CHECK-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
153; CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
154; CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
155; CHECK-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8
156; CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 8
157; CHECK-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 8
158; CHECK-NEXT:    store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
159; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @rand()
160; CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[SIZE]] to i64
161; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[CONV]], 3
162; CHECK-NEXT:    store ptr [[A]], ptr [[DOTOFFLOAD_BASEPTRS]], align 8
163; CHECK-NEXT:    store ptr [[A]], ptr [[DOTOFFLOAD_PTRS]], align 8
164; CHECK-NEXT:    store i64 [[TMP0]], ptr [[DOTOFFLOAD_SIZES]], align 8
165; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 1
166; CHECK-NEXT:    store ptr [[SIZE_ADDR]], ptr [[TMP1]], align 8
167; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i64 0, i64 1
168; CHECK-NEXT:    store ptr [[SIZE_ADDR]], ptr [[TMP2]], align 8
169; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i64 0, i64 1
170; CHECK-NEXT:    store i64 4, ptr [[TMP3]], align 8
171; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper(ptr @[[GLOB0]], i64 -1, i32 2, ptr nonnull [[DOTOFFLOAD_BASEPTRS]], ptr nonnull [[DOTOFFLOAD_PTRS]], ptr nonnull [[DOTOFFLOAD_SIZES]], ptr @.offload_maptypes.3, ptr null, ptr null)
172; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
173; CHECK-NEXT:    [[SIZE_CASTED:%.*]] = zext i32 [[TMP4]] to i64
174; CHECK-NEXT:    store i64 [[SIZE_CASTED]], ptr [[DOTOFFLOAD_BASEPTRS2]], align 8
175; CHECK-NEXT:    store i64 [[SIZE_CASTED]], ptr [[DOTOFFLOAD_PTRS3]], align 8
176; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 1
177; CHECK-NEXT:    store ptr [[A]], ptr [[TMP5]], align 8
178; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i64 0, i64 1
179; CHECK-NEXT:    store ptr [[A]], ptr [[TMP6]], align 8
180; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(ptr @[[GLOB0]], i64 -1, ptr nonnull @.__omp_offloading_heavyComputation2.region_id, i32 2, ptr nonnull [[DOTOFFLOAD_BASEPTRS2]], ptr nonnull [[DOTOFFLOAD_PTRS3]], ptr @.offload_sizes.2, ptr @.offload_maptypes.4, ptr null, ptr null, i32 0, i32 0)
181; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP7]], 0
182; CHECK-NEXT:    br i1 [[DOTNOT]], label [[OMP_OFFLOAD_CONT:%.*]], label [[OMP_OFFLOAD_FAILED:%.*]]
183; CHECK:       omp_offload.failed:
184; CHECK-NEXT:    call void @heavyComputation2FallBack(i64 [[SIZE_CASTED]], ptr [[A]])
185; CHECK-NEXT:    br label [[OMP_OFFLOAD_CONT]]
186; CHECK:       omp_offload.cont:
187; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[CALL]], 7
188; CHECK-NEXT:    call void @__tgt_target_data_end_mapper(ptr @[[GLOB0]], i64 -1, i32 2, ptr nonnull [[DOTOFFLOAD_BASEPTRS]], ptr nonnull [[DOTOFFLOAD_PTRS]], ptr nonnull [[DOTOFFLOAD_SIZES]], ptr @.offload_maptypes.3, ptr null, ptr null)
189; CHECK-NEXT:    ret i32 [[REM]]
190;
191
192
193entry:
194  %size.addr = alloca i32, align 4
195  %.offload_baseptrs = alloca [2 x ptr], align 8
196  %.offload_ptrs = alloca [2 x ptr], align 8
197  %.offload_sizes = alloca [2 x i64], align 8
198  %.offload_baseptrs2 = alloca [2 x ptr], align 8
199  %.offload_ptrs3 = alloca [2 x ptr], align 8
200
201  store i32 %size, ptr %size.addr, align 4
202  %call = tail call i32 (...) @rand()
203
204  %conv = zext i32 %size to i64
205  %0 = shl nuw nsw i64 %conv, 3
206  store ptr %a, ptr %.offload_baseptrs, align 8
207  store ptr %a, ptr %.offload_ptrs, align 8
208  store i64 %0, ptr %.offload_sizes, align 8
209  %1 = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i64 0, i64 1
210  store ptr %size.addr, ptr %1, align 8
211  %2 = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i64 0, i64 1
212  store ptr %size.addr, ptr %2, align 8
213  %3 = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i64 0, i64 1
214  store i64 4, ptr %3, align 8
215  call void @__tgt_target_data_begin_mapper(ptr @0, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes.3, ptr null, ptr null)
216
217  %4 = load i32, ptr %size.addr, align 4
218  %size.casted = zext i32 %4 to i64
219  store i64 %size.casted, ptr %.offload_baseptrs2, align 8
220  store i64 %size.casted, ptr %.offload_ptrs3, align 8
221  %5 = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs2, i64 0, i64 1
222  store ptr %a, ptr %5, align 8
223  %6 = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs3, i64 0, i64 1
224  store ptr %a, ptr %6, align 8
225
226  ; FIXME: call to @__tgt_target_data_begin_mapper_wait(...) should be moved here.
227  %7 = call i32 @__tgt_target_teams_mapper(ptr @0, i64 -1, ptr nonnull @.__omp_offloading_heavyComputation2.region_id, i32 2, ptr nonnull %.offload_baseptrs2, ptr nonnull %.offload_ptrs3, ptr @.offload_sizes.2, ptr @.offload_maptypes.4, ptr null, ptr null, i32 0, i32 0)
228  %.not = icmp eq i32 %7, 0
229  br i1 %.not, label %omp_offload.cont, label %omp_offload.failed
230
231omp_offload.failed:                               ; preds = %entry
232  call void @heavyComputation2FallBack(i64 %size.casted, ptr %a)
233  br label %omp_offload.cont
234
235omp_offload.cont:                                 ; preds = %omp_offload.failed, %entry
236  %rem = srem i32 %call, 7
237  call void @__tgt_target_data_end_mapper(ptr @0, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes.3, ptr null, ptr null)
238  ret i32 %rem
239}
240
241define internal void @heavyComputation2FallBack(i64 %size, ptr %a) {
242; CHECK-LABEL: define {{[^@]+}}@heavyComputation2FallBack
243; CHECK-SAME: (i64 [[SIZE:%.*]], ptr [[A:%.*]]) {
244; CHECK-NEXT:  entry:
245; CHECK-NEXT:    ret void
246;
247entry:
248  ; Fallback for offloading function heavyComputation2.
249  ret void
250}
251
252;int heavyComputation3(ptr restrict a, unsigned size) {
253;  int random = rand() % 7;
254;
255;  //#pragma omp target data map(a[0:size], size)
256;  ptr args[2];
257;  args[0] = &a;
258;  args[1] = &size;
259;  __tgt_target_data_begin(..., args, ...)
260;
261;  #pragma omp target teams
262;  for (int i = 0; i < size; ++i) {
263;    a[i] = ++aptr 3.141624;
264;  }
265;
266;  return random;
267;}
268define dso_local i32 @heavyComputation3(ptr noalias %a, i32 %size) {
269; CHECK-LABEL: define {{[^@]+}}@heavyComputation3
270; CHECK-SAME: (ptr noalias [[A:%.*]], i32 [[SIZE:%.*]]) {
271; CHECK-NEXT:  entry:
272; CHECK-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
273; CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
274; CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
275; CHECK-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8
276; CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 8
277; CHECK-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 8
278; CHECK-NEXT:    store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
279; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @rand()
280; CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[SIZE]] to i64
281; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[CONV]], 3
282; CHECK-NEXT:    store ptr [[A]], ptr [[DOTOFFLOAD_BASEPTRS]], align 8
283; CHECK-NEXT:    store ptr [[A]], ptr [[DOTOFFLOAD_PTRS]], align 8
284; CHECK-NEXT:    store i64 [[TMP0]], ptr [[DOTOFFLOAD_SIZES]], align 8
285; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 1
286; CHECK-NEXT:    store ptr [[SIZE_ADDR]], ptr [[TMP1]], align 8
287; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i64 0, i64 1
288; CHECK-NEXT:    store ptr [[SIZE_ADDR]], ptr [[TMP2]], align 8
289; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i64 0, i64 1
290; CHECK-NEXT:    store i64 4, ptr [[TMP3]], align 8
291; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper(ptr @[[GLOB0]], i64 -1, i32 2, ptr nonnull [[DOTOFFLOAD_BASEPTRS]], ptr nonnull [[DOTOFFLOAD_PTRS]], ptr nonnull [[DOTOFFLOAD_SIZES]], ptr @.offload_maptypes.3, ptr null, ptr null)
292; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
293; CHECK-NEXT:    [[SIZE_CASTED:%.*]] = zext i32 [[TMP4]] to i64
294; CHECK-NEXT:    store i64 [[SIZE_CASTED]], ptr [[DOTOFFLOAD_BASEPTRS2]], align 8
295; CHECK-NEXT:    store i64 [[SIZE_CASTED]], ptr [[DOTOFFLOAD_PTRS3]], align 8
296; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 1
297; CHECK-NEXT:    store ptr [[A]], ptr [[TMP5]], align 8
298; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i64 0, i64 1
299; CHECK-NEXT:    store ptr [[A]], ptr [[TMP6]], align 8
300; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(ptr @[[GLOB0]], i64 -1, ptr nonnull @.__omp_offloading_heavyComputation3.region_id, i32 2, ptr nonnull [[DOTOFFLOAD_BASEPTRS2]], ptr nonnull [[DOTOFFLOAD_PTRS3]], ptr @.offload_sizes.2, ptr @.offload_maptypes.4, ptr null, ptr null, i32 0, i32 0)
301; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP7]], 0
302; CHECK-NEXT:    br i1 [[DOTNOT]], label [[OMP_OFFLOAD_CONT:%.*]], label [[OMP_OFFLOAD_FAILED:%.*]]
303; CHECK:       omp_offload.failed:
304; CHECK-NEXT:    call void @heavyComputation3FallBack(i64 [[SIZE_CASTED]], ptr [[A]])
305; CHECK-NEXT:    br label [[OMP_OFFLOAD_CONT]]
306; CHECK:       omp_offload.cont:
307; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[CALL]], 7
308; CHECK-NEXT:    call void @__tgt_target_data_end_mapper(ptr @[[GLOB0]], i64 -1, i32 2, ptr nonnull [[DOTOFFLOAD_BASEPTRS]], ptr nonnull [[DOTOFFLOAD_PTRS]], ptr nonnull [[DOTOFFLOAD_SIZES]], ptr @.offload_maptypes.3, ptr null, ptr null)
309; CHECK-NEXT:    ret i32 [[REM]]
310;
311
312
313entry:
314  %size.addr = alloca i32, align 4
315  %.offload_baseptrs = alloca [2 x ptr], align 8
316  %.offload_ptrs = alloca [2 x ptr], align 8
317  %.offload_sizes = alloca [2 x i64], align 8
318  %.offload_baseptrs2 = alloca [2 x ptr], align 8
319  %.offload_ptrs3 = alloca [2 x ptr], align 8
320  store i32 %size, ptr %size.addr, align 4
321
322  ; FIXME: call to @__tgt_target_data_begin_mapper_issue(ptr @0, ...) should be moved here.
323  %call = tail call i32 (...) @rand()
324
325  %conv = zext i32 %size to i64
326  %0 = shl nuw nsw i64 %conv, 3
327  store ptr %a, ptr %.offload_baseptrs, align 8
328  store ptr %a, ptr %.offload_ptrs, align 8
329  store i64 %0, ptr %.offload_sizes, align 8
330  %1 = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i64 0, i64 1
331  store ptr %size.addr, ptr %1, align 8
332  %2 = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i64 0, i64 1
333  store ptr %size.addr, ptr %2, align 8
334  %3 = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i64 0, i64 1
335  store i64 4, ptr %3, align 8
336  call void @__tgt_target_data_begin_mapper(ptr @0, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes.3, ptr null, ptr null)
337
338  %4 = load i32, ptr %size.addr, align 4
339  %size.casted = zext i32 %4 to i64
340  store i64 %size.casted, ptr %.offload_baseptrs2, align 8
341  store i64 %size.casted, ptr %.offload_ptrs3, align 8
342  %5 = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs2, i64 0, i64 1
343  store ptr %a, ptr %5, align 8
344  %6 = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs3, i64 0, i64 1
345  store ptr %a, ptr %6, align 8
346
347  ; FIXME: call to @__tgt_target_data_begin_mapper_wait(...) should be moved here.
348  %7 = call i32 @__tgt_target_teams_mapper(ptr @0, i64 -1, ptr nonnull @.__omp_offloading_heavyComputation3.region_id, i32 2, ptr nonnull %.offload_baseptrs2, ptr nonnull %.offload_ptrs3, ptr @.offload_sizes.2, ptr @.offload_maptypes.4, ptr null, ptr null, i32 0, i32 0)
349  %.not = icmp eq i32 %7, 0
350  br i1 %.not, label %omp_offload.cont, label %omp_offload.failed
351
352omp_offload.failed:                               ; preds = %entry
353  call void @heavyComputation3FallBack(i64 %size.casted, ptr %a)
354  br label %omp_offload.cont
355
356omp_offload.cont:                                 ; preds = %omp_offload.failed, %entry
357  %rem = srem i32 %call, 7
358  call void @__tgt_target_data_end_mapper(ptr @0, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes.3, ptr null, ptr null)
359  ret i32 %rem
360}
361
362define internal void @heavyComputation3FallBack(i64 %size, ptr %a) {
363; CHECK-LABEL: define {{[^@]+}}@heavyComputation3FallBack
364; CHECK-SAME: (i64 [[SIZE:%.*]], ptr [[A:%.*]]) {
365; CHECK-NEXT:  entry:
366; CHECK-NEXT:    ret void
367;
368entry:
369  ; Fallback for offloading function heavyComputation3.
370  ret void
371}
372
373;int dataTransferOnly1(ptr restrict a, unsigned size) {
374;  // Random computation.
375;  int random = rand();
376;
377;  //#pragma omp target data map(to:a[0:size])
378;  ptr args[1];
379;  args[0] = &a;
380;  __tgt_target_data_begin(..., args, ...)
381;
382;  // Random computation.
383;  random %= size;
384;  return random;
385;}
386define dso_local i32 @dataTransferOnly1(ptr noalias %a, i32 %size) {
387; CHECK-LABEL: define {{[^@]+}}@dataTransferOnly1
388; CHECK-SAME: (ptr noalias [[A:%.*]], i32 [[SIZE:%.*]]) {
389; CHECK-NEXT:  entry:
390; CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
391; CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
392; CHECK-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [1 x i64], align 8
393; CHECK-NEXT:    [[HANDLE:%.*]] = alloca [[STRUCT___TGT_ASYNC_INFO:%.*]], align 8
394; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @rand()
395; CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[SIZE]] to i64
396; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[CONV]], 3
397; CHECK-NEXT:    store ptr [[A]], ptr [[DOTOFFLOAD_BASEPTRS]], align 8
398; CHECK-NEXT:    store ptr [[A]], ptr [[DOTOFFLOAD_PTRS]], align 8
399; CHECK-NEXT:    store i64 [[TMP0]], ptr [[DOTOFFLOAD_SIZES]], align 8
400; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper_issue(ptr @[[GLOB0]], i64 -1, i32 1, ptr [[DOTOFFLOAD_BASEPTRS]], ptr [[DOTOFFLOAD_PTRS]], ptr [[DOTOFFLOAD_SIZES]], ptr @.offload_maptypes.5, ptr null, ptr null, ptr [[HANDLE]])
401; CHECK-NEXT:    [[REM:%.*]] = urem i32 [[CALL]], [[SIZE]]
402; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper_wait(i64 -1, ptr [[HANDLE]])
403; CHECK-NEXT:    call void @__tgt_target_data_end_mapper(ptr @[[GLOB0]], i64 -1, i32 1, ptr nonnull [[DOTOFFLOAD_BASEPTRS]], ptr nonnull [[DOTOFFLOAD_PTRS]], ptr nonnull [[DOTOFFLOAD_SIZES]], ptr @.offload_maptypes.5, ptr null, ptr null)
404; CHECK-NEXT:    ret i32 [[REM]]
405;
406
407
408
409
410
411
412entry:
413  %.offload_baseptrs = alloca [1 x ptr], align 8
414  %.offload_ptrs = alloca [1 x ptr], align 8
415  %.offload_sizes = alloca [1 x i64], align 8
416
417  ; FIXME: call to @__tgt_target_data_begin_issue_mapper(...) should be moved here.
418  %call = tail call i32 (...) @rand()
419
420  %conv = zext i32 %size to i64
421  %0 = shl nuw nsw i64 %conv, 3
422  store ptr %a, ptr %.offload_baseptrs, align 8
423  store ptr %a, ptr %.offload_ptrs, align 8
424  store i64 %0, ptr %.offload_sizes, align 8
425  call void @__tgt_target_data_begin_mapper(ptr @0, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes.5, ptr null, ptr null)
426
427  %rem = urem i32 %call, %size
428
429  call void @__tgt_target_data_end_mapper(ptr @0, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes.5, ptr null, ptr null)
430  ret i32 %rem
431}
432
433declare void @__tgt_target_data_begin_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
434declare i32 @__tgt_target_teams_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32)
435declare void @__tgt_target_data_end_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
436
437declare dso_local i32 @rand(...)
438
439
440!llvm.module.flags = !{!0}
441
442!0 = !{i32 7, !"openmp", i32 50}
443