xref: /llvm-project/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (revision 7954a0514ba7de40dba6c598af830fd1b7a8bf0c)
1// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs --prefix-filecheck-ir-name VAR
2// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa %s -fdenormal-fp-math-f32=preserve-sign | FileCheck %s --check-prefixes=CHECK,NOCPU
3
4// // Check no-optnone and target-cpu behavior
5// RUN: %clang_cc1 -cl-std=CL2.0 -O1 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa -target-cpu gfx900 -target-feature -sram-ecc -fdenormal-fp-math-f32=preserve-sign %s | FileCheck %s --check-prefixes=CHECK,GFX900
6
7
8typedef struct {int a;} ndrange_t;
9
10void callee(long id, global long *out) {
11  out[id] = id;
12}
13
14kernel void test(global char *a, char b, global long *c, long d) {
15  queue_t default_queue;
16  unsigned flags = 0;
17  ndrange_t ndrange;
18
19  enqueue_kernel(default_queue, flags, ndrange,
20                 ^(void) {
21                 a[0] = b;
22                 });
23
24  enqueue_kernel(default_queue, flags, ndrange,
25                 ^(void) {
26                 a[0] = b;
27                 c[0] = d;
28                 });
29  enqueue_kernel(default_queue, flags, ndrange,
30                 ^(local void *lp) {
31                 a[0] = b;
32                 c[0] = d;
33                 ((local int*)lp)[0] = 1;
34                 }, 100);
35
36  void (^block)(void) = ^{
37    callee(d, c);
38  };
39
40  enqueue_kernel(default_queue, flags, ndrange, block);
41}
42
43// The target attribute from the caller is not propagated to the block.
44
45// FIXME: Something is broken and inconsistent. Either the builtin use in the
46// block should be rejected, or the target attribute needs to apply to the block
47// as well.
48// https://github.com/llvm/llvm-project/issues/60005
49__attribute__((target("s-memtime-inst")))
50kernel void test_target_features_kernel(global int *i) {
51  queue_t default_queue;
52  unsigned flags = 0;
53  ndrange_t ndrange;
54
55  __builtin_amdgcn_s_memtime();
56
57  enqueue_kernel(default_queue, flags, ndrange,
58                 ^(void) {
59                   __builtin_amdgcn_s_memtime();
60                 });
61}
62
63//.
64// CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0
65// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
66//.
67// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
68// NOCPU-LABEL: define {{[^@]+}}@callee
69// NOCPU-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
70// NOCPU-NEXT:  entry:
71// NOCPU-NEXT:    [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
72// NOCPU-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
73// NOCPU-NEXT:    [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr
74// NOCPU-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
75// NOCPU-NEXT:    store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8
76// NOCPU-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
77// NOCPU-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8
78// NOCPU-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
79// NOCPU-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8
80// NOCPU-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]]
81// NOCPU-NEXT:    store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8
82// NOCPU-NEXT:    ret void
83//
84//
85// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
86// NOCPU-LABEL: define {{[^@]+}}@test
87// NOCPU-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
88// NOCPU-NEXT:  entry:
89// NOCPU-NEXT:    [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
90// NOCPU-NEXT:    [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
91// NOCPU-NEXT:    [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
92// NOCPU-NEXT:    [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
93// NOCPU-NEXT:    [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
94// NOCPU-NEXT:    [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
95// NOCPU-NEXT:    [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
96// NOCPU-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
97// NOCPU-NEXT:    [[BLOCK:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
98// NOCPU-NEXT:    [[VARTMP2:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
99// NOCPU-NEXT:    [[BLOCK3:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
100// NOCPU-NEXT:    [[VARTMP11:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
101// NOCPU-NEXT:    [[BLOCK12:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
102// NOCPU-NEXT:    [[BLOCK_SIZES:%.*]] = alloca [1 x i64], align 8, addrspace(5)
103// NOCPU-NEXT:    [[BLOCK20:%.*]] = alloca ptr, align 8, addrspace(5)
104// NOCPU-NEXT:    [[BLOCK21:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
105// NOCPU-NEXT:    [[VARTMP27:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
106// NOCPU-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
107// NOCPU-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
108// NOCPU-NEXT:    [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
109// NOCPU-NEXT:    [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
110// NOCPU-NEXT:    [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
111// NOCPU-NEXT:    [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
112// NOCPU-NEXT:    [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
113// NOCPU-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
114// NOCPU-NEXT:    [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr
115// NOCPU-NEXT:    [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP2]] to ptr
116// NOCPU-NEXT:    [[BLOCK3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK3]] to ptr
117// NOCPU-NEXT:    [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP11]] to ptr
118// NOCPU-NEXT:    [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr
119// NOCPU-NEXT:    [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr
120// NOCPU-NEXT:    [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr
121// NOCPU-NEXT:    [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr
122// NOCPU-NEXT:    [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr
123// NOCPU-NEXT:    store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8
124// NOCPU-NEXT:    store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1
125// NOCPU-NEXT:    store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
126// NOCPU-NEXT:    store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8
127// NOCPU-NEXT:    store i32 0, ptr [[FLAGS_ASCAST]], align 4
128// NOCPU-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
129// NOCPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
130// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
131// NOCPU-NEXT:    [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0
132// NOCPU-NEXT:    store i32 25, ptr [[BLOCK_SIZE]], align 8
133// NOCPU-NEXT:    [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1
134// NOCPU-NEXT:    store i32 8, ptr [[BLOCK_ALIGN]], align 4
135// NOCPU-NEXT:    [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 2
136// NOCPU-NEXT:    store ptr @__test_block_invoke, ptr [[BLOCK_INVOKE]], align 8
137// NOCPU-NEXT:    [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 3
138// NOCPU-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8
139// NOCPU-NEXT:    store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8
140// NOCPU-NEXT:    [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 4
141// NOCPU-NEXT:    [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1
142// NOCPU-NEXT:    store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8
143// NOCPU-NEXT:    [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr [[TMP_ASCAST]], ptr @__test_block_invoke_kernel, ptr [[BLOCK_ASCAST]])
144// NOCPU-NEXT:    [[TMP5:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
145// NOCPU-NEXT:    [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
146// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
147// NOCPU-NEXT:    [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0
148// NOCPU-NEXT:    store i32 41, ptr [[BLOCK_SIZE4]], align 8
149// NOCPU-NEXT:    [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 1
150// NOCPU-NEXT:    store i32 8, ptr [[BLOCK_ALIGN5]], align 4
151// NOCPU-NEXT:    [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 2
152// NOCPU-NEXT:    store ptr @__test_block_invoke_2, ptr [[BLOCK_INVOKE6]], align 8
153// NOCPU-NEXT:    [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 3
154// NOCPU-NEXT:    [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8
155// NOCPU-NEXT:    store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8
156// NOCPU-NEXT:    [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 6
157// NOCPU-NEXT:    [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1
158// NOCPU-NEXT:    store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8
159// NOCPU-NEXT:    [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 4
160// NOCPU-NEXT:    [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8
161// NOCPU-NEXT:    store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8
162// NOCPU-NEXT:    [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 5
163// NOCPU-NEXT:    [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
164// NOCPU-NEXT:    store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8
165// NOCPU-NEXT:    [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr [[TMP2_ASCAST]], ptr @__test_block_invoke_2_kernel, ptr [[BLOCK3_ASCAST]])
166// NOCPU-NEXT:    [[TMP12:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
167// NOCPU-NEXT:    [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
168// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
169// NOCPU-NEXT:    [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0
170// NOCPU-NEXT:    store i32 41, ptr [[BLOCK_SIZE13]], align 8
171// NOCPU-NEXT:    [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 1
172// NOCPU-NEXT:    store i32 8, ptr [[BLOCK_ALIGN14]], align 4
173// NOCPU-NEXT:    [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 2
174// NOCPU-NEXT:    store ptr @__test_block_invoke_3, ptr [[BLOCK_INVOKE15]], align 8
175// NOCPU-NEXT:    [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 3
176// NOCPU-NEXT:    [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8
177// NOCPU-NEXT:    store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8
178// NOCPU-NEXT:    [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 6
179// NOCPU-NEXT:    [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1
180// NOCPU-NEXT:    store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8
181// NOCPU-NEXT:    [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 4
182// NOCPU-NEXT:    [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8
183// NOCPU-NEXT:    store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8
184// NOCPU-NEXT:    [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5
185// NOCPU-NEXT:    [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
186// NOCPU-NEXT:    store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8
187// NOCPU-NEXT:    [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0
188// NOCPU-NEXT:    store i64 100, ptr [[TMP18]], align 8
189// NOCPU-NEXT:    [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr [[TMP11_ASCAST]], ptr @__test_block_invoke_3_kernel, ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]])
190// NOCPU-NEXT:    [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0
191// NOCPU-NEXT:    store i32 32, ptr [[BLOCK_SIZE22]], align 8
192// NOCPU-NEXT:    [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1
193// NOCPU-NEXT:    store i32 8, ptr [[BLOCK_ALIGN23]], align 4
194// NOCPU-NEXT:    [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 2
195// NOCPU-NEXT:    store ptr @__test_block_invoke_4, ptr [[BLOCK_INVOKE24]], align 8
196// NOCPU-NEXT:    [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 3
197// NOCPU-NEXT:    [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
198// NOCPU-NEXT:    store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8
199// NOCPU-NEXT:    [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4
200// NOCPU-NEXT:    [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8
201// NOCPU-NEXT:    store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8
202// NOCPU-NEXT:    store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8
203// NOCPU-NEXT:    [[TMP22:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
204// NOCPU-NEXT:    [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
205// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
206// NOCPU-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8
207// NOCPU-NEXT:    [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr [[TMP27_ASCAST]], ptr @__test_block_invoke_4_kernel, ptr [[BLOCK21_ASCAST]])
208// NOCPU-NEXT:    ret void
209//
210//
211// NOCPU: Function Attrs: convergent noinline nounwind optnone
212// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke
213// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4:[0-9]+]] {
214// NOCPU-NEXT:  entry:
215// NOCPU-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
216// NOCPU-NEXT:    [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
217// NOCPU-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
218// NOCPU-NEXT:    [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
219// NOCPU-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
220// NOCPU-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
221// NOCPU-NEXT:    [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
222// NOCPU-NEXT:    [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8
223// NOCPU-NEXT:    [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
224// NOCPU-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
225// NOCPU-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
226// NOCPU-NEXT:    store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1
227// NOCPU-NEXT:    ret void
228//
229//
230// NOCPU: Function Attrs: convergent nounwind
231// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
232// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] {
233// NOCPU-NEXT:  entry:
234// NOCPU-NEXT:    [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
235// NOCPU-NEXT:    store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
236// NOCPU-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
237// NOCPU-NEXT:    call void @__test_block_invoke(ptr [[TMP2]])
238// NOCPU-NEXT:    ret void
239//
240//
241// NOCPU: Function Attrs: convergent noinline nounwind optnone
242// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_2
243// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
244// NOCPU-NEXT:  entry:
245// NOCPU-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
246// NOCPU-NEXT:    [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
247// NOCPU-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
248// NOCPU-NEXT:    [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
249// NOCPU-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
250// NOCPU-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
251// NOCPU-NEXT:    [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
252// NOCPU-NEXT:    [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8
253// NOCPU-NEXT:    [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
254// NOCPU-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
255// NOCPU-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
256// NOCPU-NEXT:    store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1
257// NOCPU-NEXT:    [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
258// NOCPU-NEXT:    [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8
259// NOCPU-NEXT:    [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
260// NOCPU-NEXT:    [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8
261// NOCPU-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
262// NOCPU-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8
263// NOCPU-NEXT:    ret void
264//
265//
266// NOCPU: Function Attrs: convergent nounwind
267// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
268// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] {
269// NOCPU-NEXT:  entry:
270// NOCPU-NEXT:    [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
271// NOCPU-NEXT:    store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
272// NOCPU-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
273// NOCPU-NEXT:    call void @__test_block_invoke_2(ptr [[TMP2]])
274// NOCPU-NEXT:    ret void
275//
276//
277// NOCPU: Function Attrs: convergent noinline nounwind optnone
278// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_3
279// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR4]] {
280// NOCPU-NEXT:  entry:
281// NOCPU-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
282// NOCPU-NEXT:    [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
283// NOCPU-NEXT:    [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
284// NOCPU-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
285// NOCPU-NEXT:    [[LP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP_ADDR]] to ptr
286// NOCPU-NEXT:    [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
287// NOCPU-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
288// NOCPU-NEXT:    store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4
289// NOCPU-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
290// NOCPU-NEXT:    [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
291// NOCPU-NEXT:    [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8
292// NOCPU-NEXT:    [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
293// NOCPU-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
294// NOCPU-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
295// NOCPU-NEXT:    store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1
296// NOCPU-NEXT:    [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
297// NOCPU-NEXT:    [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8
298// NOCPU-NEXT:    [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
299// NOCPU-NEXT:    [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8
300// NOCPU-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
301// NOCPU-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8
302// NOCPU-NEXT:    [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4
303// NOCPU-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0
304// NOCPU-NEXT:    store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4
305// NOCPU-NEXT:    ret void
306//
307//
308// NOCPU: Function Attrs: convergent nounwind
309// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
310// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META11:![0-9]+]] !kernel_arg_access_qual [[META12:![0-9]+]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META14:![0-9]+]] {
311// NOCPU-NEXT:  entry:
312// NOCPU-NEXT:    [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
313// NOCPU-NEXT:    store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8
314// NOCPU-NEXT:    [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
315// NOCPU-NEXT:    call void @__test_block_invoke_3(ptr [[TMP3]], ptr addrspace(3) [[TMP1]])
316// NOCPU-NEXT:    ret void
317//
318//
319// NOCPU: Function Attrs: convergent noinline nounwind optnone
320// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_4
321// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
322// NOCPU-NEXT:  entry:
323// NOCPU-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
324// NOCPU-NEXT:    [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
325// NOCPU-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
326// NOCPU-NEXT:    [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
327// NOCPU-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
328// NOCPU-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
329// NOCPU-NEXT:    [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
330// NOCPU-NEXT:    [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8
331// NOCPU-NEXT:    [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
332// NOCPU-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
333// NOCPU-NEXT:    call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8:[0-9]+]]
334// NOCPU-NEXT:    ret void
335//
336//
337// NOCPU: Function Attrs: convergent nounwind
338// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
339// NOCPU-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] {
340// NOCPU-NEXT:  entry:
341// NOCPU-NEXT:    [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
342// NOCPU-NEXT:    store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
343// NOCPU-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
344// NOCPU-NEXT:    call void @__test_block_invoke_4(ptr [[TMP2]])
345// NOCPU-NEXT:    ret void
346//
347//
348// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
349// NOCPU-LABEL: define {{[^@]+}}@test_target_features_kernel
350// NOCPU-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space [[META15:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META10]] {
351// NOCPU-NEXT:  entry:
352// NOCPU-NEXT:    [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
353// NOCPU-NEXT:    [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
354// NOCPU-NEXT:    [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
355// NOCPU-NEXT:    [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
356// NOCPU-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
357// NOCPU-NEXT:    [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
358// NOCPU-NEXT:    [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
359// NOCPU-NEXT:    [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
360// NOCPU-NEXT:    [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
361// NOCPU-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
362// NOCPU-NEXT:    store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8
363// NOCPU-NEXT:    store i32 0, ptr [[FLAGS_ASCAST]], align 4
364// NOCPU-NEXT:    [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
365// NOCPU-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
366// NOCPU-NEXT:    [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
367// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
368// NOCPU-NEXT:    [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr [[TMP_ASCAST]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
369// NOCPU-NEXT:    ret void
370//
371//
372// NOCPU: Function Attrs: convergent noinline nounwind optnone
373// NOCPU-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke
374// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
375// NOCPU-NEXT:  entry:
376// NOCPU-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
377// NOCPU-NEXT:    [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
378// NOCPU-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
379// NOCPU-NEXT:    [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr
380// NOCPU-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
381// NOCPU-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8
382// NOCPU-NEXT:    [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
383// NOCPU-NEXT:    ret void
384//
385//
386// NOCPU: Function Attrs: convergent nounwind
387// NOCPU-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel
388// NOCPU-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] {
389// NOCPU-NEXT:  entry:
390// NOCPU-NEXT:    [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5)
391// NOCPU-NEXT:    store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
392// NOCPU-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
393// NOCPU-NEXT:    call void @__test_target_features_kernel_block_invoke(ptr [[TMP2]])
394// NOCPU-NEXT:    ret void
395//
396//
397//
398//
399//
400//
401//
402//
403//
404//
405//
406//
407//
408//
409//
410// GFX900: Function Attrs: convergent norecurse nounwind
411// GFX900-LABEL: define {{[^@]+}}@callee
412// GFX900-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
413// GFX900-NEXT:  entry:
414// GFX900-NEXT:    [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
415// GFX900-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
416// GFX900-NEXT:    [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr
417// GFX900-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
418// GFX900-NEXT:    store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3:![0-9]+]]
419// GFX900-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[TBAA7:![0-9]+]]
420// GFX900-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
421// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
422// GFX900-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
423// GFX900-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]]
424// GFX900-NEXT:    store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
425// GFX900-NEXT:    ret void
426//
427//
428// GFX900: Function Attrs: convergent norecurse nounwind
429// GFX900-LABEL: define {{[^@]+}}@test
430// GFX900-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
431// GFX900-NEXT:  entry:
432// GFX900-NEXT:    [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
433// GFX900-NEXT:    [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
434// GFX900-NEXT:    [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
435// GFX900-NEXT:    [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
436// GFX900-NEXT:    [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
437// GFX900-NEXT:    [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
438// GFX900-NEXT:    [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
439// GFX900-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
440// GFX900-NEXT:    [[BLOCK:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
441// GFX900-NEXT:    [[VARTMP2:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
442// GFX900-NEXT:    [[BLOCK3:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
443// GFX900-NEXT:    [[VARTMP11:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
444// GFX900-NEXT:    [[BLOCK12:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
445// GFX900-NEXT:    [[BLOCK_SIZES:%.*]] = alloca [1 x i64], align 8, addrspace(5)
446// GFX900-NEXT:    [[BLOCK20:%.*]] = alloca ptr, align 8, addrspace(5)
447// GFX900-NEXT:    [[BLOCK21:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
448// GFX900-NEXT:    [[VARTMP27:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
449// GFX900-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
450// GFX900-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
451// GFX900-NEXT:    [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
452// GFX900-NEXT:    [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
453// GFX900-NEXT:    [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
454// GFX900-NEXT:    [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
455// GFX900-NEXT:    [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
456// GFX900-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
457// GFX900-NEXT:    [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr
458// GFX900-NEXT:    [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP2]] to ptr
459// GFX900-NEXT:    [[BLOCK3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK3]] to ptr
460// GFX900-NEXT:    [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP11]] to ptr
461// GFX900-NEXT:    [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr
462// GFX900-NEXT:    [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr
463// GFX900-NEXT:    [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr
464// GFX900-NEXT:    [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr
465// GFX900-NEXT:    [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr
466// GFX900-NEXT:    store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14:![0-9]+]]
467// GFX900-NEXT:    store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16:![0-9]+]]
468// GFX900-NEXT:    store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
469// GFX900-NEXT:    store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
470// GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8:[0-9]+]]
471// GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
472// GFX900-NEXT:    store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17:![0-9]+]]
473// GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
474// GFX900-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19:![0-9]+]]
475// GFX900-NEXT:    [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
476// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]]
477// GFX900-NEXT:    [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0
478// GFX900-NEXT:    store i32 25, ptr [[BLOCK_SIZE]], align 8
479// GFX900-NEXT:    [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1
480// GFX900-NEXT:    store i32 8, ptr [[BLOCK_ALIGN]], align 4
481// GFX900-NEXT:    [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 2
482// GFX900-NEXT:    store ptr @__test_block_invoke, ptr [[BLOCK_INVOKE]], align 8
483// GFX900-NEXT:    [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 3
484// GFX900-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]]
485// GFX900-NEXT:    store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8, !tbaa [[TBAA14]]
486// GFX900-NEXT:    [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 4
487// GFX900-NEXT:    [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]]
488// GFX900-NEXT:    store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8, !tbaa [[TBAA16]]
489// GFX900-NEXT:    [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr [[TMP_ASCAST]], ptr @__test_block_invoke_kernel, ptr [[BLOCK_ASCAST]])
490// GFX900-NEXT:    [[TMP5:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
491// GFX900-NEXT:    [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
492// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
493// GFX900-NEXT:    [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0
494// GFX900-NEXT:    store i32 41, ptr [[BLOCK_SIZE4]], align 8
495// GFX900-NEXT:    [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 1
496// GFX900-NEXT:    store i32 8, ptr [[BLOCK_ALIGN5]], align 4
497// GFX900-NEXT:    [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 2
498// GFX900-NEXT:    store ptr @__test_block_invoke_2, ptr [[BLOCK_INVOKE6]], align 8
499// GFX900-NEXT:    [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 3
500// GFX900-NEXT:    [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]]
501// GFX900-NEXT:    store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8, !tbaa [[TBAA14]]
502// GFX900-NEXT:    [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 6
503// GFX900-NEXT:    [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]]
504// GFX900-NEXT:    store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8, !tbaa [[TBAA16]]
505// GFX900-NEXT:    [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 4
506// GFX900-NEXT:    [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
507// GFX900-NEXT:    store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8, !tbaa [[TBAA7]]
508// GFX900-NEXT:    [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 5
509// GFX900-NEXT:    [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
510// GFX900-NEXT:    store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8, !tbaa [[TBAA3]]
511// GFX900-NEXT:    [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr [[TMP2_ASCAST]], ptr @__test_block_invoke_2_kernel, ptr [[BLOCK3_ASCAST]])
512// GFX900-NEXT:    [[TMP12:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
513// GFX900-NEXT:    [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
514// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
515// GFX900-NEXT:    [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0
516// GFX900-NEXT:    store i32 41, ptr [[BLOCK_SIZE13]], align 8
517// GFX900-NEXT:    [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 1
518// GFX900-NEXT:    store i32 8, ptr [[BLOCK_ALIGN14]], align 4
519// GFX900-NEXT:    [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 2
520// GFX900-NEXT:    store ptr @__test_block_invoke_3, ptr [[BLOCK_INVOKE15]], align 8
521// GFX900-NEXT:    [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 3
522// GFX900-NEXT:    [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]]
523// GFX900-NEXT:    store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8, !tbaa [[TBAA14]]
524// GFX900-NEXT:    [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 6
525// GFX900-NEXT:    [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]]
526// GFX900-NEXT:    store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8, !tbaa [[TBAA16]]
527// GFX900-NEXT:    [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 4
528// GFX900-NEXT:    [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
529// GFX900-NEXT:    store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8, !tbaa [[TBAA7]]
530// GFX900-NEXT:    [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5
531// GFX900-NEXT:    [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
532// GFX900-NEXT:    store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[TBAA3]]
533// GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR8]]
534// GFX900-NEXT:    [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0
535// GFX900-NEXT:    store i64 100, ptr [[TMP18]], align 8
536// GFX900-NEXT:    [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr [[TMP11_ASCAST]], ptr @__test_block_invoke_3_kernel, ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]])
537// GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR8]]
538// GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR8]]
539// GFX900-NEXT:    [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0
540// GFX900-NEXT:    store i32 32, ptr [[BLOCK_SIZE22]], align 8
541// GFX900-NEXT:    [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1
542// GFX900-NEXT:    store i32 8, ptr [[BLOCK_ALIGN23]], align 4
543// GFX900-NEXT:    [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 2
544// GFX900-NEXT:    store ptr @__test_block_invoke_4, ptr [[BLOCK_INVOKE24]], align 8
545// GFX900-NEXT:    [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 3
546// GFX900-NEXT:    [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
547// GFX900-NEXT:    store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8, !tbaa [[TBAA3]]
548// GFX900-NEXT:    [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4
549// GFX900-NEXT:    [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
550// GFX900-NEXT:    store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8, !tbaa [[TBAA7]]
551// GFX900-NEXT:    store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA16]]
552// GFX900-NEXT:    [[TMP22:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
553// GFX900-NEXT:    [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
554// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
555// GFX900-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA16]]
556// GFX900-NEXT:    [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr [[TMP27_ASCAST]], ptr @__test_block_invoke_4_kernel, ptr [[BLOCK21_ASCAST]])
557// GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR8]]
558// GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
559// GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
560// GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]]
561// GFX900-NEXT:    ret void
562//
563//
564// GFX900: Function Attrs: convergent nounwind
565// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke
566// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5:[0-9]+]] {
567// GFX900-NEXT:  entry:
568// GFX900-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
569// GFX900-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
570// GFX900-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
571// GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
572// GFX900-NEXT:    [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]]
573// GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
574// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]]
575// GFX900-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
576// GFX900-NEXT:    store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]]
577// GFX900-NEXT:    ret void
578//
579//
580// GFX900: Function Attrs: convergent nounwind
581// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
582// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] {
583// GFX900-NEXT:  entry:
584// GFX900-NEXT:    [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
585// GFX900-NEXT:    store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
586// GFX900-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
587// GFX900-NEXT:    call void @__test_block_invoke(ptr [[TMP2]])
588// GFX900-NEXT:    ret void
589//
590//
591// GFX900: Function Attrs: convergent nounwind
592// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_2
593// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
594// GFX900-NEXT:  entry:
595// GFX900-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
596// GFX900-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
597// GFX900-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
598// GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
599// GFX900-NEXT:    [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]]
600// GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
601// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]]
602// GFX900-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
603// GFX900-NEXT:    store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]]
604// GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
605// GFX900-NEXT:    [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[TBAA3]]
606// GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
607// GFX900-NEXT:    [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]]
608// GFX900-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
609// GFX900-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]]
610// GFX900-NEXT:    ret void
611//
612//
613// GFX900: Function Attrs: convergent nounwind
614// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
615// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] {
616// GFX900-NEXT:  entry:
617// GFX900-NEXT:    [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
618// GFX900-NEXT:    store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
619// GFX900-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
620// GFX900-NEXT:    call void @__test_block_invoke_2(ptr [[TMP2]])
621// GFX900-NEXT:    ret void
622//
623//
624// GFX900: Function Attrs: convergent nounwind
625// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_3
626// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR5]] {
627// GFX900-NEXT:  entry:
628// GFX900-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
629// GFX900-NEXT:    [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
630// GFX900-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
631// GFX900-NEXT:    [[LP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP_ADDR]] to ptr
632// GFX900-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
633// GFX900-NEXT:    store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA26:![0-9]+]]
634// GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
635// GFX900-NEXT:    [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]]
636// GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
637// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]]
638// GFX900-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0
639// GFX900-NEXT:    store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]]
640// GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5
641// GFX900-NEXT:    [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[TBAA3]]
642// GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
643// GFX900-NEXT:    [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]]
644// GFX900-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
645// GFX900-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]]
646// GFX900-NEXT:    [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA26]]
647// GFX900-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0
648// GFX900-NEXT:    store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4, !tbaa [[TBAA17]]
649// GFX900-NEXT:    ret void
650//
651//
652// GFX900: Function Attrs: convergent nounwind
653// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
654// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META28:![0-9]+]] !kernel_arg_access_qual [[META29:![0-9]+]] !kernel_arg_type [[META30:![0-9]+]] !kernel_arg_base_type [[META30]] !kernel_arg_type_qual [[META31:![0-9]+]] {
655// GFX900-NEXT:  entry:
656// GFX900-NEXT:    [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
657// GFX900-NEXT:    store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8
658// GFX900-NEXT:    [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
659// GFX900-NEXT:    call void @__test_block_invoke_3(ptr [[TMP3]], ptr addrspace(3) [[TMP1]])
660// GFX900-NEXT:    ret void
661//
662//
663// GFX900: Function Attrs: convergent nounwind
664// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_4
665// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
666// GFX900-NEXT:  entry:
667// GFX900-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
668// GFX900-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
669// GFX900-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
670// GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
671// GFX900-NEXT:    [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA3]]
672// GFX900-NEXT:    [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
673// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]]
674// GFX900-NEXT:    call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR9:[0-9]+]]
675// GFX900-NEXT:    ret void
676//
677//
678// GFX900: Function Attrs: convergent nounwind
679// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
680// GFX900-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] {
681// GFX900-NEXT:  entry:
682// GFX900-NEXT:    [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
683// GFX900-NEXT:    store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
684// GFX900-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
685// GFX900-NEXT:    call void @__test_block_invoke_4(ptr [[TMP2]])
686// GFX900-NEXT:    ret void
687//
688//
689// GFX900: Function Attrs: convergent norecurse nounwind
690// GFX900-LABEL: define {{[^@]+}}@test_target_features_kernel
691// GFX900-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META32:![0-9]+]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META33:![0-9]+]] !kernel_arg_base_type [[META33]] !kernel_arg_type_qual [[META25]] {
692// GFX900-NEXT:  entry:
693// GFX900-NEXT:    [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
694// GFX900-NEXT:    [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
695// GFX900-NEXT:    [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
696// GFX900-NEXT:    [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
697// GFX900-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
698// GFX900-NEXT:    [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
699// GFX900-NEXT:    [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
700// GFX900-NEXT:    [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
701// GFX900-NEXT:    [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
702// GFX900-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
703// GFX900-NEXT:    store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA34:![0-9]+]]
704// GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]]
705// GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
706// GFX900-NEXT:    store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
707// GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
708// GFX900-NEXT:    [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
709// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
710// GFX900-NEXT:    [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
711// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
712// GFX900-NEXT:    [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr [[TMP_ASCAST]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
713// GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]]
714// GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]]
715// GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]]
716// GFX900-NEXT:    ret void
717//
718//
719// GFX900: Function Attrs: convergent nounwind
720// GFX900-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke
721// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
722// GFX900-NEXT:  entry:
723// GFX900-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
724// GFX900-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
725// GFX900-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
726// GFX900-NEXT:    [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
727// GFX900-NEXT:    ret void
728//
729//
730// GFX900: Function Attrs: convergent nounwind
731// GFX900-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel
732// GFX900-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] {
733// GFX900-NEXT:  entry:
734// GFX900-NEXT:    [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5)
735// GFX900-NEXT:    store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
736// GFX900-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
737// GFX900-NEXT:    call void @__test_target_features_kernel_block_invoke(ptr [[TMP2]])
738// GFX900-NEXT:    ret void
739//
740//.
741// NOCPU: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
742// NOCPU: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
743// NOCPU: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
744// NOCPU: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
745// NOCPU: attributes #[[ATTR4]] = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
746// NOCPU: attributes #[[ATTR5]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "enqueued-block" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
747// NOCPU: attributes #[[ATTR6]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" }
748// NOCPU: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
749// NOCPU: attributes #[[ATTR8]] = { convergent nounwind }
750//.
751// GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
752// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
753// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" }
754// GFX900: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
755// GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
756// GFX900: attributes #[[ATTR5]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
757// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "enqueued-block" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
758// GFX900: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
759// GFX900: attributes #[[ATTR8]] = { nounwind }
760// GFX900: attributes #[[ATTR9]] = { convergent nounwind }
761//.
762// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
763// NOCPU: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
764// NOCPU: [[META2:![0-9]+]] = !{i32 2, i32 0}
765// NOCPU: [[META3]] = !{i32 1, i32 0, i32 1, i32 0}
766// NOCPU: [[META4]] = !{!"none", !"none", !"none", !"none"}
767// NOCPU: [[META5]] = !{!"char*", !"char", !"long*", !"long"}
768// NOCPU: [[META6]] = !{!"", !"", !"", !""}
769// NOCPU: [[META7]] = !{i32 0}
770// NOCPU: [[META8]] = !{!"none"}
771// NOCPU: [[META9]] = !{!"__block_literal"}
772// NOCPU: [[META10]] = !{!""}
773// NOCPU: [[META11]] = !{i32 0, i32 3}
774// NOCPU: [[META12]] = !{!"none", !"none"}
775// NOCPU: [[META13]] = !{!"__block_literal", !"void*"}
776// NOCPU: [[META14]] = !{!"", !""}
777// NOCPU: [[META15]] = !{i32 1}
778// NOCPU: [[META16]] = !{!"int*"}
779//.
780// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
781// GFX900: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
782// GFX900: [[META2:![0-9]+]] = !{i32 2, i32 0}
783// GFX900: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
784// GFX900: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0}
785// GFX900: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
786// GFX900: [[META6]] = !{!"Simple C/C++ TBAA"}
787// GFX900: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
788// GFX900: [[META8]] = !{!"p1 long", [[META9:![0-9]+]], i64 0}
789// GFX900: [[META9]] = !{!"any pointer", [[META5]], i64 0}
790// GFX900: [[META10]] = !{i32 1, i32 0, i32 1, i32 0}
791// GFX900: [[META11]] = !{!"none", !"none", !"none", !"none"}
792// GFX900: [[META12]] = !{!"char*", !"char", !"long*", !"long"}
793// GFX900: [[META13]] = !{!"", !"", !"", !""}
794// GFX900: [[TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0}
795// GFX900: [[META15]] = !{!"p1 omnipotent char", [[META9]], i64 0}
796// GFX900: [[TBAA16]] = !{[[META5]], [[META5]], i64 0}
797// GFX900: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
798// GFX900: [[META18]] = !{!"int", [[META5]], i64 0}
799// GFX900: [[TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0}
800// GFX900: [[META20]] = !{!"queue_t", [[META5]], i64 0}
801// GFX900: [[TBAA_STRUCT21]] = !{i64 0, i64 4, [[TBAA17]]}
802// GFX900: [[META22]] = !{i32 0}
803// GFX900: [[META23]] = !{!"none"}
804// GFX900: [[META24]] = !{!"__block_literal"}
805// GFX900: [[META25]] = !{!""}
806// GFX900: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
807// GFX900: [[META27]] = !{!"p1 void", [[META9]], i64 0}
808// GFX900: [[META28]] = !{i32 0, i32 3}
809// GFX900: [[META29]] = !{!"none", !"none"}
810// GFX900: [[META30]] = !{!"__block_literal", !"void*"}
811// GFX900: [[META31]] = !{!"", !""}
812// GFX900: [[META32]] = !{i32 1}
813// GFX900: [[META33]] = !{!"int*"}
814// GFX900: [[TBAA34]] = !{[[META35:![0-9]+]], [[META35]], i64 0}
815// GFX900: [[META35]] = !{!"p1 int", [[META9]], i64 0}
816//.
817//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
818// CHECK: {{.*}}
819