1// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs --prefix-filecheck-ir-name VAR 2// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa %s -fdenormal-fp-math-f32=preserve-sign | FileCheck %s --check-prefixes=CHECK,NOCPU 3 4// // Check no-optnone and target-cpu behavior 5// RUN: %clang_cc1 -cl-std=CL2.0 -O1 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa -target-cpu gfx900 -target-feature -sram-ecc -fdenormal-fp-math-f32=preserve-sign %s | FileCheck %s --check-prefixes=CHECK,GFX900 6 7 8typedef struct {int a;} ndrange_t; 9 10void callee(long id, global long *out) { 11 out[id] = id; 12} 13 14kernel void test(global char *a, char b, global long *c, long d) { 15 queue_t default_queue; 16 unsigned flags = 0; 17 ndrange_t ndrange; 18 19 enqueue_kernel(default_queue, flags, ndrange, 20 ^(void) { 21 a[0] = b; 22 }); 23 24 enqueue_kernel(default_queue, flags, ndrange, 25 ^(void) { 26 a[0] = b; 27 c[0] = d; 28 }); 29 enqueue_kernel(default_queue, flags, ndrange, 30 ^(local void *lp) { 31 a[0] = b; 32 c[0] = d; 33 ((local int*)lp)[0] = 1; 34 }, 100); 35 36 void (^block)(void) = ^{ 37 callee(d, c); 38 }; 39 40 enqueue_kernel(default_queue, flags, ndrange, block); 41} 42 43// The target attribute from the caller is not propagated to the block. 44 45// FIXME: Something is broken and inconsistent. Either the builtin use in the 46// block should be rejected, or the target attribute needs to apply to the block 47// as well. 48// https://github.com/llvm/llvm-project/issues/60005 49__attribute__((target("s-memtime-inst"))) 50kernel void test_target_features_kernel(global int *i) { 51 queue_t default_queue; 52 unsigned flags = 0; 53 ndrange_t ndrange; 54 55 __builtin_amdgcn_s_memtime(); 56 57 enqueue_kernel(default_queue, flags, ndrange, 58 ^(void) { 59 __builtin_amdgcn_s_memtime(); 60 }); 61} 62 63//. 64// CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0 65// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 66//. 67// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone 68// NOCPU-LABEL: define {{[^@]+}}@callee 69// NOCPU-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { 70// NOCPU-NEXT: entry: 71// NOCPU-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 72// NOCPU-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 73// NOCPU-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr 74// NOCPU-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr 75// NOCPU-NEXT: store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8 76// NOCPU-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 77// NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8 78// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 79// NOCPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8 80// NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]] 81// NOCPU-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8 82// NOCPU-NEXT: ret void 83// 84// 85// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone 86// NOCPU-LABEL: define {{[^@]+}}@test 87// NOCPU-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { 88// NOCPU-NEXT: entry: 89// NOCPU-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 90// NOCPU-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5) 91// NOCPU-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 92// NOCPU-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 93// NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 94// NOCPU-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5) 95// NOCPU-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) 96// NOCPU-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) 97// NOCPU-NEXT: [[BLOCK:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5) 98// NOCPU-NEXT: [[VARTMP2:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) 99// NOCPU-NEXT: [[BLOCK3:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) 100// NOCPU-NEXT: [[VARTMP11:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) 101// NOCPU-NEXT: [[BLOCK12:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) 102// NOCPU-NEXT: [[BLOCK_SIZES:%.*]] = alloca [1 x i64], align 8, addrspace(5) 103// NOCPU-NEXT: [[BLOCK20:%.*]] = alloca ptr, align 8, addrspace(5) 104// NOCPU-NEXT: [[BLOCK21:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5) 105// NOCPU-NEXT: [[VARTMP27:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) 106// NOCPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr 107// NOCPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr 108// NOCPU-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr 109// NOCPU-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr 110// NOCPU-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr 111// NOCPU-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr 112// NOCPU-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr 113// NOCPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr 114// NOCPU-NEXT: [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr 115// NOCPU-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP2]] to ptr 116// NOCPU-NEXT: [[BLOCK3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK3]] to ptr 117// NOCPU-NEXT: [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP11]] to ptr 118// NOCPU-NEXT: [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr 119// NOCPU-NEXT: [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr 120// NOCPU-NEXT: [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr 121// NOCPU-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr 122// NOCPU-NEXT: [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr 123// NOCPU-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8 124// NOCPU-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1 125// NOCPU-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8 126// NOCPU-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8 127// NOCPU-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4 128// NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 129// NOCPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 130// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) 131// NOCPU-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0 132// NOCPU-NEXT: store i32 25, ptr [[BLOCK_SIZE]], align 8 133// NOCPU-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1 134// NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN]], align 4 135// NOCPU-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 2 136// NOCPU-NEXT: store ptr @__test_block_invoke, ptr [[BLOCK_INVOKE]], align 8 137// NOCPU-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 3 138// NOCPU-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8 139// NOCPU-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8 140// NOCPU-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 4 141// NOCPU-NEXT: [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1 142// NOCPU-NEXT: store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8 143// NOCPU-NEXT: [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr [[TMP_ASCAST]], ptr @__test_block_invoke_kernel, ptr [[BLOCK_ASCAST]]) 144// NOCPU-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 145// NOCPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 146// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) 147// NOCPU-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0 148// NOCPU-NEXT: store i32 41, ptr [[BLOCK_SIZE4]], align 8 149// NOCPU-NEXT: [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 1 150// NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN5]], align 4 151// NOCPU-NEXT: [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 2 152// NOCPU-NEXT: store ptr @__test_block_invoke_2, ptr [[BLOCK_INVOKE6]], align 8 153// NOCPU-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 3 154// NOCPU-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8 155// NOCPU-NEXT: store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8 156// NOCPU-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 6 157// NOCPU-NEXT: [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1 158// NOCPU-NEXT: store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8 159// NOCPU-NEXT: [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 4 160// NOCPU-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8 161// NOCPU-NEXT: store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8 162// NOCPU-NEXT: [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 5 163// NOCPU-NEXT: [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8 164// NOCPU-NEXT: store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8 165// NOCPU-NEXT: [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr [[TMP2_ASCAST]], ptr @__test_block_invoke_2_kernel, ptr [[BLOCK3_ASCAST]]) 166// NOCPU-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 167// NOCPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 168// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) 169// NOCPU-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0 170// NOCPU-NEXT: store i32 41, ptr [[BLOCK_SIZE13]], align 8 171// NOCPU-NEXT: [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 1 172// NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN14]], align 4 173// NOCPU-NEXT: [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 2 174// NOCPU-NEXT: store ptr @__test_block_invoke_3, ptr [[BLOCK_INVOKE15]], align 8 175// NOCPU-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 3 176// NOCPU-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8 177// NOCPU-NEXT: store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8 178// NOCPU-NEXT: [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 6 179// NOCPU-NEXT: [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1 180// NOCPU-NEXT: store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8 181// NOCPU-NEXT: [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 4 182// NOCPU-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8 183// NOCPU-NEXT: store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8 184// NOCPU-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5 185// NOCPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8 186// NOCPU-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8 187// NOCPU-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0 188// NOCPU-NEXT: store i64 100, ptr [[TMP18]], align 8 189// NOCPU-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr [[TMP11_ASCAST]], ptr @__test_block_invoke_3_kernel, ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]]) 190// NOCPU-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0 191// NOCPU-NEXT: store i32 32, ptr [[BLOCK_SIZE22]], align 8 192// NOCPU-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1 193// NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN23]], align 4 194// NOCPU-NEXT: [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 2 195// NOCPU-NEXT: store ptr @__test_block_invoke_4, ptr [[BLOCK_INVOKE24]], align 8 196// NOCPU-NEXT: [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 3 197// NOCPU-NEXT: [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8 198// NOCPU-NEXT: store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8 199// NOCPU-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4 200// NOCPU-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8 201// NOCPU-NEXT: store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8 202// NOCPU-NEXT: store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8 203// NOCPU-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 204// NOCPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 205// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) 206// NOCPU-NEXT: [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8 207// NOCPU-NEXT: [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr [[TMP27_ASCAST]], ptr @__test_block_invoke_4_kernel, ptr [[BLOCK21_ASCAST]]) 208// NOCPU-NEXT: ret void 209// 210// 211// NOCPU: Function Attrs: convergent noinline nounwind optnone 212// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke 213// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4:[0-9]+]] { 214// NOCPU-NEXT: entry: 215// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 216// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 217// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr 218// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr 219// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 220// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8 221// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 222// NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8 223// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 224// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8 225// NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0 226// NOCPU-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1 227// NOCPU-NEXT: ret void 228// 229// 230// NOCPU: Function Attrs: convergent nounwind 231// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_kernel 232// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] { 233// NOCPU-NEXT: entry: 234// NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5) 235// NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 236// NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr 237// NOCPU-NEXT: call void @__test_block_invoke(ptr [[TMP2]]) 238// NOCPU-NEXT: ret void 239// 240// 241// NOCPU: Function Attrs: convergent noinline nounwind optnone 242// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_2 243// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] { 244// NOCPU-NEXT: entry: 245// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 246// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 247// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr 248// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr 249// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 250// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8 251// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 252// NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8 253// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 254// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8 255// NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0 256// NOCPU-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1 257// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 258// NOCPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8 259// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 260// NOCPU-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8 261// NOCPU-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0 262// NOCPU-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8 263// NOCPU-NEXT: ret void 264// 265// 266// NOCPU: Function Attrs: convergent nounwind 267// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel 268// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] { 269// NOCPU-NEXT: entry: 270// NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) 271// NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 272// NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr 273// NOCPU-NEXT: call void @__test_block_invoke_2(ptr [[TMP2]]) 274// NOCPU-NEXT: ret void 275// 276// 277// NOCPU: Function Attrs: convergent noinline nounwind optnone 278// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_3 279// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR4]] { 280// NOCPU-NEXT: entry: 281// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 282// NOCPU-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) 283// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 284// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr 285// NOCPU-NEXT: [[LP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP_ADDR]] to ptr 286// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr 287// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 288// NOCPU-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4 289// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8 290// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 291// NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8 292// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 293// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8 294// NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0 295// NOCPU-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1 296// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 297// NOCPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8 298// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 299// NOCPU-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8 300// NOCPU-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0 301// NOCPU-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8 302// NOCPU-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4 303// NOCPU-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0 304// NOCPU-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4 305// NOCPU-NEXT: ret void 306// 307// 308// NOCPU: Function Attrs: convergent nounwind 309// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel 310// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META11:![0-9]+]] !kernel_arg_access_qual [[META12:![0-9]+]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META14:![0-9]+]] { 311// NOCPU-NEXT: entry: 312// NOCPU-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) 313// NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8 314// NOCPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr 315// NOCPU-NEXT: call void @__test_block_invoke_3(ptr [[TMP3]], ptr addrspace(3) [[TMP1]]) 316// NOCPU-NEXT: ret void 317// 318// 319// NOCPU: Function Attrs: convergent noinline nounwind optnone 320// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_4 321// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] { 322// NOCPU-NEXT: entry: 323// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 324// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 325// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr 326// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr 327// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 328// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8 329// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 330// NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8 331// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 332// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8 333// NOCPU-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8:[0-9]+]] 334// NOCPU-NEXT: ret void 335// 336// 337// NOCPU: Function Attrs: convergent nounwind 338// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel 339// NOCPU-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] { 340// NOCPU-NEXT: entry: 341// NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5) 342// NOCPU-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 343// NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr 344// NOCPU-NEXT: call void @__test_block_invoke_4(ptr [[TMP2]]) 345// NOCPU-NEXT: ret void 346// 347// 348// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone 349// NOCPU-LABEL: define {{[^@]+}}@test_target_features_kernel 350// NOCPU-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space [[META15:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META10]] { 351// NOCPU-NEXT: entry: 352// NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 353// NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 354// NOCPU-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5) 355// NOCPU-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) 356// NOCPU-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) 357// NOCPU-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr 358// NOCPU-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr 359// NOCPU-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr 360// NOCPU-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr 361// NOCPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr 362// NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8 363// NOCPU-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4 364// NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() 365// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 366// NOCPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 367// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) 368// NOCPU-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr [[TMP_ASCAST]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) 369// NOCPU-NEXT: ret void 370// 371// 372// NOCPU: Function Attrs: convergent noinline nounwind optnone 373// NOCPU-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke 374// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] { 375// NOCPU-NEXT: entry: 376// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 377// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 378// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr 379// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr 380// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 381// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8 382// NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() 383// NOCPU-NEXT: ret void 384// 385// 386// NOCPU: Function Attrs: convergent nounwind 387// NOCPU-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel 388// NOCPU-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] { 389// NOCPU-NEXT: entry: 390// NOCPU-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5) 391// NOCPU-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 392// NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr 393// NOCPU-NEXT: call void @__test_target_features_kernel_block_invoke(ptr [[TMP2]]) 394// NOCPU-NEXT: ret void 395// 396// 397// 398// 399// 400// 401// 402// 403// 404// 405// 406// 407// 408// 409// 410// GFX900: Function Attrs: convergent norecurse nounwind 411// GFX900-LABEL: define {{[^@]+}}@callee 412// GFX900-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { 413// GFX900-NEXT: entry: 414// GFX900-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 415// GFX900-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 416// GFX900-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr 417// GFX900-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr 418// GFX900-NEXT: store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3:![0-9]+]] 419// GFX900-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[TBAA7:![0-9]+]] 420// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] 421// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] 422// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] 423// GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]] 424// GFX900-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8, !tbaa [[TBAA3]] 425// GFX900-NEXT: ret void 426// 427// 428// GFX900: Function Attrs: convergent norecurse nounwind 429// GFX900-LABEL: define {{[^@]+}}@test 430// GFX900-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { 431// GFX900-NEXT: entry: 432// GFX900-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 433// GFX900-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5) 434// GFX900-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 435// GFX900-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 436// GFX900-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 437// GFX900-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5) 438// GFX900-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) 439// GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) 440// GFX900-NEXT: [[BLOCK:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5) 441// GFX900-NEXT: [[VARTMP2:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) 442// GFX900-NEXT: [[BLOCK3:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) 443// GFX900-NEXT: [[VARTMP11:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) 444// GFX900-NEXT: [[BLOCK12:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) 445// GFX900-NEXT: [[BLOCK_SIZES:%.*]] = alloca [1 x i64], align 8, addrspace(5) 446// GFX900-NEXT: [[BLOCK20:%.*]] = alloca ptr, align 8, addrspace(5) 447// GFX900-NEXT: [[BLOCK21:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5) 448// GFX900-NEXT: [[VARTMP27:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) 449// GFX900-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr 450// GFX900-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr 451// GFX900-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr 452// GFX900-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr 453// GFX900-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr 454// GFX900-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr 455// GFX900-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr 456// GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr 457// GFX900-NEXT: [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr 458// GFX900-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP2]] to ptr 459// GFX900-NEXT: [[BLOCK3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK3]] to ptr 460// GFX900-NEXT: [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP11]] to ptr 461// GFX900-NEXT: [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr 462// GFX900-NEXT: [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr 463// GFX900-NEXT: [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr 464// GFX900-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr 465// GFX900-NEXT: [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr 466// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14:![0-9]+]] 467// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16:![0-9]+]] 468// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] 469// GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] 470// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8:[0-9]+]] 471// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]] 472// GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17:![0-9]+]] 473// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]] 474// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19:![0-9]+]] 475// GFX900-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] 476// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]] 477// GFX900-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0 478// GFX900-NEXT: store i32 25, ptr [[BLOCK_SIZE]], align 8 479// GFX900-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1 480// GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN]], align 4 481// GFX900-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 2 482// GFX900-NEXT: store ptr @__test_block_invoke, ptr [[BLOCK_INVOKE]], align 8 483// GFX900-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 3 484// GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]] 485// GFX900-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8, !tbaa [[TBAA14]] 486// GFX900-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 4 487// GFX900-NEXT: [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]] 488// GFX900-NEXT: store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8, !tbaa [[TBAA16]] 489// GFX900-NEXT: [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr [[TMP_ASCAST]], ptr @__test_block_invoke_kernel, ptr [[BLOCK_ASCAST]]) 490// GFX900-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]] 491// GFX900-NEXT: [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] 492// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] 493// GFX900-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0 494// GFX900-NEXT: store i32 41, ptr [[BLOCK_SIZE4]], align 8 495// GFX900-NEXT: [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 1 496// GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN5]], align 4 497// GFX900-NEXT: [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 2 498// GFX900-NEXT: store ptr @__test_block_invoke_2, ptr [[BLOCK_INVOKE6]], align 8 499// GFX900-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 3 500// GFX900-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]] 501// GFX900-NEXT: store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8, !tbaa [[TBAA14]] 502// GFX900-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 6 503// GFX900-NEXT: [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]] 504// GFX900-NEXT: store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8, !tbaa [[TBAA16]] 505// GFX900-NEXT: [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 4 506// GFX900-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] 507// GFX900-NEXT: store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8, !tbaa [[TBAA7]] 508// GFX900-NEXT: [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 5 509// GFX900-NEXT: [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] 510// GFX900-NEXT: store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8, !tbaa [[TBAA3]] 511// GFX900-NEXT: [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr [[TMP2_ASCAST]], ptr @__test_block_invoke_2_kernel, ptr [[BLOCK3_ASCAST]]) 512// GFX900-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]] 513// GFX900-NEXT: [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] 514// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] 515// GFX900-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0 516// GFX900-NEXT: store i32 41, ptr [[BLOCK_SIZE13]], align 8 517// GFX900-NEXT: [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 1 518// GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN14]], align 4 519// GFX900-NEXT: [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 2 520// GFX900-NEXT: store ptr @__test_block_invoke_3, ptr [[BLOCK_INVOKE15]], align 8 521// GFX900-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 3 522// GFX900-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]] 523// GFX900-NEXT: store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8, !tbaa [[TBAA14]] 524// GFX900-NEXT: [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 6 525// GFX900-NEXT: [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]] 526// GFX900-NEXT: store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8, !tbaa [[TBAA16]] 527// GFX900-NEXT: [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 4 528// GFX900-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] 529// GFX900-NEXT: store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8, !tbaa [[TBAA7]] 530// GFX900-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5 531// GFX900-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] 532// GFX900-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[TBAA3]] 533// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR8]] 534// GFX900-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0 535// GFX900-NEXT: store i64 100, ptr [[TMP18]], align 8 536// GFX900-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr [[TMP11_ASCAST]], ptr @__test_block_invoke_3_kernel, ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]]) 537// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR8]] 538// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR8]] 539// GFX900-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0 540// GFX900-NEXT: store i32 32, ptr [[BLOCK_SIZE22]], align 8 541// GFX900-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1 542// GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN23]], align 4 543// GFX900-NEXT: [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 2 544// GFX900-NEXT: store ptr @__test_block_invoke_4, ptr [[BLOCK_INVOKE24]], align 8 545// GFX900-NEXT: [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 3 546// GFX900-NEXT: [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] 547// GFX900-NEXT: store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8, !tbaa [[TBAA3]] 548// GFX900-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4 549// GFX900-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] 550// GFX900-NEXT: store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8, !tbaa [[TBAA7]] 551// GFX900-NEXT: store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA16]] 552// GFX900-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]] 553// GFX900-NEXT: [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] 554// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] 555// GFX900-NEXT: [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA16]] 556// GFX900-NEXT: [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr [[TMP27_ASCAST]], ptr @__test_block_invoke_4_kernel, ptr [[BLOCK21_ASCAST]]) 557// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR8]] 558// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]] 559// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]] 560// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]] 561// GFX900-NEXT: ret void 562// 563// 564// GFX900: Function Attrs: convergent nounwind 565// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke 566// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5:[0-9]+]] { 567// GFX900-NEXT: entry: 568// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 569// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr 570// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 571// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 572// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]] 573// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 574// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]] 575// GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0 576// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]] 577// GFX900-NEXT: ret void 578// 579// 580// GFX900: Function Attrs: convergent nounwind 581// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_kernel 582// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] { 583// GFX900-NEXT: entry: 584// GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5) 585// GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 586// GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr 587// GFX900-NEXT: call void @__test_block_invoke(ptr [[TMP2]]) 588// GFX900-NEXT: ret void 589// 590// 591// GFX900: Function Attrs: convergent nounwind 592// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_2 593// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { 594// GFX900-NEXT: entry: 595// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 596// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr 597// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 598// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 599// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]] 600// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 601// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]] 602// GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0 603// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]] 604// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 605// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[TBAA3]] 606// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 607// GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]] 608// GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0 609// GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]] 610// GFX900-NEXT: ret void 611// 612// 613// GFX900: Function Attrs: convergent nounwind 614// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel 615// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] { 616// GFX900-NEXT: entry: 617// GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) 618// GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 619// GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr 620// GFX900-NEXT: call void @__test_block_invoke_2(ptr [[TMP2]]) 621// GFX900-NEXT: ret void 622// 623// 624// GFX900: Function Attrs: convergent nounwind 625// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_3 626// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR5]] { 627// GFX900-NEXT: entry: 628// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 629// GFX900-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) 630// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr 631// GFX900-NEXT: [[LP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP_ADDR]] to ptr 632// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 633// GFX900-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA26:![0-9]+]] 634// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 635// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]] 636// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 637// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA14]] 638// GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1]], i64 0 639// GFX900-NEXT: store i8 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA16]] 640// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 641// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR2]], align 8, !tbaa [[TBAA3]] 642// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 643// GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]] 644// GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0 645// GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]] 646// GFX900-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA26]] 647// GFX900-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0 648// GFX900-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4, !tbaa [[TBAA17]] 649// GFX900-NEXT: ret void 650// 651// 652// GFX900: Function Attrs: convergent nounwind 653// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel 654// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META28:![0-9]+]] !kernel_arg_access_qual [[META29:![0-9]+]] !kernel_arg_type [[META30:![0-9]+]] !kernel_arg_base_type [[META30]] !kernel_arg_type_qual [[META31:![0-9]+]] { 655// GFX900-NEXT: entry: 656// GFX900-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) 657// GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8 658// GFX900-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr 659// GFX900-NEXT: call void @__test_block_invoke_3(ptr [[TMP3]], ptr addrspace(3) [[TMP1]]) 660// GFX900-NEXT: ret void 661// 662// 663// GFX900: Function Attrs: convergent nounwind 664// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_4 665// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { 666// GFX900-NEXT: entry: 667// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 668// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr 669// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 670// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 671// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA3]] 672// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 673// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]] 674// GFX900-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR9:[0-9]+]] 675// GFX900-NEXT: ret void 676// 677// 678// GFX900: Function Attrs: convergent nounwind 679// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel 680// GFX900-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] { 681// GFX900-NEXT: entry: 682// GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5) 683// GFX900-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 684// GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr 685// GFX900-NEXT: call void @__test_block_invoke_4(ptr [[TMP2]]) 686// GFX900-NEXT: ret void 687// 688// 689// GFX900: Function Attrs: convergent norecurse nounwind 690// GFX900-LABEL: define {{[^@]+}}@test_target_features_kernel 691// GFX900-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META32:![0-9]+]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META33:![0-9]+]] !kernel_arg_base_type [[META33]] !kernel_arg_type_qual [[META25]] { 692// GFX900-NEXT: entry: 693// GFX900-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 694// GFX900-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 695// GFX900-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5) 696// GFX900-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) 697// GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) 698// GFX900-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr 699// GFX900-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr 700// GFX900-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr 701// GFX900-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr 702// GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr 703// GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA34:![0-9]+]] 704// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]] 705// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]] 706// GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] 707// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]] 708// GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() 709// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]] 710// GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] 711// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] 712// GFX900-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr [[TMP_ASCAST]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) 713// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]] 714// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]] 715// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]] 716// GFX900-NEXT: ret void 717// 718// 719// GFX900: Function Attrs: convergent nounwind 720// GFX900-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke 721// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { 722// GFX900-NEXT: entry: 723// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 724// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr 725// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 726// GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() 727// GFX900-NEXT: ret void 728// 729// 730// GFX900: Function Attrs: convergent nounwind 731// GFX900-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel 732// GFX900-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR6]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] { 733// GFX900-NEXT: entry: 734// GFX900-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5) 735// GFX900-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 736// GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr 737// GFX900-NEXT: call void @__test_target_features_kernel_block_invoke(ptr [[TMP2]]) 738// GFX900-NEXT: ret void 739// 740//. 741// NOCPU: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" } 742// NOCPU: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } 743// NOCPU: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } 744// NOCPU: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } 745// NOCPU: attributes #[[ATTR4]] = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } 746// NOCPU: attributes #[[ATTR5]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "enqueued-block" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } 747// NOCPU: attributes #[[ATTR6]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" } 748// NOCPU: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn } 749// NOCPU: attributes #[[ATTR8]] = { convergent nounwind } 750//. 751// GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" } 752// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" } 753// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" } 754// GFX900: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } 755// GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } 756// GFX900: attributes #[[ATTR5]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" } 757// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "enqueued-block" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" } 758// GFX900: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn } 759// GFX900: attributes #[[ATTR8]] = { nounwind } 760// GFX900: attributes #[[ATTR9]] = { convergent nounwind } 761//. 762// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} 763// NOCPU: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 764// NOCPU: [[META2:![0-9]+]] = !{i32 2, i32 0} 765// NOCPU: [[META3]] = !{i32 1, i32 0, i32 1, i32 0} 766// NOCPU: [[META4]] = !{!"none", !"none", !"none", !"none"} 767// NOCPU: [[META5]] = !{!"char*", !"char", !"long*", !"long"} 768// NOCPU: [[META6]] = !{!"", !"", !"", !""} 769// NOCPU: [[META7]] = !{i32 0} 770// NOCPU: [[META8]] = !{!"none"} 771// NOCPU: [[META9]] = !{!"__block_literal"} 772// NOCPU: [[META10]] = !{!""} 773// NOCPU: [[META11]] = !{i32 0, i32 3} 774// NOCPU: [[META12]] = !{!"none", !"none"} 775// NOCPU: [[META13]] = !{!"__block_literal", !"void*"} 776// NOCPU: [[META14]] = !{!"", !""} 777// NOCPU: [[META15]] = !{i32 1} 778// NOCPU: [[META16]] = !{!"int*"} 779//. 780// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} 781// GFX900: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 782// GFX900: [[META2:![0-9]+]] = !{i32 2, i32 0} 783// GFX900: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} 784// GFX900: [[META4]] = !{!"long", [[META5:![0-9]+]], i64 0} 785// GFX900: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} 786// GFX900: [[META6]] = !{!"Simple C/C++ TBAA"} 787// GFX900: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} 788// GFX900: [[META8]] = !{!"p1 long", [[META9:![0-9]+]], i64 0} 789// GFX900: [[META9]] = !{!"any pointer", [[META5]], i64 0} 790// GFX900: [[META10]] = !{i32 1, i32 0, i32 1, i32 0} 791// GFX900: [[META11]] = !{!"none", !"none", !"none", !"none"} 792// GFX900: [[META12]] = !{!"char*", !"char", !"long*", !"long"} 793// GFX900: [[META13]] = !{!"", !"", !"", !""} 794// GFX900: [[TBAA14]] = !{[[META15:![0-9]+]], [[META15]], i64 0} 795// GFX900: [[META15]] = !{!"p1 omnipotent char", [[META9]], i64 0} 796// GFX900: [[TBAA16]] = !{[[META5]], [[META5]], i64 0} 797// GFX900: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0} 798// GFX900: [[META18]] = !{!"int", [[META5]], i64 0} 799// GFX900: [[TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} 800// GFX900: [[META20]] = !{!"queue_t", [[META5]], i64 0} 801// GFX900: [[TBAA_STRUCT21]] = !{i64 0, i64 4, [[TBAA17]]} 802// GFX900: [[META22]] = !{i32 0} 803// GFX900: [[META23]] = !{!"none"} 804// GFX900: [[META24]] = !{!"__block_literal"} 805// GFX900: [[META25]] = !{!""} 806// GFX900: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0} 807// GFX900: [[META27]] = !{!"p1 void", [[META9]], i64 0} 808// GFX900: [[META28]] = !{i32 0, i32 3} 809// GFX900: [[META29]] = !{!"none", !"none"} 810// GFX900: [[META30]] = !{!"__block_literal", !"void*"} 811// GFX900: [[META31]] = !{!"", !""} 812// GFX900: [[META32]] = !{i32 1} 813// GFX900: [[META33]] = !{!"int*"} 814// GFX900: [[TBAA34]] = !{[[META35:![0-9]+]], [[META35]], i64 0} 815// GFX900: [[META35]] = !{!"p1 int", [[META9]], i64 0} 816//. 817//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 818// CHECK: {{.*}} 819