1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s 3; Ports of most of test/CodeGen/NVPTX/access-non-generic.ll 4 5@scalar = internal addrspace(3) global float 0.0, align 4 6@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4 7 8define amdgpu_kernel void @load_store_lds_f32(i32 %i, float %v) #0 { 9; CHECK-LABEL: define amdgpu_kernel void @load_store_lds_f32( 10; CHECK-SAME: i32 [[I:%.*]], float [[V:%.*]]) #[[ATTR0:[0-9]+]] { 11; CHECK-NEXT: [[BB:.*:]] 12; CHECK-NEXT: [[TMP:%.*]] = load float, ptr addrspace(3) @scalar, align 4 13; CHECK-NEXT: call void @use(float [[TMP]]) 14; CHECK-NEXT: store float [[V]], ptr addrspace(3) @scalar, align 4 15; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() 16; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(3) @scalar, align 4 17; CHECK-NEXT: call void @use(float [[TMP2]]) 18; CHECK-NEXT: store float [[V]], ptr addrspace(3) @scalar, align 4 19; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() 20; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4 21; CHECK-NEXT: call void @use(float [[TMP3]]) 22; CHECK-NEXT: store float [[V]], ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4 23; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() 24; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 5 25; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(3) [[TMP4]], align 4 26; CHECK-NEXT: call void @use(float [[TMP5]]) 27; CHECK-NEXT: store float [[V]], ptr addrspace(3) [[TMP4]], align 4 28; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() 29; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 [[I]] 30; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr addrspace(3) [[TMP7]], align 4 31; CHECK-NEXT: call void @use(float [[TMP8]]) 32; CHECK-NEXT: store float [[V]], ptr addrspace(3) [[TMP7]], align 4 33; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() 34; CHECK-NEXT: ret void 35; 36bb: 37 %tmp = load float, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), align 4 38 call void @use(float %tmp) 39 store float %v, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), align 4 40 call void @llvm.amdgcn.s.barrier() 41 %tmp1 = addrspacecast ptr addrspace(3) @scalar to ptr 42 %tmp2 = load float, ptr %tmp1, align 4 43 call void @use(float %tmp2) 44 store float %v, ptr %tmp1, align 4 45 call void @llvm.amdgcn.s.barrier() 46 %tmp3 = load float, ptr getelementptr inbounds ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i32 0, i32 5), align 4 47 call void @use(float %tmp3) 48 store float %v, ptr getelementptr inbounds ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i32 0, i32 5), align 4 49 call void @llvm.amdgcn.s.barrier() 50 %tmp4 = getelementptr inbounds [10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i32 0, i32 5 51 %tmp5 = load float, ptr %tmp4, align 4 52 call void @use(float %tmp5) 53 store float %v, ptr %tmp4, align 4 54 call void @llvm.amdgcn.s.barrier() 55 %tmp6 = addrspacecast ptr addrspace(3) @array to ptr 56 %tmp7 = getelementptr inbounds [10 x float], ptr %tmp6, i32 0, i32 %i 57 %tmp8 = load float, ptr %tmp7, align 4 58 call void @use(float %tmp8) 59 store float %v, ptr %tmp7, align 4 60 call void @llvm.amdgcn.s.barrier() 61 ret void 62} 63 64define i32 @constexpr_load_int_from_float_lds() #0 { 65; CHECK-LABEL: define i32 @constexpr_load_int_from_float_lds( 66; CHECK-SAME: ) #[[ATTR0]] { 67; CHECK-NEXT: [[BB:.*:]] 68; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(3) @scalar, align 4 69; CHECK-NEXT: ret i32 [[TMP]] 70; 71bb: 72 %tmp = load i32, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), align 4 73 ret i32 %tmp 74} 75 76define i32 @load_int_from_global_float(ptr addrspace(1) %input, i32 %i, i32 %j) #0 { 77; CHECK-LABEL: define i32 @load_int_from_global_float( 78; CHECK-SAME: ptr addrspace(1) [[INPUT:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) #[[ATTR0]] { 79; CHECK-NEXT: [[BB:.*:]] 80; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr addrspace(1) [[INPUT]], i32 [[I]] 81; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr addrspace(1) [[TMP1]], i32 [[J]] 82; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP2]], align 4 83; CHECK-NEXT: ret i32 [[TMP4]] 84; 85bb: 86 %tmp = addrspacecast ptr addrspace(1) %input to ptr 87 %tmp1 = getelementptr float, ptr %tmp, i32 %i 88 %tmp2 = getelementptr float, ptr %tmp1, i32 %j 89 %tmp4 = load i32, ptr %tmp2 90 ret i32 %tmp4 91} 92 93define amdgpu_kernel void @nested_const_expr() #0 { 94; CHECK-LABEL: define amdgpu_kernel void @nested_const_expr( 95; CHECK-SAME: ) #[[ATTR0]] { 96; CHECK-NEXT: store i32 1, ptr addrspace(3) getelementptr ([10 x float], ptr addrspace(3) @array, i64 0, i64 1), align 4 97; CHECK-NEXT: ret void 98; 99 store i32 1, ptr bitcast (ptr getelementptr ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i64 0, i64 1) to ptr), align 4 100 101 ret void 102} 103 104define amdgpu_kernel void @rauw(ptr addrspace(1) %input) #0 { 105; CHECK-LABEL: define amdgpu_kernel void @rauw( 106; CHECK-SAME: ptr addrspace(1) [[INPUT:%.*]]) #[[ATTR0]] { 107; CHECK-NEXT: [[BB:.*:]] 108; CHECK-NEXT: [[ADDR:%.*]] = getelementptr float, ptr addrspace(1) [[INPUT]], i64 10 109; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(1) [[ADDR]], align 4 110; CHECK-NEXT: store float [[V]], ptr addrspace(1) [[ADDR]], align 4 111; CHECK-NEXT: ret void 112; 113bb: 114 %generic_input = addrspacecast ptr addrspace(1) %input to ptr 115 %addr = getelementptr float, ptr %generic_input, i64 10 116 %v = load float, ptr %addr 117 store float %v, ptr %addr 118 ret void 119} 120 121; FIXME: Should be able to eliminate the cast inside the loop 122define amdgpu_kernel void @loop() #0 { 123; CHECK-LABEL: define amdgpu_kernel void @loop( 124; CHECK-SAME: ) #[[ATTR0]] { 125; CHECK-NEXT: [[ENTRY:.*]]: 126; CHECK-NEXT: [[END:%.*]] = getelementptr float, ptr addrspace(3) @array, i64 10 127; CHECK-NEXT: br label %[[LOOP:.*]] 128; CHECK: [[LOOP]]: 129; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(3) [ @array, %[[ENTRY]] ], [ [[I2:%.*]], %[[LOOP]] ] 130; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(3) [[I]], align 4 131; CHECK-NEXT: call void @use(float [[V]]) 132; CHECK-NEXT: [[I2]] = getelementptr float, ptr addrspace(3) [[I]], i64 1 133; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq ptr addrspace(3) [[I2]], [[END]] 134; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] 135; CHECK: [[EXIT]]: 136; CHECK-NEXT: ret void 137; 138entry: 139 %p = addrspacecast ptr addrspace(3) @array to ptr 140 %end = getelementptr float, ptr %p, i64 10 141 br label %loop 142 143loop: ; preds = %loop, %entry 144 %i = phi ptr [ %p, %entry ], [ %i2, %loop ] 145 %v = load float, ptr %i 146 call void @use(float %v) 147 %i2 = getelementptr float, ptr %i, i64 1 148 %exit_cond = icmp eq ptr %i2, %end 149 br i1 %exit_cond, label %exit, label %loop 150 151exit: ; preds = %loop 152 ret void 153} 154 155@generic_end = external addrspace(1) global ptr 156 157define amdgpu_kernel void @loop_with_generic_bound() #0 { 158; CHECK-LABEL: define amdgpu_kernel void @loop_with_generic_bound( 159; CHECK-SAME: ) #[[ATTR0]] { 160; CHECK-NEXT: [[ENTRY:.*]]: 161; CHECK-NEXT: [[END:%.*]] = load ptr, ptr addrspace(1) @generic_end, align 8 162; CHECK-NEXT: br label %[[LOOP:.*]] 163; CHECK: [[LOOP]]: 164; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(3) [ @array, %[[ENTRY]] ], [ [[I2:%.*]], %[[LOOP]] ] 165; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(3) [[I]], align 4 166; CHECK-NEXT: call void @use(float [[V]]) 167; CHECK-NEXT: [[I2]] = getelementptr float, ptr addrspace(3) [[I]], i64 1 168; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr 169; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq ptr [[TMP0]], [[END]] 170; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] 171; CHECK: [[EXIT]]: 172; CHECK-NEXT: ret void 173; 174entry: 175 %p = addrspacecast ptr addrspace(3) @array to ptr 176 %end = load ptr, ptr addrspace(1) @generic_end 177 br label %loop 178 179loop: ; preds = %loop, %entry 180 %i = phi ptr [ %p, %entry ], [ %i2, %loop ] 181 %v = load float, ptr %i 182 call void @use(float %v) 183 %i2 = getelementptr float, ptr %i, i64 1 184 %exit_cond = icmp eq ptr %i2, %end 185 br i1 %exit_cond, label %exit, label %loop 186 187exit: ; preds = %loop 188 ret void 189} 190 191define void @select_bug() #0 { 192; CHECK-LABEL: define void @select_bug( 193; CHECK-SAME: ) #[[ATTR0]] { 194; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr inttoptr (i64 4873 to ptr), null 195; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 73, i64 93 196; CHECK-NEXT: [[ADD_PTR157:%.*]] = getelementptr inbounds i64, ptr undef, i64 [[SEL]] 197; CHECK-NEXT: [[CMP169:%.*]] = icmp uge ptr undef, [[ADD_PTR157]] 198; CHECK-NEXT: unreachable 199; 200 %cmp = icmp ne ptr inttoptr (i64 4873 to ptr), null 201 %sel = select i1 %cmp, i64 73, i64 93 202 %add.ptr157 = getelementptr inbounds i64, ptr undef, i64 %sel 203 %cmp169 = icmp uge ptr undef, %add.ptr157 204 unreachable 205} 206 207declare void @llvm.amdgcn.s.barrier() #1 208declare void @use(float) #0 209 210attributes #0 = { nounwind } 211attributes #1 = { convergent nounwind } 212