1; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s 2 3; Instcombine pulls the addrspacecast out of the select, make sure 4; this doesn't do something insane on non-canonical IR. 5 6; CHECK-LABEL: define ptr @return_select_group_flat( 7; CHECK-SAME: i1 [[C:%.*]], ptr addrspace(3) [[GROUP_PTR_0:%.*]], ptr addrspace(3) [[GROUP_PTR_1:%.*]]) #[[ATTR0:[0-9]+]] { 8; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[C]], ptr addrspace(3) [[GROUP_PTR_0]], ptr addrspace(3) [[GROUP_PTR_1]] 9; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(3) [[SELECT]] to ptr 10; CHECK-NEXT: ret ptr [[TMP1]] 11define ptr @return_select_group_flat(i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) %group.ptr.1) #0 { 12 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 13 %cast1 = addrspacecast ptr addrspace(3) %group.ptr.1 to ptr 14 %select = select i1 %c, ptr %cast0, ptr %cast1 15 ret ptr %select 16} 17 18; CHECK-LABEL: @store_select_group_flat( 19; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) %group.ptr.1 20; CHECK: store i32 -1, ptr addrspace(3) %select 21define amdgpu_kernel void @store_select_group_flat(i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) %group.ptr.1) #0 { 22 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 23 %cast1 = addrspacecast ptr addrspace(3) %group.ptr.1 to ptr 24 %select = select i1 %c, ptr %cast0, ptr %cast1 25 store i32 -1, ptr %select 26 ret void 27} 28 29; Make sure metadata is preserved 30; CHECK-LABEL: @load_select_group_flat_md( 31; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) %group.ptr.1, !prof !0 32; CHECK: %load = load i32, ptr addrspace(3) %select 33define i32 @load_select_group_flat_md(i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) %group.ptr.1) #0 { 34 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 35 %cast1 = addrspacecast ptr addrspace(3) %group.ptr.1 to ptr 36 %select = select i1 %c, ptr %cast0, ptr %cast1, !prof !0 37 %load = load i32, ptr %select 38 ret i32 %load 39} 40 41; CHECK-LABEL: @store_select_mismatch_group_private_flat( 42; CHECK: %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 43; CHECK: %cast1 = addrspacecast ptr addrspace(5) %private.ptr.1 to ptr 44; CHECK: %select = select i1 %c, ptr %cast0, ptr %cast1 45; CHECK: store i32 -1, ptr %select 46define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(5) %private.ptr.1) #0 { 47 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 48 %cast1 = addrspacecast ptr addrspace(5) %private.ptr.1 to ptr 49 %select = select i1 %c, ptr %cast0, ptr %cast1 50 store i32 -1, ptr %select 51 ret void 52} 53 54@lds0 = internal addrspace(3) global i32 123, align 4 55@lds1 = internal addrspace(3) global i32 456, align 4 56 57; CHECK-LABEL: @store_select_group_flat_null( 58; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)) 59; CHECK: store i32 -1, ptr addrspace(3) %select 60define amdgpu_kernel void @store_select_group_flat_null(i1 %c, ptr addrspace(3) %group.ptr.0) #0 { 61 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 62 %select = select i1 %c, ptr %cast0, ptr null 63 store i32 -1, ptr %select 64 ret void 65} 66 67; CHECK-LABEL: @store_select_group_flat_null_swap( 68; CHECK: %select = select i1 %c, ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(3) %group.ptr.0 69; CHECK: store i32 -1, ptr addrspace(3) %select 70define amdgpu_kernel void @store_select_group_flat_null_swap(i1 %c, ptr addrspace(3) %group.ptr.0) #0 { 71 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 72 %select = select i1 %c, ptr null, ptr %cast0 73 store i32 -1, ptr %select 74 ret void 75} 76 77; CHECK-LABEL: @store_select_group_flat_undef( 78; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) undef 79; CHECK: store i32 -1, ptr addrspace(3) %select 80define amdgpu_kernel void @store_select_group_flat_undef(i1 %c, ptr addrspace(3) %group.ptr.0) #0 { 81 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 82 %select = select i1 %c, ptr %cast0, ptr undef 83 store i32 -1, ptr %select 84 ret void 85} 86 87; CHECK-LABEL: @store_select_group_flat_undef_swap( 88; CHECK: %select = select i1 %c, ptr addrspace(3) undef, ptr addrspace(3) %group.ptr.0 89; CHECK: store i32 -1, ptr addrspace(3) %select 90define amdgpu_kernel void @store_select_group_flat_undef_swap(i1 %c, ptr addrspace(3) %group.ptr.0) #0 { 91 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 92 %select = select i1 %c, ptr undef, ptr %cast0 93 store i32 -1, ptr %select 94 ret void 95} 96 97; CHECK-LABEL: @store_select_gep_group_flat_null( 98; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)) 99; CHECK: %gep = getelementptr i32, ptr addrspace(3) %select, i64 16 100; CHECK: store i32 -1, ptr addrspace(3) %gep 101define amdgpu_kernel void @store_select_gep_group_flat_null(i1 %c, ptr addrspace(3) %group.ptr.0) #0 { 102 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 103 %select = select i1 %c, ptr %cast0, ptr null 104 %gep = getelementptr i32, ptr %select, i64 16 105 store i32 -1, ptr %gep 106 ret void 107} 108 109@global0 = internal addrspace(1) global i32 123, align 4 110 111; CHECK-LABEL: @store_select_group_flat_constexpr( 112; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) @lds1 113; CHECK: store i32 7, ptr addrspace(3) %select 114define amdgpu_kernel void @store_select_group_flat_constexpr(i1 %c, ptr addrspace(3) %group.ptr.0) #0 { 115 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 116 %select = select i1 %c, ptr %cast0, ptr addrspacecast (ptr addrspace(3) @lds1 to ptr) 117 store i32 7, ptr %select 118 ret void 119} 120 121; CHECK-LABEL: @store_select_group_flat_inttoptr_flat( 122; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) addrspacecast (ptr inttoptr (i64 12345 to ptr) to ptr addrspace(3)) 123; CHECK: store i32 7, ptr addrspace(3) %select 124define amdgpu_kernel void @store_select_group_flat_inttoptr_flat(i1 %c, ptr addrspace(3) %group.ptr.0) #0 { 125 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 126 %select = select i1 %c, ptr %cast0, ptr inttoptr (i64 12345 to ptr) 127 store i32 7, ptr %select 128 ret void 129} 130 131; CHECK-LABEL: @store_select_group_flat_inttoptr_group( 132; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) inttoptr (i32 400 to ptr addrspace(3)) 133; CHECK-NEXT: store i32 7, ptr addrspace(3) %select 134define amdgpu_kernel void @store_select_group_flat_inttoptr_group(i1 %c, ptr addrspace(3) %group.ptr.0) #0 { 135 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 136 %select = select i1 %c, ptr %cast0, ptr addrspacecast (ptr addrspace(3) inttoptr (i32 400 to ptr addrspace(3)) to ptr) 137 store i32 7, ptr %select 138 ret void 139} 140 141; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr( 142; CHECK: %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 143; CHECK: %select = select i1 %c, ptr %cast0, ptr addrspacecast (ptr addrspace(1) @global0 to ptr) 144; CHECK: store i32 7, ptr %select 145define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr(i1 %c, ptr addrspace(3) %group.ptr.0) #0 { 146 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 147 %select = select i1 %c, ptr %cast0, ptr addrspacecast (ptr addrspace(1) @global0 to ptr) 148 store i32 7, ptr %select 149 ret void 150} 151 152; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr_swap( 153; CHECK: %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 154; CHECK: %select = select i1 %c, ptr addrspacecast (ptr addrspace(1) @global0 to ptr), ptr %cast0 155; CHECK: store i32 7, ptr %select 156define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, ptr addrspace(3) %group.ptr.0) #0 { 157 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 158 %select = select i1 %c, ptr addrspacecast (ptr addrspace(1) @global0 to ptr), ptr %cast0 159 store i32 7, ptr %select 160 ret void 161} 162 163; CHECK-LABEL: @store_select_group_global_mismatch_null_null( 164; CHECK: %select = select i1 %c, ptr addrspacecast (ptr addrspace(3) null to ptr), ptr addrspacecast (ptr addrspace(1) null to ptr) 165; CHECK: store i32 7, ptr %select 166define amdgpu_kernel void @store_select_group_global_mismatch_null_null(i1 %c) #0 { 167 %select = select i1 %c, ptr addrspacecast (ptr addrspace(3) null to ptr), ptr addrspacecast (ptr addrspace(1) null to ptr) 168 store i32 7, ptr %select 169 ret void 170} 171 172@lds2 = external addrspace(3) global [1024 x i32], align 4 173 174; CHECK-LABEL: @store_select_group_constexpr_ptrtoint( 175; CHECK: %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 176; CHECK: %select = select i1 %c, ptr %cast0, ptr addrspacecast (ptr addrspace(1) inttoptr (i32 add (i32 ptrtoint (ptr addrspace(3) @lds2 to i32), i32 124) to ptr addrspace(1)) to ptr) 177; CHECK: store i32 7, ptr %select 178define amdgpu_kernel void @store_select_group_constexpr_ptrtoint(i1 %c, ptr addrspace(3) %group.ptr.0) #0 { 179 %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr 180 %select = select i1 %c, ptr %cast0, ptr addrspacecast (ptr addrspace(1) inttoptr (i32 add (i32 ptrtoint (ptr addrspace(3) @lds2 to i32), i32 124) to ptr addrspace(1)) to ptr) 181 store i32 7, ptr %select 182 ret void 183} 184 185; CHECK-LABEL: @store_select_group_flat_vector( 186; CHECK: %cast0 = addrspacecast <2 x ptr addrspace(3)> %group.ptr.0 to <2 x ptr> 187; CHECK: %cast1 = addrspacecast <2 x ptr addrspace(3)> %group.ptr.1 to <2 x ptr> 188; CHECK: %select = select i1 %c, <2 x ptr> %cast0, <2 x ptr> %cast1 189; CHECK: %extract0 = extractelement <2 x ptr> %select, i32 0 190; CHECK: %extract1 = extractelement <2 x ptr> %select, i32 1 191; CHECK: store i32 -1, ptr %extract0 192; CHECK: store i32 -2, ptr %extract1 193define amdgpu_kernel void @store_select_group_flat_vector(i1 %c, <2 x ptr addrspace(3)> %group.ptr.0, <2 x ptr addrspace(3)> %group.ptr.1) #0 { 194 %cast0 = addrspacecast <2 x ptr addrspace(3)> %group.ptr.0 to <2 x ptr> 195 %cast1 = addrspacecast <2 x ptr addrspace(3)> %group.ptr.1 to <2 x ptr> 196 %select = select i1 %c, <2 x ptr> %cast0, <2 x ptr> %cast1 197 %extract0 = extractelement <2 x ptr> %select, i32 0 198 %extract1 = extractelement <2 x ptr> %select, i32 1 199 store i32 -1, ptr %extract0 200 store i32 -2, ptr %extract1 201 ret void 202} 203 204attributes #0 = { nounwind } 205 206!0 = !{!"branch_weights", i32 2, i32 10} 207