xref: /llvm-project/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll (revision d199ff17659fc65a9b1b4ebe2d304cdbfdfe89d7)
1; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
2
3; Instcombine pulls the addrspacecast out of the select, make sure
4;  this doesn't do something insane on non-canonical IR.
5
6; CHECK-LABEL: define ptr @return_select_group_flat(
7; CHECK-SAME: i1 [[C:%.*]], ptr addrspace(3) [[GROUP_PTR_0:%.*]], ptr addrspace(3) [[GROUP_PTR_1:%.*]]) #[[ATTR0:[0-9]+]] {
8; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[C]], ptr addrspace(3) [[GROUP_PTR_0]], ptr addrspace(3) [[GROUP_PTR_1]]
9; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(3) [[SELECT]] to ptr
10; CHECK-NEXT:    ret ptr [[TMP1]]
11define ptr @return_select_group_flat(i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) %group.ptr.1) #0 {
12  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
13  %cast1 = addrspacecast ptr addrspace(3) %group.ptr.1 to ptr
14  %select = select i1 %c, ptr %cast0, ptr %cast1
15  ret ptr %select
16}
17
18; CHECK-LABEL: @store_select_group_flat(
19; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) %group.ptr.1
20; CHECK: store i32 -1, ptr addrspace(3) %select
21define amdgpu_kernel void @store_select_group_flat(i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) %group.ptr.1) #0 {
22  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
23  %cast1 = addrspacecast ptr addrspace(3) %group.ptr.1 to ptr
24  %select = select i1 %c, ptr %cast0, ptr %cast1
25  store i32 -1, ptr %select
26  ret void
27}
28
29; Make sure metadata is preserved
30; CHECK-LABEL: @load_select_group_flat_md(
31; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) %group.ptr.1, !prof !0
32; CHECK: %load = load i32, ptr addrspace(3) %select
33define i32 @load_select_group_flat_md(i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) %group.ptr.1) #0 {
34  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
35  %cast1 = addrspacecast ptr addrspace(3) %group.ptr.1 to ptr
36  %select = select i1 %c, ptr %cast0, ptr %cast1, !prof !0
37  %load = load i32, ptr %select
38  ret i32 %load
39}
40
41; CHECK-LABEL: @store_select_mismatch_group_private_flat(
42; CHECK: %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
43; CHECK: %cast1 = addrspacecast ptr addrspace(5) %private.ptr.1 to ptr
44; CHECK: %select = select i1 %c, ptr %cast0, ptr %cast1
45; CHECK: store i32 -1, ptr %select
46define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(5) %private.ptr.1) #0 {
47  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
48  %cast1 = addrspacecast ptr addrspace(5) %private.ptr.1 to ptr
49  %select = select i1 %c, ptr %cast0, ptr %cast1
50  store i32 -1, ptr %select
51  ret void
52}
53
54@lds0 = internal addrspace(3) global i32 123, align 4
55@lds1 = internal addrspace(3) global i32 456, align 4
56
57; CHECK-LABEL: @store_select_group_flat_null(
58; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3))
59; CHECK: store i32 -1, ptr addrspace(3) %select
60define amdgpu_kernel void @store_select_group_flat_null(i1 %c, ptr addrspace(3) %group.ptr.0) #0 {
61  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
62  %select = select i1 %c, ptr %cast0, ptr null
63  store i32 -1, ptr %select
64  ret void
65}
66
67; CHECK-LABEL: @store_select_group_flat_null_swap(
68; CHECK: %select = select i1 %c, ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(3) %group.ptr.0
69; CHECK: store i32 -1, ptr addrspace(3) %select
70define amdgpu_kernel void @store_select_group_flat_null_swap(i1 %c, ptr addrspace(3) %group.ptr.0) #0 {
71  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
72  %select = select i1 %c, ptr null, ptr %cast0
73  store i32 -1, ptr %select
74  ret void
75}
76
77; CHECK-LABEL: @store_select_group_flat_undef(
78; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) undef
79; CHECK: store i32 -1, ptr addrspace(3) %select
80define amdgpu_kernel void @store_select_group_flat_undef(i1 %c, ptr addrspace(3) %group.ptr.0) #0 {
81  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
82  %select = select i1 %c, ptr %cast0, ptr undef
83  store i32 -1, ptr %select
84  ret void
85}
86
87; CHECK-LABEL: @store_select_group_flat_undef_swap(
88; CHECK: %select = select i1 %c, ptr addrspace(3) undef, ptr addrspace(3) %group.ptr.0
89; CHECK: store i32 -1, ptr addrspace(3) %select
90define amdgpu_kernel void @store_select_group_flat_undef_swap(i1 %c, ptr addrspace(3) %group.ptr.0) #0 {
91  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
92  %select = select i1 %c, ptr undef, ptr %cast0
93  store i32 -1, ptr %select
94  ret void
95}
96
97; CHECK-LABEL: @store_select_gep_group_flat_null(
98; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3))
99; CHECK: %gep = getelementptr i32, ptr addrspace(3) %select, i64 16
100; CHECK: store i32 -1, ptr addrspace(3) %gep
101define amdgpu_kernel void @store_select_gep_group_flat_null(i1 %c, ptr addrspace(3) %group.ptr.0) #0 {
102  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
103  %select = select i1 %c, ptr %cast0, ptr null
104  %gep = getelementptr i32, ptr %select, i64 16
105  store i32 -1, ptr %gep
106  ret void
107}
108
109@global0 = internal addrspace(1) global i32 123, align 4
110
111; CHECK-LABEL: @store_select_group_flat_constexpr(
112; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) @lds1
113; CHECK: store i32 7, ptr addrspace(3) %select
114define amdgpu_kernel void @store_select_group_flat_constexpr(i1 %c, ptr addrspace(3) %group.ptr.0) #0 {
115  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
116  %select = select i1 %c, ptr %cast0, ptr addrspacecast (ptr addrspace(3) @lds1 to ptr)
117  store i32 7, ptr %select
118  ret void
119}
120
121; CHECK-LABEL: @store_select_group_flat_inttoptr_flat(
122; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) addrspacecast (ptr inttoptr (i64 12345 to ptr) to ptr addrspace(3))
123; CHECK: store i32 7, ptr addrspace(3) %select
124define amdgpu_kernel void @store_select_group_flat_inttoptr_flat(i1 %c, ptr addrspace(3) %group.ptr.0) #0 {
125  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
126  %select = select i1 %c, ptr %cast0, ptr inttoptr (i64 12345 to ptr)
127  store i32 7, ptr %select
128  ret void
129}
130
131; CHECK-LABEL: @store_select_group_flat_inttoptr_group(
132; CHECK: %select = select i1 %c, ptr addrspace(3) %group.ptr.0, ptr addrspace(3) inttoptr (i32 400 to ptr addrspace(3))
133; CHECK-NEXT: store i32 7, ptr addrspace(3) %select
134define amdgpu_kernel void @store_select_group_flat_inttoptr_group(i1 %c, ptr addrspace(3) %group.ptr.0) #0 {
135  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
136  %select = select i1 %c, ptr %cast0, ptr addrspacecast (ptr addrspace(3) inttoptr (i32 400 to ptr addrspace(3)) to ptr)
137  store i32 7, ptr %select
138  ret void
139}
140
141; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr(
142; CHECK: %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
143; CHECK: %select = select i1 %c, ptr %cast0, ptr addrspacecast (ptr addrspace(1) @global0 to ptr)
144; CHECK: store i32 7, ptr %select
145define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr(i1 %c, ptr addrspace(3) %group.ptr.0) #0 {
146  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
147  %select = select i1 %c, ptr %cast0, ptr addrspacecast (ptr addrspace(1) @global0 to ptr)
148  store i32 7, ptr %select
149  ret void
150}
151
152; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr_swap(
153; CHECK: %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
154; CHECK: %select = select i1 %c, ptr addrspacecast (ptr addrspace(1) @global0 to ptr), ptr %cast0
155; CHECK: store i32 7, ptr %select
156define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, ptr addrspace(3) %group.ptr.0) #0 {
157  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
158  %select = select i1 %c, ptr addrspacecast (ptr addrspace(1) @global0 to ptr), ptr %cast0
159  store i32 7, ptr %select
160  ret void
161}
162
163; CHECK-LABEL: @store_select_group_global_mismatch_null_null(
164; CHECK: %select = select i1 %c, ptr addrspacecast (ptr addrspace(3) null to ptr), ptr addrspacecast (ptr addrspace(1) null to ptr)
165; CHECK: store i32 7, ptr %select
166define amdgpu_kernel void @store_select_group_global_mismatch_null_null(i1 %c) #0 {
167  %select = select i1 %c, ptr addrspacecast (ptr addrspace(3) null to ptr), ptr addrspacecast (ptr addrspace(1) null to ptr)
168  store i32 7, ptr %select
169  ret void
170}
171
172@lds2 = external addrspace(3) global [1024 x i32], align 4
173
174; CHECK-LABEL: @store_select_group_constexpr_ptrtoint(
175; CHECK: %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
176; CHECK: %select = select i1 %c, ptr %cast0, ptr addrspacecast (ptr addrspace(1) inttoptr (i32 add (i32 ptrtoint (ptr addrspace(3) @lds2 to i32), i32 124) to ptr addrspace(1)) to ptr)
177; CHECK: store i32 7, ptr %select
178define amdgpu_kernel void @store_select_group_constexpr_ptrtoint(i1 %c, ptr addrspace(3) %group.ptr.0) #0 {
179  %cast0 = addrspacecast ptr addrspace(3) %group.ptr.0 to ptr
180  %select = select i1 %c, ptr %cast0, ptr addrspacecast (ptr addrspace(1) inttoptr (i32 add (i32 ptrtoint (ptr addrspace(3) @lds2 to i32), i32 124) to ptr addrspace(1)) to ptr)
181  store i32 7, ptr %select
182  ret void
183}
184
185; CHECK-LABEL: @store_select_group_flat_vector(
186; CHECK: %cast0 = addrspacecast <2 x ptr addrspace(3)> %group.ptr.0 to <2 x ptr>
187; CHECK: %cast1 = addrspacecast <2 x ptr addrspace(3)> %group.ptr.1 to <2 x ptr>
188; CHECK: %select = select i1 %c, <2 x ptr> %cast0, <2 x ptr> %cast1
189; CHECK: %extract0 = extractelement <2 x ptr> %select, i32 0
190; CHECK: %extract1 = extractelement <2 x ptr> %select, i32 1
191; CHECK: store i32 -1, ptr %extract0
192; CHECK: store i32 -2, ptr %extract1
193define amdgpu_kernel void @store_select_group_flat_vector(i1 %c, <2 x ptr addrspace(3)> %group.ptr.0, <2 x ptr addrspace(3)> %group.ptr.1) #0 {
194  %cast0 = addrspacecast <2 x ptr addrspace(3)> %group.ptr.0 to <2 x ptr>
195  %cast1 = addrspacecast <2 x ptr addrspace(3)> %group.ptr.1 to <2 x ptr>
196  %select = select i1 %c, <2 x ptr> %cast0, <2 x ptr> %cast1
197  %extract0 = extractelement <2 x ptr> %select, i32 0
198  %extract1 = extractelement <2 x ptr> %select, i32 1
199  store i32 -1, ptr %extract0
200  store i32 -2, ptr %extract1
201  ret void
202}
203
204attributes #0 = { nounwind }
205
206!0 = !{!"branch_weights", i32 2, i32 10}
207