xref: /llvm-project/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-codegenprepare %s | FileCheck -check-prefix=IR %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=GCN %s
4
5define i32 @select_sdiv_lhs_const_i32(i1 %cond) {
6; IR-LABEL: @select_sdiv_lhs_const_i32(
7; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i32 200000, i32 125000
8; IR-NEXT:    ret i32 [[OP]]
9;
10; GCN-LABEL: select_sdiv_lhs_const_i32:
11; GCN:       ; %bb.0:
12; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
14; GCN-NEXT:    v_mov_b32_e32 v1, 0x1e848
15; GCN-NEXT:    v_mov_b32_e32 v2, 0x30d40
16; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
17; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
18; GCN-NEXT:    s_setpc_b64 s[30:31]
19  %select = select i1 %cond, i32 5, i32 8
20  %op = sdiv i32 1000000, %select
21  ret i32 %op
22}
23
24define i32 @select_sdiv_rhs_const_i32(i1 %cond) {
25; IR-LABEL: @select_sdiv_rhs_const_i32(
26; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i32 1000, i32 10000
27; IR-NEXT:    ret i32 [[OP]]
28;
29; GCN-LABEL: select_sdiv_rhs_const_i32:
30; GCN:       ; %bb.0:
31; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
33; GCN-NEXT:    v_mov_b32_e32 v1, 0x2710
34; GCN-NEXT:    v_mov_b32_e32 v2, 0x3e8
35; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
36; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
37; GCN-NEXT:    s_setpc_b64 s[30:31]
38  %select = select i1 %cond, i32 42000, i32 420000
39  %op = sdiv i32 %select, 42
40  ret i32 %op
41}
42
43define <2 x i32> @select_sdiv_lhs_const_v2i32(i1 %cond) {
44; IR-LABEL: @select_sdiv_lhs_const_v2i32(
45; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], <2 x i32> <i32 666, i32 poison>, <2 x i32> <i32 555, i32 1428>
46; IR-NEXT:    ret <2 x i32> [[OP]]
47;
48; GCN-LABEL: select_sdiv_lhs_const_v2i32:
49; GCN:       ; %bb.0:
50; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
52; GCN-NEXT:    v_mov_b32_e32 v1, 0x22b
53; GCN-NEXT:    v_mov_b32_e32 v2, 0x29a
54; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
55; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
56; GCN-NEXT:    v_mov_b32_e32 v1, 0x594
57; GCN-NEXT:    s_setpc_b64 s[30:31]
58  %select = select i1 %cond, <2 x i32> <i32 5, i32 undef>, <2 x i32> <i32 6, i32 7>
59  %op = sdiv <2 x i32> <i32 3333, i32 9999>, %select
60  ret <2 x i32> %op
61}
62
63define <2 x i32> @select_sdiv_rhs_const_v2i32(i1 %cond) {
64; IR-LABEL: @select_sdiv_rhs_const_v2i32(
65; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], <2 x i32> <i32 198621, i32 20855308>, <2 x i32> <i32 222748, i32 2338858>
66; IR-NEXT:    ret <2 x i32> [[OP]]
67;
68; GCN-LABEL: select_sdiv_rhs_const_v2i32:
69; GCN:       ; %bb.0:
70; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
72; GCN-NEXT:    v_mov_b32_e32 v1, 0x3661c
73; GCN-NEXT:    v_mov_b32_e32 v2, 0x307dd
74; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
75; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
76; GCN-NEXT:    v_mov_b32_e32 v1, 0x23b02a
77; GCN-NEXT:    v_mov_b32_e32 v2, 0x13e3a0c
78; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
79; GCN-NEXT:    s_setpc_b64 s[30:31]
80  %select = select i1 %cond, <2 x i32> <i32 8342123, i32 834212353>, <2 x i32> <i32 9355456, i32 93554321>
81  %op = sdiv <2 x i32> %select, <i32 42, i32 40>
82  ret <2 x i32> %op
83}
84
85@gv = external addrspace(1) global i32
86
87define i32 @select_sdiv_lhs_opaque_const0_i32(i1 %cond) {
88; IR-LABEL: @select_sdiv_lhs_opaque_const0_i32(
89; IR-NEXT:    [[SELECT:%.*]] = select i1 [[COND:%.*]], i32 ptrtoint (ptr addrspace(1) @gv to i32), i32 5
90; IR-NEXT:    [[TMP1:%.*]] = ashr i32 [[SELECT]], 31
91; IR-NEXT:    [[TMP2:%.*]] = xor i32 0, [[TMP1]]
92; IR-NEXT:    [[TMP3:%.*]] = add i32 [[SELECT]], [[TMP1]]
93; IR-NEXT:    [[TMP4:%.*]] = xor i32 [[TMP3]], [[TMP1]]
94; IR-NEXT:    [[TMP5:%.*]] = uitofp i32 [[TMP4]] to float
95; IR-NEXT:    [[TMP6:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP5]])
96; IR-NEXT:    [[TMP7:%.*]] = fmul fast float [[TMP6]], 0x41EFFFFFC0000000
97; IR-NEXT:    [[TMP8:%.*]] = fptoui float [[TMP7]] to i32
98; IR-NEXT:    [[TMP9:%.*]] = sub i32 0, [[TMP4]]
99; IR-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP8]]
100; IR-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP8]] to i64
101; IR-NEXT:    [[TMP12:%.*]] = zext i32 [[TMP10]] to i64
102; IR-NEXT:    [[TMP13:%.*]] = mul i64 [[TMP11]], [[TMP12]]
103; IR-NEXT:    [[TMP14:%.*]] = trunc i64 [[TMP13]] to i32
104; IR-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP13]], 32
105; IR-NEXT:    [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32
106; IR-NEXT:    [[TMP17:%.*]] = add i32 [[TMP8]], [[TMP16]]
107; IR-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
108; IR-NEXT:    [[TMP19:%.*]] = mul i64 1000000, [[TMP18]]
109; IR-NEXT:    [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
110; IR-NEXT:    [[TMP21:%.*]] = lshr i64 [[TMP19]], 32
111; IR-NEXT:    [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32
112; IR-NEXT:    [[TMP23:%.*]] = mul i32 [[TMP22]], [[TMP4]]
113; IR-NEXT:    [[TMP24:%.*]] = sub i32 1000000, [[TMP23]]
114; IR-NEXT:    [[TMP25:%.*]] = icmp uge i32 [[TMP24]], [[TMP4]]
115; IR-NEXT:    [[TMP26:%.*]] = add i32 [[TMP22]], 1
116; IR-NEXT:    [[TMP27:%.*]] = select i1 [[TMP25]], i32 [[TMP26]], i32 [[TMP22]]
117; IR-NEXT:    [[TMP28:%.*]] = sub i32 [[TMP24]], [[TMP4]]
118; IR-NEXT:    [[TMP29:%.*]] = select i1 [[TMP25]], i32 [[TMP28]], i32 [[TMP24]]
119; IR-NEXT:    [[TMP30:%.*]] = icmp uge i32 [[TMP29]], [[TMP4]]
120; IR-NEXT:    [[TMP31:%.*]] = add i32 [[TMP27]], 1
121; IR-NEXT:    [[TMP32:%.*]] = select i1 [[TMP30]], i32 [[TMP31]], i32 [[TMP27]]
122; IR-NEXT:    [[TMP33:%.*]] = xor i32 [[TMP32]], [[TMP2]]
123; IR-NEXT:    [[TMP34:%.*]] = sub i32 [[TMP33]], [[TMP2]]
124; IR-NEXT:    ret i32 [[TMP34]]
125;
126; GCN-LABEL: select_sdiv_lhs_opaque_const0_i32:
127; GCN:       ; %bb.0:
128; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129; GCN-NEXT:    s_getpc_b64 s[4:5]
130; GCN-NEXT:    s_add_u32 s4, s4, gv@gotpcrel32@lo+4
131; GCN-NEXT:    s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
132; GCN-NEXT:    s_load_dword s4, s[4:5], 0x0
133; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
134; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
135; GCN-NEXT:    s_waitcnt lgkmcnt(0)
136; GCN-NEXT:    v_mov_b32_e32 v1, s4
137; GCN-NEXT:    v_cndmask_b32_e32 v0, 5, v1, vcc
138; GCN-NEXT:    v_sub_u32_e32 v1, vcc, 0, v0
139; GCN-NEXT:    v_max_i32_e32 v1, v0, v1
140; GCN-NEXT:    v_cvt_f32_u32_e32 v2, v1
141; GCN-NEXT:    v_sub_u32_e32 v3, vcc, 0, v1
142; GCN-NEXT:    s_mov_b32 s4, 0xf4240
143; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v2
144; GCN-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
145; GCN-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
146; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v2
147; GCN-NEXT:    v_mul_lo_u32 v3, v3, v2
148; GCN-NEXT:    v_mul_hi_u32 v3, v2, v3
149; GCN-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
150; GCN-NEXT:    v_mul_hi_u32 v2, v2, s4
151; GCN-NEXT:    v_mul_lo_u32 v3, v2, v1
152; GCN-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
153; GCN-NEXT:    v_sub_u32_e32 v3, vcc, 0xf4240, v3
154; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v1
155; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
156; GCN-NEXT:    v_sub_u32_e64 v4, s[4:5], v3, v1
157; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
158; GCN-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
159; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v1
160; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v4, vcc
161; GCN-NEXT:    v_xor_b32_e32 v1, v1, v0
162; GCN-NEXT:    v_sub_u32_e32 v0, vcc, v1, v0
163; GCN-NEXT:    s_setpc_b64 s[30:31]
164  %select = select i1 %cond, i32 ptrtoint (ptr addrspace(1) @gv to i32), i32 5
165  %op = sdiv i32 1000000, %select
166  ret i32 %op
167}
168
169define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) {
170; IR-LABEL: @select_sdiv_lhs_opaque_const1_i32(
171; IR-NEXT:    [[SELECT:%.*]] = select i1 [[COND:%.*]], i32 5, i32 ptrtoint (ptr addrspace(1) @gv to i32)
172; IR-NEXT:    [[TMP1:%.*]] = ashr i32 [[SELECT]], 31
173; IR-NEXT:    [[TMP2:%.*]] = xor i32 0, [[TMP1]]
174; IR-NEXT:    [[TMP3:%.*]] = add i32 [[SELECT]], [[TMP1]]
175; IR-NEXT:    [[TMP4:%.*]] = xor i32 [[TMP3]], [[TMP1]]
176; IR-NEXT:    [[TMP5:%.*]] = uitofp i32 [[TMP4]] to float
177; IR-NEXT:    [[TMP6:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP5]])
178; IR-NEXT:    [[TMP7:%.*]] = fmul fast float [[TMP6]], 0x41EFFFFFC0000000
179; IR-NEXT:    [[TMP8:%.*]] = fptoui float [[TMP7]] to i32
180; IR-NEXT:    [[TMP9:%.*]] = sub i32 0, [[TMP4]]
181; IR-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP8]]
182; IR-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP8]] to i64
183; IR-NEXT:    [[TMP12:%.*]] = zext i32 [[TMP10]] to i64
184; IR-NEXT:    [[TMP13:%.*]] = mul i64 [[TMP11]], [[TMP12]]
185; IR-NEXT:    [[TMP14:%.*]] = trunc i64 [[TMP13]] to i32
186; IR-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP13]], 32
187; IR-NEXT:    [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32
188; IR-NEXT:    [[TMP17:%.*]] = add i32 [[TMP8]], [[TMP16]]
189; IR-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
190; IR-NEXT:    [[TMP19:%.*]] = mul i64 1000000, [[TMP18]]
191; IR-NEXT:    [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
192; IR-NEXT:    [[TMP21:%.*]] = lshr i64 [[TMP19]], 32
193; IR-NEXT:    [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32
194; IR-NEXT:    [[TMP23:%.*]] = mul i32 [[TMP22]], [[TMP4]]
195; IR-NEXT:    [[TMP24:%.*]] = sub i32 1000000, [[TMP23]]
196; IR-NEXT:    [[TMP25:%.*]] = icmp uge i32 [[TMP24]], [[TMP4]]
197; IR-NEXT:    [[TMP26:%.*]] = add i32 [[TMP22]], 1
198; IR-NEXT:    [[TMP27:%.*]] = select i1 [[TMP25]], i32 [[TMP26]], i32 [[TMP22]]
199; IR-NEXT:    [[TMP28:%.*]] = sub i32 [[TMP24]], [[TMP4]]
200; IR-NEXT:    [[TMP29:%.*]] = select i1 [[TMP25]], i32 [[TMP28]], i32 [[TMP24]]
201; IR-NEXT:    [[TMP30:%.*]] = icmp uge i32 [[TMP29]], [[TMP4]]
202; IR-NEXT:    [[TMP31:%.*]] = add i32 [[TMP27]], 1
203; IR-NEXT:    [[TMP32:%.*]] = select i1 [[TMP30]], i32 [[TMP31]], i32 [[TMP27]]
204; IR-NEXT:    [[TMP33:%.*]] = xor i32 [[TMP32]], [[TMP2]]
205; IR-NEXT:    [[TMP34:%.*]] = sub i32 [[TMP33]], [[TMP2]]
206; IR-NEXT:    ret i32 [[TMP34]]
207;
208; GCN-LABEL: select_sdiv_lhs_opaque_const1_i32:
209; GCN:       ; %bb.0:
210; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211; GCN-NEXT:    s_getpc_b64 s[4:5]
212; GCN-NEXT:    s_add_u32 s4, s4, gv@gotpcrel32@lo+4
213; GCN-NEXT:    s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
214; GCN-NEXT:    s_load_dword s4, s[4:5], 0x0
215; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
216; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
217; GCN-NEXT:    s_waitcnt lgkmcnt(0)
218; GCN-NEXT:    v_mov_b32_e32 v1, s4
219; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 5, vcc
220; GCN-NEXT:    v_sub_u32_e32 v1, vcc, 0, v0
221; GCN-NEXT:    v_max_i32_e32 v1, v0, v1
222; GCN-NEXT:    v_cvt_f32_u32_e32 v2, v1
223; GCN-NEXT:    v_sub_u32_e32 v3, vcc, 0, v1
224; GCN-NEXT:    s_mov_b32 s4, 0xf4240
225; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v2
226; GCN-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
227; GCN-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
228; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v2
229; GCN-NEXT:    v_mul_lo_u32 v3, v3, v2
230; GCN-NEXT:    v_mul_hi_u32 v3, v2, v3
231; GCN-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
232; GCN-NEXT:    v_mul_hi_u32 v2, v2, s4
233; GCN-NEXT:    v_mul_lo_u32 v3, v2, v1
234; GCN-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
235; GCN-NEXT:    v_sub_u32_e32 v3, vcc, 0xf4240, v3
236; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v1
237; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
238; GCN-NEXT:    v_sub_u32_e64 v4, s[4:5], v3, v1
239; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
240; GCN-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
241; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v1
242; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v4, vcc
243; GCN-NEXT:    v_xor_b32_e32 v1, v1, v0
244; GCN-NEXT:    v_sub_u32_e32 v0, vcc, v1, v0
245; GCN-NEXT:    s_setpc_b64 s[30:31]
246  %select = select i1 %cond, i32 5, i32 ptrtoint (ptr addrspace(1) @gv to i32)
247  %op = sdiv i32 1000000, %select
248  ret i32 %op
249}
250
251define i32 @select_sdiv_rhs_opaque_const0_i32(i1 %cond) {
252; IR-LABEL: @select_sdiv_rhs_opaque_const0_i32(
253; IR-NEXT:    [[SELECT:%.*]] = select i1 [[COND:%.*]], i32 ptrtoint (ptr addrspace(1) @gv to i32), i32 234234
254; IR-NEXT:    [[OP:%.*]] = sdiv i32 [[SELECT]], 42
255; IR-NEXT:    ret i32 [[OP]]
256;
257; GCN-LABEL: select_sdiv_rhs_opaque_const0_i32:
258; GCN:       ; %bb.0:
259; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
260; GCN-NEXT:    s_getpc_b64 s[4:5]
261; GCN-NEXT:    s_add_u32 s4, s4, gv@gotpcrel32@lo+4
262; GCN-NEXT:    s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
263; GCN-NEXT:    s_load_dword s4, s[4:5], 0x0
264; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
265; GCN-NEXT:    v_mov_b32_e32 v1, 0x392fa
266; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
267; GCN-NEXT:    s_waitcnt lgkmcnt(0)
268; GCN-NEXT:    v_mov_b32_e32 v2, s4
269; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
270; GCN-NEXT:    s_mov_b32 s4, 0x30c30c31
271; GCN-NEXT:    v_mul_hi_i32 v0, v0, s4
272; GCN-NEXT:    v_lshrrev_b32_e32 v1, 31, v0
273; GCN-NEXT:    v_ashrrev_i32_e32 v0, 3, v0
274; GCN-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
275; GCN-NEXT:    s_setpc_b64 s[30:31]
276  %select = select i1 %cond, i32 ptrtoint (ptr addrspace(1) @gv to i32), i32 234234
277  %op = sdiv i32 %select, 42
278  ret i32 %op
279}
280
281define i32 @select_sdiv_rhs_opaque_const1_i32(i1 %cond) {
282; IR-LABEL: @select_sdiv_rhs_opaque_const1_i32(
283; IR-NEXT:    [[SELECT:%.*]] = select i1 [[COND:%.*]], i32 42000, i32 ptrtoint (ptr addrspace(1) @gv to i32)
284; IR-NEXT:    [[OP:%.*]] = sdiv i32 [[SELECT]], 42
285; IR-NEXT:    ret i32 [[OP]]
286;
287; GCN-LABEL: select_sdiv_rhs_opaque_const1_i32:
288; GCN:       ; %bb.0:
289; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290; GCN-NEXT:    s_getpc_b64 s[4:5]
291; GCN-NEXT:    s_add_u32 s4, s4, gv@gotpcrel32@lo+4
292; GCN-NEXT:    s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
293; GCN-NEXT:    s_load_dword s4, s[4:5], 0x0
294; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
295; GCN-NEXT:    v_mov_b32_e32 v1, 0xa410
296; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
297; GCN-NEXT:    s_waitcnt lgkmcnt(0)
298; GCN-NEXT:    v_mov_b32_e32 v2, s4
299; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
300; GCN-NEXT:    s_mov_b32 s4, 0x30c30c31
301; GCN-NEXT:    v_mul_hi_i32 v0, v0, s4
302; GCN-NEXT:    v_lshrrev_b32_e32 v1, 31, v0
303; GCN-NEXT:    v_ashrrev_i32_e32 v0, 3, v0
304; GCN-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
305; GCN-NEXT:    s_setpc_b64 s[30:31]
306  %select = select i1 %cond, i32 42000, i32 ptrtoint (ptr addrspace(1) @gv to i32)
307  %op = sdiv i32 %select, 42
308  ret i32 %op
309}
310
311define i32 @select_add_lhs_const_i32(i1 %cond) {
312; IR-LABEL: @select_add_lhs_const_i32(
313; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i32 1000005, i32 1000008
314; IR-NEXT:    ret i32 [[OP]]
315;
316; GCN-LABEL: select_add_lhs_const_i32:
317; GCN:       ; %bb.0:
318; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
320; GCN-NEXT:    v_mov_b32_e32 v1, 0xf4248
321; GCN-NEXT:    v_mov_b32_e32 v2, 0xf4245
322; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
323; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
324; GCN-NEXT:    s_setpc_b64 s[30:31]
325  %select = select i1 %cond, i32 5, i32 8
326  %op = add i32 1000000, %select
327  ret i32 %op
328}
329
330define float @select_fadd_lhs_const_i32_fmf(i1 %cond) {
331; IR-LABEL: @select_fadd_lhs_const_i32_fmf(
332; IR-NEXT:    [[OP:%.*]] = select nnan nsz i1 [[COND:%.*]], float 3.000000e+00, float 5.000000e+00
333; IR-NEXT:    ret float [[OP]]
334;
335; GCN-LABEL: select_fadd_lhs_const_i32_fmf:
336; GCN:       ; %bb.0:
337; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
338; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
339; GCN-NEXT:    v_mov_b32_e32 v1, 0x40a00000
340; GCN-NEXT:    v_mov_b32_e32 v2, 0x40400000
341; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
342; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
343; GCN-NEXT:    s_setpc_b64 s[30:31]
344  %select = select i1 %cond, float 2.0, float 4.0
345  %op = fadd nnan nsz float 1.0, %select
346  ret float %op
347}
348
349; Make sure we don't try to use mul24 instead
350define i32 @select_mul_lhs_const_i32(i1 %cond) {
351; GCN-LABEL: select_mul_lhs_const_i32:
352; GCN:       ; %bb.0:
353; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
355; GCN-NEXT:    v_mov_b32_e32 v1, 0x1f40
356; GCN-NEXT:    v_mov_b32_e32 v2, 0x1388
357; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
358; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
359; GCN-NEXT:    s_setpc_b64 s[30:31]
360; IR-LABEL: @select_mul_lhs_const_i32(
361; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i32 5000, i32 8000
362; IR-NEXT:    ret i32 [[OP]]
363  %select = select i1 %cond, i32 5, i32 8
364  %op = mul i32 1000, %select
365  ret i32 %op
366}
367
368; Make sure we don't try to use mul24 instead
369define i32 @select_mul_rhs_const_i32(i1 %cond) {
370; GCN-LABEL: select_mul_rhs_const_i32:
371; GCN:       ; %bb.0:
372; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
374; GCN-NEXT:    v_mov_b32_e32 v1, 0x1f40
375; GCN-NEXT:    v_mov_b32_e32 v2, 0x1388
376; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
377; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
378; GCN-NEXT:    s_setpc_b64 s[30:31]
379; IR-LABEL: @select_mul_rhs_const_i32(
380; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i32 5000, i32 8000
381; IR-NEXT:    ret i32 [[OP]]
382  %select = select i1 %cond, i32 5, i32 8
383  %op = mul i32 %select, 1000
384  ret i32 %op
385}
386
387define amdgpu_kernel void @select_add_lhs_const_i16(i1 %cond) {
388; IR-LABEL: @select_add_lhs_const_i16(
389; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i16 128, i16 131
390; IR-NEXT:    store i16 [[OP]], ptr addrspace(1) undef, align 2
391; IR-NEXT:    ret void
392;
393; GCN-LABEL: select_add_lhs_const_i16:
394; GCN:       ; %bb.0:
395; GCN-NEXT:    s_load_dword s0, s[8:9], 0x0
396; GCN-NEXT:    s_waitcnt lgkmcnt(0)
397; GCN-NEXT:    s_bitcmp1_b32 s0, 0
398; GCN-NEXT:    s_movk_i32 s0, 0x80
399; GCN-NEXT:    s_cselect_b32 s0, s0, 0x83
400; GCN-NEXT:    v_mov_b32_e32 v0, s0
401; GCN-NEXT:    flat_store_short v[0:1], v0
402; GCN-NEXT:    s_endpgm
403  %select = select i1 %cond, i16 5, i16 8
404  %op = add i16 %select, 123
405  store i16 %op, ptr addrspace(1) undef
406  ret void
407}
408
409define i16 @select_add_trunc_select(i1 %cond) {
410; GCN-LABEL: select_add_trunc_select:
411; GCN:       ; %bb.0:
412; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
413; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
414; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
415; GCN-NEXT:    v_cndmask_b32_e64 v0, 50, 47, vcc
416; GCN-NEXT:    s_setpc_b64 s[30:31]
417; IR-LABEL: @select_add_trunc_select(
418; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i16 47, i16 50
419; IR-NEXT:    ret i16 [[OP]]
420  %select = select i1 %cond, i32 5, i32 8
421  %trunc = trunc i32 %select to i16
422  %op = add i16 %trunc, 42
423  ret i16 %op
424}
425
426define i32 @select_add_sext_select(i1 %cond) {
427; IR-LABEL: @select_add_sext_select(
428; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i32 29, i32 50
429; IR-NEXT:    ret i32 [[OP]]
430;
431; GCN-LABEL: select_add_sext_select:
432; GCN:       ; %bb.0:
433; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
435; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
436; GCN-NEXT:    v_cndmask_b32_e64 v0, 50, 29, vcc
437; GCN-NEXT:    s_setpc_b64 s[30:31]
438  %select = select i1 %cond, i16 -13, i16 8
439  %trunc = sext i16 %select to i32
440  %op = add i32 %trunc, 42
441  ret i32 %op
442}
443
444define i32 @select_add_zext_select(i1 %cond) {
445; IR-LABEL: @select_add_zext_select(
446; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i32 47, i32 50
447; IR-NEXT:    ret i32 [[OP]]
448;
449; GCN-LABEL: select_add_zext_select:
450; GCN:       ; %bb.0:
451; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
452; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
453; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
454; GCN-NEXT:    v_cndmask_b32_e64 v0, 50, 47, vcc
455; GCN-NEXT:    s_setpc_b64 s[30:31]
456  %select = select i1 %cond, i16 5, i16 8
457  %trunc = zext i16 %select to i32
458  %op = add i32 %trunc, 42
459  ret i32 %op
460}
461
462define i32 @select_add_bitcast_select(i1 %cond) {
463; IR-LABEL: @select_add_bitcast_select(
464; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i32 1065353258, i32 1073741866
465; IR-NEXT:    ret i32 [[OP]]
466;
467; GCN-LABEL: select_add_bitcast_select:
468; GCN:       ; %bb.0:
469; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
470; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
471; GCN-NEXT:    v_mov_b32_e32 v1, 0x4000002a
472; GCN-NEXT:    v_mov_b32_e32 v2, 0x3f80002a
473; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
474; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
475; GCN-NEXT:    s_setpc_b64 s[30:31]
476  %select = select i1 %cond, float 1.0, float 2.0
477  %trunc = bitcast float %select to i32
478  %op = add i32 %trunc, 42
479  ret i32 %op
480}
481
482; If we fold through a cast, we need to ensure it doesn't have
483; multiple uses.
484define <2 x half> @multi_use_cast_regression(i1 %cond) {
485; IR-LABEL: @multi_use_cast_regression(
486; IR-NEXT:    [[SELECT:%.*]] = select i1 [[COND:%.*]], half 0xH3C00, half 0xH0000
487; IR-NEXT:    [[FPEXT:%.*]] = fpext half [[SELECT]] to float
488; IR-NEXT:    [[FSUB:%.*]] = fsub nsz float 1.000000e+00, [[FPEXT]]
489; IR-NEXT:    [[CALL:%.*]] = call nsz <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[FPEXT]], float [[FSUB]])
490; IR-NEXT:    ret <2 x half> [[CALL]]
491;
492; GCN-LABEL: multi_use_cast_regression:
493; GCN:       ; %bb.0:
494; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
495; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
496; GCN-NEXT:    v_mov_b32_e32 v1, 0x3c00
497; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
498; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
499; GCN-NEXT:    v_cvt_f32_f16_e32 v0, v0
500; GCN-NEXT:    v_sub_f32_e32 v1, 1.0, v0
501; GCN-NEXT:    v_cvt_pkrtz_f16_f32 v0, v0, v1
502; GCN-NEXT:    s_setpc_b64 s[30:31]
503  %select = select i1 %cond, half 1.000000e+00, half 0.000000e+00
504  %fpext = fpext half %select to float
505  %fsub = fsub nsz float 1.0, %fpext
506  %call = call nsz <2 x half> @llvm.amdgcn.cvt.pkrtz(float %fpext, float %fsub) #3
507  ret <2 x half> %call
508}
509
510declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
511
512attributes #0 = { nounwind readnone speculatable }
513