xref: /llvm-project/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefixes=R600 %s
3
4; BFI_INT Definition pattern from ISA docs
5; (y & x) | (z & ~x)
6;
7define amdgpu_kernel void @bfi_def(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
8; R600-LABEL: bfi_def:
9; R600:       ; %bb.0: ; %entry
10; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
11; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
12; R600-NEXT:    CF_END
13; R600-NEXT:    PAD
14; R600-NEXT:    ALU clause starting at 4:
15; R600-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
16; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
17; R600-NEXT:     BFI_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X,
18entry:
19  %0 = xor i32 %x, -1
20  %1 = and i32 %z, %0
21  %2 = and i32 %y, %x
22  %3 = or i32 %1, %2
23  store i32 %3, ptr addrspace(1) %out
24  ret void
25}
26
27; SHA-256 Ch function
28; z ^ (x & (y ^ z))
29define amdgpu_kernel void @bfi_sha256_ch(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
30; R600-LABEL: bfi_sha256_ch:
31; R600:       ; %bb.0: ; %entry
32; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
33; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
34; R600-NEXT:    CF_END
35; R600-NEXT:    PAD
36; R600-NEXT:    ALU clause starting at 4:
37; R600-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
38; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
39; R600-NEXT:     BFI_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X,
40entry:
41  %0 = xor i32 %y, %z
42  %1 = and i32 %x, %0
43  %2 = xor i32 %z, %1
44  store i32 %2, ptr addrspace(1) %out
45  ret void
46}
47
48; SHA-256 Ma function
49; ((x & z) | (y & (x | z)))
50define amdgpu_kernel void @bfi_sha256_ma(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
51; R600-LABEL: bfi_sha256_ma:
52; R600:       ; %bb.0: ; %entry
53; R600-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
54; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
55; R600-NEXT:    CF_END
56; R600-NEXT:    PAD
57; R600-NEXT:    ALU clause starting at 4:
58; R600-NEXT:     XOR_INT * T0.W, KC0[2].Z, KC0[2].W,
59; R600-NEXT:     BFI_INT * T0.X, PV.W, KC0[3].X, KC0[2].W,
60; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
61; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
62entry:
63  %0 = and i32 %x, %z
64  %1 = or i32 %x, %z
65  %2 = and i32 %y, %1
66  %3 = or i32 %0, %2
67  store i32 %3, ptr addrspace(1) %out
68  ret void
69}
70
71define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %mask) {
72; R600-LABEL: v_bitselect_v2i32_pat1:
73; R600:       ; %bb.0:
74; R600-NEXT:    CF_END
75; R600-NEXT:    PAD
76  %xor.0 = xor <2 x i32> %a, %mask
77  %and = and <2 x i32> %xor.0, %b
78  %bitselect = xor <2 x i32> %and, %mask
79  ret <2 x i32> %bitselect
80}
81
82define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
83; R600-LABEL: v_bitselect_i64_pat_0:
84; R600:       ; %bb.0:
85; R600-NEXT:    CF_END
86; R600-NEXT:    PAD
87  %and0 = and i64 %a, %b
88  %not.a = xor i64 %a, -1
89  %and1 = and i64 %not.a, %mask
90  %bitselect = or i64 %and0, %and1
91  ret i64 %bitselect
92}
93
94define i64 @v_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
95; R600-LABEL: v_bitselect_i64_pat_1:
96; R600:       ; %bb.0:
97; R600-NEXT:    CF_END
98; R600-NEXT:    PAD
99  %xor.0 = xor i64 %a, %mask
100  %and = and i64 %xor.0, %b
101  %bitselect = xor i64 %and, %mask
102  ret i64 %bitselect
103}
104
105define i64 @v_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
106; R600-LABEL: v_bitselect_i64_pat_2:
107; R600:       ; %bb.0:
108; R600-NEXT:    CF_END
109; R600-NEXT:    PAD
110  %xor.0 = xor i64 %a, %mask
111  %and = and i64 %xor.0, %b
112  %bitselect = xor i64 %and, %mask
113  ret i64 %bitselect
114}
115
116define i64 @v_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
117; R600-LABEL: v_bfi_sha256_ma_i64:
118; R600:       ; %bb.0: ; %entry
119; R600-NEXT:    CF_END
120; R600-NEXT:    PAD
121entry:
122  %and0 = and i64 %x, %z
123  %or0 = or i64 %x, %z
124  %and1 = and i64 %y, %or0
125  %or1 = or i64 %and0, %and1
126  ret i64 %or1
127}
128
129define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
130; R600-LABEL: s_bitselect_i64_pat_0:
131; R600:       ; %bb.0:
132; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
133; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
134; R600-NEXT:    CF_END
135; R600-NEXT:    PAD
136; R600-NEXT:    ALU clause starting at 4:
137; R600-NEXT:     MOV * T0.W, KC0[3].Y,
138; R600-NEXT:     BFI_INT * T0.W, KC0[2].Y, KC0[2].W, PV.W,
139; R600-NEXT:     MOV * T1.W, KC0[3].Z,
140; R600-NEXT:     BFI_INT T1.W, KC0[2].Z, KC0[3].X, PV.W,
141; R600-NEXT:     ADDC_UINT * T2.W, T0.W, literal.x,
142; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
143; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
144; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
145; R600-NEXT:     MOV * T1.X, literal.y,
146; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
147  %and0 = and i64 %a, %b
148  %not.a = xor i64 %a, -1
149  %and1 = and i64 %not.a, %mask
150  %bitselect = or i64 %and0, %and1
151  %scalar.use = add i64 %bitselect, 10
152  store i64 %scalar.use, ptr addrspace(1) undef
153  ret void
154}
155
156define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
157; R600-LABEL: s_bitselect_i64_pat_1:
158; R600:       ; %bb.0:
159; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
160; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
161; R600-NEXT:    CF_END
162; R600-NEXT:    PAD
163; R600-NEXT:    ALU clause starting at 4:
164; R600-NEXT:     MOV * T0.W, KC0[3].Y,
165; R600-NEXT:     BFI_INT * T0.W, KC0[2].W, KC0[2].Y, PV.W,
166; R600-NEXT:     MOV * T1.W, KC0[3].Z,
167; R600-NEXT:     BFI_INT T1.W, KC0[3].X, KC0[2].Z, PV.W,
168; R600-NEXT:     ADDC_UINT * T2.W, T0.W, literal.x,
169; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
170; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
171; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
172; R600-NEXT:     MOV * T1.X, literal.y,
173; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
174  %xor.0 = xor i64 %a, %mask
175  %and = and i64 %xor.0, %b
176  %bitselect = xor i64 %and, %mask
177
178  %scalar.use = add i64 %bitselect, 10
179  store i64 %scalar.use, ptr addrspace(1) undef
180  ret void
181}
182
183define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
184; R600-LABEL: s_bitselect_i64_pat_2:
185; R600:       ; %bb.0:
186; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
187; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
188; R600-NEXT:    CF_END
189; R600-NEXT:    PAD
190; R600-NEXT:    ALU clause starting at 4:
191; R600-NEXT:     MOV * T0.W, KC0[3].Y,
192; R600-NEXT:     BFI_INT * T0.W, KC0[2].W, KC0[2].Y, PV.W,
193; R600-NEXT:     MOV * T1.W, KC0[3].Z,
194; R600-NEXT:     BFI_INT T1.W, KC0[3].X, KC0[2].Z, PV.W,
195; R600-NEXT:     ADDC_UINT * T2.W, T0.W, literal.x,
196; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
197; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
198; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
199; R600-NEXT:     MOV * T1.X, literal.y,
200; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
201  %xor.0 = xor i64 %a, %mask
202  %and = and i64 %xor.0, %b
203  %bitselect = xor i64 %and, %mask
204
205  %scalar.use = add i64 %bitselect, 10
206  store i64 %scalar.use, ptr addrspace(1) undef
207  ret void
208}
209
210define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
211; R600-LABEL: s_bfi_sha256_ma_i64:
212; R600:       ; %bb.0: ; %entry
213; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
214; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
215; R600-NEXT:    CF_END
216; R600-NEXT:    PAD
217; R600-NEXT:    ALU clause starting at 4:
218; R600-NEXT:     XOR_INT * T0.W, KC0[2].Y, KC0[2].W,
219; R600-NEXT:     BFI_INT T0.W, PV.W, KC0[3].Y, KC0[2].W,
220; R600-NEXT:     XOR_INT * T1.W, KC0[2].Z, KC0[3].X,
221; R600-NEXT:     BFI_INT T1.W, PS, KC0[3].Z, KC0[3].X,
222; R600-NEXT:     ADDC_UINT * T2.W, PV.W, literal.x,
223; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
224; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
225; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
226; R600-NEXT:     MOV * T1.X, literal.y,
227; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
228entry:
229  %and0 = and i64 %x, %z
230  %or0 = or i64 %x, %z
231  %and1 = and i64 %y, %or0
232  %or1 = or i64 %and0, %and1
233
234  %scalar.use = add i64 %or1, 10
235  store i64 %scalar.use, ptr addrspace(1) undef
236  ret void
237}
238