xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll (revision e7e90dd1c1014b4a7ef77f74af3682168d23ddbf)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -o - < %s | FileCheck %s --check-prefixes=GFX,GFX6
3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -o - < %s | FileCheck %s --check-prefixes=GFX,GFX8
4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -o - < %s | FileCheck %s --check-prefixes=GFX,GFX10
5
6declare i16 @llvm.abs.i16(i16, i1)
7declare i32 @llvm.abs.i32(i32, i1)
8declare i64 @llvm.abs.i64(i64, i1)
9declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1)
10declare <3 x i8> @llvm.abs.v3i8(<3 x i8>, i1)
11declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1)
12declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
13declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
14
15define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
16; GFX-LABEL: abs_sgpr_i16:
17; GFX:       ; %bb.0:
18; GFX-NEXT:    s_sext_i32_i16 s0, s0
19; GFX-NEXT:    s_abs_i32 s0, s0
20; GFX-NEXT:    ; return to shader part epilog
21  %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
22  ret i16 %res
23}
24
25define amdgpu_cs i32 @abs_sgpr_i32(i32 inreg %arg) {
26; GFX-LABEL: abs_sgpr_i32:
27; GFX:       ; %bb.0:
28; GFX-NEXT:    s_abs_i32 s0, s0
29; GFX-NEXT:    ; return to shader part epilog
30  %res = call i32 @llvm.abs.i32(i32 %arg, i1 false)
31  ret i32 %res
32}
33
34define amdgpu_cs i64 @abs_sgpr_i64(i64 inreg %arg) {
35; GFX-LABEL: abs_sgpr_i64:
36; GFX:       ; %bb.0:
37; GFX-NEXT:    s_ashr_i32 s2, s1, 31
38; GFX-NEXT:    s_add_u32 s0, s0, s2
39; GFX-NEXT:    s_mov_b32 s3, s2
40; GFX-NEXT:    s_addc_u32 s1, s1, s2
41; GFX-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
42; GFX-NEXT:    ; return to shader part epilog
43  %res = call i64 @llvm.abs.i64(i64 %arg, i1 false)
44  ret i64 %res
45}
46
47define amdgpu_cs <4 x i32> @abs_sgpr_v4i32(<4 x i32> inreg %arg) {
48; GFX-LABEL: abs_sgpr_v4i32:
49; GFX:       ; %bb.0:
50; GFX-NEXT:    s_abs_i32 s0, s0
51; GFX-NEXT:    s_abs_i32 s1, s1
52; GFX-NEXT:    s_abs_i32 s2, s2
53; GFX-NEXT:    s_abs_i32 s3, s3
54; GFX-NEXT:    ; return to shader part epilog
55  %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false)
56  ret <4 x i32> %res
57}
58
59define amdgpu_cs i16 @abs_vgpr_i16(i16 %arg) {
60; GFX6-LABEL: abs_vgpr_i16:
61; GFX6:       ; %bb.0:
62; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
63; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v0
64; GFX6-NEXT:    v_max_i32_e32 v0, v0, v1
65; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
66; GFX6-NEXT:    ; return to shader part epilog
67;
68; GFX8-LABEL: abs_vgpr_i16:
69; GFX8:       ; %bb.0:
70; GFX8-NEXT:    v_sub_u16_e32 v1, 0, v0
71; GFX8-NEXT:    v_max_i16_e32 v0, v0, v1
72; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
73; GFX8-NEXT:    ; return to shader part epilog
74;
75; GFX10-LABEL: abs_vgpr_i16:
76; GFX10:       ; %bb.0:
77; GFX10-NEXT:    v_sub_nc_u16 v1, 0, v0
78; GFX10-NEXT:    v_max_i16 v0, v0, v1
79; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
80; GFX10-NEXT:    ; return to shader part epilog
81  %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
82  ret i16 %res
83}
84
85define amdgpu_cs i32 @abs_vgpr_i32(i32 %arg) {
86; GFX6-LABEL: abs_vgpr_i32:
87; GFX6:       ; %bb.0:
88; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v0
89; GFX6-NEXT:    v_max_i32_e32 v0, v0, v1
90; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
91; GFX6-NEXT:    ; return to shader part epilog
92;
93; GFX8-LABEL: abs_vgpr_i32:
94; GFX8:       ; %bb.0:
95; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 0, v0
96; GFX8-NEXT:    v_max_i32_e32 v0, v0, v1
97; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
98; GFX8-NEXT:    ; return to shader part epilog
99;
100; GFX10-LABEL: abs_vgpr_i32:
101; GFX10:       ; %bb.0:
102; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
103; GFX10-NEXT:    v_max_i32_e32 v0, v0, v1
104; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
105; GFX10-NEXT:    ; return to shader part epilog
106  %res = call i32 @llvm.abs.i32(i32 %arg, i1 false)
107  ret i32 %res
108}
109
110define amdgpu_cs i64 @abs_vgpr_i64(i64 %arg) {
111; GFX6-LABEL: abs_vgpr_i64:
112; GFX6:       ; %bb.0:
113; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
114; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
115; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
116; GFX6-NEXT:    v_xor_b32_e32 v0, v0, v2
117; GFX6-NEXT:    v_xor_b32_e32 v1, v1, v2
118; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
119; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
120; GFX6-NEXT:    ; return to shader part epilog
121;
122; GFX8-LABEL: abs_vgpr_i64:
123; GFX8:       ; %bb.0:
124; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
125; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
126; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
127; GFX8-NEXT:    v_xor_b32_e32 v0, v0, v2
128; GFX8-NEXT:    v_xor_b32_e32 v1, v1, v2
129; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
130; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
131; GFX8-NEXT:    ; return to shader part epilog
132;
133; GFX10-LABEL: abs_vgpr_i64:
134; GFX10:       ; %bb.0:
135; GFX10-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
136; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
137; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo
138; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v2
139; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v2
140; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
141; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
142; GFX10-NEXT:    ; return to shader part epilog
143  %res = call i64 @llvm.abs.i64(i64 %arg, i1 false)
144  ret i64 %res
145}
146
147define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
148; GFX6-LABEL: abs_vgpr_v4i32:
149; GFX6:       ; %bb.0:
150; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v0
151; GFX6-NEXT:    v_max_i32_e32 v0, v0, v4
152; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
153; GFX6-NEXT:    v_max_i32_e32 v1, v1, v4
154; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v2
155; GFX6-NEXT:    v_max_i32_e32 v2, v2, v4
156; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
157; GFX6-NEXT:    v_max_i32_e32 v3, v3, v4
158; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
159; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
160; GFX6-NEXT:    v_readfirstlane_b32 s2, v2
161; GFX6-NEXT:    v_readfirstlane_b32 s3, v3
162; GFX6-NEXT:    ; return to shader part epilog
163;
164; GFX8-LABEL: abs_vgpr_v4i32:
165; GFX8:       ; %bb.0:
166; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v0
167; GFX8-NEXT:    v_max_i32_e32 v0, v0, v4
168; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v1
169; GFX8-NEXT:    v_max_i32_e32 v1, v1, v4
170; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v2
171; GFX8-NEXT:    v_max_i32_e32 v2, v2, v4
172; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v3
173; GFX8-NEXT:    v_max_i32_e32 v3, v3, v4
174; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
175; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
176; GFX8-NEXT:    v_readfirstlane_b32 s2, v2
177; GFX8-NEXT:    v_readfirstlane_b32 s3, v3
178; GFX8-NEXT:    ; return to shader part epilog
179;
180; GFX10-LABEL: abs_vgpr_v4i32:
181; GFX10:       ; %bb.0:
182; GFX10-NEXT:    v_sub_nc_u32_e32 v4, 0, v0
183; GFX10-NEXT:    v_sub_nc_u32_e32 v5, 0, v1
184; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 0, v2
185; GFX10-NEXT:    v_sub_nc_u32_e32 v7, 0, v3
186; GFX10-NEXT:    v_max_i32_e32 v0, v0, v4
187; GFX10-NEXT:    v_max_i32_e32 v1, v1, v5
188; GFX10-NEXT:    v_max_i32_e32 v2, v2, v6
189; GFX10-NEXT:    v_max_i32_e32 v3, v3, v7
190; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
191; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
192; GFX10-NEXT:    v_readfirstlane_b32 s2, v2
193; GFX10-NEXT:    v_readfirstlane_b32 s3, v3
194; GFX10-NEXT:    ; return to shader part epilog
195  %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false)
196  ret <4 x i32> %res
197}
198
199define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
200; GFX6-LABEL: abs_sgpr_v2i8:
201; GFX6:       ; %bb.0:
202; GFX6-NEXT:    s_sext_i32_i8 s0, s0
203; GFX6-NEXT:    s_sext_i32_i8 s1, s1
204; GFX6-NEXT:    s_abs_i32 s0, s0
205; GFX6-NEXT:    s_abs_i32 s1, s1
206; GFX6-NEXT:    ; return to shader part epilog
207;
208; GFX8-LABEL: abs_sgpr_v2i8:
209; GFX8:       ; %bb.0:
210; GFX8-NEXT:    s_lshl_b32 s0, s0, 8
211; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
212; GFX8-NEXT:    s_sext_i32_i16 s0, s0
213; GFX8-NEXT:    s_sext_i32_i16 s1, s1
214; GFX8-NEXT:    s_ashr_i32 s0, s0, 8
215; GFX8-NEXT:    s_ashr_i32 s1, s1, 8
216; GFX8-NEXT:    s_sext_i32_i16 s0, s0
217; GFX8-NEXT:    s_sext_i32_i16 s1, s1
218; GFX8-NEXT:    s_abs_i32 s0, s0
219; GFX8-NEXT:    s_abs_i32 s1, s1
220; GFX8-NEXT:    ; return to shader part epilog
221;
222; GFX10-LABEL: abs_sgpr_v2i8:
223; GFX10:       ; %bb.0:
224; GFX10-NEXT:    s_sext_i32_i8 s0, s0
225; GFX10-NEXT:    s_sext_i32_i8 s1, s1
226; GFX10-NEXT:    s_sext_i32_i16 s0, s0
227; GFX10-NEXT:    s_sext_i32_i16 s1, s1
228; GFX10-NEXT:    s_abs_i32 s0, s0
229; GFX10-NEXT:    s_abs_i32 s1, s1
230; GFX10-NEXT:    ; return to shader part epilog
231  %res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false)
232  ret <2 x i8> %res
233}
234
235define amdgpu_cs <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
236; GFX6-LABEL: abs_vgpr_v2i8:
237; GFX6:       ; %bb.0:
238; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
239; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
240; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 8
241; GFX6-NEXT:    v_max_i32_e32 v0, v0, v2
242; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
243; GFX6-NEXT:    v_max_i32_e32 v1, v1, v2
244; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
245; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
246; GFX6-NEXT:    ; return to shader part epilog
247;
248; GFX8-LABEL: abs_vgpr_v2i8:
249; GFX8:       ; %bb.0:
250; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
251; GFX8-NEXT:    v_mov_b32_e32 v2, 0
252; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
253; GFX8-NEXT:    v_sub_u16_sdwa v3, v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
254; GFX8-NEXT:    v_sub_u16_sdwa v2, v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
255; GFX8-NEXT:    v_max_i16_sdwa v0, sext(v0), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
256; GFX8-NEXT:    v_max_i16_sdwa v1, sext(v1), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
257; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
258; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
259; GFX8-NEXT:    ; return to shader part epilog
260;
261; GFX10-LABEL: abs_vgpr_v2i8:
262; GFX10:       ; %bb.0:
263; GFX10-NEXT:    v_bfe_i32 v0, v0, 0, 8
264; GFX10-NEXT:    v_bfe_i32 v1, v1, 0, 8
265; GFX10-NEXT:    v_sub_nc_u16 v2, 0, v0
266; GFX10-NEXT:    v_sub_nc_u16 v3, 0, v1
267; GFX10-NEXT:    v_max_i16 v0, v0, v2
268; GFX10-NEXT:    v_max_i16 v1, v1, v3
269; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
270; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
271; GFX10-NEXT:    ; return to shader part epilog
272  %res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false)
273  ret <2 x i8> %res
274}
275
276define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
277; GFX6-LABEL: abs_sgpr_v3i8:
278; GFX6:       ; %bb.0:
279; GFX6-NEXT:    s_sext_i32_i8 s0, s0
280; GFX6-NEXT:    s_sext_i32_i8 s1, s1
281; GFX6-NEXT:    s_sext_i32_i8 s2, s2
282; GFX6-NEXT:    s_abs_i32 s0, s0
283; GFX6-NEXT:    s_abs_i32 s1, s1
284; GFX6-NEXT:    s_abs_i32 s2, s2
285; GFX6-NEXT:    ; return to shader part epilog
286;
287; GFX8-LABEL: abs_sgpr_v3i8:
288; GFX8:       ; %bb.0:
289; GFX8-NEXT:    s_lshl_b32 s0, s0, 8
290; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
291; GFX8-NEXT:    s_lshl_b32 s2, s2, 8
292; GFX8-NEXT:    s_sext_i32_i16 s0, s0
293; GFX8-NEXT:    s_sext_i32_i16 s1, s1
294; GFX8-NEXT:    s_sext_i32_i16 s2, s2
295; GFX8-NEXT:    s_ashr_i32 s0, s0, 8
296; GFX8-NEXT:    s_ashr_i32 s1, s1, 8
297; GFX8-NEXT:    s_ashr_i32 s2, s2, 8
298; GFX8-NEXT:    s_sext_i32_i16 s0, s0
299; GFX8-NEXT:    s_sext_i32_i16 s1, s1
300; GFX8-NEXT:    s_sext_i32_i16 s2, s2
301; GFX8-NEXT:    s_abs_i32 s0, s0
302; GFX8-NEXT:    s_abs_i32 s1, s1
303; GFX8-NEXT:    s_abs_i32 s2, s2
304; GFX8-NEXT:    ; return to shader part epilog
305;
306; GFX10-LABEL: abs_sgpr_v3i8:
307; GFX10:       ; %bb.0:
308; GFX10-NEXT:    s_sext_i32_i8 s0, s0
309; GFX10-NEXT:    s_sext_i32_i8 s1, s1
310; GFX10-NEXT:    s_sext_i32_i8 s2, s2
311; GFX10-NEXT:    s_sext_i32_i16 s0, s0
312; GFX10-NEXT:    s_sext_i32_i16 s1, s1
313; GFX10-NEXT:    s_sext_i32_i16 s2, s2
314; GFX10-NEXT:    s_abs_i32 s0, s0
315; GFX10-NEXT:    s_abs_i32 s1, s1
316; GFX10-NEXT:    s_abs_i32 s2, s2
317; GFX10-NEXT:    ; return to shader part epilog
318  %res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false)
319  ret <3 x i8> %res
320}
321
322define amdgpu_cs <3 x i8> @abs_vgpr_v3i8(<3 x i8>  %arg) {
323; GFX6-LABEL: abs_vgpr_v3i8:
324; GFX6:       ; %bb.0:
325; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
326; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
327; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 8
328; GFX6-NEXT:    v_max_i32_e32 v0, v0, v3
329; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
330; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 8
331; GFX6-NEXT:    v_max_i32_e32 v1, v1, v3
332; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
333; GFX6-NEXT:    v_max_i32_e32 v2, v2, v3
334; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
335; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
336; GFX6-NEXT:    v_readfirstlane_b32 s2, v2
337; GFX6-NEXT:    ; return to shader part epilog
338;
339; GFX8-LABEL: abs_vgpr_v3i8:
340; GFX8:       ; %bb.0:
341; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
342; GFX8-NEXT:    v_mov_b32_e32 v3, 0
343; GFX8-NEXT:    v_sub_u16_sdwa v4, v3, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
344; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
345; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 8, v2
346; GFX8-NEXT:    v_max_i16_sdwa v0, sext(v0), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
347; GFX8-NEXT:    v_sub_u16_sdwa v4, v3, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
348; GFX8-NEXT:    v_sub_u16_sdwa v3, v3, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
349; GFX8-NEXT:    v_max_i16_sdwa v1, sext(v1), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
350; GFX8-NEXT:    v_max_i16_sdwa v2, sext(v2), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
351; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
352; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
353; GFX8-NEXT:    v_readfirstlane_b32 s2, v2
354; GFX8-NEXT:    ; return to shader part epilog
355;
356; GFX10-LABEL: abs_vgpr_v3i8:
357; GFX10:       ; %bb.0:
358; GFX10-NEXT:    v_bfe_i32 v0, v0, 0, 8
359; GFX10-NEXT:    v_bfe_i32 v1, v1, 0, 8
360; GFX10-NEXT:    v_bfe_i32 v2, v2, 0, 8
361; GFX10-NEXT:    v_sub_nc_u16 v3, 0, v0
362; GFX10-NEXT:    v_sub_nc_u16 v4, 0, v1
363; GFX10-NEXT:    v_sub_nc_u16 v5, 0, v2
364; GFX10-NEXT:    v_max_i16 v0, v0, v3
365; GFX10-NEXT:    v_max_i16 v1, v1, v4
366; GFX10-NEXT:    v_max_i16 v2, v2, v5
367; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
368; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
369; GFX10-NEXT:    v_readfirstlane_b32 s2, v2
370; GFX10-NEXT:    ; return to shader part epilog
371  %res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false)
372  ret <3 x i8> %res
373}
374
375define amdgpu_cs <2 x i16> @abs_sgpr_v2i16(<2 x i16> inreg %arg) {
376; GFX6-LABEL: abs_sgpr_v2i16:
377; GFX6:       ; %bb.0:
378; GFX6-NEXT:    s_sext_i32_i16 s0, s0
379; GFX6-NEXT:    s_sext_i32_i16 s1, s1
380; GFX6-NEXT:    s_abs_i32 s0, s0
381; GFX6-NEXT:    s_abs_i32 s1, s1
382; GFX6-NEXT:    ; return to shader part epilog
383;
384; GFX8-LABEL: abs_sgpr_v2i16:
385; GFX8:       ; %bb.0:
386; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
387; GFX8-NEXT:    s_sext_i32_i16 s1, s1
388; GFX8-NEXT:    s_sext_i32_i16 s0, s0
389; GFX8-NEXT:    s_abs_i32 s1, s1
390; GFX8-NEXT:    s_abs_i32 s0, s0
391; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
392; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
393; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
394; GFX8-NEXT:    s_or_b32 s0, s0, s1
395; GFX8-NEXT:    ; return to shader part epilog
396;
397; GFX10-LABEL: abs_sgpr_v2i16:
398; GFX10:       ; %bb.0:
399; GFX10-NEXT:    s_sext_i32_i16 s1, s0
400; GFX10-NEXT:    s_ashr_i32 s0, s0, 16
401; GFX10-NEXT:    s_abs_i32 s1, s1
402; GFX10-NEXT:    s_abs_i32 s0, s0
403; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s1, s0
404; GFX10-NEXT:    ; return to shader part epilog
405  %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %arg, i1 false)
406  ret <2 x i16> %res
407}
408
409define amdgpu_cs <2 x i16> @abs_vgpr_v2i16(<2 x i16> %arg) {
410; GFX6-LABEL: abs_vgpr_v2i16:
411; GFX6:       ; %bb.0:
412; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
413; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
414; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
415; GFX6-NEXT:    v_max_i32_e32 v0, v0, v2
416; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
417; GFX6-NEXT:    v_max_i32_e32 v1, v1, v2
418; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
419; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
420; GFX6-NEXT:    ; return to shader part epilog
421;
422; GFX8-LABEL: abs_vgpr_v2i16:
423; GFX8:       ; %bb.0:
424; GFX8-NEXT:    v_mov_b32_e32 v2, 0
425; GFX8-NEXT:    v_sub_u16_e32 v1, 0, v0
426; GFX8-NEXT:    v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
427; GFX8-NEXT:    v_max_i16_e32 v1, v0, v1
428; GFX8-NEXT:    v_max_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
429; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
430; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
431; GFX8-NEXT:    ; return to shader part epilog
432;
433; GFX10-LABEL: abs_vgpr_v2i16:
434; GFX10:       ; %bb.0:
435; GFX10-NEXT:    v_pk_sub_i16 v1, 0, v0
436; GFX10-NEXT:    v_pk_max_i16 v0, v0, v1
437; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
438; GFX10-NEXT:    ; return to shader part epilog
439  %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %arg, i1 false)
440  ret <2 x i16> %res
441}
442
443define amdgpu_cs <3 x i16> @abs_sgpr_v3i16(<3 x i16> inreg %arg) {
444; GFX6-LABEL: abs_sgpr_v3i16:
445; GFX6:       ; %bb.0:
446; GFX6-NEXT:    s_sext_i32_i16 s0, s0
447; GFX6-NEXT:    s_sext_i32_i16 s1, s1
448; GFX6-NEXT:    s_sext_i32_i16 s2, s2
449; GFX6-NEXT:    s_abs_i32 s0, s0
450; GFX6-NEXT:    s_abs_i32 s1, s1
451; GFX6-NEXT:    s_abs_i32 s2, s2
452; GFX6-NEXT:    ; return to shader part epilog
453;
454; GFX8-LABEL: abs_sgpr_v3i16:
455; GFX8:       ; %bb.0:
456; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
457; GFX8-NEXT:    s_sext_i32_i16 s2, s2
458; GFX8-NEXT:    s_sext_i32_i16 s0, s0
459; GFX8-NEXT:    s_abs_i32 s2, s2
460; GFX8-NEXT:    s_abs_i32 s0, s0
461; GFX8-NEXT:    s_sext_i32_i16 s1, s1
462; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
463; GFX8-NEXT:    s_abs_i32 s1, s1
464; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
465; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
466; GFX8-NEXT:    s_or_b32 s0, s0, s2
467; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
468; GFX8-NEXT:    ; return to shader part epilog
469;
470; GFX10-LABEL: abs_sgpr_v3i16:
471; GFX10:       ; %bb.0:
472; GFX10-NEXT:    s_sext_i32_i16 s2, s0
473; GFX10-NEXT:    s_ashr_i32 s0, s0, 16
474; GFX10-NEXT:    s_abs_i32 s2, s2
475; GFX10-NEXT:    s_abs_i32 s0, s0
476; GFX10-NEXT:    s_sext_i32_i16 s1, s1
477; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s2, s0
478; GFX10-NEXT:    s_abs_i32 s1, s1
479; GFX10-NEXT:    ; return to shader part epilog
480  %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
481  ret <3 x i16> %res
482}
483
484define amdgpu_cs <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
485; GFX6-LABEL: abs_vgpr_v3i16:
486; GFX6:       ; %bb.0:
487; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
488; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
489; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
490; GFX6-NEXT:    v_max_i32_e32 v0, v0, v3
491; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
492; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
493; GFX6-NEXT:    v_max_i32_e32 v1, v1, v3
494; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
495; GFX6-NEXT:    v_max_i32_e32 v2, v2, v3
496; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
497; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
498; GFX6-NEXT:    v_readfirstlane_b32 s2, v2
499; GFX6-NEXT:    ; return to shader part epilog
500;
501; GFX8-LABEL: abs_vgpr_v3i16:
502; GFX8:       ; %bb.0:
503; GFX8-NEXT:    v_mov_b32_e32 v3, 0
504; GFX8-NEXT:    v_sub_u16_e32 v2, 0, v0
505; GFX8-NEXT:    v_sub_u16_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
506; GFX8-NEXT:    v_sub_u16_e32 v4, 0, v1
507; GFX8-NEXT:    v_max_i16_e32 v2, v0, v2
508; GFX8-NEXT:    v_max_i16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
509; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
510; GFX8-NEXT:    v_max_i16_e32 v1, v1, v4
511; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
512; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
513; GFX8-NEXT:    ; return to shader part epilog
514;
515; GFX10-LABEL: abs_vgpr_v3i16:
516; GFX10:       ; %bb.0:
517; GFX10-NEXT:    v_pk_sub_i16 v2, 0, v0
518; GFX10-NEXT:    v_sub_nc_u16 v3, 0, v1
519; GFX10-NEXT:    v_pk_max_i16 v0, v0, v2
520; GFX10-NEXT:    v_max_i16 v1, v1, v3
521; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
522; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
523; GFX10-NEXT:    ; return to shader part epilog
524  %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
525  ret <3 x i16> %res
526}
527