xref: /llvm-project/llvm/test/CodeGen/AMDGPU/abs_i16.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-- -mcpu=gfx600 < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -mtriple=amdgcn-- -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
4; RUN: llc -mtriple=amdgcn-- -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
5; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
6; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
7; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
8; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
9
10define i16 @abs_i16(i16 %arg) {
11; GFX6-LABEL: abs_i16:
12; GFX6:       ; %bb.0:
13; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
15; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v0
16; GFX6-NEXT:    v_max_i32_e32 v0, v0, v1
17; GFX6-NEXT:    s_setpc_b64 s[30:31]
18;
19; GFX7-LABEL: abs_i16:
20; GFX7:       ; %bb.0:
21; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 16
23; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, 0, v0
24; GFX7-NEXT:    v_max_i32_e32 v0, v0, v1
25; GFX7-NEXT:    s_setpc_b64 s[30:31]
26;
27; GFX8-LABEL: abs_i16:
28; GFX8:       ; %bb.0:
29; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30; GFX8-NEXT:    v_sub_u16_e32 v1, 0, v0
31; GFX8-NEXT:    v_max_i16_e32 v0, v0, v1
32; GFX8-NEXT:    s_setpc_b64 s[30:31]
33;
34; GFX9-LABEL: abs_i16:
35; GFX9:       ; %bb.0:
36; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; GFX9-NEXT:    v_sub_u16_e32 v1, 0, v0
38; GFX9-NEXT:    v_max_i16_e32 v0, v0, v1
39; GFX9-NEXT:    s_setpc_b64 s[30:31]
40;
41; GFX10-LABEL: abs_i16:
42; GFX10:       ; %bb.0:
43; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GFX10-NEXT:    v_sub_nc_u16 v1, 0, v0
45; GFX10-NEXT:    v_max_i16 v0, v0, v1
46; GFX10-NEXT:    s_setpc_b64 s[30:31]
47;
48; GFX11-LABEL: abs_i16:
49; GFX11:       ; %bb.0:
50; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51; GFX11-NEXT:    v_sub_nc_u16 v1, 0, v0
52; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
53; GFX11-NEXT:    v_max_i16 v0, v0, v1
54; GFX11-NEXT:    s_setpc_b64 s[30:31]
55;
56; GFX12-LABEL: abs_i16:
57; GFX12:       ; %bb.0:
58; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
59; GFX12-NEXT:    s_wait_expcnt 0x0
60; GFX12-NEXT:    s_wait_samplecnt 0x0
61; GFX12-NEXT:    s_wait_bvhcnt 0x0
62; GFX12-NEXT:    s_wait_kmcnt 0x0
63; GFX12-NEXT:    v_sub_nc_u16 v1, 0, v0
64; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
65; GFX12-NEXT:    v_max_i16 v0, v0, v1
66; GFX12-NEXT:    s_setpc_b64 s[30:31]
67  %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
68  ret i16 %res
69}
70
71define <2 x i16> @v_abs_v2i16(<2 x i16> %arg) {
72; GFX6-LABEL: v_abs_v2i16:
73; GFX6:       ; %bb.0:
74; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
76; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
77; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
78; GFX6-NEXT:    v_max_i32_e32 v0, v0, v2
79; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
80; GFX6-NEXT:    v_max_i32_e32 v1, v1, v2
81; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
82; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
83; GFX6-NEXT:    s_setpc_b64 s[30:31]
84;
85; GFX7-LABEL: v_abs_v2i16:
86; GFX7:       ; %bb.0:
87; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 16
89; GFX7-NEXT:    v_bfe_i32 v1, v1, 0, 16
90; GFX7-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
91; GFX7-NEXT:    v_max_i32_e32 v0, v0, v2
92; GFX7-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
93; GFX7-NEXT:    v_max_i32_e32 v1, v1, v2
94; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
95; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
96; GFX7-NEXT:    s_setpc_b64 s[30:31]
97;
98; GFX8-LABEL: v_abs_v2i16:
99; GFX8:       ; %bb.0:
100; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101; GFX8-NEXT:    v_mov_b32_e32 v1, 0
102; GFX8-NEXT:    v_sub_u16_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
103; GFX8-NEXT:    v_sub_u16_e32 v2, 0, v0
104; GFX8-NEXT:    v_max_i16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
105; GFX8-NEXT:    v_max_i16_e32 v0, v0, v2
106; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
107; GFX8-NEXT:    s_setpc_b64 s[30:31]
108;
109; GFX9-LABEL: v_abs_v2i16:
110; GFX9:       ; %bb.0:
111; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112; GFX9-NEXT:    v_pk_sub_i16 v1, 0, v0
113; GFX9-NEXT:    v_pk_max_i16 v0, v0, v1
114; GFX9-NEXT:    s_setpc_b64 s[30:31]
115;
116; GFX10-LABEL: v_abs_v2i16:
117; GFX10:       ; %bb.0:
118; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119; GFX10-NEXT:    v_pk_sub_i16 v1, 0, v0
120; GFX10-NEXT:    v_pk_max_i16 v0, v0, v1
121; GFX10-NEXT:    s_setpc_b64 s[30:31]
122;
123; GFX11-LABEL: v_abs_v2i16:
124; GFX11:       ; %bb.0:
125; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126; GFX11-NEXT:    v_pk_sub_i16 v1, 0, v0
127; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
128; GFX11-NEXT:    v_pk_max_i16 v0, v0, v1
129; GFX11-NEXT:    s_setpc_b64 s[30:31]
130;
131; GFX12-LABEL: v_abs_v2i16:
132; GFX12:       ; %bb.0:
133; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
134; GFX12-NEXT:    s_wait_expcnt 0x0
135; GFX12-NEXT:    s_wait_samplecnt 0x0
136; GFX12-NEXT:    s_wait_bvhcnt 0x0
137; GFX12-NEXT:    s_wait_kmcnt 0x0
138; GFX12-NEXT:    v_pk_sub_i16 v1, 0, v0
139; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
140; GFX12-NEXT:    v_pk_max_i16 v0, v0, v1
141; GFX12-NEXT:    s_setpc_b64 s[30:31]
142  %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %arg, i1 false)
143  ret <2 x i16> %res
144}
145
146define <3 x i16> @v_abs_v3i16(<3 x i16> %arg) {
147; GFX6-LABEL: v_abs_v3i16:
148; GFX6:       ; %bb.0:
149; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
151; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
152; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
153; GFX6-NEXT:    v_max_i32_e32 v0, v0, v3
154; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
155; GFX6-NEXT:    v_max_i32_e32 v1, v1, v3
156; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
157; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
158; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
159; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
160; GFX6-NEXT:    v_max_i32_e32 v2, v2, v1
161; GFX6-NEXT:    v_alignbit_b32 v1, v2, v0, 16
162; GFX6-NEXT:    s_setpc_b64 s[30:31]
163;
164; GFX7-LABEL: v_abs_v3i16:
165; GFX7:       ; %bb.0:
166; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 16
168; GFX7-NEXT:    v_bfe_i32 v1, v1, 0, 16
169; GFX7-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
170; GFX7-NEXT:    v_max_i32_e32 v0, v0, v3
171; GFX7-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
172; GFX7-NEXT:    v_max_i32_e32 v1, v1, v3
173; GFX7-NEXT:    v_bfe_i32 v2, v2, 0, 16
174; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
175; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
176; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
177; GFX7-NEXT:    v_max_i32_e32 v2, v2, v1
178; GFX7-NEXT:    v_alignbit_b32 v1, v2, v0, 16
179; GFX7-NEXT:    s_setpc_b64 s[30:31]
180;
181; GFX8-LABEL: v_abs_v3i16:
182; GFX8:       ; %bb.0:
183; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184; GFX8-NEXT:    v_mov_b32_e32 v2, 0
185; GFX8-NEXT:    v_sub_u16_e32 v3, 0, v1
186; GFX8-NEXT:    v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
187; GFX8-NEXT:    v_max_i16_e32 v1, v1, v3
188; GFX8-NEXT:    v_sub_u16_e32 v3, 0, v0
189; GFX8-NEXT:    v_max_i16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
190; GFX8-NEXT:    v_max_i16_e32 v0, v0, v3
191; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
192; GFX8-NEXT:    s_setpc_b64 s[30:31]
193;
194; GFX9-LABEL: v_abs_v3i16:
195; GFX9:       ; %bb.0:
196; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197; GFX9-NEXT:    v_pk_sub_i16 v2, 0, v0
198; GFX9-NEXT:    v_pk_max_i16 v0, v0, v2
199; GFX9-NEXT:    v_pk_sub_i16 v2, 0, v1
200; GFX9-NEXT:    v_pk_max_i16 v1, v1, v2
201; GFX9-NEXT:    s_setpc_b64 s[30:31]
202;
203; GFX10-LABEL: v_abs_v3i16:
204; GFX10:       ; %bb.0:
205; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; GFX10-NEXT:    v_pk_sub_i16 v2, 0, v0
207; GFX10-NEXT:    v_pk_sub_i16 v3, 0, v1
208; GFX10-NEXT:    v_pk_max_i16 v0, v0, v2
209; GFX10-NEXT:    v_pk_max_i16 v1, v1, v3
210; GFX10-NEXT:    s_setpc_b64 s[30:31]
211;
212; GFX11-LABEL: v_abs_v3i16:
213; GFX11:       ; %bb.0:
214; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215; GFX11-NEXT:    v_pk_sub_i16 v2, 0, v0
216; GFX11-NEXT:    v_pk_sub_i16 v3, 0, v1
217; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
218; GFX11-NEXT:    v_pk_max_i16 v0, v0, v2
219; GFX11-NEXT:    v_pk_max_i16 v1, v1, v3
220; GFX11-NEXT:    s_setpc_b64 s[30:31]
221;
222; GFX12-LABEL: v_abs_v3i16:
223; GFX12:       ; %bb.0:
224; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
225; GFX12-NEXT:    s_wait_expcnt 0x0
226; GFX12-NEXT:    s_wait_samplecnt 0x0
227; GFX12-NEXT:    s_wait_bvhcnt 0x0
228; GFX12-NEXT:    s_wait_kmcnt 0x0
229; GFX12-NEXT:    v_pk_sub_i16 v2, 0, v0
230; GFX12-NEXT:    v_pk_sub_i16 v3, 0, v1
231; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
232; GFX12-NEXT:    v_pk_max_i16 v0, v0, v2
233; GFX12-NEXT:    v_pk_max_i16 v1, v1, v3
234; GFX12-NEXT:    s_setpc_b64 s[30:31]
235  %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
236  ret <3 x i16> %res
237}
238
239define <4 x i16> @v_abs_v4i16(<4 x i16> %arg) {
240; GFX6-LABEL: v_abs_v4i16:
241; GFX6:       ; %bb.0:
242; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
244; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
245; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v2
246; GFX6-NEXT:    v_max_i32_e32 v2, v2, v4
247; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
248; GFX6-NEXT:    v_max_i32_e32 v3, v3, v4
249; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
250; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
251; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
252; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
253; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
254; GFX6-NEXT:    v_max_i32_e32 v0, v0, v3
255; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
256; GFX6-NEXT:    v_max_i32_e32 v1, v1, v3
257; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
258; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
259; GFX6-NEXT:    v_alignbit_b32 v1, v2, v0, 16
260; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
261; GFX6-NEXT:    s_setpc_b64 s[30:31]
262;
263; GFX7-LABEL: v_abs_v4i16:
264; GFX7:       ; %bb.0:
265; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX7-NEXT:    v_bfe_i32 v2, v2, 0, 16
267; GFX7-NEXT:    v_bfe_i32 v3, v3, 0, 16
268; GFX7-NEXT:    v_sub_i32_e32 v4, vcc, 0, v2
269; GFX7-NEXT:    v_max_i32_e32 v2, v2, v4
270; GFX7-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
271; GFX7-NEXT:    v_max_i32_e32 v3, v3, v4
272; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 16
273; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
274; GFX7-NEXT:    v_bfe_i32 v1, v1, 0, 16
275; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
276; GFX7-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
277; GFX7-NEXT:    v_max_i32_e32 v0, v0, v3
278; GFX7-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
279; GFX7-NEXT:    v_max_i32_e32 v1, v1, v3
280; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
281; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
282; GFX7-NEXT:    v_alignbit_b32 v1, v2, v0, 16
283; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
284; GFX7-NEXT:    s_setpc_b64 s[30:31]
285;
286; GFX8-LABEL: v_abs_v4i16:
287; GFX8:       ; %bb.0:
288; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289; GFX8-NEXT:    v_mov_b32_e32 v2, 0
290; GFX8-NEXT:    v_sub_u16_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
291; GFX8-NEXT:    v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
292; GFX8-NEXT:    v_sub_u16_e32 v4, 0, v1
293; GFX8-NEXT:    v_sub_u16_e32 v5, 0, v0
294; GFX8-NEXT:    v_max_i16_sdwa v3, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
295; GFX8-NEXT:    v_max_i16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
296; GFX8-NEXT:    v_max_i16_e32 v0, v0, v5
297; GFX8-NEXT:    v_max_i16_e32 v1, v1, v4
298; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
299; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
300; GFX8-NEXT:    s_setpc_b64 s[30:31]
301;
302; GFX9-LABEL: v_abs_v4i16:
303; GFX9:       ; %bb.0:
304; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305; GFX9-NEXT:    v_pk_sub_i16 v2, 0, v0
306; GFX9-NEXT:    v_pk_max_i16 v0, v0, v2
307; GFX9-NEXT:    v_pk_sub_i16 v2, 0, v1
308; GFX9-NEXT:    v_pk_max_i16 v1, v1, v2
309; GFX9-NEXT:    s_setpc_b64 s[30:31]
310;
311; GFX10-LABEL: v_abs_v4i16:
312; GFX10:       ; %bb.0:
313; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314; GFX10-NEXT:    v_pk_sub_i16 v2, 0, v0
315; GFX10-NEXT:    v_pk_sub_i16 v3, 0, v1
316; GFX10-NEXT:    v_pk_max_i16 v0, v0, v2
317; GFX10-NEXT:    v_pk_max_i16 v1, v1, v3
318; GFX10-NEXT:    s_setpc_b64 s[30:31]
319;
320; GFX11-LABEL: v_abs_v4i16:
321; GFX11:       ; %bb.0:
322; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323; GFX11-NEXT:    v_pk_sub_i16 v2, 0, v0
324; GFX11-NEXT:    v_pk_sub_i16 v3, 0, v1
325; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
326; GFX11-NEXT:    v_pk_max_i16 v0, v0, v2
327; GFX11-NEXT:    v_pk_max_i16 v1, v1, v3
328; GFX11-NEXT:    s_setpc_b64 s[30:31]
329;
330; GFX12-LABEL: v_abs_v4i16:
331; GFX12:       ; %bb.0:
332; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
333; GFX12-NEXT:    s_wait_expcnt 0x0
334; GFX12-NEXT:    s_wait_samplecnt 0x0
335; GFX12-NEXT:    s_wait_bvhcnt 0x0
336; GFX12-NEXT:    s_wait_kmcnt 0x0
337; GFX12-NEXT:    v_pk_sub_i16 v2, 0, v0
338; GFX12-NEXT:    v_pk_sub_i16 v3, 0, v1
339; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
340; GFX12-NEXT:    v_pk_max_i16 v0, v0, v2
341; GFX12-NEXT:    v_pk_max_i16 v1, v1, v3
342; GFX12-NEXT:    s_setpc_b64 s[30:31]
343  %res = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %arg, i1 false)
344  ret <4 x i16> %res
345}
346
347define <6 x i16> @v_abs_v6i16(<6 x i16> %arg) {
348; GFX6-LABEL: v_abs_v6i16:
349; GFX6:       ; %bb.0:
350; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
352; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
353; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
354; GFX6-NEXT:    v_max_i32_e32 v2, v2, v6
355; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 0, v3
356; GFX6-NEXT:    v_max_i32_e32 v3, v3, v6
357; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
358; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
359; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
360; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
361; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
362; GFX6-NEXT:    v_max_i32_e32 v0, v0, v3
363; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
364; GFX6-NEXT:    v_bfe_i32 v5, v5, 0, 16
365; GFX6-NEXT:    v_max_i32_e32 v1, v1, v3
366; GFX6-NEXT:    v_bfe_i32 v4, v4, 0, 16
367; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
368; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v5
369; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
370; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v4
371; GFX6-NEXT:    v_max_i32_e32 v5, v5, v3
372; GFX6-NEXT:    v_max_i32_e32 v1, v4, v1
373; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
374; GFX6-NEXT:    v_or_b32_e32 v4, v1, v3
375; GFX6-NEXT:    v_alignbit_b32 v1, v2, v0, 16
376; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
377; GFX6-NEXT:    s_setpc_b64 s[30:31]
378;
379; GFX7-LABEL: v_abs_v6i16:
380; GFX7:       ; %bb.0:
381; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
382; GFX7-NEXT:    v_bfe_i32 v2, v2, 0, 16
383; GFX7-NEXT:    v_bfe_i32 v3, v3, 0, 16
384; GFX7-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
385; GFX7-NEXT:    v_max_i32_e32 v2, v2, v6
386; GFX7-NEXT:    v_sub_i32_e32 v6, vcc, 0, v3
387; GFX7-NEXT:    v_max_i32_e32 v3, v3, v6
388; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 16
389; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
390; GFX7-NEXT:    v_bfe_i32 v1, v1, 0, 16
391; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
392; GFX7-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
393; GFX7-NEXT:    v_max_i32_e32 v0, v0, v3
394; GFX7-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
395; GFX7-NEXT:    v_bfe_i32 v5, v5, 0, 16
396; GFX7-NEXT:    v_max_i32_e32 v1, v1, v3
397; GFX7-NEXT:    v_bfe_i32 v4, v4, 0, 16
398; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
399; GFX7-NEXT:    v_sub_i32_e32 v3, vcc, 0, v5
400; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
401; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, 0, v4
402; GFX7-NEXT:    v_max_i32_e32 v5, v5, v3
403; GFX7-NEXT:    v_max_i32_e32 v1, v4, v1
404; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
405; GFX7-NEXT:    v_or_b32_e32 v4, v1, v3
406; GFX7-NEXT:    v_alignbit_b32 v1, v2, v0, 16
407; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
408; GFX7-NEXT:    s_setpc_b64 s[30:31]
409;
410; GFX8-LABEL: v_abs_v6i16:
411; GFX8:       ; %bb.0:
412; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
413; GFX8-NEXT:    v_mov_b32_e32 v3, 0
414; GFX8-NEXT:    v_sub_u16_sdwa v4, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
415; GFX8-NEXT:    v_sub_u16_sdwa v5, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
416; GFX8-NEXT:    v_sub_u16_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
417; GFX8-NEXT:    v_sub_u16_e32 v6, 0, v2
418; GFX8-NEXT:    v_sub_u16_e32 v7, 0, v1
419; GFX8-NEXT:    v_sub_u16_e32 v8, 0, v0
420; GFX8-NEXT:    v_max_i16_sdwa v4, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
421; GFX8-NEXT:    v_max_i16_sdwa v5, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
422; GFX8-NEXT:    v_max_i16_sdwa v3, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
423; GFX8-NEXT:    v_max_i16_e32 v0, v0, v8
424; GFX8-NEXT:    v_max_i16_e32 v1, v1, v7
425; GFX8-NEXT:    v_max_i16_e32 v2, v2, v6
426; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
427; GFX8-NEXT:    v_or_b32_e32 v1, v1, v5
428; GFX8-NEXT:    v_or_b32_e32 v2, v2, v4
429; GFX8-NEXT:    s_setpc_b64 s[30:31]
430;
431; GFX9-LABEL: v_abs_v6i16:
432; GFX9:       ; %bb.0:
433; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434; GFX9-NEXT:    v_pk_sub_i16 v3, 0, v0
435; GFX9-NEXT:    v_pk_max_i16 v0, v0, v3
436; GFX9-NEXT:    v_pk_sub_i16 v3, 0, v1
437; GFX9-NEXT:    v_pk_max_i16 v1, v1, v3
438; GFX9-NEXT:    v_pk_sub_i16 v3, 0, v2
439; GFX9-NEXT:    v_pk_max_i16 v2, v2, v3
440; GFX9-NEXT:    s_setpc_b64 s[30:31]
441;
442; GFX10-LABEL: v_abs_v6i16:
443; GFX10:       ; %bb.0:
444; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445; GFX10-NEXT:    v_pk_sub_i16 v3, 0, v0
446; GFX10-NEXT:    v_pk_sub_i16 v4, 0, v1
447; GFX10-NEXT:    v_pk_sub_i16 v5, 0, v2
448; GFX10-NEXT:    v_pk_max_i16 v0, v0, v3
449; GFX10-NEXT:    v_pk_max_i16 v1, v1, v4
450; GFX10-NEXT:    v_pk_max_i16 v2, v2, v5
451; GFX10-NEXT:    s_setpc_b64 s[30:31]
452;
453; GFX11-LABEL: v_abs_v6i16:
454; GFX11:       ; %bb.0:
455; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
456; GFX11-NEXT:    v_pk_sub_i16 v3, 0, v0
457; GFX11-NEXT:    v_pk_sub_i16 v4, 0, v1
458; GFX11-NEXT:    v_pk_sub_i16 v5, 0, v2
459; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
460; GFX11-NEXT:    v_pk_max_i16 v0, v0, v3
461; GFX11-NEXT:    v_pk_max_i16 v1, v1, v4
462; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
463; GFX11-NEXT:    v_pk_max_i16 v2, v2, v5
464; GFX11-NEXT:    s_setpc_b64 s[30:31]
465;
466; GFX12-LABEL: v_abs_v6i16:
467; GFX12:       ; %bb.0:
468; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
469; GFX12-NEXT:    s_wait_expcnt 0x0
470; GFX12-NEXT:    s_wait_samplecnt 0x0
471; GFX12-NEXT:    s_wait_bvhcnt 0x0
472; GFX12-NEXT:    s_wait_kmcnt 0x0
473; GFX12-NEXT:    v_pk_sub_i16 v3, 0, v0
474; GFX12-NEXT:    v_pk_sub_i16 v4, 0, v1
475; GFX12-NEXT:    v_pk_sub_i16 v5, 0, v2
476; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
477; GFX12-NEXT:    v_pk_max_i16 v0, v0, v3
478; GFX12-NEXT:    v_pk_max_i16 v1, v1, v4
479; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
480; GFX12-NEXT:    v_pk_max_i16 v2, v2, v5
481; GFX12-NEXT:    s_setpc_b64 s[30:31]
482  %res = call <6 x i16> @llvm.abs.v6i16(<6 x i16> %arg, i1 false)
483  ret <6 x i16> %res
484}
485
486define <8 x i16> @v_abs_v8i16(<8 x i16> %arg) {
487; GFX6-LABEL: v_abs_v8i16:
488; GFX6:       ; %bb.0:
489; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
490; GFX6-NEXT:    v_bfe_i32 v6, v6, 0, 16
491; GFX6-NEXT:    v_bfe_i32 v7, v7, 0, 16
492; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
493; GFX6-NEXT:    v_max_i32_e32 v6, v6, v8
494; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 0, v7
495; GFX6-NEXT:    v_max_i32_e32 v7, v7, v8
496; GFX6-NEXT:    v_bfe_i32 v4, v4, 0, 16
497; GFX6-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
498; GFX6-NEXT:    v_bfe_i32 v5, v5, 0, 16
499; GFX6-NEXT:    v_or_b32_e32 v6, v6, v7
500; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, 0, v4
501; GFX6-NEXT:    v_max_i32_e32 v4, v4, v7
502; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, 0, v5
503; GFX6-NEXT:    v_max_i32_e32 v5, v5, v7
504; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
505; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
506; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
507; GFX6-NEXT:    v_or_b32_e32 v4, v4, v5
508; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
509; GFX6-NEXT:    v_max_i32_e32 v2, v2, v5
510; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
511; GFX6-NEXT:    v_max_i32_e32 v3, v3, v5
512; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
513; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
514; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
515; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
516; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
517; GFX6-NEXT:    v_max_i32_e32 v0, v0, v3
518; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
519; GFX6-NEXT:    v_max_i32_e32 v1, v1, v3
520; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
521; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
522; GFX6-NEXT:    v_alignbit_b32 v1, v2, v0, 16
523; GFX6-NEXT:    v_alignbit_b32 v5, v6, v4, 16
524; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
525; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
526; GFX6-NEXT:    s_setpc_b64 s[30:31]
527;
528; GFX7-LABEL: v_abs_v8i16:
529; GFX7:       ; %bb.0:
530; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
531; GFX7-NEXT:    v_bfe_i32 v6, v6, 0, 16
532; GFX7-NEXT:    v_bfe_i32 v7, v7, 0, 16
533; GFX7-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
534; GFX7-NEXT:    v_max_i32_e32 v6, v6, v8
535; GFX7-NEXT:    v_sub_i32_e32 v8, vcc, 0, v7
536; GFX7-NEXT:    v_max_i32_e32 v7, v7, v8
537; GFX7-NEXT:    v_bfe_i32 v4, v4, 0, 16
538; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
539; GFX7-NEXT:    v_bfe_i32 v5, v5, 0, 16
540; GFX7-NEXT:    v_or_b32_e32 v6, v6, v7
541; GFX7-NEXT:    v_sub_i32_e32 v7, vcc, 0, v4
542; GFX7-NEXT:    v_max_i32_e32 v4, v4, v7
543; GFX7-NEXT:    v_sub_i32_e32 v7, vcc, 0, v5
544; GFX7-NEXT:    v_max_i32_e32 v5, v5, v7
545; GFX7-NEXT:    v_bfe_i32 v2, v2, 0, 16
546; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
547; GFX7-NEXT:    v_bfe_i32 v3, v3, 0, 16
548; GFX7-NEXT:    v_or_b32_e32 v4, v4, v5
549; GFX7-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
550; GFX7-NEXT:    v_max_i32_e32 v2, v2, v5
551; GFX7-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
552; GFX7-NEXT:    v_max_i32_e32 v3, v3, v5
553; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 16
554; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
555; GFX7-NEXT:    v_bfe_i32 v1, v1, 0, 16
556; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
557; GFX7-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
558; GFX7-NEXT:    v_max_i32_e32 v0, v0, v3
559; GFX7-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
560; GFX7-NEXT:    v_max_i32_e32 v1, v1, v3
561; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
562; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
563; GFX7-NEXT:    v_alignbit_b32 v1, v2, v0, 16
564; GFX7-NEXT:    v_alignbit_b32 v5, v6, v4, 16
565; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
566; GFX7-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
567; GFX7-NEXT:    s_setpc_b64 s[30:31]
568;
569; GFX8-LABEL: v_abs_v8i16:
570; GFX8:       ; %bb.0:
571; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
572; GFX8-NEXT:    v_mov_b32_e32 v4, 0
573; GFX8-NEXT:    v_sub_u16_sdwa v5, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
574; GFX8-NEXT:    v_sub_u16_sdwa v6, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
575; GFX8-NEXT:    v_sub_u16_sdwa v7, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
576; GFX8-NEXT:    v_sub_u16_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
577; GFX8-NEXT:    v_sub_u16_e32 v8, 0, v3
578; GFX8-NEXT:    v_sub_u16_e32 v9, 0, v2
579; GFX8-NEXT:    v_sub_u16_e32 v10, 0, v1
580; GFX8-NEXT:    v_sub_u16_e32 v11, 0, v0
581; GFX8-NEXT:    v_max_i16_sdwa v5, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
582; GFX8-NEXT:    v_max_i16_sdwa v6, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
583; GFX8-NEXT:    v_max_i16_sdwa v7, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
584; GFX8-NEXT:    v_max_i16_sdwa v4, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
585; GFX8-NEXT:    v_max_i16_e32 v0, v0, v11
586; GFX8-NEXT:    v_max_i16_e32 v1, v1, v10
587; GFX8-NEXT:    v_max_i16_e32 v2, v2, v9
588; GFX8-NEXT:    v_max_i16_e32 v3, v3, v8
589; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
590; GFX8-NEXT:    v_or_b32_e32 v1, v1, v7
591; GFX8-NEXT:    v_or_b32_e32 v2, v2, v6
592; GFX8-NEXT:    v_or_b32_e32 v3, v3, v5
593; GFX8-NEXT:    s_setpc_b64 s[30:31]
594;
595; GFX9-LABEL: v_abs_v8i16:
596; GFX9:       ; %bb.0:
597; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598; GFX9-NEXT:    v_pk_sub_i16 v4, 0, v0
599; GFX9-NEXT:    v_pk_max_i16 v0, v0, v4
600; GFX9-NEXT:    v_pk_sub_i16 v4, 0, v1
601; GFX9-NEXT:    v_pk_max_i16 v1, v1, v4
602; GFX9-NEXT:    v_pk_sub_i16 v4, 0, v2
603; GFX9-NEXT:    v_pk_max_i16 v2, v2, v4
604; GFX9-NEXT:    v_pk_sub_i16 v4, 0, v3
605; GFX9-NEXT:    v_pk_max_i16 v3, v3, v4
606; GFX9-NEXT:    s_setpc_b64 s[30:31]
607;
608; GFX10-LABEL: v_abs_v8i16:
609; GFX10:       ; %bb.0:
610; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
611; GFX10-NEXT:    v_pk_sub_i16 v4, 0, v0
612; GFX10-NEXT:    v_pk_sub_i16 v5, 0, v1
613; GFX10-NEXT:    v_pk_sub_i16 v6, 0, v2
614; GFX10-NEXT:    v_pk_sub_i16 v7, 0, v3
615; GFX10-NEXT:    v_pk_max_i16 v0, v0, v4
616; GFX10-NEXT:    v_pk_max_i16 v1, v1, v5
617; GFX10-NEXT:    v_pk_max_i16 v2, v2, v6
618; GFX10-NEXT:    v_pk_max_i16 v3, v3, v7
619; GFX10-NEXT:    s_setpc_b64 s[30:31]
620;
621; GFX11-LABEL: v_abs_v8i16:
622; GFX11:       ; %bb.0:
623; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
624; GFX11-NEXT:    v_pk_sub_i16 v4, 0, v0
625; GFX11-NEXT:    v_pk_sub_i16 v5, 0, v1
626; GFX11-NEXT:    v_pk_sub_i16 v6, 0, v2
627; GFX11-NEXT:    v_pk_sub_i16 v7, 0, v3
628; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
629; GFX11-NEXT:    v_pk_max_i16 v0, v0, v4
630; GFX11-NEXT:    v_pk_max_i16 v1, v1, v5
631; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
632; GFX11-NEXT:    v_pk_max_i16 v2, v2, v6
633; GFX11-NEXT:    v_pk_max_i16 v3, v3, v7
634; GFX11-NEXT:    s_setpc_b64 s[30:31]
635;
636; GFX12-LABEL: v_abs_v8i16:
637; GFX12:       ; %bb.0:
638; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
639; GFX12-NEXT:    s_wait_expcnt 0x0
640; GFX12-NEXT:    s_wait_samplecnt 0x0
641; GFX12-NEXT:    s_wait_bvhcnt 0x0
642; GFX12-NEXT:    s_wait_kmcnt 0x0
643; GFX12-NEXT:    v_pk_sub_i16 v4, 0, v0
644; GFX12-NEXT:    v_pk_sub_i16 v5, 0, v1
645; GFX12-NEXT:    v_pk_sub_i16 v6, 0, v2
646; GFX12-NEXT:    v_pk_sub_i16 v7, 0, v3
647; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
648; GFX12-NEXT:    v_pk_max_i16 v0, v0, v4
649; GFX12-NEXT:    v_pk_max_i16 v1, v1, v5
650; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
651; GFX12-NEXT:    v_pk_max_i16 v2, v2, v6
652; GFX12-NEXT:    v_pk_max_i16 v3, v3, v7
653; GFX12-NEXT:    s_setpc_b64 s[30:31]
654  %res = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %arg, i1 false)
655  ret <8 x i16> %res
656}
657
658
659define <16 x i16> @v_abs_v16i16(<16 x i16> %arg) {
660; GFX6-LABEL: v_abs_v16i16:
661; GFX6:       ; %bb.0:
662; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
663; GFX6-NEXT:    v_bfe_i32 v14, v14, 0, 16
664; GFX6-NEXT:    v_bfe_i32 v15, v15, 0, 16
665; GFX6-NEXT:    v_sub_i32_e32 v16, vcc, 0, v14
666; GFX6-NEXT:    v_max_i32_e32 v14, v14, v16
667; GFX6-NEXT:    v_sub_i32_e32 v16, vcc, 0, v15
668; GFX6-NEXT:    v_max_i32_e32 v15, v15, v16
669; GFX6-NEXT:    v_bfe_i32 v12, v12, 0, 16
670; GFX6-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
671; GFX6-NEXT:    v_bfe_i32 v13, v13, 0, 16
672; GFX6-NEXT:    v_or_b32_e32 v14, v14, v15
673; GFX6-NEXT:    v_sub_i32_e32 v15, vcc, 0, v12
674; GFX6-NEXT:    v_max_i32_e32 v12, v12, v15
675; GFX6-NEXT:    v_sub_i32_e32 v15, vcc, 0, v13
676; GFX6-NEXT:    v_max_i32_e32 v13, v13, v15
677; GFX6-NEXT:    v_bfe_i32 v10, v10, 0, 16
678; GFX6-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
679; GFX6-NEXT:    v_bfe_i32 v11, v11, 0, 16
680; GFX6-NEXT:    v_or_b32_e32 v12, v12, v13
681; GFX6-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
682; GFX6-NEXT:    v_max_i32_e32 v10, v10, v13
683; GFX6-NEXT:    v_sub_i32_e32 v13, vcc, 0, v11
684; GFX6-NEXT:    v_max_i32_e32 v11, v11, v13
685; GFX6-NEXT:    v_bfe_i32 v8, v8, 0, 16
686; GFX6-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
687; GFX6-NEXT:    v_bfe_i32 v9, v9, 0, 16
688; GFX6-NEXT:    v_or_b32_e32 v10, v10, v11
689; GFX6-NEXT:    v_sub_i32_e32 v11, vcc, 0, v8
690; GFX6-NEXT:    v_max_i32_e32 v8, v8, v11
691; GFX6-NEXT:    v_sub_i32_e32 v11, vcc, 0, v9
692; GFX6-NEXT:    v_max_i32_e32 v9, v9, v11
693; GFX6-NEXT:    v_bfe_i32 v6, v6, 0, 16
694; GFX6-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
695; GFX6-NEXT:    v_bfe_i32 v7, v7, 0, 16
696; GFX6-NEXT:    v_or_b32_e32 v8, v8, v9
697; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, 0, v6
698; GFX6-NEXT:    v_max_i32_e32 v6, v6, v9
699; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, 0, v7
700; GFX6-NEXT:    v_max_i32_e32 v7, v7, v9
701; GFX6-NEXT:    v_bfe_i32 v4, v4, 0, 16
702; GFX6-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
703; GFX6-NEXT:    v_bfe_i32 v5, v5, 0, 16
704; GFX6-NEXT:    v_or_b32_e32 v6, v6, v7
705; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, 0, v4
706; GFX6-NEXT:    v_max_i32_e32 v4, v4, v7
707; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, 0, v5
708; GFX6-NEXT:    v_max_i32_e32 v5, v5, v7
709; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
710; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
711; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
712; GFX6-NEXT:    v_or_b32_e32 v4, v4, v5
713; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
714; GFX6-NEXT:    v_max_i32_e32 v2, v2, v5
715; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
716; GFX6-NEXT:    v_max_i32_e32 v3, v3, v5
717; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
718; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
719; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
720; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
721; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
722; GFX6-NEXT:    v_max_i32_e32 v0, v0, v3
723; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
724; GFX6-NEXT:    v_max_i32_e32 v1, v1, v3
725; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
726; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
727; GFX6-NEXT:    v_alignbit_b32 v1, v2, v0, 16
728; GFX6-NEXT:    v_alignbit_b32 v5, v6, v4, 16
729; GFX6-NEXT:    v_alignbit_b32 v9, v10, v8, 16
730; GFX6-NEXT:    v_alignbit_b32 v13, v14, v12, 16
731; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
732; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
733; GFX6-NEXT:    v_lshrrev_b32_e32 v11, 16, v10
734; GFX6-NEXT:    v_lshrrev_b32_e32 v15, 16, v14
735; GFX6-NEXT:    s_setpc_b64 s[30:31]
736;
737; GFX7-LABEL: v_abs_v16i16:
738; GFX7:       ; %bb.0:
739; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
740; GFX7-NEXT:    v_bfe_i32 v14, v14, 0, 16
741; GFX7-NEXT:    v_bfe_i32 v15, v15, 0, 16
742; GFX7-NEXT:    v_sub_i32_e32 v16, vcc, 0, v14
743; GFX7-NEXT:    v_max_i32_e32 v14, v14, v16
744; GFX7-NEXT:    v_sub_i32_e32 v16, vcc, 0, v15
745; GFX7-NEXT:    v_max_i32_e32 v15, v15, v16
746; GFX7-NEXT:    v_bfe_i32 v12, v12, 0, 16
747; GFX7-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
748; GFX7-NEXT:    v_bfe_i32 v13, v13, 0, 16
749; GFX7-NEXT:    v_or_b32_e32 v14, v14, v15
750; GFX7-NEXT:    v_sub_i32_e32 v15, vcc, 0, v12
751; GFX7-NEXT:    v_max_i32_e32 v12, v12, v15
752; GFX7-NEXT:    v_sub_i32_e32 v15, vcc, 0, v13
753; GFX7-NEXT:    v_max_i32_e32 v13, v13, v15
754; GFX7-NEXT:    v_bfe_i32 v10, v10, 0, 16
755; GFX7-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
756; GFX7-NEXT:    v_bfe_i32 v11, v11, 0, 16
757; GFX7-NEXT:    v_or_b32_e32 v12, v12, v13
758; GFX7-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
759; GFX7-NEXT:    v_max_i32_e32 v10, v10, v13
760; GFX7-NEXT:    v_sub_i32_e32 v13, vcc, 0, v11
761; GFX7-NEXT:    v_max_i32_e32 v11, v11, v13
762; GFX7-NEXT:    v_bfe_i32 v8, v8, 0, 16
763; GFX7-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
764; GFX7-NEXT:    v_bfe_i32 v9, v9, 0, 16
765; GFX7-NEXT:    v_or_b32_e32 v10, v10, v11
766; GFX7-NEXT:    v_sub_i32_e32 v11, vcc, 0, v8
767; GFX7-NEXT:    v_max_i32_e32 v8, v8, v11
768; GFX7-NEXT:    v_sub_i32_e32 v11, vcc, 0, v9
769; GFX7-NEXT:    v_max_i32_e32 v9, v9, v11
770; GFX7-NEXT:    v_bfe_i32 v6, v6, 0, 16
771; GFX7-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
772; GFX7-NEXT:    v_bfe_i32 v7, v7, 0, 16
773; GFX7-NEXT:    v_or_b32_e32 v8, v8, v9
774; GFX7-NEXT:    v_sub_i32_e32 v9, vcc, 0, v6
775; GFX7-NEXT:    v_max_i32_e32 v6, v6, v9
776; GFX7-NEXT:    v_sub_i32_e32 v9, vcc, 0, v7
777; GFX7-NEXT:    v_max_i32_e32 v7, v7, v9
778; GFX7-NEXT:    v_bfe_i32 v4, v4, 0, 16
779; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
780; GFX7-NEXT:    v_bfe_i32 v5, v5, 0, 16
781; GFX7-NEXT:    v_or_b32_e32 v6, v6, v7
782; GFX7-NEXT:    v_sub_i32_e32 v7, vcc, 0, v4
783; GFX7-NEXT:    v_max_i32_e32 v4, v4, v7
784; GFX7-NEXT:    v_sub_i32_e32 v7, vcc, 0, v5
785; GFX7-NEXT:    v_max_i32_e32 v5, v5, v7
786; GFX7-NEXT:    v_bfe_i32 v2, v2, 0, 16
787; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
788; GFX7-NEXT:    v_bfe_i32 v3, v3, 0, 16
789; GFX7-NEXT:    v_or_b32_e32 v4, v4, v5
790; GFX7-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
791; GFX7-NEXT:    v_max_i32_e32 v2, v2, v5
792; GFX7-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
793; GFX7-NEXT:    v_max_i32_e32 v3, v3, v5
794; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 16
795; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
796; GFX7-NEXT:    v_bfe_i32 v1, v1, 0, 16
797; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
798; GFX7-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
799; GFX7-NEXT:    v_max_i32_e32 v0, v0, v3
800; GFX7-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
801; GFX7-NEXT:    v_max_i32_e32 v1, v1, v3
802; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
803; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
804; GFX7-NEXT:    v_alignbit_b32 v1, v2, v0, 16
805; GFX7-NEXT:    v_alignbit_b32 v5, v6, v4, 16
806; GFX7-NEXT:    v_alignbit_b32 v9, v10, v8, 16
807; GFX7-NEXT:    v_alignbit_b32 v13, v14, v12, 16
808; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
809; GFX7-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
810; GFX7-NEXT:    v_lshrrev_b32_e32 v11, 16, v10
811; GFX7-NEXT:    v_lshrrev_b32_e32 v15, 16, v14
812; GFX7-NEXT:    s_setpc_b64 s[30:31]
813;
814; GFX8-LABEL: v_abs_v16i16:
815; GFX8:       ; %bb.0:
816; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817; GFX8-NEXT:    v_mov_b32_e32 v8, 0
818; GFX8-NEXT:    v_sub_u16_sdwa v9, v8, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
819; GFX8-NEXT:    v_sub_u16_sdwa v10, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
820; GFX8-NEXT:    v_sub_u16_sdwa v11, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
821; GFX8-NEXT:    v_sub_u16_sdwa v12, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
822; GFX8-NEXT:    v_sub_u16_sdwa v13, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
823; GFX8-NEXT:    v_sub_u16_sdwa v14, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
824; GFX8-NEXT:    v_sub_u16_sdwa v15, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
825; GFX8-NEXT:    v_sub_u16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
826; GFX8-NEXT:    v_sub_u16_e32 v19, 0, v0
827; GFX8-NEXT:    v_max_i16_sdwa v8, v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
828; GFX8-NEXT:    v_max_i16_e32 v0, v0, v19
829; GFX8-NEXT:    v_or_b32_e32 v0, v0, v8
830; GFX8-NEXT:    v_sub_u16_e32 v8, 0, v1
831; GFX8-NEXT:    v_max_i16_sdwa v15, v1, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
832; GFX8-NEXT:    v_max_i16_e32 v1, v1, v8
833; GFX8-NEXT:    v_sub_u16_e32 v16, 0, v7
834; GFX8-NEXT:    v_sub_u16_e32 v17, 0, v6
835; GFX8-NEXT:    v_sub_u16_e32 v18, 0, v5
836; GFX8-NEXT:    v_sub_u16_e32 v19, 0, v4
837; GFX8-NEXT:    v_sub_u16_e32 v8, 0, v3
838; GFX8-NEXT:    v_or_b32_e32 v1, v1, v15
839; GFX8-NEXT:    v_sub_u16_e32 v15, 0, v2
840; GFX8-NEXT:    v_max_i16_sdwa v9, v7, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
841; GFX8-NEXT:    v_max_i16_sdwa v10, v6, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
842; GFX8-NEXT:    v_max_i16_sdwa v11, v5, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
843; GFX8-NEXT:    v_max_i16_sdwa v12, v4, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
844; GFX8-NEXT:    v_max_i16_sdwa v13, v3, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
845; GFX8-NEXT:    v_max_i16_sdwa v14, v2, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
846; GFX8-NEXT:    v_max_i16_e32 v2, v2, v15
847; GFX8-NEXT:    v_max_i16_e32 v3, v3, v8
848; GFX8-NEXT:    v_max_i16_e32 v4, v4, v19
849; GFX8-NEXT:    v_max_i16_e32 v5, v5, v18
850; GFX8-NEXT:    v_max_i16_e32 v6, v6, v17
851; GFX8-NEXT:    v_max_i16_e32 v7, v7, v16
852; GFX8-NEXT:    v_or_b32_e32 v2, v2, v14
853; GFX8-NEXT:    v_or_b32_e32 v3, v3, v13
854; GFX8-NEXT:    v_or_b32_e32 v4, v4, v12
855; GFX8-NEXT:    v_or_b32_e32 v5, v5, v11
856; GFX8-NEXT:    v_or_b32_e32 v6, v6, v10
857; GFX8-NEXT:    v_or_b32_e32 v7, v7, v9
858; GFX8-NEXT:    s_setpc_b64 s[30:31]
859;
860; GFX9-LABEL: v_abs_v16i16:
861; GFX9:       ; %bb.0:
862; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
863; GFX9-NEXT:    v_pk_sub_i16 v8, 0, v0
864; GFX9-NEXT:    v_pk_max_i16 v0, v0, v8
865; GFX9-NEXT:    v_pk_sub_i16 v8, 0, v1
866; GFX9-NEXT:    v_pk_max_i16 v1, v1, v8
867; GFX9-NEXT:    v_pk_sub_i16 v8, 0, v2
868; GFX9-NEXT:    v_pk_max_i16 v2, v2, v8
869; GFX9-NEXT:    v_pk_sub_i16 v8, 0, v3
870; GFX9-NEXT:    v_pk_max_i16 v3, v3, v8
871; GFX9-NEXT:    v_pk_sub_i16 v8, 0, v4
872; GFX9-NEXT:    v_pk_max_i16 v4, v4, v8
873; GFX9-NEXT:    v_pk_sub_i16 v8, 0, v5
874; GFX9-NEXT:    v_pk_max_i16 v5, v5, v8
875; GFX9-NEXT:    v_pk_sub_i16 v8, 0, v6
876; GFX9-NEXT:    v_pk_max_i16 v6, v6, v8
877; GFX9-NEXT:    v_pk_sub_i16 v8, 0, v7
878; GFX9-NEXT:    v_pk_max_i16 v7, v7, v8
879; GFX9-NEXT:    s_setpc_b64 s[30:31]
880;
881; GFX10-LABEL: v_abs_v16i16:
882; GFX10:       ; %bb.0:
883; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
884; GFX10-NEXT:    v_pk_sub_i16 v8, 0, v0
885; GFX10-NEXT:    v_pk_sub_i16 v9, 0, v1
886; GFX10-NEXT:    v_pk_sub_i16 v10, 0, v2
887; GFX10-NEXT:    v_pk_sub_i16 v11, 0, v6
888; GFX10-NEXT:    v_pk_sub_i16 v12, 0, v7
889; GFX10-NEXT:    v_pk_max_i16 v0, v0, v8
890; GFX10-NEXT:    v_pk_max_i16 v1, v1, v9
891; GFX10-NEXT:    v_pk_max_i16 v2, v2, v10
892; GFX10-NEXT:    v_pk_sub_i16 v8, 0, v3
893; GFX10-NEXT:    v_pk_sub_i16 v9, 0, v4
894; GFX10-NEXT:    v_pk_sub_i16 v10, 0, v5
895; GFX10-NEXT:    v_pk_max_i16 v6, v6, v11
896; GFX10-NEXT:    v_pk_max_i16 v7, v7, v12
897; GFX10-NEXT:    v_pk_max_i16 v3, v3, v8
898; GFX10-NEXT:    v_pk_max_i16 v4, v4, v9
899; GFX10-NEXT:    v_pk_max_i16 v5, v5, v10
900; GFX10-NEXT:    s_setpc_b64 s[30:31]
901;
902; GFX11-LABEL: v_abs_v16i16:
903; GFX11:       ; %bb.0:
904; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
905; GFX11-NEXT:    v_pk_sub_i16 v8, 0, v0
906; GFX11-NEXT:    v_pk_sub_i16 v9, 0, v1
907; GFX11-NEXT:    v_pk_sub_i16 v10, 0, v2
908; GFX11-NEXT:    v_pk_sub_i16 v11, 0, v6
909; GFX11-NEXT:    v_pk_sub_i16 v12, 0, v7
910; GFX11-NEXT:    v_pk_max_i16 v0, v0, v8
911; GFX11-NEXT:    v_pk_max_i16 v1, v1, v9
912; GFX11-NEXT:    v_pk_max_i16 v2, v2, v10
913; GFX11-NEXT:    v_pk_sub_i16 v8, 0, v3
914; GFX11-NEXT:    v_pk_sub_i16 v9, 0, v4
915; GFX11-NEXT:    v_pk_sub_i16 v10, 0, v5
916; GFX11-NEXT:    v_pk_max_i16 v6, v6, v11
917; GFX11-NEXT:    v_pk_max_i16 v7, v7, v12
918; GFX11-NEXT:    v_pk_max_i16 v3, v3, v8
919; GFX11-NEXT:    v_pk_max_i16 v4, v4, v9
920; GFX11-NEXT:    v_pk_max_i16 v5, v5, v10
921; GFX11-NEXT:    s_setpc_b64 s[30:31]
922;
923; GFX12-LABEL: v_abs_v16i16:
924; GFX12:       ; %bb.0:
925; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
926; GFX12-NEXT:    s_wait_expcnt 0x0
927; GFX12-NEXT:    s_wait_samplecnt 0x0
928; GFX12-NEXT:    s_wait_bvhcnt 0x0
929; GFX12-NEXT:    s_wait_kmcnt 0x0
930; GFX12-NEXT:    v_pk_sub_i16 v8, 0, v0
931; GFX12-NEXT:    v_pk_sub_i16 v9, 0, v1
932; GFX12-NEXT:    v_pk_sub_i16 v10, 0, v2
933; GFX12-NEXT:    v_pk_sub_i16 v11, 0, v6
934; GFX12-NEXT:    v_pk_sub_i16 v12, 0, v7
935; GFX12-NEXT:    v_pk_max_i16 v0, v0, v8
936; GFX12-NEXT:    v_pk_max_i16 v1, v1, v9
937; GFX12-NEXT:    v_pk_max_i16 v2, v2, v10
938; GFX12-NEXT:    v_pk_sub_i16 v8, 0, v3
939; GFX12-NEXT:    v_pk_sub_i16 v9, 0, v4
940; GFX12-NEXT:    v_pk_sub_i16 v10, 0, v5
941; GFX12-NEXT:    v_pk_max_i16 v6, v6, v11
942; GFX12-NEXT:    v_pk_max_i16 v7, v7, v12
943; GFX12-NEXT:    v_pk_max_i16 v3, v3, v8
944; GFX12-NEXT:    v_pk_max_i16 v4, v4, v9
945; GFX12-NEXT:    v_pk_max_i16 v5, v5, v10
946; GFX12-NEXT:    s_setpc_b64 s[30:31]
947  %res = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %arg, i1 false)
948  ret <16 x i16> %res
949}
950
951define <32 x i16> @v_abs_v32i16(<32 x i16> %arg) {
952; GFX6-LABEL: v_abs_v32i16:
953; GFX6:       ; %bb.0:
954; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
955; GFX6-NEXT:    v_bfe_i32 v28, v28, 0, 16
956; GFX6-NEXT:    v_sub_i32_e32 v31, vcc, 0, v28
957; GFX6-NEXT:    v_bfe_i32 v29, v29, 0, 16
958; GFX6-NEXT:    v_max_i32_e32 v28, v28, v31
959; GFX6-NEXT:    v_sub_i32_e32 v31, vcc, 0, v29
960; GFX6-NEXT:    v_bfe_i32 v30, v30, 0, 16
961; GFX6-NEXT:    v_max_i32_e32 v29, v29, v31
962; GFX6-NEXT:    v_sub_i32_e32 v31, vcc, 0, v30
963; GFX6-NEXT:    v_bfe_i32 v26, v26, 0, 16
964; GFX6-NEXT:    v_max_i32_e32 v30, v30, v31
965; GFX6-NEXT:    v_sub_i32_e32 v31, vcc, 0, v26
966; GFX6-NEXT:    v_bfe_i32 v27, v27, 0, 16
967; GFX6-NEXT:    v_max_i32_e32 v26, v26, v31
968; GFX6-NEXT:    v_sub_i32_e32 v31, vcc, 0, v27
969; GFX6-NEXT:    v_bfe_i32 v24, v24, 0, 16
970; GFX6-NEXT:    v_max_i32_e32 v27, v27, v31
971; GFX6-NEXT:    v_sub_i32_e32 v31, vcc, 0, v24
972; GFX6-NEXT:    v_bfe_i32 v25, v25, 0, 16
973; GFX6-NEXT:    v_max_i32_e32 v24, v24, v31
974; GFX6-NEXT:    v_sub_i32_e32 v31, vcc, 0, v25
975; GFX6-NEXT:    v_bfe_i32 v22, v22, 0, 16
976; GFX6-NEXT:    v_max_i32_e32 v25, v25, v31
977; GFX6-NEXT:    v_sub_i32_e32 v31, vcc, 0, v22
978; GFX6-NEXT:    v_bfe_i32 v23, v23, 0, 16
979; GFX6-NEXT:    v_max_i32_e32 v22, v22, v31
980; GFX6-NEXT:    v_sub_i32_e32 v31, vcc, 0, v23
981; GFX6-NEXT:    v_max_i32_e32 v23, v23, v31
982; GFX6-NEXT:    buffer_load_dword v31, off, s[0:3], s32
983; GFX6-NEXT:    v_lshlrev_b32_e32 v23, 16, v23
984; GFX6-NEXT:    v_lshlrev_b32_e32 v25, 16, v25
985; GFX6-NEXT:    v_or_b32_e32 v22, v22, v23
986; GFX6-NEXT:    v_or_b32_e32 v24, v24, v25
987; GFX6-NEXT:    v_bfe_i32 v21, v21, 0, 16
988; GFX6-NEXT:    v_bfe_i32 v20, v20, 0, 16
989; GFX6-NEXT:    v_lshlrev_b32_e32 v29, 16, v29
990; GFX6-NEXT:    v_or_b32_e32 v28, v28, v29
991; GFX6-NEXT:    v_sub_i32_e32 v29, vcc, 0, v20
992; GFX6-NEXT:    v_max_i32_e32 v20, v20, v29
993; GFX6-NEXT:    v_bfe_i32 v18, v18, 0, 16
994; GFX6-NEXT:    v_bfe_i32 v19, v19, 0, 16
995; GFX6-NEXT:    v_bfe_i32 v16, v16, 0, 16
996; GFX6-NEXT:    v_bfe_i32 v17, v17, 0, 16
997; GFX6-NEXT:    v_bfe_i32 v14, v14, 0, 16
998; GFX6-NEXT:    v_bfe_i32 v15, v15, 0, 16
999; GFX6-NEXT:    v_bfe_i32 v12, v12, 0, 16
1000; GFX6-NEXT:    v_bfe_i32 v13, v13, 0, 16
1001; GFX6-NEXT:    v_bfe_i32 v10, v10, 0, 16
1002; GFX6-NEXT:    v_bfe_i32 v11, v11, 0, 16
1003; GFX6-NEXT:    v_bfe_i32 v8, v8, 0, 16
1004; GFX6-NEXT:    v_bfe_i32 v9, v9, 0, 16
1005; GFX6-NEXT:    v_bfe_i32 v6, v6, 0, 16
1006; GFX6-NEXT:    v_bfe_i32 v7, v7, 0, 16
1007; GFX6-NEXT:    v_bfe_i32 v4, v4, 0, 16
1008; GFX6-NEXT:    v_bfe_i32 v5, v5, 0, 16
1009; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
1010; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
1011; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
1012; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
1013; GFX6-NEXT:    v_lshlrev_b32_e32 v27, 16, v27
1014; GFX6-NEXT:    v_or_b32_e32 v26, v26, v27
1015; GFX6-NEXT:    v_lshrrev_b32_e32 v27, 16, v26
1016; GFX6-NEXT:    s_waitcnt vmcnt(0)
1017; GFX6-NEXT:    v_bfe_i32 v23, v31, 0, 16
1018; GFX6-NEXT:    v_sub_i32_e32 v25, vcc, 0, v23
1019; GFX6-NEXT:    v_max_i32_e32 v23, v23, v25
1020; GFX6-NEXT:    v_lshlrev_b32_e32 v23, 16, v23
1021; GFX6-NEXT:    v_or_b32_e32 v30, v30, v23
1022; GFX6-NEXT:    v_sub_i32_e32 v23, vcc, 0, v21
1023; GFX6-NEXT:    v_max_i32_e32 v21, v21, v23
1024; GFX6-NEXT:    v_lshlrev_b32_e32 v21, 16, v21
1025; GFX6-NEXT:    v_or_b32_e32 v20, v20, v21
1026; GFX6-NEXT:    v_sub_i32_e32 v21, vcc, 0, v18
1027; GFX6-NEXT:    v_max_i32_e32 v18, v18, v21
1028; GFX6-NEXT:    v_sub_i32_e32 v21, vcc, 0, v19
1029; GFX6-NEXT:    v_max_i32_e32 v19, v19, v21
1030; GFX6-NEXT:    v_lshlrev_b32_e32 v19, 16, v19
1031; GFX6-NEXT:    v_or_b32_e32 v18, v18, v19
1032; GFX6-NEXT:    v_sub_i32_e32 v19, vcc, 0, v16
1033; GFX6-NEXT:    v_max_i32_e32 v16, v16, v19
1034; GFX6-NEXT:    v_sub_i32_e32 v19, vcc, 0, v17
1035; GFX6-NEXT:    v_max_i32_e32 v17, v17, v19
1036; GFX6-NEXT:    v_lshlrev_b32_e32 v17, 16, v17
1037; GFX6-NEXT:    v_or_b32_e32 v16, v16, v17
1038; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, 0, v14
1039; GFX6-NEXT:    v_max_i32_e32 v14, v14, v17
1040; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, 0, v15
1041; GFX6-NEXT:    v_max_i32_e32 v15, v15, v17
1042; GFX6-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
1043; GFX6-NEXT:    v_or_b32_e32 v14, v14, v15
1044; GFX6-NEXT:    v_sub_i32_e32 v15, vcc, 0, v12
1045; GFX6-NEXT:    v_max_i32_e32 v12, v12, v15
1046; GFX6-NEXT:    v_sub_i32_e32 v15, vcc, 0, v13
1047; GFX6-NEXT:    v_max_i32_e32 v13, v13, v15
1048; GFX6-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
1049; GFX6-NEXT:    v_or_b32_e32 v12, v12, v13
1050; GFX6-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
1051; GFX6-NEXT:    v_max_i32_e32 v10, v10, v13
1052; GFX6-NEXT:    v_sub_i32_e32 v13, vcc, 0, v11
1053; GFX6-NEXT:    v_max_i32_e32 v11, v11, v13
1054; GFX6-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
1055; GFX6-NEXT:    v_or_b32_e32 v10, v10, v11
1056; GFX6-NEXT:    v_sub_i32_e32 v11, vcc, 0, v8
1057; GFX6-NEXT:    v_max_i32_e32 v8, v8, v11
1058; GFX6-NEXT:    v_sub_i32_e32 v11, vcc, 0, v9
1059; GFX6-NEXT:    v_max_i32_e32 v9, v9, v11
1060; GFX6-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
1061; GFX6-NEXT:    v_or_b32_e32 v8, v8, v9
1062; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, 0, v6
1063; GFX6-NEXT:    v_max_i32_e32 v6, v6, v9
1064; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, 0, v7
1065; GFX6-NEXT:    v_max_i32_e32 v7, v7, v9
1066; GFX6-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
1067; GFX6-NEXT:    v_or_b32_e32 v6, v6, v7
1068; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, 0, v4
1069; GFX6-NEXT:    v_max_i32_e32 v4, v4, v7
1070; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, 0, v5
1071; GFX6-NEXT:    v_max_i32_e32 v5, v5, v7
1072; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
1073; GFX6-NEXT:    v_or_b32_e32 v4, v4, v5
1074; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
1075; GFX6-NEXT:    v_max_i32_e32 v2, v2, v5
1076; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
1077; GFX6-NEXT:    v_max_i32_e32 v3, v3, v5
1078; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1079; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
1080; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
1081; GFX6-NEXT:    v_max_i32_e32 v0, v0, v3
1082; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
1083; GFX6-NEXT:    v_max_i32_e32 v1, v1, v3
1084; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1085; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1086; GFX6-NEXT:    v_alignbit_b32 v1, v2, v0, 16
1087; GFX6-NEXT:    v_alignbit_b32 v5, v6, v4, 16
1088; GFX6-NEXT:    v_alignbit_b32 v9, v10, v8, 16
1089; GFX6-NEXT:    v_alignbit_b32 v13, v14, v12, 16
1090; GFX6-NEXT:    v_alignbit_b32 v17, v18, v16, 16
1091; GFX6-NEXT:    v_alignbit_b32 v21, v22, v20, 16
1092; GFX6-NEXT:    v_alignbit_b32 v25, v26, v24, 16
1093; GFX6-NEXT:    v_alignbit_b32 v29, v30, v28, 16
1094; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
1095; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
1096; GFX6-NEXT:    v_lshrrev_b32_e32 v11, 16, v10
1097; GFX6-NEXT:    v_lshrrev_b32_e32 v15, 16, v14
1098; GFX6-NEXT:    v_lshrrev_b32_e32 v19, 16, v18
1099; GFX6-NEXT:    v_lshrrev_b32_e32 v23, 16, v22
1100; GFX6-NEXT:    v_lshrrev_b32_e32 v31, 16, v30
1101; GFX6-NEXT:    s_setpc_b64 s[30:31]
1102;
1103; GFX7-LABEL: v_abs_v32i16:
1104; GFX7:       ; %bb.0:
1105; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106; GFX7-NEXT:    v_bfe_i32 v28, v28, 0, 16
1107; GFX7-NEXT:    v_sub_i32_e32 v31, vcc, 0, v28
1108; GFX7-NEXT:    v_bfe_i32 v29, v29, 0, 16
1109; GFX7-NEXT:    v_max_i32_e32 v28, v28, v31
1110; GFX7-NEXT:    v_sub_i32_e32 v31, vcc, 0, v29
1111; GFX7-NEXT:    v_bfe_i32 v30, v30, 0, 16
1112; GFX7-NEXT:    v_max_i32_e32 v29, v29, v31
1113; GFX7-NEXT:    v_sub_i32_e32 v31, vcc, 0, v30
1114; GFX7-NEXT:    v_bfe_i32 v26, v26, 0, 16
1115; GFX7-NEXT:    v_max_i32_e32 v30, v30, v31
1116; GFX7-NEXT:    v_sub_i32_e32 v31, vcc, 0, v26
1117; GFX7-NEXT:    v_bfe_i32 v27, v27, 0, 16
1118; GFX7-NEXT:    v_max_i32_e32 v26, v26, v31
1119; GFX7-NEXT:    v_sub_i32_e32 v31, vcc, 0, v27
1120; GFX7-NEXT:    v_bfe_i32 v24, v24, 0, 16
1121; GFX7-NEXT:    v_max_i32_e32 v27, v27, v31
1122; GFX7-NEXT:    v_sub_i32_e32 v31, vcc, 0, v24
1123; GFX7-NEXT:    v_bfe_i32 v25, v25, 0, 16
1124; GFX7-NEXT:    v_max_i32_e32 v24, v24, v31
1125; GFX7-NEXT:    v_sub_i32_e32 v31, vcc, 0, v25
1126; GFX7-NEXT:    v_bfe_i32 v22, v22, 0, 16
1127; GFX7-NEXT:    v_max_i32_e32 v25, v25, v31
1128; GFX7-NEXT:    v_sub_i32_e32 v31, vcc, 0, v22
1129; GFX7-NEXT:    v_bfe_i32 v23, v23, 0, 16
1130; GFX7-NEXT:    v_max_i32_e32 v22, v22, v31
1131; GFX7-NEXT:    v_sub_i32_e32 v31, vcc, 0, v23
1132; GFX7-NEXT:    v_max_i32_e32 v23, v23, v31
1133; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32
1134; GFX7-NEXT:    v_lshlrev_b32_e32 v23, 16, v23
1135; GFX7-NEXT:    v_lshlrev_b32_e32 v25, 16, v25
1136; GFX7-NEXT:    v_or_b32_e32 v22, v22, v23
1137; GFX7-NEXT:    v_or_b32_e32 v24, v24, v25
1138; GFX7-NEXT:    v_bfe_i32 v21, v21, 0, 16
1139; GFX7-NEXT:    v_bfe_i32 v20, v20, 0, 16
1140; GFX7-NEXT:    v_lshlrev_b32_e32 v29, 16, v29
1141; GFX7-NEXT:    v_or_b32_e32 v28, v28, v29
1142; GFX7-NEXT:    v_sub_i32_e32 v29, vcc, 0, v20
1143; GFX7-NEXT:    v_max_i32_e32 v20, v20, v29
1144; GFX7-NEXT:    v_bfe_i32 v18, v18, 0, 16
1145; GFX7-NEXT:    v_bfe_i32 v19, v19, 0, 16
1146; GFX7-NEXT:    v_bfe_i32 v16, v16, 0, 16
1147; GFX7-NEXT:    v_bfe_i32 v17, v17, 0, 16
1148; GFX7-NEXT:    v_bfe_i32 v14, v14, 0, 16
1149; GFX7-NEXT:    v_bfe_i32 v15, v15, 0, 16
1150; GFX7-NEXT:    v_bfe_i32 v12, v12, 0, 16
1151; GFX7-NEXT:    v_bfe_i32 v13, v13, 0, 16
1152; GFX7-NEXT:    v_bfe_i32 v10, v10, 0, 16
1153; GFX7-NEXT:    v_bfe_i32 v11, v11, 0, 16
1154; GFX7-NEXT:    v_bfe_i32 v8, v8, 0, 16
1155; GFX7-NEXT:    v_bfe_i32 v9, v9, 0, 16
1156; GFX7-NEXT:    v_bfe_i32 v6, v6, 0, 16
1157; GFX7-NEXT:    v_bfe_i32 v7, v7, 0, 16
1158; GFX7-NEXT:    v_bfe_i32 v4, v4, 0, 16
1159; GFX7-NEXT:    v_bfe_i32 v5, v5, 0, 16
1160; GFX7-NEXT:    v_bfe_i32 v2, v2, 0, 16
1161; GFX7-NEXT:    v_bfe_i32 v3, v3, 0, 16
1162; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 16
1163; GFX7-NEXT:    v_bfe_i32 v1, v1, 0, 16
1164; GFX7-NEXT:    v_lshlrev_b32_e32 v27, 16, v27
1165; GFX7-NEXT:    v_or_b32_e32 v26, v26, v27
1166; GFX7-NEXT:    v_lshrrev_b32_e32 v27, 16, v26
1167; GFX7-NEXT:    s_waitcnt vmcnt(0)
1168; GFX7-NEXT:    v_bfe_i32 v23, v31, 0, 16
1169; GFX7-NEXT:    v_sub_i32_e32 v25, vcc, 0, v23
1170; GFX7-NEXT:    v_max_i32_e32 v23, v23, v25
1171; GFX7-NEXT:    v_lshlrev_b32_e32 v23, 16, v23
1172; GFX7-NEXT:    v_or_b32_e32 v30, v30, v23
1173; GFX7-NEXT:    v_sub_i32_e32 v23, vcc, 0, v21
1174; GFX7-NEXT:    v_max_i32_e32 v21, v21, v23
1175; GFX7-NEXT:    v_lshlrev_b32_e32 v21, 16, v21
1176; GFX7-NEXT:    v_or_b32_e32 v20, v20, v21
1177; GFX7-NEXT:    v_sub_i32_e32 v21, vcc, 0, v18
1178; GFX7-NEXT:    v_max_i32_e32 v18, v18, v21
1179; GFX7-NEXT:    v_sub_i32_e32 v21, vcc, 0, v19
1180; GFX7-NEXT:    v_max_i32_e32 v19, v19, v21
1181; GFX7-NEXT:    v_lshlrev_b32_e32 v19, 16, v19
1182; GFX7-NEXT:    v_or_b32_e32 v18, v18, v19
1183; GFX7-NEXT:    v_sub_i32_e32 v19, vcc, 0, v16
1184; GFX7-NEXT:    v_max_i32_e32 v16, v16, v19
1185; GFX7-NEXT:    v_sub_i32_e32 v19, vcc, 0, v17
1186; GFX7-NEXT:    v_max_i32_e32 v17, v17, v19
1187; GFX7-NEXT:    v_lshlrev_b32_e32 v17, 16, v17
1188; GFX7-NEXT:    v_or_b32_e32 v16, v16, v17
1189; GFX7-NEXT:    v_sub_i32_e32 v17, vcc, 0, v14
1190; GFX7-NEXT:    v_max_i32_e32 v14, v14, v17
1191; GFX7-NEXT:    v_sub_i32_e32 v17, vcc, 0, v15
1192; GFX7-NEXT:    v_max_i32_e32 v15, v15, v17
1193; GFX7-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
1194; GFX7-NEXT:    v_or_b32_e32 v14, v14, v15
1195; GFX7-NEXT:    v_sub_i32_e32 v15, vcc, 0, v12
1196; GFX7-NEXT:    v_max_i32_e32 v12, v12, v15
1197; GFX7-NEXT:    v_sub_i32_e32 v15, vcc, 0, v13
1198; GFX7-NEXT:    v_max_i32_e32 v13, v13, v15
1199; GFX7-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
1200; GFX7-NEXT:    v_or_b32_e32 v12, v12, v13
1201; GFX7-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
1202; GFX7-NEXT:    v_max_i32_e32 v10, v10, v13
1203; GFX7-NEXT:    v_sub_i32_e32 v13, vcc, 0, v11
1204; GFX7-NEXT:    v_max_i32_e32 v11, v11, v13
1205; GFX7-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
1206; GFX7-NEXT:    v_or_b32_e32 v10, v10, v11
1207; GFX7-NEXT:    v_sub_i32_e32 v11, vcc, 0, v8
1208; GFX7-NEXT:    v_max_i32_e32 v8, v8, v11
1209; GFX7-NEXT:    v_sub_i32_e32 v11, vcc, 0, v9
1210; GFX7-NEXT:    v_max_i32_e32 v9, v9, v11
1211; GFX7-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
1212; GFX7-NEXT:    v_or_b32_e32 v8, v8, v9
1213; GFX7-NEXT:    v_sub_i32_e32 v9, vcc, 0, v6
1214; GFX7-NEXT:    v_max_i32_e32 v6, v6, v9
1215; GFX7-NEXT:    v_sub_i32_e32 v9, vcc, 0, v7
1216; GFX7-NEXT:    v_max_i32_e32 v7, v7, v9
1217; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
1218; GFX7-NEXT:    v_or_b32_e32 v6, v6, v7
1219; GFX7-NEXT:    v_sub_i32_e32 v7, vcc, 0, v4
1220; GFX7-NEXT:    v_max_i32_e32 v4, v4, v7
1221; GFX7-NEXT:    v_sub_i32_e32 v7, vcc, 0, v5
1222; GFX7-NEXT:    v_max_i32_e32 v5, v5, v7
1223; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
1224; GFX7-NEXT:    v_or_b32_e32 v4, v4, v5
1225; GFX7-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
1226; GFX7-NEXT:    v_max_i32_e32 v2, v2, v5
1227; GFX7-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
1228; GFX7-NEXT:    v_max_i32_e32 v3, v3, v5
1229; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1230; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
1231; GFX7-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
1232; GFX7-NEXT:    v_max_i32_e32 v0, v0, v3
1233; GFX7-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
1234; GFX7-NEXT:    v_max_i32_e32 v1, v1, v3
1235; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1236; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1237; GFX7-NEXT:    v_alignbit_b32 v1, v2, v0, 16
1238; GFX7-NEXT:    v_alignbit_b32 v5, v6, v4, 16
1239; GFX7-NEXT:    v_alignbit_b32 v9, v10, v8, 16
1240; GFX7-NEXT:    v_alignbit_b32 v13, v14, v12, 16
1241; GFX7-NEXT:    v_alignbit_b32 v17, v18, v16, 16
1242; GFX7-NEXT:    v_alignbit_b32 v21, v22, v20, 16
1243; GFX7-NEXT:    v_alignbit_b32 v25, v26, v24, 16
1244; GFX7-NEXT:    v_alignbit_b32 v29, v30, v28, 16
1245; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
1246; GFX7-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
1247; GFX7-NEXT:    v_lshrrev_b32_e32 v11, 16, v10
1248; GFX7-NEXT:    v_lshrrev_b32_e32 v15, 16, v14
1249; GFX7-NEXT:    v_lshrrev_b32_e32 v19, 16, v18
1250; GFX7-NEXT:    v_lshrrev_b32_e32 v23, 16, v22
1251; GFX7-NEXT:    v_lshrrev_b32_e32 v31, 16, v30
1252; GFX7-NEXT:    s_setpc_b64 s[30:31]
1253;
1254; GFX8-LABEL: v_abs_v32i16:
1255; GFX8:       ; %bb.0:
1256; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1257; GFX8-NEXT:    v_mov_b32_e32 v16, 0
1258; GFX8-NEXT:    v_sub_u16_sdwa v18, v16, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1259; GFX8-NEXT:    v_sub_u16_e32 v19, 0, v0
1260; GFX8-NEXT:    v_max_i16_sdwa v18, v0, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1261; GFX8-NEXT:    v_max_i16_e32 v0, v0, v19
1262; GFX8-NEXT:    v_sub_u16_sdwa v19, v16, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1263; GFX8-NEXT:    v_or_b32_e32 v0, v0, v18
1264; GFX8-NEXT:    v_sub_u16_e32 v18, 0, v1
1265; GFX8-NEXT:    v_max_i16_sdwa v19, v1, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1266; GFX8-NEXT:    v_max_i16_e32 v1, v1, v18
1267; GFX8-NEXT:    v_sub_u16_sdwa v18, v16, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1268; GFX8-NEXT:    v_or_b32_e32 v1, v1, v19
1269; GFX8-NEXT:    v_sub_u16_e32 v19, 0, v2
1270; GFX8-NEXT:    v_max_i16_sdwa v18, v2, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1271; GFX8-NEXT:    v_max_i16_e32 v2, v2, v19
1272; GFX8-NEXT:    v_sub_u16_sdwa v19, v16, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1273; GFX8-NEXT:    v_or_b32_e32 v2, v2, v18
1274; GFX8-NEXT:    v_sub_u16_e32 v18, 0, v3
1275; GFX8-NEXT:    v_max_i16_sdwa v19, v3, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1276; GFX8-NEXT:    v_max_i16_e32 v3, v3, v18
1277; GFX8-NEXT:    v_sub_u16_sdwa v18, v16, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1278; GFX8-NEXT:    v_or_b32_e32 v3, v3, v19
1279; GFX8-NEXT:    v_sub_u16_e32 v19, 0, v4
1280; GFX8-NEXT:    v_max_i16_sdwa v18, v4, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1281; GFX8-NEXT:    v_max_i16_e32 v4, v4, v19
1282; GFX8-NEXT:    v_sub_u16_sdwa v19, v16, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1283; GFX8-NEXT:    v_or_b32_e32 v4, v4, v18
1284; GFX8-NEXT:    v_sub_u16_e32 v18, 0, v5
1285; GFX8-NEXT:    v_max_i16_sdwa v19, v5, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1286; GFX8-NEXT:    v_max_i16_e32 v5, v5, v18
1287; GFX8-NEXT:    v_sub_u16_sdwa v18, v16, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1288; GFX8-NEXT:    v_or_b32_e32 v5, v5, v19
1289; GFX8-NEXT:    v_sub_u16_e32 v19, 0, v6
1290; GFX8-NEXT:    v_max_i16_sdwa v18, v6, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1291; GFX8-NEXT:    v_max_i16_e32 v6, v6, v19
1292; GFX8-NEXT:    v_sub_u16_sdwa v19, v16, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1293; GFX8-NEXT:    v_or_b32_e32 v6, v6, v18
1294; GFX8-NEXT:    v_sub_u16_e32 v18, 0, v7
1295; GFX8-NEXT:    v_max_i16_sdwa v19, v7, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1296; GFX8-NEXT:    v_max_i16_e32 v7, v7, v18
1297; GFX8-NEXT:    v_sub_u16_sdwa v18, v16, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1298; GFX8-NEXT:    v_or_b32_e32 v7, v7, v19
1299; GFX8-NEXT:    v_sub_u16_e32 v19, 0, v8
1300; GFX8-NEXT:    v_max_i16_sdwa v18, v8, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1301; GFX8-NEXT:    v_max_i16_e32 v8, v8, v19
1302; GFX8-NEXT:    v_sub_u16_sdwa v19, v16, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1303; GFX8-NEXT:    v_or_b32_e32 v8, v8, v18
1304; GFX8-NEXT:    v_sub_u16_e32 v18, 0, v9
1305; GFX8-NEXT:    v_max_i16_sdwa v19, v9, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1306; GFX8-NEXT:    v_max_i16_e32 v9, v9, v18
1307; GFX8-NEXT:    v_sub_u16_sdwa v18, v16, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1308; GFX8-NEXT:    v_or_b32_e32 v9, v9, v19
1309; GFX8-NEXT:    v_sub_u16_e32 v19, 0, v10
1310; GFX8-NEXT:    v_max_i16_sdwa v18, v10, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1311; GFX8-NEXT:    v_max_i16_e32 v10, v10, v19
1312; GFX8-NEXT:    v_sub_u16_sdwa v19, v16, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1313; GFX8-NEXT:    v_or_b32_e32 v10, v10, v18
1314; GFX8-NEXT:    v_sub_u16_e32 v18, 0, v11
1315; GFX8-NEXT:    v_max_i16_sdwa v19, v11, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1316; GFX8-NEXT:    v_max_i16_e32 v11, v11, v18
1317; GFX8-NEXT:    v_sub_u16_sdwa v18, v16, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1318; GFX8-NEXT:    v_or_b32_e32 v11, v11, v19
1319; GFX8-NEXT:    v_sub_u16_e32 v19, 0, v12
1320; GFX8-NEXT:    v_max_i16_sdwa v18, v12, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1321; GFX8-NEXT:    v_max_i16_e32 v12, v12, v19
1322; GFX8-NEXT:    v_sub_u16_sdwa v17, v16, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1323; GFX8-NEXT:    v_sub_u16_sdwa v19, v16, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1324; GFX8-NEXT:    v_sub_u16_sdwa v16, v16, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1325; GFX8-NEXT:    v_or_b32_e32 v12, v12, v18
1326; GFX8-NEXT:    v_sub_u16_e32 v18, 0, v13
1327; GFX8-NEXT:    v_max_i16_sdwa v16, v13, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1328; GFX8-NEXT:    v_max_i16_e32 v13, v13, v18
1329; GFX8-NEXT:    v_sub_u16_e32 v18, 0, v15
1330; GFX8-NEXT:    v_or_b32_e32 v13, v13, v16
1331; GFX8-NEXT:    v_sub_u16_e32 v16, 0, v14
1332; GFX8-NEXT:    v_max_i16_sdwa v17, v15, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1333; GFX8-NEXT:    v_max_i16_sdwa v19, v14, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1334; GFX8-NEXT:    v_max_i16_e32 v14, v14, v16
1335; GFX8-NEXT:    v_max_i16_e32 v15, v15, v18
1336; GFX8-NEXT:    v_or_b32_e32 v14, v14, v19
1337; GFX8-NEXT:    v_or_b32_e32 v15, v15, v17
1338; GFX8-NEXT:    s_setpc_b64 s[30:31]
1339;
1340; GFX9-LABEL: v_abs_v32i16:
1341; GFX9:       ; %bb.0:
1342; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v0
1344; GFX9-NEXT:    v_pk_max_i16 v0, v0, v16
1345; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v1
1346; GFX9-NEXT:    v_pk_max_i16 v1, v1, v16
1347; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v2
1348; GFX9-NEXT:    v_pk_max_i16 v2, v2, v16
1349; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v3
1350; GFX9-NEXT:    v_pk_max_i16 v3, v3, v16
1351; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v4
1352; GFX9-NEXT:    v_pk_max_i16 v4, v4, v16
1353; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v5
1354; GFX9-NEXT:    v_pk_max_i16 v5, v5, v16
1355; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v6
1356; GFX9-NEXT:    v_pk_max_i16 v6, v6, v16
1357; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v7
1358; GFX9-NEXT:    v_pk_max_i16 v7, v7, v16
1359; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v8
1360; GFX9-NEXT:    v_pk_max_i16 v8, v8, v16
1361; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v9
1362; GFX9-NEXT:    v_pk_max_i16 v9, v9, v16
1363; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v10
1364; GFX9-NEXT:    v_pk_max_i16 v10, v10, v16
1365; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v11
1366; GFX9-NEXT:    v_pk_max_i16 v11, v11, v16
1367; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v12
1368; GFX9-NEXT:    v_pk_max_i16 v12, v12, v16
1369; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v13
1370; GFX9-NEXT:    v_pk_max_i16 v13, v13, v16
1371; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v14
1372; GFX9-NEXT:    v_pk_max_i16 v14, v14, v16
1373; GFX9-NEXT:    v_pk_sub_i16 v16, 0, v15
1374; GFX9-NEXT:    v_pk_max_i16 v15, v15, v16
1375; GFX9-NEXT:    s_setpc_b64 s[30:31]
1376;
1377; GFX10-LABEL: v_abs_v32i16:
1378; GFX10:       ; %bb.0:
1379; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1380; GFX10-NEXT:    v_pk_sub_i16 v16, 0, v0
1381; GFX10-NEXT:    v_pk_sub_i16 v17, 0, v2
1382; GFX10-NEXT:    v_pk_sub_i16 v18, 0, v3
1383; GFX10-NEXT:    v_pk_sub_i16 v19, 0, v4
1384; GFX10-NEXT:    v_pk_sub_i16 v20, 0, v5
1385; GFX10-NEXT:    v_pk_max_i16 v0, v0, v16
1386; GFX10-NEXT:    v_pk_sub_i16 v16, 0, v1
1387; GFX10-NEXT:    v_pk_max_i16 v2, v2, v17
1388; GFX10-NEXT:    v_pk_max_i16 v3, v3, v18
1389; GFX10-NEXT:    v_pk_max_i16 v4, v4, v19
1390; GFX10-NEXT:    v_pk_max_i16 v5, v5, v20
1391; GFX10-NEXT:    v_pk_max_i16 v1, v1, v16
1392; GFX10-NEXT:    v_pk_sub_i16 v16, 0, v6
1393; GFX10-NEXT:    v_pk_sub_i16 v17, 0, v7
1394; GFX10-NEXT:    v_pk_sub_i16 v18, 0, v8
1395; GFX10-NEXT:    v_pk_sub_i16 v19, 0, v9
1396; GFX10-NEXT:    v_pk_sub_i16 v20, 0, v10
1397; GFX10-NEXT:    v_pk_max_i16 v6, v6, v16
1398; GFX10-NEXT:    v_pk_max_i16 v7, v7, v17
1399; GFX10-NEXT:    v_pk_max_i16 v8, v8, v18
1400; GFX10-NEXT:    v_pk_max_i16 v9, v9, v19
1401; GFX10-NEXT:    v_pk_max_i16 v10, v10, v20
1402; GFX10-NEXT:    v_pk_sub_i16 v16, 0, v11
1403; GFX10-NEXT:    v_pk_sub_i16 v17, 0, v12
1404; GFX10-NEXT:    v_pk_sub_i16 v18, 0, v13
1405; GFX10-NEXT:    v_pk_sub_i16 v19, 0, v14
1406; GFX10-NEXT:    v_pk_sub_i16 v20, 0, v15
1407; GFX10-NEXT:    v_pk_max_i16 v11, v11, v16
1408; GFX10-NEXT:    v_pk_max_i16 v12, v12, v17
1409; GFX10-NEXT:    v_pk_max_i16 v13, v13, v18
1410; GFX10-NEXT:    v_pk_max_i16 v14, v14, v19
1411; GFX10-NEXT:    v_pk_max_i16 v15, v15, v20
1412; GFX10-NEXT:    s_setpc_b64 s[30:31]
1413;
1414; GFX11-LABEL: v_abs_v32i16:
1415; GFX11:       ; %bb.0:
1416; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1417; GFX11-NEXT:    v_pk_sub_i16 v16, 0, v0
1418; GFX11-NEXT:    v_pk_sub_i16 v17, 0, v2
1419; GFX11-NEXT:    v_pk_sub_i16 v18, 0, v3
1420; GFX11-NEXT:    v_pk_sub_i16 v19, 0, v4
1421; GFX11-NEXT:    v_pk_sub_i16 v20, 0, v5
1422; GFX11-NEXT:    v_pk_max_i16 v0, v0, v16
1423; GFX11-NEXT:    v_pk_sub_i16 v16, 0, v1
1424; GFX11-NEXT:    v_pk_max_i16 v2, v2, v17
1425; GFX11-NEXT:    v_pk_max_i16 v3, v3, v18
1426; GFX11-NEXT:    v_pk_max_i16 v4, v4, v19
1427; GFX11-NEXT:    v_pk_max_i16 v5, v5, v20
1428; GFX11-NEXT:    v_pk_max_i16 v1, v1, v16
1429; GFX11-NEXT:    v_pk_sub_i16 v16, 0, v6
1430; GFX11-NEXT:    v_pk_sub_i16 v17, 0, v7
1431; GFX11-NEXT:    v_pk_sub_i16 v18, 0, v8
1432; GFX11-NEXT:    v_pk_sub_i16 v19, 0, v9
1433; GFX11-NEXT:    v_pk_sub_i16 v20, 0, v10
1434; GFX11-NEXT:    v_pk_max_i16 v6, v6, v16
1435; GFX11-NEXT:    v_pk_max_i16 v7, v7, v17
1436; GFX11-NEXT:    v_pk_max_i16 v8, v8, v18
1437; GFX11-NEXT:    v_pk_max_i16 v9, v9, v19
1438; GFX11-NEXT:    v_pk_max_i16 v10, v10, v20
1439; GFX11-NEXT:    v_pk_sub_i16 v16, 0, v11
1440; GFX11-NEXT:    v_pk_sub_i16 v17, 0, v12
1441; GFX11-NEXT:    v_pk_sub_i16 v18, 0, v13
1442; GFX11-NEXT:    v_pk_sub_i16 v19, 0, v14
1443; GFX11-NEXT:    v_pk_sub_i16 v20, 0, v15
1444; GFX11-NEXT:    v_pk_max_i16 v11, v11, v16
1445; GFX11-NEXT:    v_pk_max_i16 v12, v12, v17
1446; GFX11-NEXT:    v_pk_max_i16 v13, v13, v18
1447; GFX11-NEXT:    v_pk_max_i16 v14, v14, v19
1448; GFX11-NEXT:    v_pk_max_i16 v15, v15, v20
1449; GFX11-NEXT:    s_setpc_b64 s[30:31]
1450;
1451; GFX12-LABEL: v_abs_v32i16:
1452; GFX12:       ; %bb.0:
1453; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1454; GFX12-NEXT:    s_wait_expcnt 0x0
1455; GFX12-NEXT:    s_wait_samplecnt 0x0
1456; GFX12-NEXT:    s_wait_bvhcnt 0x0
1457; GFX12-NEXT:    s_wait_kmcnt 0x0
1458; GFX12-NEXT:    v_pk_sub_i16 v16, 0, v0
1459; GFX12-NEXT:    v_pk_sub_i16 v17, 0, v2
1460; GFX12-NEXT:    v_pk_sub_i16 v18, 0, v3
1461; GFX12-NEXT:    v_pk_sub_i16 v19, 0, v4
1462; GFX12-NEXT:    v_pk_sub_i16 v20, 0, v5
1463; GFX12-NEXT:    v_pk_max_i16 v0, v0, v16
1464; GFX12-NEXT:    v_pk_sub_i16 v16, 0, v1
1465; GFX12-NEXT:    v_pk_max_i16 v2, v2, v17
1466; GFX12-NEXT:    v_pk_max_i16 v3, v3, v18
1467; GFX12-NEXT:    v_pk_max_i16 v4, v4, v19
1468; GFX12-NEXT:    v_pk_max_i16 v5, v5, v20
1469; GFX12-NEXT:    v_pk_max_i16 v1, v1, v16
1470; GFX12-NEXT:    v_pk_sub_i16 v16, 0, v6
1471; GFX12-NEXT:    v_pk_sub_i16 v17, 0, v7
1472; GFX12-NEXT:    v_pk_sub_i16 v18, 0, v8
1473; GFX12-NEXT:    v_pk_sub_i16 v19, 0, v9
1474; GFX12-NEXT:    v_pk_sub_i16 v20, 0, v10
1475; GFX12-NEXT:    v_pk_max_i16 v6, v6, v16
1476; GFX12-NEXT:    v_pk_max_i16 v7, v7, v17
1477; GFX12-NEXT:    v_pk_max_i16 v8, v8, v18
1478; GFX12-NEXT:    v_pk_max_i16 v9, v9, v19
1479; GFX12-NEXT:    v_pk_max_i16 v10, v10, v20
1480; GFX12-NEXT:    v_pk_sub_i16 v16, 0, v11
1481; GFX12-NEXT:    v_pk_sub_i16 v17, 0, v12
1482; GFX12-NEXT:    v_pk_sub_i16 v18, 0, v13
1483; GFX12-NEXT:    v_pk_sub_i16 v19, 0, v14
1484; GFX12-NEXT:    v_pk_sub_i16 v20, 0, v15
1485; GFX12-NEXT:    v_pk_max_i16 v11, v11, v16
1486; GFX12-NEXT:    v_pk_max_i16 v12, v12, v17
1487; GFX12-NEXT:    v_pk_max_i16 v13, v13, v18
1488; GFX12-NEXT:    v_pk_max_i16 v14, v14, v19
1489; GFX12-NEXT:    v_pk_max_i16 v15, v15, v20
1490; GFX12-NEXT:    s_setpc_b64 s[30:31]
1491  %res = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %arg, i1 false)
1492  ret <32 x i16> %res
1493}
1494