xref: /llvm-project/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+sve-b16b16 -force-streaming -verify-machineinstrs < %s | FileCheck %s
3
4; SMAX (Single, x2)
5
6define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_single_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) {
7; CHECK-LABEL: multi_vec_max_single_x2_s8:
8; CHECK:       // %bb.0:
9; CHECK-NEXT:    mov z5.d, z2.d
10; CHECK-NEXT:    mov z4.d, z1.d
11; CHECK-NEXT:    smax { z4.b, z5.b }, { z4.b, z5.b }, z3.b
12; CHECK-NEXT:    mov z0.d, z4.d
13; CHECK-NEXT:    mov z1.d, z5.d
14; CHECK-NEXT:    ret
15  %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smax.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm)
16  ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
17}
18
19define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_single_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) {
20; CHECK-LABEL: multi_vec_max_single_x2_s16:
21; CHECK:       // %bb.0:
22; CHECK-NEXT:    mov z5.d, z2.d
23; CHECK-NEXT:    mov z4.d, z1.d
24; CHECK-NEXT:    smax { z4.h, z5.h }, { z4.h, z5.h }, z3.h
25; CHECK-NEXT:    mov z0.d, z4.d
26; CHECK-NEXT:    mov z1.d, z5.d
27; CHECK-NEXT:    ret
28  %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smax.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm)
29  ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
30}
31
32define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_single_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) {
33; CHECK-LABEL: multi_vec_max_single_x2_s32:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    mov z5.d, z2.d
36; CHECK-NEXT:    mov z4.d, z1.d
37; CHECK-NEXT:    smax { z4.s, z5.s }, { z4.s, z5.s }, z3.s
38; CHECK-NEXT:    mov z0.d, z4.d
39; CHECK-NEXT:    mov z1.d, z5.d
40; CHECK-NEXT:    ret
41  %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smax.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm)
42  ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
43}
44
45define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_single_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) {
46; CHECK-LABEL: multi_vec_max_single_x2_s64:
47; CHECK:       // %bb.0:
48; CHECK-NEXT:    mov z5.d, z2.d
49; CHECK-NEXT:    mov z4.d, z1.d
50; CHECK-NEXT:    smax { z4.d, z5.d }, { z4.d, z5.d }, z3.d
51; CHECK-NEXT:    mov z0.d, z4.d
52; CHECK-NEXT:    mov z1.d, z5.d
53; CHECK-NEXT:    ret
54  %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smax.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm)
55  ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
56}
57
58; UMAX (Single, x2)
59
60define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_single_x2_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) {
61; CHECK-LABEL: multi_vec_max_single_x2_u8:
62; CHECK:       // %bb.0:
63; CHECK-NEXT:    mov z5.d, z2.d
64; CHECK-NEXT:    mov z4.d, z1.d
65; CHECK-NEXT:    umax { z4.b, z5.b }, { z4.b, z5.b }, z3.b
66; CHECK-NEXT:    mov z0.d, z4.d
67; CHECK-NEXT:    mov z1.d, z5.d
68; CHECK-NEXT:    ret
69  %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umax.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm)
70  ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
71}
72
73define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_single_x2_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) {
74; CHECK-LABEL: multi_vec_max_single_x2_u16:
75; CHECK:       // %bb.0:
76; CHECK-NEXT:    mov z5.d, z2.d
77; CHECK-NEXT:    mov z4.d, z1.d
78; CHECK-NEXT:    umax { z4.h, z5.h }, { z4.h, z5.h }, z3.h
79; CHECK-NEXT:    mov z0.d, z4.d
80; CHECK-NEXT:    mov z1.d, z5.d
81; CHECK-NEXT:    ret
82  %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umax.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm)
83  ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
84}
85
86define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_single_x2_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) {
87; CHECK-LABEL: multi_vec_max_single_x2_u32:
88; CHECK:       // %bb.0:
89; CHECK-NEXT:    mov z5.d, z2.d
90; CHECK-NEXT:    mov z4.d, z1.d
91; CHECK-NEXT:    umax { z4.s, z5.s }, { z4.s, z5.s }, z3.s
92; CHECK-NEXT:    mov z0.d, z4.d
93; CHECK-NEXT:    mov z1.d, z5.d
94; CHECK-NEXT:    ret
95  %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umax.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm)
96  ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
97}
98
99define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_single_x2_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) {
100; CHECK-LABEL: multi_vec_max_single_x2_u64:
101; CHECK:       // %bb.0:
102; CHECK-NEXT:    mov z5.d, z2.d
103; CHECK-NEXT:    mov z4.d, z1.d
104; CHECK-NEXT:    umax { z4.d, z5.d }, { z4.d, z5.d }, z3.d
105; CHECK-NEXT:    mov z0.d, z4.d
106; CHECK-NEXT:    mov z1.d, z5.d
107; CHECK-NEXT:    ret
108  %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umax.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm)
109  ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
110}
111
112; BFMAX (Single, x2)
113
114define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
115; CHECK-LABEL: multi_vec_max_single_x2_bf16:
116; CHECK:       // %bb.0:
117; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
118; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
119; CHECK-NEXT:    bfmax { z0.h, z1.h }, { z0.h, z1.h }, z2.h
120; CHECK-NEXT:    ret
121  %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
122  ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
123}
124
125; FMAX (Single, x2)
126
127define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_max_single_x2_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm) {
128; CHECK-LABEL: multi_vec_max_single_x2_f16:
129; CHECK:       // %bb.0:
130; CHECK-NEXT:    mov z5.d, z2.d
131; CHECK-NEXT:    mov z4.d, z1.d
132; CHECK-NEXT:    fmax { z4.h, z5.h }, { z4.h, z5.h }, z3.h
133; CHECK-NEXT:    mov z0.d, z4.d
134; CHECK-NEXT:    mov z1.d, z5.d
135; CHECK-NEXT:    ret
136  %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmax.single.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm)
137  ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
138}
139
140define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_single_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm) {
141; CHECK-LABEL: multi_vec_max_single_x2_f32:
142; CHECK:       // %bb.0:
143; CHECK-NEXT:    mov z5.d, z2.d
144; CHECK-NEXT:    mov z4.d, z1.d
145; CHECK-NEXT:    fmax { z4.s, z5.s }, { z4.s, z5.s }, z3.s
146; CHECK-NEXT:    mov z0.d, z4.d
147; CHECK-NEXT:    mov z1.d, z5.d
148; CHECK-NEXT:    ret
149  %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmax.single.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm)
150  ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
151}
152
153define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_single_x2_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm) {
154; CHECK-LABEL: multi_vec_max_single_x2_f64:
155; CHECK:       // %bb.0:
156; CHECK-NEXT:    mov z5.d, z2.d
157; CHECK-NEXT:    mov z4.d, z1.d
158; CHECK-NEXT:    fmax { z4.d, z5.d }, { z4.d, z5.d }, z3.d
159; CHECK-NEXT:    mov z0.d, z4.d
160; CHECK-NEXT:    mov z1.d, z5.d
161; CHECK-NEXT:    ret
162  %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmax.single.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm)
163  ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
164}
165
166; SMAX (Single, x4)
167
168define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_single_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) {
169; CHECK-LABEL: multi_vec_max_single_x4_s8:
170; CHECK:       // %bb.0:
171; CHECK-NEXT:    mov z27.d, z4.d
172; CHECK-NEXT:    mov z26.d, z3.d
173; CHECK-NEXT:    mov z25.d, z2.d
174; CHECK-NEXT:    mov z24.d, z1.d
175; CHECK-NEXT:    smax { z24.b - z27.b }, { z24.b - z27.b }, z5.b
176; CHECK-NEXT:    mov z0.d, z24.d
177; CHECK-NEXT:    mov z1.d, z25.d
178; CHECK-NEXT:    mov z2.d, z26.d
179; CHECK-NEXT:    mov z3.d, z27.d
180; CHECK-NEXT:    ret
181  %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
182              @llvm.aarch64.sve.smax.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm)
183  ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
184}
185
186define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_single_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) {
187; CHECK-LABEL: multi_vec_max_single_x4_s16:
188; CHECK:       // %bb.0:
189; CHECK-NEXT:    mov z27.d, z4.d
190; CHECK-NEXT:    mov z26.d, z3.d
191; CHECK-NEXT:    mov z25.d, z2.d
192; CHECK-NEXT:    mov z24.d, z1.d
193; CHECK-NEXT:    smax { z24.h - z27.h }, { z24.h - z27.h }, z5.h
194; CHECK-NEXT:    mov z0.d, z24.d
195; CHECK-NEXT:    mov z1.d, z25.d
196; CHECK-NEXT:    mov z2.d, z26.d
197; CHECK-NEXT:    mov z3.d, z27.d
198; CHECK-NEXT:    ret
199  %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
200              @llvm.aarch64.sve.smax.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm)
201  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
202}
203
204define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_single_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) {
205; CHECK-LABEL: multi_vec_max_single_x4_s32:
206; CHECK:       // %bb.0:
207; CHECK-NEXT:    mov z27.d, z4.d
208; CHECK-NEXT:    mov z26.d, z3.d
209; CHECK-NEXT:    mov z25.d, z2.d
210; CHECK-NEXT:    mov z24.d, z1.d
211; CHECK-NEXT:    smax { z24.s - z27.s }, { z24.s - z27.s }, z5.s
212; CHECK-NEXT:    mov z0.d, z24.d
213; CHECK-NEXT:    mov z1.d, z25.d
214; CHECK-NEXT:    mov z2.d, z26.d
215; CHECK-NEXT:    mov z3.d, z27.d
216; CHECK-NEXT:    ret
217  %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
218              @llvm.aarch64.sve.smax.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm)
219  ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
220}
221
222define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_single_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) {
223; CHECK-LABEL: multi_vec_max_single_x4_s64:
224; CHECK:       // %bb.0:
225; CHECK-NEXT:    mov z27.d, z4.d
226; CHECK-NEXT:    mov z26.d, z3.d
227; CHECK-NEXT:    mov z25.d, z2.d
228; CHECK-NEXT:    mov z24.d, z1.d
229; CHECK-NEXT:    smax { z24.d - z27.d }, { z24.d - z27.d }, z5.d
230; CHECK-NEXT:    mov z0.d, z24.d
231; CHECK-NEXT:    mov z1.d, z25.d
232; CHECK-NEXT:    mov z2.d, z26.d
233; CHECK-NEXT:    mov z3.d, z27.d
234; CHECK-NEXT:    ret
235  %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
236              @llvm.aarch64.sve.smax.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm)
237  ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
238}
239
240; UMAX (Single, x4)
241
242define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_single_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) {
243; CHECK-LABEL: multi_vec_max_single_x4_u8:
244; CHECK:       // %bb.0:
245; CHECK-NEXT:    mov z27.d, z4.d
246; CHECK-NEXT:    mov z26.d, z3.d
247; CHECK-NEXT:    mov z25.d, z2.d
248; CHECK-NEXT:    mov z24.d, z1.d
249; CHECK-NEXT:    umax { z24.b - z27.b }, { z24.b - z27.b }, z5.b
250; CHECK-NEXT:    mov z0.d, z24.d
251; CHECK-NEXT:    mov z1.d, z25.d
252; CHECK-NEXT:    mov z2.d, z26.d
253; CHECK-NEXT:    mov z3.d, z27.d
254; CHECK-NEXT:    ret
255  %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
256              @llvm.aarch64.sve.umax.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm)
257  ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
258}
259
260define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_single_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) {
261; CHECK-LABEL: multi_vec_max_single_x4_u16:
262; CHECK:       // %bb.0:
263; CHECK-NEXT:    mov z27.d, z4.d
264; CHECK-NEXT:    mov z26.d, z3.d
265; CHECK-NEXT:    mov z25.d, z2.d
266; CHECK-NEXT:    mov z24.d, z1.d
267; CHECK-NEXT:    umax { z24.h - z27.h }, { z24.h - z27.h }, z5.h
268; CHECK-NEXT:    mov z0.d, z24.d
269; CHECK-NEXT:    mov z1.d, z25.d
270; CHECK-NEXT:    mov z2.d, z26.d
271; CHECK-NEXT:    mov z3.d, z27.d
272; CHECK-NEXT:    ret
273  %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
274              @llvm.aarch64.sve.umax.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm)
275  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
276}
277
278define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_single_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) {
279; CHECK-LABEL: multi_vec_max_single_x4_u32:
280; CHECK:       // %bb.0:
281; CHECK-NEXT:    mov z27.d, z4.d
282; CHECK-NEXT:    mov z26.d, z3.d
283; CHECK-NEXT:    mov z25.d, z2.d
284; CHECK-NEXT:    mov z24.d, z1.d
285; CHECK-NEXT:    umax { z24.s - z27.s }, { z24.s - z27.s }, z5.s
286; CHECK-NEXT:    mov z0.d, z24.d
287; CHECK-NEXT:    mov z1.d, z25.d
288; CHECK-NEXT:    mov z2.d, z26.d
289; CHECK-NEXT:    mov z3.d, z27.d
290; CHECK-NEXT:    ret
291  %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
292              @llvm.aarch64.sve.umax.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm)
293  ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
294}
295
296define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_single_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) {
297; CHECK-LABEL: multi_vec_max_single_x4_u64:
298; CHECK:       // %bb.0:
299; CHECK-NEXT:    mov z27.d, z4.d
300; CHECK-NEXT:    mov z26.d, z3.d
301; CHECK-NEXT:    mov z25.d, z2.d
302; CHECK-NEXT:    mov z24.d, z1.d
303; CHECK-NEXT:    umax { z24.d - z27.d }, { z24.d - z27.d }, z5.d
304; CHECK-NEXT:    mov z0.d, z24.d
305; CHECK-NEXT:    mov z1.d, z25.d
306; CHECK-NEXT:    mov z2.d, z26.d
307; CHECK-NEXT:    mov z3.d, z27.d
308; CHECK-NEXT:    ret
309  %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
310              @llvm.aarch64.sve.umax.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm)
311  ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
312}
313
314; BFMAX (Single, x4)
315
316define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_max_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
317; CHECK-LABEL: multi_vec_max_single_x4_bf16:
318; CHECK:       // %bb.0:
319; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
320; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
321; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
322; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
323; CHECK-NEXT:    bfmax { z0.h - z3.h }, { z0.h - z3.h }, z4.h
324; CHECK-NEXT:    ret
325  %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
326  ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } %res
327}
328
329; FMAX (SINGLE, x4)
330
331define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_max_single_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm) {
332; CHECK-LABEL: multi_vec_max_single_x4_f16:
333; CHECK:       // %bb.0:
334; CHECK-NEXT:    mov z27.d, z4.d
335; CHECK-NEXT:    mov z26.d, z3.d
336; CHECK-NEXT:    mov z25.d, z2.d
337; CHECK-NEXT:    mov z24.d, z1.d
338; CHECK-NEXT:    fmax { z24.h - z27.h }, { z24.h - z27.h }, z5.h
339; CHECK-NEXT:    mov z0.d, z24.d
340; CHECK-NEXT:    mov z1.d, z25.d
341; CHECK-NEXT:    mov z2.d, z26.d
342; CHECK-NEXT:    mov z3.d, z27.d
343; CHECK-NEXT:    ret
344  %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
345              @llvm.aarch64.sve.fmax.single.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm)
346  ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
347}
348
349define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_single_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm) {
350; CHECK-LABEL: multi_vec_max_single_x4_f32:
351; CHECK:       // %bb.0:
352; CHECK-NEXT:    mov z27.d, z4.d
353; CHECK-NEXT:    mov z26.d, z3.d
354; CHECK-NEXT:    mov z25.d, z2.d
355; CHECK-NEXT:    mov z24.d, z1.d
356; CHECK-NEXT:    fmax { z24.s - z27.s }, { z24.s - z27.s }, z5.s
357; CHECK-NEXT:    mov z0.d, z24.d
358; CHECK-NEXT:    mov z1.d, z25.d
359; CHECK-NEXT:    mov z2.d, z26.d
360; CHECK-NEXT:    mov z3.d, z27.d
361; CHECK-NEXT:    ret
362  %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
363              @llvm.aarch64.sve.fmax.single.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm)
364  ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
365}
366
367define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_single_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm) {
368; CHECK-LABEL: multi_vec_max_single_x4_f64:
369; CHECK:       // %bb.0:
370; CHECK-NEXT:    mov z27.d, z4.d
371; CHECK-NEXT:    mov z26.d, z3.d
372; CHECK-NEXT:    mov z25.d, z2.d
373; CHECK-NEXT:    mov z24.d, z1.d
374; CHECK-NEXT:    fmax { z24.d - z27.d }, { z24.d - z27.d }, z5.d
375; CHECK-NEXT:    mov z0.d, z24.d
376; CHECK-NEXT:    mov z1.d, z25.d
377; CHECK-NEXT:    mov z2.d, z26.d
378; CHECK-NEXT:    mov z3.d, z27.d
379; CHECK-NEXT:    ret
380  %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
381              @llvm.aarch64.sve.fmax.single.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm)
382  ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
383}
384
385; SMAX (Multi, x2)
386
387define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) {
388; CHECK-LABEL: multi_vec_max_multi_x2_s8:
389; CHECK:       // %bb.0:
390; CHECK-NEXT:    mov z7.d, z4.d
391; CHECK-NEXT:    mov z5.d, z2.d
392; CHECK-NEXT:    mov z6.d, z3.d
393; CHECK-NEXT:    mov z4.d, z1.d
394; CHECK-NEXT:    smax { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b }
395; CHECK-NEXT:    mov z0.d, z4.d
396; CHECK-NEXT:    mov z1.d, z5.d
397; CHECK-NEXT:    ret
398  %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smax.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
399  ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
400}
401
402define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
403; CHECK-LABEL: multi_vec_max_multi_x2_s16:
404; CHECK:       // %bb.0:
405; CHECK-NEXT:    mov z7.d, z4.d
406; CHECK-NEXT:    mov z5.d, z2.d
407; CHECK-NEXT:    mov z6.d, z3.d
408; CHECK-NEXT:    mov z4.d, z1.d
409; CHECK-NEXT:    smax { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
410; CHECK-NEXT:    mov z0.d, z4.d
411; CHECK-NEXT:    mov z1.d, z5.d
412; CHECK-NEXT:    ret
413  %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smax.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
414  ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
415}
416
417define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) {
418; CHECK-LABEL: multi_vec_max_multi_x2_s32:
419; CHECK:       // %bb.0:
420; CHECK-NEXT:    mov z7.d, z4.d
421; CHECK-NEXT:    mov z5.d, z2.d
422; CHECK-NEXT:    mov z6.d, z3.d
423; CHECK-NEXT:    mov z4.d, z1.d
424; CHECK-NEXT:    smax { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
425; CHECK-NEXT:    mov z0.d, z4.d
426; CHECK-NEXT:    mov z1.d, z5.d
427; CHECK-NEXT:    ret
428  %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smax.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
429  ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
430}
431
432define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
433; CHECK-LABEL: multi_vec_max_multi_x2_s64:
434; CHECK:       // %bb.0:
435; CHECK-NEXT:    mov z7.d, z4.d
436; CHECK-NEXT:    mov z5.d, z2.d
437; CHECK-NEXT:    mov z6.d, z3.d
438; CHECK-NEXT:    mov z4.d, z1.d
439; CHECK-NEXT:    smax { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
440; CHECK-NEXT:    mov z0.d, z4.d
441; CHECK-NEXT:    mov z1.d, z5.d
442; CHECK-NEXT:    ret
443  %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smax.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
444  ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
445}
446
447; UMAX (Multi, x2)
448
449define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x2_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) {
450; CHECK-LABEL: multi_vec_max_multi_x2_u8:
451; CHECK:       // %bb.0:
452; CHECK-NEXT:    mov z7.d, z4.d
453; CHECK-NEXT:    mov z5.d, z2.d
454; CHECK-NEXT:    mov z6.d, z3.d
455; CHECK-NEXT:    mov z4.d, z1.d
456; CHECK-NEXT:    umax { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b }
457; CHECK-NEXT:    mov z0.d, z4.d
458; CHECK-NEXT:    mov z1.d, z5.d
459; CHECK-NEXT:    ret
460  %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umax.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
461  ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
462}
463
464define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x2_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
465; CHECK-LABEL: multi_vec_max_multi_x2_u16:
466; CHECK:       // %bb.0:
467; CHECK-NEXT:    mov z7.d, z4.d
468; CHECK-NEXT:    mov z5.d, z2.d
469; CHECK-NEXT:    mov z6.d, z3.d
470; CHECK-NEXT:    mov z4.d, z1.d
471; CHECK-NEXT:    umax { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
472; CHECK-NEXT:    mov z0.d, z4.d
473; CHECK-NEXT:    mov z1.d, z5.d
474; CHECK-NEXT:    ret
475  %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umax.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
476  ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
477}
478
479define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x2_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) {
480; CHECK-LABEL: multi_vec_max_multi_x2_u32:
481; CHECK:       // %bb.0:
482; CHECK-NEXT:    mov z7.d, z4.d
483; CHECK-NEXT:    mov z5.d, z2.d
484; CHECK-NEXT:    mov z6.d, z3.d
485; CHECK-NEXT:    mov z4.d, z1.d
486; CHECK-NEXT:    umax { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
487; CHECK-NEXT:    mov z0.d, z4.d
488; CHECK-NEXT:    mov z1.d, z5.d
489; CHECK-NEXT:    ret
490  %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umax.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
491  ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
492}
493
494define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x2_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
495; CHECK-LABEL: multi_vec_max_multi_x2_u64:
496; CHECK:       // %bb.0:
497; CHECK-NEXT:    mov z7.d, z4.d
498; CHECK-NEXT:    mov z5.d, z2.d
499; CHECK-NEXT:    mov z6.d, z3.d
500; CHECK-NEXT:    mov z4.d, z1.d
501; CHECK-NEXT:    umax { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
502; CHECK-NEXT:    mov z0.d, z4.d
503; CHECK-NEXT:    mov z1.d, z5.d
504; CHECK-NEXT:    ret
505  %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umax.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
506  ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
507}
508
509; BFMAX (Multi, x2)
510
511define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
512; CHECK-LABEL: multi_vec_max_x2_bf16:
513; CHECK:       // %bb.0:
514; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
515; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
516; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
517; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
518; CHECK-NEXT:    bfmax { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
519; CHECK-NEXT:    ret
520  %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
521  ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
522}
523
524; FMAX (Multi, x2)
525
526define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_max_multi_x2_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) {
527; CHECK-LABEL: multi_vec_max_multi_x2_f16:
528; CHECK:       // %bb.0:
529; CHECK-NEXT:    mov z7.d, z4.d
530; CHECK-NEXT:    mov z5.d, z2.d
531; CHECK-NEXT:    mov z6.d, z3.d
532; CHECK-NEXT:    mov z4.d, z1.d
533; CHECK-NEXT:    fmax { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
534; CHECK-NEXT:    mov z0.d, z4.d
535; CHECK-NEXT:    mov z1.d, z5.d
536; CHECK-NEXT:    ret
537  %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmax.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
538  ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
539}
540
541define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_multi_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) {
542; CHECK-LABEL: multi_vec_max_multi_x2_f32:
543; CHECK:       // %bb.0:
544; CHECK-NEXT:    mov z7.d, z4.d
545; CHECK-NEXT:    mov z5.d, z2.d
546; CHECK-NEXT:    mov z6.d, z3.d
547; CHECK-NEXT:    mov z4.d, z1.d
548; CHECK-NEXT:    fmax { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
549; CHECK-NEXT:    mov z0.d, z4.d
550; CHECK-NEXT:    mov z1.d, z5.d
551; CHECK-NEXT:    ret
552  %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmax.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2)
553  ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
554}
555
556define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x2_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) {
557; CHECK-LABEL: multi_vec_max_multi_x2_f64:
558; CHECK:       // %bb.0:
559; CHECK-NEXT:    mov z7.d, z4.d
560; CHECK-NEXT:    mov z5.d, z2.d
561; CHECK-NEXT:    mov z6.d, z3.d
562; CHECK-NEXT:    mov z4.d, z1.d
563; CHECK-NEXT:    fmax { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
564; CHECK-NEXT:    mov z0.d, z4.d
565; CHECK-NEXT:    mov z1.d, z5.d
566; CHECK-NEXT:    ret
567  %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmax.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2)
568  ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
569}
570
571; SMAX (Multi, x4)
572
573define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
574; CHECK-LABEL: multi_vec_max_multi_x4_s8:
575; CHECK:       // %bb.0:
576; CHECK-NEXT:    mov z30.d, z7.d
577; CHECK-NEXT:    mov z27.d, z4.d
578; CHECK-NEXT:    ptrue p0.b
579; CHECK-NEXT:    mov z29.d, z6.d
580; CHECK-NEXT:    mov z26.d, z3.d
581; CHECK-NEXT:    mov z28.d, z5.d
582; CHECK-NEXT:    mov z25.d, z2.d
583; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
584; CHECK-NEXT:    mov z24.d, z1.d
585; CHECK-NEXT:    smax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
586; CHECK-NEXT:    mov z0.d, z24.d
587; CHECK-NEXT:    mov z1.d, z25.d
588; CHECK-NEXT:    mov z2.d, z26.d
589; CHECK-NEXT:    mov z3.d, z27.d
590; CHECK-NEXT:    ret
591                           <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
592  %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
593              @llvm.aarch64.sve.smax.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
594                                                <vscale x 16 x i8> %zm1,  <vscale x 16 x i8> %zm2,  <vscale x 16 x i8> %zm3,  <vscale x 16 x i8> %zm4)
595  ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
596}
597
598define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
599; CHECK-LABEL: multi_vec_max_multi_x4_s16:
600; CHECK:       // %bb.0:
601; CHECK-NEXT:    mov z30.d, z7.d
602; CHECK-NEXT:    mov z27.d, z4.d
603; CHECK-NEXT:    ptrue p0.h
604; CHECK-NEXT:    mov z29.d, z6.d
605; CHECK-NEXT:    mov z26.d, z3.d
606; CHECK-NEXT:    mov z28.d, z5.d
607; CHECK-NEXT:    mov z25.d, z2.d
608; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
609; CHECK-NEXT:    mov z24.d, z1.d
610; CHECK-NEXT:    smax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
611; CHECK-NEXT:    mov z0.d, z24.d
612; CHECK-NEXT:    mov z1.d, z25.d
613; CHECK-NEXT:    mov z2.d, z26.d
614; CHECK-NEXT:    mov z3.d, z27.d
615; CHECK-NEXT:    ret
616                            <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
617  %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
618              @llvm.aarch64.sve.smax.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
619                                                <vscale x 8 x i16> %zm1,  <vscale x 8 x i16> %zm2,  <vscale x 8 x i16> %zm3,  <vscale x 8 x i16> %zm4)
620  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
621}
622
623define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
624; CHECK-LABEL: multi_vec_max_multi_x4_s32:
625; CHECK:       // %bb.0:
626; CHECK-NEXT:    mov z30.d, z7.d
627; CHECK-NEXT:    mov z27.d, z4.d
628; CHECK-NEXT:    ptrue p0.s
629; CHECK-NEXT:    mov z29.d, z6.d
630; CHECK-NEXT:    mov z26.d, z3.d
631; CHECK-NEXT:    mov z28.d, z5.d
632; CHECK-NEXT:    mov z25.d, z2.d
633; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
634; CHECK-NEXT:    mov z24.d, z1.d
635; CHECK-NEXT:    smax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
636; CHECK-NEXT:    mov z0.d, z24.d
637; CHECK-NEXT:    mov z1.d, z25.d
638; CHECK-NEXT:    mov z2.d, z26.d
639; CHECK-NEXT:    mov z3.d, z27.d
640; CHECK-NEXT:    ret
641                            <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
642  %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
643              @llvm.aarch64.sve.smax.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
644                                                <vscale x 4 x i32> %zm1,  <vscale x 4 x i32> %zm2,  <vscale x 4 x i32> %zm3,  <vscale x 4 x i32> %zm4)
645  ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
646}
647
648define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
649; CHECK-LABEL: multi_vec_max_multi_x4_s64:
650; CHECK:       // %bb.0:
651; CHECK-NEXT:    mov z30.d, z7.d
652; CHECK-NEXT:    mov z27.d, z4.d
653; CHECK-NEXT:    ptrue p0.d
654; CHECK-NEXT:    mov z29.d, z6.d
655; CHECK-NEXT:    mov z26.d, z3.d
656; CHECK-NEXT:    mov z28.d, z5.d
657; CHECK-NEXT:    mov z25.d, z2.d
658; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
659; CHECK-NEXT:    mov z24.d, z1.d
660; CHECK-NEXT:    smax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
661; CHECK-NEXT:    mov z0.d, z24.d
662; CHECK-NEXT:    mov z1.d, z25.d
663; CHECK-NEXT:    mov z2.d, z26.d
664; CHECK-NEXT:    mov z3.d, z27.d
665; CHECK-NEXT:    ret
666                            <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
667  %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
668              @llvm.aarch64.sve.smax.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
669                                                <vscale x 2 x i64> %zm1,  <vscale x 2 x i64> %zm2,  <vscale x 2 x i64> %zm3,  <vscale x 2 x i64> %zm4)
670  ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
671}
672
673; UMAX (Multi, x4)
674
675define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
676; CHECK-LABEL: multi_vec_max_multi_x4_u8:
677; CHECK:       // %bb.0:
678; CHECK-NEXT:    mov z30.d, z7.d
679; CHECK-NEXT:    mov z27.d, z4.d
680; CHECK-NEXT:    ptrue p0.b
681; CHECK-NEXT:    mov z29.d, z6.d
682; CHECK-NEXT:    mov z26.d, z3.d
683; CHECK-NEXT:    mov z28.d, z5.d
684; CHECK-NEXT:    mov z25.d, z2.d
685; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
686; CHECK-NEXT:    mov z24.d, z1.d
687; CHECK-NEXT:    umax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
688; CHECK-NEXT:    mov z0.d, z24.d
689; CHECK-NEXT:    mov z1.d, z25.d
690; CHECK-NEXT:    mov z2.d, z26.d
691; CHECK-NEXT:    mov z3.d, z27.d
692; CHECK-NEXT:    ret
693                           <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
694  %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
695              @llvm.aarch64.sve.umax.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
696                                                <vscale x 16 x i8> %zm1,  <vscale x 16 x i8> %zm2,  <vscale x 16 x i8> %zm3,  <vscale x 16 x i8> %zm4)
697  ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
698}
699
700define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
701; CHECK-LABEL: multi_vec_max_multi_x4_u16:
702; CHECK:       // %bb.0:
703; CHECK-NEXT:    mov z30.d, z7.d
704; CHECK-NEXT:    mov z27.d, z4.d
705; CHECK-NEXT:    ptrue p0.h
706; CHECK-NEXT:    mov z29.d, z6.d
707; CHECK-NEXT:    mov z26.d, z3.d
708; CHECK-NEXT:    mov z28.d, z5.d
709; CHECK-NEXT:    mov z25.d, z2.d
710; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
711; CHECK-NEXT:    mov z24.d, z1.d
712; CHECK-NEXT:    umax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
713; CHECK-NEXT:    mov z0.d, z24.d
714; CHECK-NEXT:    mov z1.d, z25.d
715; CHECK-NEXT:    mov z2.d, z26.d
716; CHECK-NEXT:    mov z3.d, z27.d
717; CHECK-NEXT:    ret
718                            <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
719  %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
720              @llvm.aarch64.sve.umax.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
721                                                <vscale x 8 x i16> %zm1,  <vscale x 8 x i16> %zm2,  <vscale x 8 x i16> %zm3,  <vscale x 8 x i16> %zm4)
722  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
723}
724
725define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
726; CHECK-LABEL: multi_vec_max_multi_x4_u32:
727; CHECK:       // %bb.0:
728; CHECK-NEXT:    mov z30.d, z7.d
729; CHECK-NEXT:    mov z27.d, z4.d
730; CHECK-NEXT:    ptrue p0.s
731; CHECK-NEXT:    mov z29.d, z6.d
732; CHECK-NEXT:    mov z26.d, z3.d
733; CHECK-NEXT:    mov z28.d, z5.d
734; CHECK-NEXT:    mov z25.d, z2.d
735; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
736; CHECK-NEXT:    mov z24.d, z1.d
737; CHECK-NEXT:    umax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
738; CHECK-NEXT:    mov z0.d, z24.d
739; CHECK-NEXT:    mov z1.d, z25.d
740; CHECK-NEXT:    mov z2.d, z26.d
741; CHECK-NEXT:    mov z3.d, z27.d
742; CHECK-NEXT:    ret
743                            <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
744  %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
745              @llvm.aarch64.sve.umax.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
746                                                <vscale x 4 x i32> %zm1,  <vscale x 4 x i32> %zm2,  <vscale x 4 x i32> %zm3,  <vscale x 4 x i32> %zm4)
747  ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
748}
749
750define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
751; CHECK-LABEL: multi_vec_max_multi_x4_u64:
752; CHECK:       // %bb.0:
753; CHECK-NEXT:    mov z30.d, z7.d
754; CHECK-NEXT:    mov z27.d, z4.d
755; CHECK-NEXT:    ptrue p0.d
756; CHECK-NEXT:    mov z29.d, z6.d
757; CHECK-NEXT:    mov z26.d, z3.d
758; CHECK-NEXT:    mov z28.d, z5.d
759; CHECK-NEXT:    mov z25.d, z2.d
760; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
761; CHECK-NEXT:    mov z24.d, z1.d
762; CHECK-NEXT:    umax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
763; CHECK-NEXT:    mov z0.d, z24.d
764; CHECK-NEXT:    mov z1.d, z25.d
765; CHECK-NEXT:    mov z2.d, z26.d
766; CHECK-NEXT:    mov z3.d, z27.d
767; CHECK-NEXT:    ret
768                            <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
769  %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
770              @llvm.aarch64.sve.umax.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
771                                                <vscale x 2 x i64> %zm1,  <vscale x 2 x i64> %zm2,  <vscale x 2 x i64> %zm3,  <vscale x 2 x i64> %zm4)
772  ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
773}
774
775; BFMAX (Multi, x4)
776
777define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_max_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
778; CHECK-LABEL: multi_vec_max_x4_bf16:
779; CHECK:       // %bb.0:
780; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
781; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
782; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
783; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
784; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
785; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
786; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
787; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
788; CHECK-NEXT:    bfmax { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
789; CHECK-NEXT:    ret
790  %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmax.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
791  ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } %res
792}
793
794; FMAX (Multi, x4)
795
796define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_max_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
797; CHECK-LABEL: multi_vec_max_multi_x4_f16:
798; CHECK:       // %bb.0:
799; CHECK-NEXT:    mov z30.d, z7.d
800; CHECK-NEXT:    mov z27.d, z4.d
801; CHECK-NEXT:    ptrue p0.h
802; CHECK-NEXT:    mov z29.d, z6.d
803; CHECK-NEXT:    mov z26.d, z3.d
804; CHECK-NEXT:    mov z28.d, z5.d
805; CHECK-NEXT:    mov z25.d, z2.d
806; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
807; CHECK-NEXT:    mov z24.d, z1.d
808; CHECK-NEXT:    fmax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
809; CHECK-NEXT:    mov z0.d, z24.d
810; CHECK-NEXT:    mov z1.d, z25.d
811; CHECK-NEXT:    mov z2.d, z26.d
812; CHECK-NEXT:    mov z3.d, z27.d
813; CHECK-NEXT:    ret
814                            <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
815  %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
816              @llvm.aarch64.sve.fmax.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
817                                                <vscale x 8 x half> %zm1,  <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4)
818  ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
819}
820
821define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
822; CHECK-LABEL: multi_vec_max_multi_x4_f32:
823; CHECK:       // %bb.0:
824; CHECK-NEXT:    mov z30.d, z7.d
825; CHECK-NEXT:    mov z27.d, z4.d
826; CHECK-NEXT:    ptrue p0.s
827; CHECK-NEXT:    mov z29.d, z6.d
828; CHECK-NEXT:    mov z26.d, z3.d
829; CHECK-NEXT:    mov z28.d, z5.d
830; CHECK-NEXT:    mov z25.d, z2.d
831; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
832; CHECK-NEXT:    mov z24.d, z1.d
833; CHECK-NEXT:    fmax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
834; CHECK-NEXT:    mov z0.d, z24.d
835; CHECK-NEXT:    mov z1.d, z25.d
836; CHECK-NEXT:    mov z2.d, z26.d
837; CHECK-NEXT:    mov z3.d, z27.d
838; CHECK-NEXT:    ret
839                            <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
840  %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
841              @llvm.aarch64.sve.fmax.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
842                                                <vscale x 4 x float> %zm1,  <vscale x 4 x float> %zm2,  <vscale x 4 x float> %zm3,  <vscale x 4 x float> %zm4)
843  ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
844}
845
846define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
847; CHECK-LABEL: multi_vec_max_multi_x4_f64:
848; CHECK:       // %bb.0:
849; CHECK-NEXT:    mov z30.d, z7.d
850; CHECK-NEXT:    mov z27.d, z4.d
851; CHECK-NEXT:    ptrue p0.d
852; CHECK-NEXT:    mov z29.d, z6.d
853; CHECK-NEXT:    mov z26.d, z3.d
854; CHECK-NEXT:    mov z28.d, z5.d
855; CHECK-NEXT:    mov z25.d, z2.d
856; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
857; CHECK-NEXT:    mov z24.d, z1.d
858; CHECK-NEXT:    fmax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
859; CHECK-NEXT:    mov z0.d, z24.d
860; CHECK-NEXT:    mov z1.d, z25.d
861; CHECK-NEXT:    mov z2.d, z26.d
862; CHECK-NEXT:    mov z3.d, z27.d
863; CHECK-NEXT:    ret
864                            <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
865  %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
866              @llvm.aarch64.sve.fmax.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
867                                                <vscale x 2 x double> %zm1,  <vscale x 2 x double> %zm2,  <vscale x 2 x double> %zm3,  <vscale x 2 x double> %zm4)
868  ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
869}
870
871; BFMAXNM (Single, x2)
872
873define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
874; CHECK-LABEL: multi_vec_maxnm_single_x2_bf16:
875; CHECK:       // %bb.0:
876; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
877; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
878; CHECK-NEXT:    bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h
879; CHECK-NEXT:    ret
880  %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
881  ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
882}
883
884; FMAXNM (Single, x2)
885
886define { <vscale x 8 x half>, <vscale x 8 x half> }  @multi_vec_maxnm_single_x2_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm) {
887; CHECK-LABEL: multi_vec_maxnm_single_x2_f16:
888; CHECK:       // %bb.0:
889; CHECK-NEXT:    mov z5.d, z2.d
890; CHECK-NEXT:    mov z4.d, z1.d
891; CHECK-NEXT:    fmaxnm { z4.h, z5.h }, { z4.h, z5.h }, z3.h
892; CHECK-NEXT:    mov z0.d, z4.d
893; CHECK-NEXT:    mov z1.d, z5.d
894; CHECK-NEXT:    ret
895  %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm)
896  ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
897}
898
899define { <vscale x 4 x float>, <vscale x 4 x float> }  @multi_vec_maxnm_single_x2_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm) {
900; CHECK-LABEL: multi_vec_maxnm_single_x2_f32:
901; CHECK:       // %bb.0:
902; CHECK-NEXT:    mov z5.d, z2.d
903; CHECK-NEXT:    mov z4.d, z1.d
904; CHECK-NEXT:    fmaxnm { z4.s, z5.s }, { z4.s, z5.s }, z3.s
905; CHECK-NEXT:    mov z0.d, z4.d
906; CHECK-NEXT:    mov z1.d, z5.d
907; CHECK-NEXT:    ret
908  %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm)
909  ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
910}
911
912define { <vscale x 2 x double>, <vscale x 2 x double> }  @multi_vec_maxnm_single_x2_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm) {
913; CHECK-LABEL: multi_vec_maxnm_single_x2_f64:
914; CHECK:       // %bb.0:
915; CHECK-NEXT:    mov z5.d, z2.d
916; CHECK-NEXT:    mov z4.d, z1.d
917; CHECK-NEXT:    fmaxnm { z4.d, z5.d }, { z4.d, z5.d }, z3.d
918; CHECK-NEXT:    mov z0.d, z4.d
919; CHECK-NEXT:    mov z1.d, z5.d
920; CHECK-NEXT:    ret
921  %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm)
922  ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
923}
924
925; BFMAXNM (Single, x4)
926
927define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_maxnm_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
928; CHECK-LABEL: multi_vec_maxnm_single_x4_bf16:
929; CHECK:       // %bb.0:
930; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
931; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
932; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
933; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
934; CHECK-NEXT:    bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h
935; CHECK-NEXT:    ret
936  %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
937  ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } %res
938}
939
940; FMAXNM (Single, x4)
941
942define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_maxnm_single_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm) {
943; CHECK-LABEL: multi_vec_maxnm_single_x4_f16:
944; CHECK:       // %bb.0:
945; CHECK-NEXT:    mov z27.d, z4.d
946; CHECK-NEXT:    mov z26.d, z3.d
947; CHECK-NEXT:    mov z25.d, z2.d
948; CHECK-NEXT:    mov z24.d, z1.d
949; CHECK-NEXT:    fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, z5.h
950; CHECK-NEXT:    mov z0.d, z24.d
951; CHECK-NEXT:    mov z1.d, z25.d
952; CHECK-NEXT:    mov z2.d, z26.d
953; CHECK-NEXT:    mov z3.d, z27.d
954; CHECK-NEXT:    ret
955  %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
956              @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm)
957  ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
958}
959
960define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_maxnm_single_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm) {
961; CHECK-LABEL: multi_vec_maxnm_single_x4_f32:
962; CHECK:       // %bb.0:
963; CHECK-NEXT:    mov z27.d, z4.d
964; CHECK-NEXT:    mov z26.d, z3.d
965; CHECK-NEXT:    mov z25.d, z2.d
966; CHECK-NEXT:    mov z24.d, z1.d
967; CHECK-NEXT:    fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, z5.s
968; CHECK-NEXT:    mov z0.d, z24.d
969; CHECK-NEXT:    mov z1.d, z25.d
970; CHECK-NEXT:    mov z2.d, z26.d
971; CHECK-NEXT:    mov z3.d, z27.d
972; CHECK-NEXT:    ret
973  %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
974              @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm)
975  ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
976}
977
978define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_single_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm) {
979; CHECK-LABEL: multi_vec_maxnm_single_x4_f64:
980; CHECK:       // %bb.0:
981; CHECK-NEXT:    mov z27.d, z4.d
982; CHECK-NEXT:    mov z26.d, z3.d
983; CHECK-NEXT:    mov z25.d, z2.d
984; CHECK-NEXT:    mov z24.d, z1.d
985; CHECK-NEXT:    fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, z5.d
986; CHECK-NEXT:    mov z0.d, z24.d
987; CHECK-NEXT:    mov z1.d, z25.d
988; CHECK-NEXT:    mov z2.d, z26.d
989; CHECK-NEXT:    mov z3.d, z27.d
990; CHECK-NEXT:    ret
991  %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
992              @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm)
993  ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
994}
995
996; BFMAXNM (Multi, x2)
997
998define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
999; CHECK-LABEL: multi_vec_maxnm_x2_bf16:
1000; CHECK:       // %bb.0:
1001; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1002; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
1003; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1004; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
1005; CHECK-NEXT:    bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
1006; CHECK-NEXT:    ret
1007  %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
1008  ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
1009}
1010
1011; FMAXNM (Multi, x2)
1012
1013define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_maxnm_x2_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) {
1014; CHECK-LABEL: multi_vec_maxnm_x2_f16:
1015; CHECK:       // %bb.0:
1016; CHECK-NEXT:    mov z7.d, z4.d
1017; CHECK-NEXT:    mov z5.d, z2.d
1018; CHECK-NEXT:    mov z6.d, z3.d
1019; CHECK-NEXT:    mov z4.d, z1.d
1020; CHECK-NEXT:    fmaxnm { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
1021; CHECK-NEXT:    mov z0.d, z4.d
1022; CHECK-NEXT:    mov z1.d, z5.d
1023; CHECK-NEXT:    ret
1024  %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
1025  ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
1026}
1027
1028define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_maxnm_x2_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) {
1029; CHECK-LABEL: multi_vec_maxnm_x2_f32:
1030; CHECK:       // %bb.0:
1031; CHECK-NEXT:    mov z7.d, z4.d
1032; CHECK-NEXT:    mov z5.d, z2.d
1033; CHECK-NEXT:    mov z6.d, z3.d
1034; CHECK-NEXT:    mov z4.d, z1.d
1035; CHECK-NEXT:    fmaxnm { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
1036; CHECK-NEXT:    mov z0.d, z4.d
1037; CHECK-NEXT:    mov z1.d, z5.d
1038; CHECK-NEXT:    ret
1039  %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2)
1040  ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
1041}
1042
1043define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_x2_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) {
1044; CHECK-LABEL: multi_vec_maxnm_x2_f64:
1045; CHECK:       // %bb.0:
1046; CHECK-NEXT:    mov z7.d, z4.d
1047; CHECK-NEXT:    mov z5.d, z2.d
1048; CHECK-NEXT:    mov z6.d, z3.d
1049; CHECK-NEXT:    mov z4.d, z1.d
1050; CHECK-NEXT:    fmaxnm { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
1051; CHECK-NEXT:    mov z0.d, z4.d
1052; CHECK-NEXT:    mov z1.d, z5.d
1053; CHECK-NEXT:    ret
1054  %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2)
1055  ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
1056}
1057
1058; BFMAXNM (Multi, x4)
1059
1060define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_maxnm_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
1061; CHECK-LABEL: multi_vec_maxnm_x4_bf16:
1062; CHECK:       // %bb.0:
1063; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
1064; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
1065; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
1066; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
1067; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
1068; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
1069; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
1070; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
1071; CHECK-NEXT:    bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
1072; CHECK-NEXT:    ret
1073  %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
1074  ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } %res
1075}
1076
1077; FMAXNM (Multi, x4)
1078
1079define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_maxnm_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
1080; CHECK-LABEL: multi_vec_maxnm_x4_f16:
1081; CHECK:       // %bb.0:
1082; CHECK-NEXT:    mov z30.d, z7.d
1083; CHECK-NEXT:    mov z27.d, z4.d
1084; CHECK-NEXT:    ptrue p0.h
1085; CHECK-NEXT:    mov z29.d, z6.d
1086; CHECK-NEXT:    mov z26.d, z3.d
1087; CHECK-NEXT:    mov z28.d, z5.d
1088; CHECK-NEXT:    mov z25.d, z2.d
1089; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
1090; CHECK-NEXT:    mov z24.d, z1.d
1091; CHECK-NEXT:    fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
1092; CHECK-NEXT:    mov z0.d, z24.d
1093; CHECK-NEXT:    mov z1.d, z25.d
1094; CHECK-NEXT:    mov z2.d, z26.d
1095; CHECK-NEXT:    mov z3.d, z27.d
1096; CHECK-NEXT:    ret
1097  %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
1098              @llvm.aarch64.sve.fmaxnm.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
1099                                                  <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4)
1100  ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
1101}
1102
1103define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_maxnm_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
1104; CHECK-LABEL: multi_vec_maxnm_x4_f32:
1105; CHECK:       // %bb.0:
1106; CHECK-NEXT:    mov z30.d, z7.d
1107; CHECK-NEXT:    mov z27.d, z4.d
1108; CHECK-NEXT:    ptrue p0.s
1109; CHECK-NEXT:    mov z29.d, z6.d
1110; CHECK-NEXT:    mov z26.d, z3.d
1111; CHECK-NEXT:    mov z28.d, z5.d
1112; CHECK-NEXT:    mov z25.d, z2.d
1113; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
1114; CHECK-NEXT:    mov z24.d, z1.d
1115; CHECK-NEXT:    fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
1116; CHECK-NEXT:    mov z0.d, z24.d
1117; CHECK-NEXT:    mov z1.d, z25.d
1118; CHECK-NEXT:    mov z2.d, z26.d
1119; CHECK-NEXT:    mov z3.d, z27.d
1120; CHECK-NEXT:    ret
1121  %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1122              @llvm.aarch64.sve.fmaxnm.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
1123                                                  <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4)
1124  ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
1125}
1126
1127define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
1128; CHECK-LABEL: multi_vec_maxnm_x4_f64:
1129; CHECK:       // %bb.0:
1130; CHECK-NEXT:    mov z30.d, z7.d
1131; CHECK-NEXT:    mov z27.d, z4.d
1132; CHECK-NEXT:    ptrue p0.d
1133; CHECK-NEXT:    mov z29.d, z6.d
1134; CHECK-NEXT:    mov z26.d, z3.d
1135; CHECK-NEXT:    mov z28.d, z5.d
1136; CHECK-NEXT:    mov z25.d, z2.d
1137; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
1138; CHECK-NEXT:    mov z24.d, z1.d
1139; CHECK-NEXT:    fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
1140; CHECK-NEXT:    mov z0.d, z24.d
1141; CHECK-NEXT:    mov z1.d, z25.d
1142; CHECK-NEXT:    mov z2.d, z26.d
1143; CHECK-NEXT:    mov z3.d, z27.d
1144; CHECK-NEXT:    ret
1145  %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1146              @llvm.aarch64.sve.fmaxnm.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
1147                                                  <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4)
1148  ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
1149}
1150
1151declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smax.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1152declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smax.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1153declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smax.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1154declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smax.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1155
1156declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umax.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1157declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umax.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1158declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umax.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1159declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umax.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1160
1161declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmax.single.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1162declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmax.single.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1163declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmax.single.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1164
1165declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smax.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1166declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smax.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1167declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smax.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1168declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smax.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1169
1170declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umax.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1171declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umax.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1172declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umax.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1173declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umax.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1174
1175declare { <vscale x 8 x half>, <vscale x 8 x half> , <vscale x 8 x half>, <vscale x 8 x half> }
1176 @llvm.aarch64.sve.fmax.single.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1177declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1178 @llvm.aarch64.sve.fmax.single.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1179declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1180 @llvm.aarch64.sve.fmax.single.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1181
1182declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smax.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1183declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smax.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> )
1184declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smax.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1185declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smax.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1186
1187declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umax.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1188declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umax.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> )
1189declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umax.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1190declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umax.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1191
1192declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmax.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1193declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmax.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1194declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmax.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1195
1196declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
1197 @llvm.aarch64.sve.smax.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1198declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
1199 @llvm.aarch64.sve.smax.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1200declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
1201 @llvm.aarch64.sve.smax.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1202declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
1203 @llvm.aarch64.sve.smax.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1204
1205declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
1206 @llvm.aarch64.sve.umax.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1207declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
1208 @llvm.aarch64.sve.umax.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1209declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
1210 @llvm.aarch64.sve.umax.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1211declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
1212 @llvm.aarch64.sve.umax.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1213
1214declare { <vscale x 8 x half>, <vscale x 8 x half> , <vscale x 8 x half>, <vscale x 8 x half> }
1215 @llvm.aarch64.sve.fmax.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1216declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1217 @llvm.aarch64.sve.fmax.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1218declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1219 @llvm.aarch64.sve.fmax.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1220
1221declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1222declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1223declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1224
1225declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
1226 @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1227declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1228 @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1229declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1230 @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1231
1232declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1233declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1234declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1235
1236declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
1237 @llvm.aarch64.sve.fmaxnm.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1238declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1239 @llvm.aarch64.sve.fmaxnm.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1240declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1241 @llvm.aarch64.sve.fmaxnm.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1242