1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+sve-b16b16 -force-streaming -verify-machineinstrs < %s | FileCheck %s 3 4; SMIN (Single, x2) 5 6define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_single_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) { 7; CHECK-LABEL: multi_vec_min_single_x2_s8: 8; CHECK: // %bb.0: 9; CHECK-NEXT: mov z5.d, z2.d 10; CHECK-NEXT: mov z4.d, z1.d 11; CHECK-NEXT: smin { z4.b, z5.b }, { z4.b, z5.b }, z3.b 12; CHECK-NEXT: mov z0.d, z4.d 13; CHECK-NEXT: mov z1.d, z5.d 14; CHECK-NEXT: ret 15 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) 16 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res 17} 18 19define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_single_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) { 20; CHECK-LABEL: multi_vec_min_single_x2_s16: 21; CHECK: // %bb.0: 22; CHECK-NEXT: mov z5.d, z2.d 23; CHECK-NEXT: mov z4.d, z1.d 24; CHECK-NEXT: smin { z4.h, z5.h }, { z4.h, z5.h }, z3.h 25; CHECK-NEXT: mov z0.d, z4.d 26; CHECK-NEXT: mov z1.d, z5.d 27; CHECK-NEXT: ret 28 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) 29 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res 30} 31 32define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_single_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) { 33; CHECK-LABEL: multi_vec_min_single_x2_s32: 34; CHECK: // %bb.0: 35; CHECK-NEXT: mov z5.d, z2.d 36; CHECK-NEXT: mov z4.d, z1.d 37; CHECK-NEXT: smin { z4.s, z5.s }, { z4.s, z5.s }, z3.s 38; CHECK-NEXT: mov z0.d, z4.d 39; CHECK-NEXT: mov z1.d, z5.d 40; CHECK-NEXT: ret 41 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) 42 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res 43} 44 45define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_single_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) { 46; CHECK-LABEL: multi_vec_min_single_x2_s64: 47; CHECK: // %bb.0: 48; CHECK-NEXT: mov z5.d, z2.d 49; CHECK-NEXT: mov z4.d, z1.d 50; CHECK-NEXT: smin { z4.d, z5.d }, { z4.d, z5.d }, z3.d 51; CHECK-NEXT: mov z0.d, z4.d 52; CHECK-NEXT: mov z1.d, z5.d 53; CHECK-NEXT: ret 54 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) 55 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res 56} 57 58; UMIN (Single, x2) 59 60define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_single_x2_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) { 61; CHECK-LABEL: multi_vec_min_single_x2_u8: 62; CHECK: // %bb.0: 63; CHECK-NEXT: mov z5.d, z2.d 64; CHECK-NEXT: mov z4.d, z1.d 65; CHECK-NEXT: umin { z4.b, z5.b }, { z4.b, z5.b }, z3.b 66; CHECK-NEXT: mov z0.d, z4.d 67; CHECK-NEXT: mov z1.d, z5.d 68; CHECK-NEXT: ret 69 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) 70 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res 71} 72 73define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_single_x2_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) { 74; CHECK-LABEL: multi_vec_min_single_x2_u16: 75; CHECK: // %bb.0: 76; CHECK-NEXT: mov z5.d, z2.d 77; CHECK-NEXT: mov z4.d, z1.d 78; CHECK-NEXT: umin { z4.h, z5.h }, { z4.h, z5.h }, z3.h 79; CHECK-NEXT: mov z0.d, z4.d 80; CHECK-NEXT: mov z1.d, z5.d 81; CHECK-NEXT: ret 82 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) 83 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res 84} 85 86define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_single_x2_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) { 87; CHECK-LABEL: multi_vec_min_single_x2_u32: 88; CHECK: // %bb.0: 89; CHECK-NEXT: mov z5.d, z2.d 90; CHECK-NEXT: mov z4.d, z1.d 91; CHECK-NEXT: umin { z4.s, z5.s }, { z4.s, z5.s }, z3.s 92; CHECK-NEXT: mov z0.d, z4.d 93; CHECK-NEXT: mov z1.d, z5.d 94; CHECK-NEXT: ret 95 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) 96 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res 97} 98 99define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_single_x2_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) { 100; CHECK-LABEL: multi_vec_min_single_x2_u64: 101; CHECK: // %bb.0: 102; CHECK-NEXT: mov z5.d, z2.d 103; CHECK-NEXT: mov z4.d, z1.d 104; CHECK-NEXT: umin { z4.d, z5.d }, { z4.d, z5.d }, z3.d 105; CHECK-NEXT: mov z0.d, z4.d 106; CHECK-NEXT: mov z1.d, z5.d 107; CHECK-NEXT: ret 108 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) 109 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res 110} 111 112; BFMIN (Single, x2) 113 114define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) { 115; CHECK-LABEL: multi_vec_min_single_x2_bf16: 116; CHECK: // %bb.0: 117; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 118; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 119; CHECK-NEXT: bfmin { z0.h, z1.h }, { z0.h, z1.h }, z2.h 120; CHECK-NEXT: ret 121 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) 122 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res 123} 124 125; FMIN (Single, x2) 126 127define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_single_x2_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm) { 128; CHECK-LABEL: multi_vec_min_single_x2_f16: 129; CHECK: // %bb.0: 130; CHECK-NEXT: mov z5.d, z2.d 131; CHECK-NEXT: mov z4.d, z1.d 132; CHECK-NEXT: fmin { z4.h, z5.h }, { z4.h, z5.h }, z3.h 133; CHECK-NEXT: mov z0.d, z4.d 134; CHECK-NEXT: mov z1.d, z5.d 135; CHECK-NEXT: ret 136 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmin.single.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm) 137 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res 138} 139 140define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_single_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm) { 141; CHECK-LABEL: multi_vec_min_single_x2_f32: 142; CHECK: // %bb.0: 143; CHECK-NEXT: mov z5.d, z2.d 144; CHECK-NEXT: mov z4.d, z1.d 145; CHECK-NEXT: fmin { z4.s, z5.s }, { z4.s, z5.s }, z3.s 146; CHECK-NEXT: mov z0.d, z4.d 147; CHECK-NEXT: mov z1.d, z5.d 148; CHECK-NEXT: ret 149 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmin.single.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm) 150 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res 151} 152 153define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_single_x2_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm) { 154; CHECK-LABEL: multi_vec_min_single_x2_f64: 155; CHECK: // %bb.0: 156; CHECK-NEXT: mov z5.d, z2.d 157; CHECK-NEXT: mov z4.d, z1.d 158; CHECK-NEXT: fmin { z4.d, z5.d }, { z4.d, z5.d }, z3.d 159; CHECK-NEXT: mov z0.d, z4.d 160; CHECK-NEXT: mov z1.d, z5.d 161; CHECK-NEXT: ret 162 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmin.single.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm) 163 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res 164} 165 166; SMIN (Single, x4) 167 168define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_single_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) { 169; CHECK-LABEL: multi_vec_min_single_x4_s8: 170; CHECK: // %bb.0: 171; CHECK-NEXT: mov z27.d, z4.d 172; CHECK-NEXT: mov z26.d, z3.d 173; CHECK-NEXT: mov z25.d, z2.d 174; CHECK-NEXT: mov z24.d, z1.d 175; CHECK-NEXT: smin { z24.b - z27.b }, { z24.b - z27.b }, z5.b 176; CHECK-NEXT: mov z0.d, z24.d 177; CHECK-NEXT: mov z1.d, z25.d 178; CHECK-NEXT: mov z2.d, z26.d 179; CHECK-NEXT: mov z3.d, z27.d 180; CHECK-NEXT: ret 181 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 182 @llvm.aarch64.sve.smin.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) 183 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res 184} 185 186define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_single_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) { 187; CHECK-LABEL: multi_vec_min_single_x4_s16: 188; CHECK: // %bb.0: 189; CHECK-NEXT: mov z27.d, z4.d 190; CHECK-NEXT: mov z26.d, z3.d 191; CHECK-NEXT: mov z25.d, z2.d 192; CHECK-NEXT: mov z24.d, z1.d 193; CHECK-NEXT: smin { z24.h - z27.h }, { z24.h - z27.h }, z5.h 194; CHECK-NEXT: mov z0.d, z24.d 195; CHECK-NEXT: mov z1.d, z25.d 196; CHECK-NEXT: mov z2.d, z26.d 197; CHECK-NEXT: mov z3.d, z27.d 198; CHECK-NEXT: ret 199 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 200 @llvm.aarch64.sve.smin.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) 201 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res 202} 203 204define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_single_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) { 205; CHECK-LABEL: multi_vec_min_single_x4_s32: 206; CHECK: // %bb.0: 207; CHECK-NEXT: mov z27.d, z4.d 208; CHECK-NEXT: mov z26.d, z3.d 209; CHECK-NEXT: mov z25.d, z2.d 210; CHECK-NEXT: mov z24.d, z1.d 211; CHECK-NEXT: smin { z24.s - z27.s }, { z24.s - z27.s }, z5.s 212; CHECK-NEXT: mov z0.d, z24.d 213; CHECK-NEXT: mov z1.d, z25.d 214; CHECK-NEXT: mov z2.d, z26.d 215; CHECK-NEXT: mov z3.d, z27.d 216; CHECK-NEXT: ret 217 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } 218 @llvm.aarch64.sve.smin.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) 219 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res 220} 221 222define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_single_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) { 223; CHECK-LABEL: multi_vec_min_single_x4_s64: 224; CHECK: // %bb.0: 225; CHECK-NEXT: mov z27.d, z4.d 226; CHECK-NEXT: mov z26.d, z3.d 227; CHECK-NEXT: mov z25.d, z2.d 228; CHECK-NEXT: mov z24.d, z1.d 229; CHECK-NEXT: smin { z24.d - z27.d }, { z24.d - z27.d }, z5.d 230; CHECK-NEXT: mov z0.d, z24.d 231; CHECK-NEXT: mov z1.d, z25.d 232; CHECK-NEXT: mov z2.d, z26.d 233; CHECK-NEXT: mov z3.d, z27.d 234; CHECK-NEXT: ret 235 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } 236 @llvm.aarch64.sve.smin.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) 237 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res 238} 239 240; UMIN (Single, x4) 241 242define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_single_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) { 243; CHECK-LABEL: multi_vec_min_single_x4_u8: 244; CHECK: // %bb.0: 245; CHECK-NEXT: mov z27.d, z4.d 246; CHECK-NEXT: mov z26.d, z3.d 247; CHECK-NEXT: mov z25.d, z2.d 248; CHECK-NEXT: mov z24.d, z1.d 249; CHECK-NEXT: umin { z24.b - z27.b }, { z24.b - z27.b }, z5.b 250; CHECK-NEXT: mov z0.d, z24.d 251; CHECK-NEXT: mov z1.d, z25.d 252; CHECK-NEXT: mov z2.d, z26.d 253; CHECK-NEXT: mov z3.d, z27.d 254; CHECK-NEXT: ret 255 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 256 @llvm.aarch64.sve.umin.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) 257 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res 258} 259 260define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_single_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) { 261; CHECK-LABEL: multi_vec_min_single_x4_u16: 262; CHECK: // %bb.0: 263; CHECK-NEXT: mov z27.d, z4.d 264; CHECK-NEXT: mov z26.d, z3.d 265; CHECK-NEXT: mov z25.d, z2.d 266; CHECK-NEXT: mov z24.d, z1.d 267; CHECK-NEXT: umin { z24.h - z27.h }, { z24.h - z27.h }, z5.h 268; CHECK-NEXT: mov z0.d, z24.d 269; CHECK-NEXT: mov z1.d, z25.d 270; CHECK-NEXT: mov z2.d, z26.d 271; CHECK-NEXT: mov z3.d, z27.d 272; CHECK-NEXT: ret 273 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 274 @llvm.aarch64.sve.umin.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) 275 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res 276} 277 278define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_single_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) { 279; CHECK-LABEL: multi_vec_min_single_x4_u32: 280; CHECK: // %bb.0: 281; CHECK-NEXT: mov z27.d, z4.d 282; CHECK-NEXT: mov z26.d, z3.d 283; CHECK-NEXT: mov z25.d, z2.d 284; CHECK-NEXT: mov z24.d, z1.d 285; CHECK-NEXT: umin { z24.s - z27.s }, { z24.s - z27.s }, z5.s 286; CHECK-NEXT: mov z0.d, z24.d 287; CHECK-NEXT: mov z1.d, z25.d 288; CHECK-NEXT: mov z2.d, z26.d 289; CHECK-NEXT: mov z3.d, z27.d 290; CHECK-NEXT: ret 291 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } 292 @llvm.aarch64.sve.umin.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) 293 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res 294} 295 296define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_single_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) { 297; CHECK-LABEL: multi_vec_min_single_x4_u64: 298; CHECK: // %bb.0: 299; CHECK-NEXT: mov z27.d, z4.d 300; CHECK-NEXT: mov z26.d, z3.d 301; CHECK-NEXT: mov z25.d, z2.d 302; CHECK-NEXT: mov z24.d, z1.d 303; CHECK-NEXT: umin { z24.d - z27.d }, { z24.d - z27.d }, z5.d 304; CHECK-NEXT: mov z0.d, z24.d 305; CHECK-NEXT: mov z1.d, z25.d 306; CHECK-NEXT: mov z2.d, z26.d 307; CHECK-NEXT: mov z3.d, z27.d 308; CHECK-NEXT: ret 309 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } 310 @llvm.aarch64.sve.umin.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) 311 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res 312} 313 314; BFMIN (Single, x4) 315 316define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) { 317; CHECK-LABEL: multi_vec_min_single_x4_bf16: 318; CHECK: // %bb.0: 319; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 320; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 321; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 322; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 323; CHECK-NEXT: bfmin { z0.h - z3.h }, { z0.h - z3.h }, z4.h 324; CHECK-NEXT: ret 325 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) 326 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res 327} 328 329; FMIN (SINGLE, x4) 330 331define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_single_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm) { 332; CHECK-LABEL: multi_vec_min_single_x4_f16: 333; CHECK: // %bb.0: 334; CHECK-NEXT: mov z27.d, z4.d 335; CHECK-NEXT: mov z26.d, z3.d 336; CHECK-NEXT: mov z25.d, z2.d 337; CHECK-NEXT: mov z24.d, z1.d 338; CHECK-NEXT: fmin { z24.h - z27.h }, { z24.h - z27.h }, z5.h 339; CHECK-NEXT: mov z0.d, z24.d 340; CHECK-NEXT: mov z1.d, z25.d 341; CHECK-NEXT: mov z2.d, z26.d 342; CHECK-NEXT: mov z3.d, z27.d 343; CHECK-NEXT: ret 344 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } 345 @llvm.aarch64.sve.fmin.single.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm) 346 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res 347} 348 349define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_single_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm) { 350; CHECK-LABEL: multi_vec_min_single_x4_f32: 351; CHECK: // %bb.0: 352; CHECK-NEXT: mov z27.d, z4.d 353; CHECK-NEXT: mov z26.d, z3.d 354; CHECK-NEXT: mov z25.d, z2.d 355; CHECK-NEXT: mov z24.d, z1.d 356; CHECK-NEXT: fmin { z24.s - z27.s }, { z24.s - z27.s }, z5.s 357; CHECK-NEXT: mov z0.d, z24.d 358; CHECK-NEXT: mov z1.d, z25.d 359; CHECK-NEXT: mov z2.d, z26.d 360; CHECK-NEXT: mov z3.d, z27.d 361; CHECK-NEXT: ret 362 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } 363 @llvm.aarch64.sve.fmin.single.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm) 364 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res 365} 366 367define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_single_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm) { 368; CHECK-LABEL: multi_vec_min_single_x4_f64: 369; CHECK: // %bb.0: 370; CHECK-NEXT: mov z27.d, z4.d 371; CHECK-NEXT: mov z26.d, z3.d 372; CHECK-NEXT: mov z25.d, z2.d 373; CHECK-NEXT: mov z24.d, z1.d 374; CHECK-NEXT: fmin { z24.d - z27.d }, { z24.d - z27.d }, z5.d 375; CHECK-NEXT: mov z0.d, z24.d 376; CHECK-NEXT: mov z1.d, z25.d 377; CHECK-NEXT: mov z2.d, z26.d 378; CHECK-NEXT: mov z3.d, z27.d 379; CHECK-NEXT: ret 380 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } 381 @llvm.aarch64.sve.fmin.single.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm) 382 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res 383} 384 385; SMIN (Multi, x2) 386 387define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) { 388; CHECK-LABEL: multi_vec_min_multi_x2_s8: 389; CHECK: // %bb.0: 390; CHECK-NEXT: mov z7.d, z4.d 391; CHECK-NEXT: mov z5.d, z2.d 392; CHECK-NEXT: mov z6.d, z3.d 393; CHECK-NEXT: mov z4.d, z1.d 394; CHECK-NEXT: smin { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b } 395; CHECK-NEXT: mov z0.d, z4.d 396; CHECK-NEXT: mov z1.d, z5.d 397; CHECK-NEXT: ret 398 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) 399 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res 400} 401 402define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) { 403; CHECK-LABEL: multi_vec_min_multi_x2_s16: 404; CHECK: // %bb.0: 405; CHECK-NEXT: mov z7.d, z4.d 406; CHECK-NEXT: mov z5.d, z2.d 407; CHECK-NEXT: mov z6.d, z3.d 408; CHECK-NEXT: mov z4.d, z1.d 409; CHECK-NEXT: smin { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h } 410; CHECK-NEXT: mov z0.d, z4.d 411; CHECK-NEXT: mov z1.d, z5.d 412; CHECK-NEXT: ret 413 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) 414 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res 415} 416 417define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) { 418; CHECK-LABEL: multi_vec_min_multi_x2_s32: 419; CHECK: // %bb.0: 420; CHECK-NEXT: mov z7.d, z4.d 421; CHECK-NEXT: mov z5.d, z2.d 422; CHECK-NEXT: mov z6.d, z3.d 423; CHECK-NEXT: mov z4.d, z1.d 424; CHECK-NEXT: smin { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s } 425; CHECK-NEXT: mov z0.d, z4.d 426; CHECK-NEXT: mov z1.d, z5.d 427; CHECK-NEXT: ret 428 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) 429 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res 430} 431 432define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) { 433; CHECK-LABEL: multi_vec_min_multi_x2_s64: 434; CHECK: // %bb.0: 435; CHECK-NEXT: mov z7.d, z4.d 436; CHECK-NEXT: mov z5.d, z2.d 437; CHECK-NEXT: mov z6.d, z3.d 438; CHECK-NEXT: mov z4.d, z1.d 439; CHECK-NEXT: smin { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d } 440; CHECK-NEXT: mov z0.d, z4.d 441; CHECK-NEXT: mov z1.d, z5.d 442; CHECK-NEXT: ret 443 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) 444 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res 445} 446 447; UMIN (Multi, x2) 448 449define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x2_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) { 450; CHECK-LABEL: multi_vec_min_multi_x2_u8: 451; CHECK: // %bb.0: 452; CHECK-NEXT: mov z7.d, z4.d 453; CHECK-NEXT: mov z5.d, z2.d 454; CHECK-NEXT: mov z6.d, z3.d 455; CHECK-NEXT: mov z4.d, z1.d 456; CHECK-NEXT: umin { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b } 457; CHECK-NEXT: mov z0.d, z4.d 458; CHECK-NEXT: mov z1.d, z5.d 459; CHECK-NEXT: ret 460 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) 461 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res 462} 463 464define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x2_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) { 465; CHECK-LABEL: multi_vec_min_multi_x2_u16: 466; CHECK: // %bb.0: 467; CHECK-NEXT: mov z7.d, z4.d 468; CHECK-NEXT: mov z5.d, z2.d 469; CHECK-NEXT: mov z6.d, z3.d 470; CHECK-NEXT: mov z4.d, z1.d 471; CHECK-NEXT: umin { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h } 472; CHECK-NEXT: mov z0.d, z4.d 473; CHECK-NEXT: mov z1.d, z5.d 474; CHECK-NEXT: ret 475 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) 476 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res 477} 478 479define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x2_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) { 480; CHECK-LABEL: multi_vec_min_multi_x2_u32: 481; CHECK: // %bb.0: 482; CHECK-NEXT: mov z7.d, z4.d 483; CHECK-NEXT: mov z5.d, z2.d 484; CHECK-NEXT: mov z6.d, z3.d 485; CHECK-NEXT: mov z4.d, z1.d 486; CHECK-NEXT: umin { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s } 487; CHECK-NEXT: mov z0.d, z4.d 488; CHECK-NEXT: mov z1.d, z5.d 489; CHECK-NEXT: ret 490 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) 491 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res 492} 493 494define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x2_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) { 495; CHECK-LABEL: multi_vec_min_multi_x2_u64: 496; CHECK: // %bb.0: 497; CHECK-NEXT: mov z7.d, z4.d 498; CHECK-NEXT: mov z5.d, z2.d 499; CHECK-NEXT: mov z6.d, z3.d 500; CHECK-NEXT: mov z4.d, z1.d 501; CHECK-NEXT: umin { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d } 502; CHECK-NEXT: mov z0.d, z4.d 503; CHECK-NEXT: mov z1.d, z5.d 504; CHECK-NEXT: ret 505 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) 506 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res 507} 508 509; BFMIN (Multi, x2) 510 511define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) { 512; CHECK-LABEL: multi_vec_min_x2_bf16: 513; CHECK: // %bb.0: 514; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 515; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 516; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 517; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 518; CHECK-NEXT: bfmin { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } 519; CHECK-NEXT: ret 520 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) 521 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res 522} 523 524; FMIN (Multi, x2) 525 526define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_multi_x2_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) { 527; CHECK-LABEL: multi_vec_min_multi_x2_f16: 528; CHECK: // %bb.0: 529; CHECK-NEXT: mov z7.d, z4.d 530; CHECK-NEXT: mov z5.d, z2.d 531; CHECK-NEXT: mov z6.d, z3.d 532; CHECK-NEXT: mov z4.d, z1.d 533; CHECK-NEXT: fmin { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h } 534; CHECK-NEXT: mov z0.d, z4.d 535; CHECK-NEXT: mov z1.d, z5.d 536; CHECK-NEXT: ret 537 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmin.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) 538 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res 539} 540 541define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_multi_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) { 542; CHECK-LABEL: multi_vec_min_multi_x2_f32: 543; CHECK: // %bb.0: 544; CHECK-NEXT: mov z7.d, z4.d 545; CHECK-NEXT: mov z5.d, z2.d 546; CHECK-NEXT: mov z6.d, z3.d 547; CHECK-NEXT: mov z4.d, z1.d 548; CHECK-NEXT: fmin { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s } 549; CHECK-NEXT: mov z0.d, z4.d 550; CHECK-NEXT: mov z1.d, z5.d 551; CHECK-NEXT: ret 552 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmin.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) 553 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res 554} 555 556define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x2_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) { 557; CHECK-LABEL: multi_vec_min_multi_x2_f64: 558; CHECK: // %bb.0: 559; CHECK-NEXT: mov z7.d, z4.d 560; CHECK-NEXT: mov z5.d, z2.d 561; CHECK-NEXT: mov z6.d, z3.d 562; CHECK-NEXT: mov z4.d, z1.d 563; CHECK-NEXT: fmin { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d } 564; CHECK-NEXT: mov z0.d, z4.d 565; CHECK-NEXT: mov z1.d, z5.d 566; CHECK-NEXT: ret 567 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmin.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) 568 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res 569} 570 571; SMIN (Multi, x4) 572 573define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, 574; CHECK-LABEL: multi_vec_min_multi_x4_s8: 575; CHECK: // %bb.0: 576; CHECK-NEXT: mov z30.d, z7.d 577; CHECK-NEXT: mov z27.d, z4.d 578; CHECK-NEXT: ptrue p0.b 579; CHECK-NEXT: mov z29.d, z6.d 580; CHECK-NEXT: mov z26.d, z3.d 581; CHECK-NEXT: mov z28.d, z5.d 582; CHECK-NEXT: mov z25.d, z2.d 583; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] 584; CHECK-NEXT: mov z24.d, z1.d 585; CHECK-NEXT: smin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } 586; CHECK-NEXT: mov z0.d, z24.d 587; CHECK-NEXT: mov z1.d, z25.d 588; CHECK-NEXT: mov z2.d, z26.d 589; CHECK-NEXT: mov z3.d, z27.d 590; CHECK-NEXT: ret 591 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) { 592 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 593 @llvm.aarch64.sve.smin.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, 594 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) 595 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res 596} 597 598define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, 599; CHECK-LABEL: multi_vec_min_multi_x4_s16: 600; CHECK: // %bb.0: 601; CHECK-NEXT: mov z30.d, z7.d 602; CHECK-NEXT: mov z27.d, z4.d 603; CHECK-NEXT: ptrue p0.h 604; CHECK-NEXT: mov z29.d, z6.d 605; CHECK-NEXT: mov z26.d, z3.d 606; CHECK-NEXT: mov z28.d, z5.d 607; CHECK-NEXT: mov z25.d, z2.d 608; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] 609; CHECK-NEXT: mov z24.d, z1.d 610; CHECK-NEXT: smin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } 611; CHECK-NEXT: mov z0.d, z24.d 612; CHECK-NEXT: mov z1.d, z25.d 613; CHECK-NEXT: mov z2.d, z26.d 614; CHECK-NEXT: mov z3.d, z27.d 615; CHECK-NEXT: ret 616 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) { 617 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 618 @llvm.aarch64.sve.smin.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, 619 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) 620 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res 621} 622 623define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, 624; CHECK-LABEL: multi_vec_min_multi_x4_s32: 625; CHECK: // %bb.0: 626; CHECK-NEXT: mov z30.d, z7.d 627; CHECK-NEXT: mov z27.d, z4.d 628; CHECK-NEXT: ptrue p0.s 629; CHECK-NEXT: mov z29.d, z6.d 630; CHECK-NEXT: mov z26.d, z3.d 631; CHECK-NEXT: mov z28.d, z5.d 632; CHECK-NEXT: mov z25.d, z2.d 633; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] 634; CHECK-NEXT: mov z24.d, z1.d 635; CHECK-NEXT: smin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } 636; CHECK-NEXT: mov z0.d, z24.d 637; CHECK-NEXT: mov z1.d, z25.d 638; CHECK-NEXT: mov z2.d, z26.d 639; CHECK-NEXT: mov z3.d, z27.d 640; CHECK-NEXT: ret 641 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) { 642 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } 643 @llvm.aarch64.sve.smin.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, 644 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) 645 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res 646} 647 648define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, 649; CHECK-LABEL: multi_vec_min_multi_x4_s64: 650; CHECK: // %bb.0: 651; CHECK-NEXT: mov z30.d, z7.d 652; CHECK-NEXT: mov z27.d, z4.d 653; CHECK-NEXT: ptrue p0.d 654; CHECK-NEXT: mov z29.d, z6.d 655; CHECK-NEXT: mov z26.d, z3.d 656; CHECK-NEXT: mov z28.d, z5.d 657; CHECK-NEXT: mov z25.d, z2.d 658; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] 659; CHECK-NEXT: mov z24.d, z1.d 660; CHECK-NEXT: smin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } 661; CHECK-NEXT: mov z0.d, z24.d 662; CHECK-NEXT: mov z1.d, z25.d 663; CHECK-NEXT: mov z2.d, z26.d 664; CHECK-NEXT: mov z3.d, z27.d 665; CHECK-NEXT: ret 666 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) { 667 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } 668 @llvm.aarch64.sve.smin.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, 669 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) 670 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res 671} 672 673; UMIN (Multi, x4) 674 675define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, 676; CHECK-LABEL: multi_vec_min_multi_x4_u8: 677; CHECK: // %bb.0: 678; CHECK-NEXT: mov z30.d, z7.d 679; CHECK-NEXT: mov z27.d, z4.d 680; CHECK-NEXT: ptrue p0.b 681; CHECK-NEXT: mov z29.d, z6.d 682; CHECK-NEXT: mov z26.d, z3.d 683; CHECK-NEXT: mov z28.d, z5.d 684; CHECK-NEXT: mov z25.d, z2.d 685; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] 686; CHECK-NEXT: mov z24.d, z1.d 687; CHECK-NEXT: umin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } 688; CHECK-NEXT: mov z0.d, z24.d 689; CHECK-NEXT: mov z1.d, z25.d 690; CHECK-NEXT: mov z2.d, z26.d 691; CHECK-NEXT: mov z3.d, z27.d 692; CHECK-NEXT: ret 693 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) { 694 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 695 @llvm.aarch64.sve.umin.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, 696 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) 697 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res 698} 699 700define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, 701; CHECK-LABEL: multi_vec_min_multi_x4_u16: 702; CHECK: // %bb.0: 703; CHECK-NEXT: mov z30.d, z7.d 704; CHECK-NEXT: mov z27.d, z4.d 705; CHECK-NEXT: ptrue p0.h 706; CHECK-NEXT: mov z29.d, z6.d 707; CHECK-NEXT: mov z26.d, z3.d 708; CHECK-NEXT: mov z28.d, z5.d 709; CHECK-NEXT: mov z25.d, z2.d 710; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] 711; CHECK-NEXT: mov z24.d, z1.d 712; CHECK-NEXT: umin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } 713; CHECK-NEXT: mov z0.d, z24.d 714; CHECK-NEXT: mov z1.d, z25.d 715; CHECK-NEXT: mov z2.d, z26.d 716; CHECK-NEXT: mov z3.d, z27.d 717; CHECK-NEXT: ret 718 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) { 719 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 720 @llvm.aarch64.sve.umin.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, 721 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) 722 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res 723} 724 725define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, 726; CHECK-LABEL: multi_vec_min_multi_x4_u32: 727; CHECK: // %bb.0: 728; CHECK-NEXT: mov z30.d, z7.d 729; CHECK-NEXT: mov z27.d, z4.d 730; CHECK-NEXT: ptrue p0.s 731; CHECK-NEXT: mov z29.d, z6.d 732; CHECK-NEXT: mov z26.d, z3.d 733; CHECK-NEXT: mov z28.d, z5.d 734; CHECK-NEXT: mov z25.d, z2.d 735; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] 736; CHECK-NEXT: mov z24.d, z1.d 737; CHECK-NEXT: umin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } 738; CHECK-NEXT: mov z0.d, z24.d 739; CHECK-NEXT: mov z1.d, z25.d 740; CHECK-NEXT: mov z2.d, z26.d 741; CHECK-NEXT: mov z3.d, z27.d 742; CHECK-NEXT: ret 743 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) { 744 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } 745 @llvm.aarch64.sve.umin.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, 746 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) 747 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res 748} 749 750define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, 751; CHECK-LABEL: multi_vec_min_multi_x4_u64: 752; CHECK: // %bb.0: 753; CHECK-NEXT: mov z30.d, z7.d 754; CHECK-NEXT: mov z27.d, z4.d 755; CHECK-NEXT: ptrue p0.d 756; CHECK-NEXT: mov z29.d, z6.d 757; CHECK-NEXT: mov z26.d, z3.d 758; CHECK-NEXT: mov z28.d, z5.d 759; CHECK-NEXT: mov z25.d, z2.d 760; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] 761; CHECK-NEXT: mov z24.d, z1.d 762; CHECK-NEXT: umin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } 763; CHECK-NEXT: mov z0.d, z24.d 764; CHECK-NEXT: mov z1.d, z25.d 765; CHECK-NEXT: mov z2.d, z26.d 766; CHECK-NEXT: mov z3.d, z27.d 767; CHECK-NEXT: ret 768 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) { 769 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } 770 @llvm.aarch64.sve.umin.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, 771 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) 772 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res 773} 774 775 776; BFMIN (Multi, x4) 777 778define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) { 779; CHECK-LABEL: multi_vec_min_x4_bf16: 780; CHECK: // %bb.0: 781; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 782; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 783; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 784; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 785; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 786; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 787; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 788; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 789; CHECK-NEXT: bfmin { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } 790; CHECK-NEXT: ret 791 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) 792 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res 793} 794 795; FMIN (Multi, x4) 796 797define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, 798; CHECK-LABEL: multi_vec_min_multi_x4_f16: 799; CHECK: // %bb.0: 800; CHECK-NEXT: mov z30.d, z7.d 801; CHECK-NEXT: mov z27.d, z4.d 802; CHECK-NEXT: ptrue p0.h 803; CHECK-NEXT: mov z29.d, z6.d 804; CHECK-NEXT: mov z26.d, z3.d 805; CHECK-NEXT: mov z28.d, z5.d 806; CHECK-NEXT: mov z25.d, z2.d 807; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] 808; CHECK-NEXT: mov z24.d, z1.d 809; CHECK-NEXT: fmin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } 810; CHECK-NEXT: mov z0.d, z24.d 811; CHECK-NEXT: mov z1.d, z25.d 812; CHECK-NEXT: mov z2.d, z26.d 813; CHECK-NEXT: mov z3.d, z27.d 814; CHECK-NEXT: ret 815 <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) { 816 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } 817 @llvm.aarch64.sve.fmin.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, 818 <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) 819 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res 820} 821 822define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, 823; CHECK-LABEL: multi_vec_min_multi_x4_f32: 824; CHECK: // %bb.0: 825; CHECK-NEXT: mov z30.d, z7.d 826; CHECK-NEXT: mov z27.d, z4.d 827; CHECK-NEXT: ptrue p0.s 828; CHECK-NEXT: mov z29.d, z6.d 829; CHECK-NEXT: mov z26.d, z3.d 830; CHECK-NEXT: mov z28.d, z5.d 831; CHECK-NEXT: mov z25.d, z2.d 832; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] 833; CHECK-NEXT: mov z24.d, z1.d 834; CHECK-NEXT: fmin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } 835; CHECK-NEXT: mov z0.d, z24.d 836; CHECK-NEXT: mov z1.d, z25.d 837; CHECK-NEXT: mov z2.d, z26.d 838; CHECK-NEXT: mov z3.d, z27.d 839; CHECK-NEXT: ret 840 <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) { 841 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } 842 @llvm.aarch64.sve.fmin.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, 843 <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) 844 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res 845} 846 847define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, 848; CHECK-LABEL: multi_vec_min_multi_x4_f64: 849; CHECK: // %bb.0: 850; CHECK-NEXT: mov z30.d, z7.d 851; CHECK-NEXT: mov z27.d, z4.d 852; CHECK-NEXT: ptrue p0.d 853; CHECK-NEXT: mov z29.d, z6.d 854; CHECK-NEXT: mov z26.d, z3.d 855; CHECK-NEXT: mov z28.d, z5.d 856; CHECK-NEXT: mov z25.d, z2.d 857; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] 858; CHECK-NEXT: mov z24.d, z1.d 859; CHECK-NEXT: fmin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } 860; CHECK-NEXT: mov z0.d, z24.d 861; CHECK-NEXT: mov z1.d, z25.d 862; CHECK-NEXT: mov z2.d, z26.d 863; CHECK-NEXT: mov z3.d, z27.d 864; CHECK-NEXT: ret 865 <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) { 866 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } 867 @llvm.aarch64.sve.fmin.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, 868 <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) 869 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res 870} 871 872; BFMINNM (Single, x2) 873 874define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) { 875; CHECK-LABEL: multi_vec_minnm_single_x2_bf16: 876; CHECK: // %bb.0: 877; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 878; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 879; CHECK-NEXT: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h 880; CHECK-NEXT: ret 881 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) 882 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res 883} 884 885; FMINNM (Single, x2) 886 887define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_minnm_single_x2_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm) { 888; CHECK-LABEL: multi_vec_minnm_single_x2_f16: 889; CHECK: // %bb.0: 890; CHECK-NEXT: mov z5.d, z2.d 891; CHECK-NEXT: mov z4.d, z1.d 892; CHECK-NEXT: fminnm { z4.h, z5.h }, { z4.h, z5.h }, z3.h 893; CHECK-NEXT: mov z0.d, z4.d 894; CHECK-NEXT: mov z1.d, z5.d 895; CHECK-NEXT: ret 896 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm) 897 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res 898} 899 900define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_minnm_single_x2_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm) { 901; CHECK-LABEL: multi_vec_minnm_single_x2_f32: 902; CHECK: // %bb.0: 903; CHECK-NEXT: mov z5.d, z2.d 904; CHECK-NEXT: mov z4.d, z1.d 905; CHECK-NEXT: fminnm { z4.s, z5.s }, { z4.s, z5.s }, z3.s 906; CHECK-NEXT: mov z0.d, z4.d 907; CHECK-NEXT: mov z1.d, z5.d 908; CHECK-NEXT: ret 909 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm) 910 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res 911} 912 913define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_single_x2_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm) { 914; CHECK-LABEL: multi_vec_minnm_single_x2_f64: 915; CHECK: // %bb.0: 916; CHECK-NEXT: mov z5.d, z2.d 917; CHECK-NEXT: mov z4.d, z1.d 918; CHECK-NEXT: fminnm { z4.d, z5.d }, { z4.d, z5.d }, z3.d 919; CHECK-NEXT: mov z0.d, z4.d 920; CHECK-NEXT: mov z1.d, z5.d 921; CHECK-NEXT: ret 922 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm) 923 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res 924} 925 926; BFMINNM (Single, x4) 927 928define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) { 929; CHECK-LABEL: multi_vec_minnm_single_x4_bf16: 930; CHECK: // %bb.0: 931; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 932; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 933; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 934; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 935; CHECK-NEXT: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h 936; CHECK-NEXT: ret 937 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) 938 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res 939} 940 941; FMINNM (Single, x4) 942 943define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_minnm_single_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm) { 944; CHECK-LABEL: multi_vec_minnm_single_x4_f16: 945; CHECK: // %bb.0: 946; CHECK-NEXT: mov z27.d, z4.d 947; CHECK-NEXT: mov z26.d, z3.d 948; CHECK-NEXT: mov z25.d, z2.d 949; CHECK-NEXT: mov z24.d, z1.d 950; CHECK-NEXT: fminnm { z24.h - z27.h }, { z24.h - z27.h }, z5.h 951; CHECK-NEXT: mov z0.d, z24.d 952; CHECK-NEXT: mov z1.d, z25.d 953; CHECK-NEXT: mov z2.d, z26.d 954; CHECK-NEXT: mov z3.d, z27.d 955; CHECK-NEXT: ret 956 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } 957 @llvm.aarch64.sve.fminnm.single.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm) 958 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res 959} 960 961define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_minnm_single_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm) { 962; CHECK-LABEL: multi_vec_minnm_single_x4_f32: 963; CHECK: // %bb.0: 964; CHECK-NEXT: mov z27.d, z4.d 965; CHECK-NEXT: mov z26.d, z3.d 966; CHECK-NEXT: mov z25.d, z2.d 967; CHECK-NEXT: mov z24.d, z1.d 968; CHECK-NEXT: fminnm { z24.s - z27.s }, { z24.s - z27.s }, z5.s 969; CHECK-NEXT: mov z0.d, z24.d 970; CHECK-NEXT: mov z1.d, z25.d 971; CHECK-NEXT: mov z2.d, z26.d 972; CHECK-NEXT: mov z3.d, z27.d 973; CHECK-NEXT: ret 974 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } 975 @llvm.aarch64.sve.fminnm.single.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm) 976 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res 977} 978 979define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_single_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm) { 980; CHECK-LABEL: multi_vec_minnm_single_x4_f64: 981; CHECK: // %bb.0: 982; CHECK-NEXT: mov z27.d, z4.d 983; CHECK-NEXT: mov z26.d, z3.d 984; CHECK-NEXT: mov z25.d, z2.d 985; CHECK-NEXT: mov z24.d, z1.d 986; CHECK-NEXT: fminnm { z24.d - z27.d }, { z24.d - z27.d }, z5.d 987; CHECK-NEXT: mov z0.d, z24.d 988; CHECK-NEXT: mov z1.d, z25.d 989; CHECK-NEXT: mov z2.d, z26.d 990; CHECK-NEXT: mov z3.d, z27.d 991; CHECK-NEXT: ret 992 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } 993 @llvm.aarch64.sve.fminnm.single.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm) 994 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res 995} 996 997; BFMINNM (Multi, x2) 998 999define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) { 1000; CHECK-LABEL: multi_vec_minnm_x2_bf16: 1001; CHECK: // %bb.0: 1002; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 1003; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 1004; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 1005; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 1006; CHECK-NEXT: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } 1007; CHECK-NEXT: ret 1008 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) 1009 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res 1010} 1011 1012; FMINNM (Multi, x2) 1013 1014define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_minnm_x2_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) { 1015; CHECK-LABEL: multi_vec_minnm_x2_f16: 1016; CHECK: // %bb.0: 1017; CHECK-NEXT: mov z7.d, z4.d 1018; CHECK-NEXT: mov z5.d, z2.d 1019; CHECK-NEXT: mov z6.d, z3.d 1020; CHECK-NEXT: mov z4.d, z1.d 1021; CHECK-NEXT: fminnm { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h } 1022; CHECK-NEXT: mov z0.d, z4.d 1023; CHECK-NEXT: mov z1.d, z5.d 1024; CHECK-NEXT: ret 1025 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fminnm.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) 1026 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res 1027} 1028 1029define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_minnm_x2_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) { 1030; CHECK-LABEL: multi_vec_minnm_x2_f32: 1031; CHECK: // %bb.0: 1032; CHECK-NEXT: mov z7.d, z4.d 1033; CHECK-NEXT: mov z5.d, z2.d 1034; CHECK-NEXT: mov z6.d, z3.d 1035; CHECK-NEXT: mov z4.d, z1.d 1036; CHECK-NEXT: fminnm { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s } 1037; CHECK-NEXT: mov z0.d, z4.d 1038; CHECK-NEXT: mov z1.d, z5.d 1039; CHECK-NEXT: ret 1040 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fminnm.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) 1041 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res 1042} 1043 1044define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_x2_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) { 1045; CHECK-LABEL: multi_vec_minnm_x2_f64: 1046; CHECK: // %bb.0: 1047; CHECK-NEXT: mov z7.d, z4.d 1048; CHECK-NEXT: mov z5.d, z2.d 1049; CHECK-NEXT: mov z6.d, z3.d 1050; CHECK-NEXT: mov z4.d, z1.d 1051; CHECK-NEXT: fminnm { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d } 1052; CHECK-NEXT: mov z0.d, z4.d 1053; CHECK-NEXT: mov z1.d, z5.d 1054; CHECK-NEXT: ret 1055 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fminnm.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) 1056 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res 1057} 1058 1059; BFMINNM (Multi, x4) 1060 1061define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) { 1062; CHECK-LABEL: multi_vec_minnm_x4_bf16: 1063; CHECK: // %bb.0: 1064; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 1065; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 1066; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 1067; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 1068; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 1069; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 1070; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 1071; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 1072; CHECK-NEXT: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } 1073; CHECK-NEXT: ret 1074 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) 1075 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res 1076} 1077 1078; FMINNM (Multi, x4) 1079 1080define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_minnm_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) { 1081; CHECK-LABEL: multi_vec_minnm_x4_f16: 1082; CHECK: // %bb.0: 1083; CHECK-NEXT: mov z30.d, z7.d 1084; CHECK-NEXT: mov z27.d, z4.d 1085; CHECK-NEXT: ptrue p0.h 1086; CHECK-NEXT: mov z29.d, z6.d 1087; CHECK-NEXT: mov z26.d, z3.d 1088; CHECK-NEXT: mov z28.d, z5.d 1089; CHECK-NEXT: mov z25.d, z2.d 1090; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] 1091; CHECK-NEXT: mov z24.d, z1.d 1092; CHECK-NEXT: fminnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } 1093; CHECK-NEXT: mov z0.d, z24.d 1094; CHECK-NEXT: mov z1.d, z25.d 1095; CHECK-NEXT: mov z2.d, z26.d 1096; CHECK-NEXT: mov z3.d, z27.d 1097; CHECK-NEXT: ret 1098 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } 1099 @llvm.aarch64.sve.fminnm.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, 1100 <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) 1101 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res 1102} 1103 1104define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_minnm_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) { 1105; CHECK-LABEL: multi_vec_minnm_x4_f32: 1106; CHECK: // %bb.0: 1107; CHECK-NEXT: mov z30.d, z7.d 1108; CHECK-NEXT: mov z27.d, z4.d 1109; CHECK-NEXT: ptrue p0.s 1110; CHECK-NEXT: mov z29.d, z6.d 1111; CHECK-NEXT: mov z26.d, z3.d 1112; CHECK-NEXT: mov z28.d, z5.d 1113; CHECK-NEXT: mov z25.d, z2.d 1114; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] 1115; CHECK-NEXT: mov z24.d, z1.d 1116; CHECK-NEXT: fminnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } 1117; CHECK-NEXT: mov z0.d, z24.d 1118; CHECK-NEXT: mov z1.d, z25.d 1119; CHECK-NEXT: mov z2.d, z26.d 1120; CHECK-NEXT: mov z3.d, z27.d 1121; CHECK-NEXT: ret 1122 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } 1123 @llvm.aarch64.sve.fminnm.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, 1124 <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) 1125 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res 1126} 1127 1128define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) { 1129; CHECK-LABEL: multi_vec_minnm_x4_f64: 1130; CHECK: // %bb.0: 1131; CHECK-NEXT: mov z30.d, z7.d 1132; CHECK-NEXT: mov z27.d, z4.d 1133; CHECK-NEXT: ptrue p0.d 1134; CHECK-NEXT: mov z29.d, z6.d 1135; CHECK-NEXT: mov z26.d, z3.d 1136; CHECK-NEXT: mov z28.d, z5.d 1137; CHECK-NEXT: mov z25.d, z2.d 1138; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] 1139; CHECK-NEXT: mov z24.d, z1.d 1140; CHECK-NEXT: fminnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } 1141; CHECK-NEXT: mov z0.d, z24.d 1142; CHECK-NEXT: mov z1.d, z25.d 1143; CHECK-NEXT: mov z2.d, z26.d 1144; CHECK-NEXT: mov z3.d, z27.d 1145; CHECK-NEXT: ret 1146 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } 1147 @llvm.aarch64.sve.fminnm.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, 1148 <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) 1149 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res 1150} 1151 1152declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1153declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1154declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1155declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1156 1157declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1158declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1159declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1160declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1161 1162declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmin.single.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) 1163declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmin.single.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) 1164declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmin.single.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) 1165 1166declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1167declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1168declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1169declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1170 1171declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1172declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1173declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1174declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1175 1176declare { <vscale x 8 x half>, <vscale x 8 x half> , <vscale x 8 x half>, <vscale x 8 x half> } 1177 @llvm.aarch64.sve.fmin.single.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) 1178declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } 1179 @llvm.aarch64.sve.fmin.single.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) 1180declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } 1181 @llvm.aarch64.sve.fmin.single.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) 1182 1183declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1184declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> ) 1185declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1186declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1187 1188declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1189declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> ) 1190declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1191declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1192 1193declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmin.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) 1194declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmin.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) 1195declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmin.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) 1196 1197declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 1198 @llvm.aarch64.sve.smin.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1199declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 1200 @llvm.aarch64.sve.smin.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1201declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } 1202 @llvm.aarch64.sve.smin.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1203declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } 1204 @llvm.aarch64.sve.smin.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1205 1206declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 1207 @llvm.aarch64.sve.umin.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1208declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 1209 @llvm.aarch64.sve.umin.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1210declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } 1211 @llvm.aarch64.sve.umin.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1212declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } 1213 @llvm.aarch64.sve.umin.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1214 1215declare { <vscale x 8 x half>, <vscale x 8 x half> , <vscale x 8 x half>, <vscale x 8 x half> } 1216 @llvm.aarch64.sve.fmin.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) 1217declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } 1218 @llvm.aarch64.sve.fmin.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) 1219declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } 1220 @llvm.aarch64.sve.fmin.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) 1221 1222declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) 1223declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) 1224declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) 1225 1226declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } 1227 @llvm.aarch64.sve.fminnm.single.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) 1228declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } 1229 @llvm.aarch64.sve.fminnm.single.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) 1230declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } 1231 @llvm.aarch64.sve.fminnm.single.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) 1232 1233declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fminnm.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) 1234declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fminnm.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) 1235declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fminnm.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) 1236 1237declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } 1238 @llvm.aarch64.sve.fminnm.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) 1239declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } 1240 @llvm.aarch64.sve.fminnm.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) 1241declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } 1242 @llvm.aarch64.sve.fminnm.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) 1243