1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3 4; FADDV 5 6define half @faddv_nxv2f16(half %init, <vscale x 2 x half> %a) { 7; CHECK-LABEL: faddv_nxv2f16: 8; CHECK: // %bb.0: 9; CHECK-NEXT: ptrue p0.d 10; CHECK-NEXT: faddv h1, p0, z1.h 11; CHECK-NEXT: fadd h0, h0, h1 12; CHECK-NEXT: ret 13 %res = call fast half @llvm.vector.reduce.fadd.nxv2f16(half %init, <vscale x 2 x half> %a) 14 ret half %res 15} 16 17define half @faddv_nxv4f16(half %init, <vscale x 4 x half> %a) { 18; CHECK-LABEL: faddv_nxv4f16: 19; CHECK: // %bb.0: 20; CHECK-NEXT: ptrue p0.s 21; CHECK-NEXT: faddv h1, p0, z1.h 22; CHECK-NEXT: fadd h0, h0, h1 23; CHECK-NEXT: ret 24 %res = call fast half @llvm.vector.reduce.fadd.nxv4f16(half %init, <vscale x 4 x half> %a) 25 ret half %res 26} 27 28define half @faddv_nxv8f16(half %init, <vscale x 8 x half> %a) { 29; CHECK-LABEL: faddv_nxv8f16: 30; CHECK: // %bb.0: 31; CHECK-NEXT: ptrue p0.h 32; CHECK-NEXT: faddv h1, p0, z1.h 33; CHECK-NEXT: fadd h0, h0, h1 34; CHECK-NEXT: ret 35 %res = call fast half @llvm.vector.reduce.fadd.nxv8f16(half %init, <vscale x 8 x half> %a) 36 ret half %res 37} 38 39define float @faddv_nxv2f32(float %init, <vscale x 2 x float> %a) { 40; CHECK-LABEL: faddv_nxv2f32: 41; CHECK: // %bb.0: 42; CHECK-NEXT: ptrue p0.d 43; CHECK-NEXT: faddv s1, p0, z1.s 44; CHECK-NEXT: fadd s0, s0, s1 45; CHECK-NEXT: ret 46 %res = call fast float @llvm.vector.reduce.fadd.nxv2f32(float %init, <vscale x 2 x float> %a) 47 ret float %res 48} 49 50define float @faddv_nxv4f32(float %init, <vscale x 4 x float> %a) { 51; CHECK-LABEL: faddv_nxv4f32: 52; CHECK: // %bb.0: 53; CHECK-NEXT: ptrue p0.s 54; CHECK-NEXT: faddv s1, p0, z1.s 55; CHECK-NEXT: fadd s0, s0, s1 56; CHECK-NEXT: ret 57 %res = call fast float @llvm.vector.reduce.fadd.nxv4f32(float %init, <vscale x 4 x float> %a) 58 ret float %res 59} 60 61define double @faddv_nxv2f64(double %init, <vscale x 2 x double> %a) { 62; CHECK-LABEL: faddv_nxv2f64: 63; CHECK: // %bb.0: 64; CHECK-NEXT: ptrue p0.d 65; CHECK-NEXT: faddv d1, p0, z1.d 66; CHECK-NEXT: fadd d0, d0, d1 67; CHECK-NEXT: ret 68 %res = call fast double @llvm.vector.reduce.fadd.nxv2f64(double %init, <vscale x 2 x double> %a) 69 ret double %res 70} 71 72; FMAXNMV 73 74define half @fmaxv_nxv2f16(<vscale x 2 x half> %a) { 75; CHECK-LABEL: fmaxv_nxv2f16: 76; CHECK: // %bb.0: 77; CHECK-NEXT: ptrue p0.d 78; CHECK-NEXT: fmaxnmv h0, p0, z0.h 79; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 80; CHECK-NEXT: ret 81 %res = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %a) 82 ret half %res 83} 84 85define half @fmaxv_nxv4f16(<vscale x 4 x half> %a) { 86; CHECK-LABEL: fmaxv_nxv4f16: 87; CHECK: // %bb.0: 88; CHECK-NEXT: ptrue p0.s 89; CHECK-NEXT: fmaxnmv h0, p0, z0.h 90; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 91; CHECK-NEXT: ret 92 %res = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %a) 93 ret half %res 94} 95 96define half @fmaxv_nxv8f16(<vscale x 8 x half> %a) { 97; CHECK-LABEL: fmaxv_nxv8f16: 98; CHECK: // %bb.0: 99; CHECK-NEXT: ptrue p0.h 100; CHECK-NEXT: fmaxnmv h0, p0, z0.h 101; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 102; CHECK-NEXT: ret 103 %res = call half @llvm.vector.reduce.fmax.nxv8f16(<vscale x 8 x half> %a) 104 ret half %res 105} 106 107define float @fmaxv_nxv2f32(<vscale x 2 x float> %a) { 108; CHECK-LABEL: fmaxv_nxv2f32: 109; CHECK: // %bb.0: 110; CHECK-NEXT: ptrue p0.d 111; CHECK-NEXT: fmaxnmv s0, p0, z0.s 112; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 113; CHECK-NEXT: ret 114 %res = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %a) 115 ret float %res 116} 117 118define float @fmaxv_nxv4f32(<vscale x 4 x float> %a) { 119; CHECK-LABEL: fmaxv_nxv4f32: 120; CHECK: // %bb.0: 121; CHECK-NEXT: ptrue p0.s 122; CHECK-NEXT: fmaxnmv s0, p0, z0.s 123; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 124; CHECK-NEXT: ret 125 %res = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %a) 126 ret float %res 127} 128 129define double @fmaxv_nxv2f64(<vscale x 2 x double> %a) { 130; CHECK-LABEL: fmaxv_nxv2f64: 131; CHECK: // %bb.0: 132; CHECK-NEXT: ptrue p0.d 133; CHECK-NEXT: fmaxnmv d0, p0, z0.d 134; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 135; CHECK-NEXT: ret 136 %res = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %a) 137 ret double %res 138} 139 140; FMINNMV 141 142define half @fminv_nxv2f16(<vscale x 2 x half> %a) { 143; CHECK-LABEL: fminv_nxv2f16: 144; CHECK: // %bb.0: 145; CHECK-NEXT: ptrue p0.d 146; CHECK-NEXT: fminnmv h0, p0, z0.h 147; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 148; CHECK-NEXT: ret 149 %res = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %a) 150 ret half %res 151} 152 153define half @fminv_nxv4f16(<vscale x 4 x half> %a) { 154; CHECK-LABEL: fminv_nxv4f16: 155; CHECK: // %bb.0: 156; CHECK-NEXT: ptrue p0.s 157; CHECK-NEXT: fminnmv h0, p0, z0.h 158; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 159; CHECK-NEXT: ret 160 %res = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %a) 161 ret half %res 162} 163 164define half @fminv_nxv8f16(<vscale x 8 x half> %a) { 165; CHECK-LABEL: fminv_nxv8f16: 166; CHECK: // %bb.0: 167; CHECK-NEXT: ptrue p0.h 168; CHECK-NEXT: fminnmv h0, p0, z0.h 169; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 170; CHECK-NEXT: ret 171 %res = call half @llvm.vector.reduce.fmin.nxv8f16(<vscale x 8 x half> %a) 172 ret half %res 173} 174 175define float @fminv_nxv2f32(<vscale x 2 x float> %a) { 176; CHECK-LABEL: fminv_nxv2f32: 177; CHECK: // %bb.0: 178; CHECK-NEXT: ptrue p0.d 179; CHECK-NEXT: fminnmv s0, p0, z0.s 180; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 181; CHECK-NEXT: ret 182 %res = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %a) 183 ret float %res 184} 185 186define float @fminv_nxv4f32(<vscale x 4 x float> %a) { 187; CHECK-LABEL: fminv_nxv4f32: 188; CHECK: // %bb.0: 189; CHECK-NEXT: ptrue p0.s 190; CHECK-NEXT: fminnmv s0, p0, z0.s 191; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 192; CHECK-NEXT: ret 193 %res = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %a) 194 ret float %res 195} 196 197define double @fminv_nxv2f64(<vscale x 2 x double> %a) { 198; CHECK-LABEL: fminv_nxv2f64: 199; CHECK: // %bb.0: 200; CHECK-NEXT: ptrue p0.d 201; CHECK-NEXT: fminnmv d0, p0, z0.d 202; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 203; CHECK-NEXT: ret 204 %res = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %a) 205 ret double %res 206} 207 208 209 210 211; FMAXV 212 213define half @fmaximumv_nxv2f16(<vscale x 2 x half> %a) { 214; CHECK-LABEL: fmaximumv_nxv2f16: 215; CHECK: // %bb.0: 216; CHECK-NEXT: ptrue p0.d 217; CHECK-NEXT: fmaxv h0, p0, z0.h 218; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 219; CHECK-NEXT: ret 220 %res = call half @llvm.vector.reduce.fmaximum.nxv2f16(<vscale x 2 x half> %a) 221 ret half %res 222} 223 224define half @fmaximumv_nxv4f16(<vscale x 4 x half> %a) { 225; CHECK-LABEL: fmaximumv_nxv4f16: 226; CHECK: // %bb.0: 227; CHECK-NEXT: ptrue p0.s 228; CHECK-NEXT: fmaxv h0, p0, z0.h 229; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 230; CHECK-NEXT: ret 231 %res = call half @llvm.vector.reduce.fmaximum.nxv4f16(<vscale x 4 x half> %a) 232 ret half %res 233} 234 235define half @fmaximumv_nxv8f16(<vscale x 8 x half> %a) { 236; CHECK-LABEL: fmaximumv_nxv8f16: 237; CHECK: // %bb.0: 238; CHECK-NEXT: ptrue p0.h 239; CHECK-NEXT: fmaxv h0, p0, z0.h 240; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 241; CHECK-NEXT: ret 242 %res = call half @llvm.vector.reduce.fmaximum.nxv8f16(<vscale x 8 x half> %a) 243 ret half %res 244} 245 246define float @fmaximumv_nxv2f32(<vscale x 2 x float> %a) { 247; CHECK-LABEL: fmaximumv_nxv2f32: 248; CHECK: // %bb.0: 249; CHECK-NEXT: ptrue p0.d 250; CHECK-NEXT: fmaxv s0, p0, z0.s 251; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 252; CHECK-NEXT: ret 253 %res = call float @llvm.vector.reduce.fmaximum.nxv2f32(<vscale x 2 x float> %a) 254 ret float %res 255} 256 257define float @fmaximumv_nxv4f32(<vscale x 4 x float> %a) { 258; CHECK-LABEL: fmaximumv_nxv4f32: 259; CHECK: // %bb.0: 260; CHECK-NEXT: ptrue p0.s 261; CHECK-NEXT: fmaxv s0, p0, z0.s 262; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 263; CHECK-NEXT: ret 264 %res = call float @llvm.vector.reduce.fmaximum.nxv4f32(<vscale x 4 x float> %a) 265 ret float %res 266} 267 268define double @fmaximumv_nxv2f64(<vscale x 2 x double> %a) { 269; CHECK-LABEL: fmaximumv_nxv2f64: 270; CHECK: // %bb.0: 271; CHECK-NEXT: ptrue p0.d 272; CHECK-NEXT: fmaxv d0, p0, z0.d 273; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 274; CHECK-NEXT: ret 275 %res = call double @llvm.vector.reduce.fmaximum.nxv2f64(<vscale x 2 x double> %a) 276 ret double %res 277} 278 279; FMINV 280 281define half @fminimumv_nxv2f16(<vscale x 2 x half> %a) { 282; CHECK-LABEL: fminimumv_nxv2f16: 283; CHECK: // %bb.0: 284; CHECK-NEXT: ptrue p0.d 285; CHECK-NEXT: fminv h0, p0, z0.h 286; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 287; CHECK-NEXT: ret 288 %res = call half @llvm.vector.reduce.fminimum.nxv2f16(<vscale x 2 x half> %a) 289 ret half %res 290} 291 292define half @fminimumv_nxv4f16(<vscale x 4 x half> %a) { 293; CHECK-LABEL: fminimumv_nxv4f16: 294; CHECK: // %bb.0: 295; CHECK-NEXT: ptrue p0.s 296; CHECK-NEXT: fminv h0, p0, z0.h 297; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 298; CHECK-NEXT: ret 299 %res = call half @llvm.vector.reduce.fminimum.nxv4f16(<vscale x 4 x half> %a) 300 ret half %res 301} 302 303define half @fminimumv_nxv8f16(<vscale x 8 x half> %a) { 304; CHECK-LABEL: fminimumv_nxv8f16: 305; CHECK: // %bb.0: 306; CHECK-NEXT: ptrue p0.h 307; CHECK-NEXT: fminv h0, p0, z0.h 308; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 309; CHECK-NEXT: ret 310 %res = call half @llvm.vector.reduce.fminimum.nxv8f16(<vscale x 8 x half> %a) 311 ret half %res 312} 313 314define float @fminimumv_nxv2f32(<vscale x 2 x float> %a) { 315; CHECK-LABEL: fminimumv_nxv2f32: 316; CHECK: // %bb.0: 317; CHECK-NEXT: ptrue p0.d 318; CHECK-NEXT: fminv s0, p0, z0.s 319; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 320; CHECK-NEXT: ret 321 %res = call float @llvm.vector.reduce.fminimum.nxv2f32(<vscale x 2 x float> %a) 322 ret float %res 323} 324 325define float @fminimumv_nxv4f32(<vscale x 4 x float> %a) { 326; CHECK-LABEL: fminimumv_nxv4f32: 327; CHECK: // %bb.0: 328; CHECK-NEXT: ptrue p0.s 329; CHECK-NEXT: fminv s0, p0, z0.s 330; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 331; CHECK-NEXT: ret 332 %res = call float @llvm.vector.reduce.fminimum.nxv4f32(<vscale x 4 x float> %a) 333 ret float %res 334} 335 336define double @fminimumv_nxv2f64(<vscale x 2 x double> %a) { 337; CHECK-LABEL: fminimumv_nxv2f64: 338; CHECK: // %bb.0: 339; CHECK-NEXT: ptrue p0.d 340; CHECK-NEXT: fminv d0, p0, z0.d 341; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 342; CHECK-NEXT: ret 343 %res = call double @llvm.vector.reduce.fminimum.nxv2f64(<vscale x 2 x double> %a) 344 ret double %res 345} 346 347define float @fadd_reduct_reassoc_v4v8f32(<vscale x 4 x float> %a, <vscale x 8 x float> %b) { 348; CHECK-LABEL: fadd_reduct_reassoc_v4v8f32: 349; CHECK: // %bb.0: 350; CHECK-NEXT: fadd z1.s, z1.s, z2.s 351; CHECK-NEXT: ptrue p0.s 352; CHECK-NEXT: fadd z0.s, z0.s, z1.s 353; CHECK-NEXT: faddv s0, p0, z0.s 354; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 355; CHECK-NEXT: ret 356 %r1 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.0, <vscale x 4 x float> %a) 357 %r2 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float -0.0, <vscale x 8 x float> %b) 358 %r = fadd fast float %r1, %r2 359 ret float %r 360} 361 362declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>) 363declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>) 364declare half @llvm.vector.reduce.fadd.nxv8f16(half, <vscale x 8 x half>) 365declare half @llvm.vector.reduce.fadd.nxv6f16(half, <vscale x 6 x half>) 366declare half @llvm.vector.reduce.fadd.nxv10f16(half, <vscale x 10 x half>) 367declare half @llvm.vector.reduce.fadd.nxv12f16(half, <vscale x 12 x half>) 368declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>) 369declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>) 370declare float @llvm.vector.reduce.fadd.nxv8f32(float, <vscale x 8 x float>) 371declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>) 372 373declare half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half>) 374declare half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half>) 375declare half @llvm.vector.reduce.fmax.nxv8f16(<vscale x 8 x half>) 376declare float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float>) 377declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>) 378declare double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double>) 379 380declare half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half>) 381declare half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half>) 382declare half @llvm.vector.reduce.fmin.nxv8f16(<vscale x 8 x half>) 383declare float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float>) 384declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>) 385declare double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double>) 386 387declare half @llvm.vector.reduce.fmaximum.nxv2f16(<vscale x 2 x half>) 388declare half @llvm.vector.reduce.fmaximum.nxv4f16(<vscale x 4 x half>) 389declare half @llvm.vector.reduce.fmaximum.nxv8f16(<vscale x 8 x half>) 390declare float @llvm.vector.reduce.fmaximum.nxv2f32(<vscale x 2 x float>) 391declare float @llvm.vector.reduce.fmaximum.nxv4f32(<vscale x 4 x float>) 392declare double @llvm.vector.reduce.fmaximum.nxv2f64(<vscale x 2 x double>) 393 394declare half @llvm.vector.reduce.fminimum.nxv2f16(<vscale x 2 x half>) 395declare half @llvm.vector.reduce.fminimum.nxv4f16(<vscale x 4 x half>) 396declare half @llvm.vector.reduce.fminimum.nxv8f16(<vscale x 8 x half>) 397declare float @llvm.vector.reduce.fminimum.nxv2f32(<vscale x 2 x float>) 398declare float @llvm.vector.reduce.fminimum.nxv4f32(<vscale x 4 x float>) 399declare double @llvm.vector.reduce.fminimum.nxv2f64(<vscale x 2 x double>) 400