1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s 3; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s 4 5declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) 6 7define half @vreduce_fadd_v1f16(<1 x half> %v, half %s) { 8; CHECK-LABEL: vreduce_fadd_v1f16: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 11; CHECK-NEXT: vfmv.f.s fa5, v8 12; CHECK-NEXT: fadd.h fa0, fa0, fa5 13; CHECK-NEXT: ret 14 %red = call reassoc half @llvm.vector.reduce.fadd.v1f16(half %s, <1 x half> %v) 15 ret half %red 16} 17 18define half @vreduce_ord_fadd_v1f16(<1 x half> %v, half %s) { 19; CHECK-LABEL: vreduce_ord_fadd_v1f16: 20; CHECK: # %bb.0: 21; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 22; CHECK-NEXT: vfmv.s.f v9, fa0 23; CHECK-NEXT: vfredosum.vs v8, v8, v9 24; CHECK-NEXT: vfmv.f.s fa0, v8 25; CHECK-NEXT: ret 26 %red = call half @llvm.vector.reduce.fadd.v1f16(half %s, <1 x half> %v) 27 ret half %red 28} 29 30declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>) 31 32define half @vreduce_fadd_v2f16(ptr %x, half %s) { 33; CHECK-LABEL: vreduce_fadd_v2f16: 34; CHECK: # %bb.0: 35; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 36; CHECK-NEXT: vle16.v v8, (a0) 37; CHECK-NEXT: vfmv.s.f v9, fa0 38; CHECK-NEXT: vfredusum.vs v8, v8, v9 39; CHECK-NEXT: vfmv.f.s fa0, v8 40; CHECK-NEXT: ret 41 %v = load <2 x half>, ptr %x 42 %red = call reassoc half @llvm.vector.reduce.fadd.v2f16(half %s, <2 x half> %v) 43 ret half %red 44} 45 46define half @vreduce_ord_fadd_v2f16(ptr %x, half %s) { 47; CHECK-LABEL: vreduce_ord_fadd_v2f16: 48; CHECK: # %bb.0: 49; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 50; CHECK-NEXT: vle16.v v8, (a0) 51; CHECK-NEXT: vfmv.s.f v9, fa0 52; CHECK-NEXT: vfredosum.vs v8, v8, v9 53; CHECK-NEXT: vfmv.f.s fa0, v8 54; CHECK-NEXT: ret 55 %v = load <2 x half>, ptr %x 56 %red = call half @llvm.vector.reduce.fadd.v2f16(half %s, <2 x half> %v) 57 ret half %red 58} 59 60declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>) 61 62define half @vreduce_fadd_v4f16(ptr %x, half %s) { 63; CHECK-LABEL: vreduce_fadd_v4f16: 64; CHECK: # %bb.0: 65; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 66; CHECK-NEXT: vle16.v v8, (a0) 67; CHECK-NEXT: vfmv.s.f v9, fa0 68; CHECK-NEXT: vfredusum.vs v8, v8, v9 69; CHECK-NEXT: vfmv.f.s fa0, v8 70; CHECK-NEXT: ret 71 %v = load <4 x half>, ptr %x 72 %red = call reassoc half @llvm.vector.reduce.fadd.v4f16(half %s, <4 x half> %v) 73 ret half %red 74} 75 76define half @vreduce_ord_fadd_v4f16(ptr %x, half %s) { 77; CHECK-LABEL: vreduce_ord_fadd_v4f16: 78; CHECK: # %bb.0: 79; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 80; CHECK-NEXT: vle16.v v8, (a0) 81; CHECK-NEXT: vfmv.s.f v9, fa0 82; CHECK-NEXT: vfredosum.vs v8, v8, v9 83; CHECK-NEXT: vfmv.f.s fa0, v8 84; CHECK-NEXT: ret 85 %v = load <4 x half>, ptr %x 86 %red = call half @llvm.vector.reduce.fadd.v4f16(half %s, <4 x half> %v) 87 ret half %red 88} 89 90declare half @llvm.vector.reduce.fadd.v7f16(half, <7 x half>) 91 92define half @vreduce_fadd_v7f16(ptr %x, half %s) { 93; CHECK-LABEL: vreduce_fadd_v7f16: 94; CHECK: # %bb.0: 95; CHECK-NEXT: vsetivli zero, 7, e16, m1, ta, ma 96; CHECK-NEXT: vle16.v v8, (a0) 97; CHECK-NEXT: vfmv.s.f v9, fa0 98; CHECK-NEXT: vfredusum.vs v8, v8, v9 99; CHECK-NEXT: vfmv.f.s fa0, v8 100; CHECK-NEXT: ret 101 %v = load <7 x half>, ptr %x 102 %red = call reassoc half @llvm.vector.reduce.fadd.v7f16(half %s, <7 x half> %v) 103 ret half %red 104} 105 106declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>) 107 108define half @vreduce_fadd_v8f16(ptr %x, half %s) { 109; CHECK-LABEL: vreduce_fadd_v8f16: 110; CHECK: # %bb.0: 111; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 112; CHECK-NEXT: vle16.v v8, (a0) 113; CHECK-NEXT: vfmv.s.f v9, fa0 114; CHECK-NEXT: vfredusum.vs v8, v8, v9 115; CHECK-NEXT: vfmv.f.s fa0, v8 116; CHECK-NEXT: ret 117 %v = load <8 x half>, ptr %x 118 %red = call reassoc half @llvm.vector.reduce.fadd.v8f16(half %s, <8 x half> %v) 119 ret half %red 120} 121 122define half @vreduce_ord_fadd_v8f16(ptr %x, half %s) { 123; CHECK-LABEL: vreduce_ord_fadd_v8f16: 124; CHECK: # %bb.0: 125; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 126; CHECK-NEXT: vle16.v v8, (a0) 127; CHECK-NEXT: vfmv.s.f v9, fa0 128; CHECK-NEXT: vfredosum.vs v8, v8, v9 129; CHECK-NEXT: vfmv.f.s fa0, v8 130; CHECK-NEXT: ret 131 %v = load <8 x half>, ptr %x 132 %red = call half @llvm.vector.reduce.fadd.v8f16(half %s, <8 x half> %v) 133 ret half %red 134} 135 136declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>) 137 138define half @vreduce_fadd_v16f16(ptr %x, half %s) { 139; CHECK-LABEL: vreduce_fadd_v16f16: 140; CHECK: # %bb.0: 141; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 142; CHECK-NEXT: vle16.v v8, (a0) 143; CHECK-NEXT: vfmv.s.f v10, fa0 144; CHECK-NEXT: vfredusum.vs v8, v8, v10 145; CHECK-NEXT: vfmv.f.s fa0, v8 146; CHECK-NEXT: ret 147 %v = load <16 x half>, ptr %x 148 %red = call reassoc half @llvm.vector.reduce.fadd.v16f16(half %s, <16 x half> %v) 149 ret half %red 150} 151 152define half @vreduce_ord_fadd_v16f16(ptr %x, half %s) { 153; CHECK-LABEL: vreduce_ord_fadd_v16f16: 154; CHECK: # %bb.0: 155; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 156; CHECK-NEXT: vle16.v v8, (a0) 157; CHECK-NEXT: vfmv.s.f v10, fa0 158; CHECK-NEXT: vfredosum.vs v8, v8, v10 159; CHECK-NEXT: vfmv.f.s fa0, v8 160; CHECK-NEXT: ret 161 %v = load <16 x half>, ptr %x 162 %red = call half @llvm.vector.reduce.fadd.v16f16(half %s, <16 x half> %v) 163 ret half %red 164} 165 166declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>) 167 168define half @vreduce_fadd_v32f16(ptr %x, half %s) { 169; CHECK-LABEL: vreduce_fadd_v32f16: 170; CHECK: # %bb.0: 171; CHECK-NEXT: li a1, 32 172; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma 173; CHECK-NEXT: vle16.v v8, (a0) 174; CHECK-NEXT: vfmv.s.f v12, fa0 175; CHECK-NEXT: vfredusum.vs v8, v8, v12 176; CHECK-NEXT: vfmv.f.s fa0, v8 177; CHECK-NEXT: ret 178 %v = load <32 x half>, ptr %x 179 %red = call reassoc half @llvm.vector.reduce.fadd.v32f16(half %s, <32 x half> %v) 180 ret half %red 181} 182 183define half @vreduce_ord_fadd_v32f16(ptr %x, half %s) { 184; CHECK-LABEL: vreduce_ord_fadd_v32f16: 185; CHECK: # %bb.0: 186; CHECK-NEXT: li a1, 32 187; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma 188; CHECK-NEXT: vle16.v v8, (a0) 189; CHECK-NEXT: vfmv.s.f v12, fa0 190; CHECK-NEXT: vfredosum.vs v8, v8, v12 191; CHECK-NEXT: vfmv.f.s fa0, v8 192; CHECK-NEXT: ret 193 %v = load <32 x half>, ptr %x 194 %red = call half @llvm.vector.reduce.fadd.v32f16(half %s, <32 x half> %v) 195 ret half %red 196} 197 198declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>) 199 200define half @vreduce_fadd_v64f16(ptr %x, half %s) { 201; CHECK-LABEL: vreduce_fadd_v64f16: 202; CHECK: # %bb.0: 203; CHECK-NEXT: li a1, 64 204; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 205; CHECK-NEXT: vle16.v v8, (a0) 206; CHECK-NEXT: vfmv.s.f v16, fa0 207; CHECK-NEXT: vfredusum.vs v8, v8, v16 208; CHECK-NEXT: vfmv.f.s fa0, v8 209; CHECK-NEXT: ret 210 %v = load <64 x half>, ptr %x 211 %red = call reassoc half @llvm.vector.reduce.fadd.v64f16(half %s, <64 x half> %v) 212 ret half %red 213} 214 215define half @vreduce_ord_fadd_v64f16(ptr %x, half %s) { 216; CHECK-LABEL: vreduce_ord_fadd_v64f16: 217; CHECK: # %bb.0: 218; CHECK-NEXT: li a1, 64 219; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 220; CHECK-NEXT: vle16.v v8, (a0) 221; CHECK-NEXT: vfmv.s.f v16, fa0 222; CHECK-NEXT: vfredosum.vs v8, v8, v16 223; CHECK-NEXT: vfmv.f.s fa0, v8 224; CHECK-NEXT: ret 225 %v = load <64 x half>, ptr %x 226 %red = call half @llvm.vector.reduce.fadd.v64f16(half %s, <64 x half> %v) 227 ret half %red 228} 229 230declare half @llvm.vector.reduce.fadd.v128f16(half, <128 x half>) 231 232define half @vreduce_fadd_v128f16(ptr %x, half %s) { 233; CHECK-LABEL: vreduce_fadd_v128f16: 234; CHECK: # %bb.0: 235; CHECK-NEXT: li a1, 64 236; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 237; CHECK-NEXT: vle16.v v8, (a0) 238; CHECK-NEXT: addi a0, a0, 128 239; CHECK-NEXT: vle16.v v16, (a0) 240; CHECK-NEXT: vfadd.vv v8, v8, v16 241; CHECK-NEXT: vfmv.s.f v16, fa0 242; CHECK-NEXT: vfredusum.vs v8, v8, v16 243; CHECK-NEXT: vfmv.f.s fa0, v8 244; CHECK-NEXT: ret 245 %v = load <128 x half>, ptr %x 246 %red = call reassoc half @llvm.vector.reduce.fadd.v128f16(half %s, <128 x half> %v) 247 ret half %red 248} 249 250define half @vreduce_ord_fadd_v128f16(ptr %x, half %s) { 251; CHECK-LABEL: vreduce_ord_fadd_v128f16: 252; CHECK: # %bb.0: 253; CHECK-NEXT: addi a1, a0, 128 254; CHECK-NEXT: li a2, 64 255; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma 256; CHECK-NEXT: vle16.v v8, (a0) 257; CHECK-NEXT: vle16.v v16, (a1) 258; CHECK-NEXT: vfmv.s.f v24, fa0 259; CHECK-NEXT: vfredosum.vs v8, v8, v24 260; CHECK-NEXT: vfredosum.vs v8, v16, v8 261; CHECK-NEXT: vfmv.f.s fa0, v8 262; CHECK-NEXT: ret 263 %v = load <128 x half>, ptr %x 264 %red = call half @llvm.vector.reduce.fadd.v128f16(half %s, <128 x half> %v) 265 ret half %red 266} 267 268declare float @llvm.vector.reduce.fadd.v1f32(float, <1 x float>) 269 270define float @vreduce_fadd_v1f32(<1 x float> %v, float %s) { 271; CHECK-LABEL: vreduce_fadd_v1f32: 272; CHECK: # %bb.0: 273; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 274; CHECK-NEXT: vfmv.f.s fa5, v8 275; CHECK-NEXT: fadd.s fa0, fa0, fa5 276; CHECK-NEXT: ret 277 %red = call reassoc float @llvm.vector.reduce.fadd.v1f32(float %s, <1 x float> %v) 278 ret float %red 279} 280 281define float @vreduce_ord_fadd_v1f32(<1 x float> %v, float %s) { 282; CHECK-LABEL: vreduce_ord_fadd_v1f32: 283; CHECK: # %bb.0: 284; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 285; CHECK-NEXT: vfmv.s.f v9, fa0 286; CHECK-NEXT: vfredosum.vs v8, v8, v9 287; CHECK-NEXT: vfmv.f.s fa0, v8 288; CHECK-NEXT: ret 289 %red = call float @llvm.vector.reduce.fadd.v1f32(float %s, <1 x float> %v) 290 ret float %red 291} 292 293define float @vreduce_fwadd_v1f32(<1 x half> %v, float %s) { 294; CHECK-LABEL: vreduce_fwadd_v1f32: 295; CHECK: # %bb.0: 296; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 297; CHECK-NEXT: vfwcvt.f.f.v v9, v8 298; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 299; CHECK-NEXT: vfmv.f.s fa5, v9 300; CHECK-NEXT: fadd.s fa0, fa0, fa5 301; CHECK-NEXT: ret 302 %e = fpext <1 x half> %v to <1 x float> 303 %red = call reassoc float @llvm.vector.reduce.fadd.v1f32(float %s, <1 x float> %e) 304 ret float %red 305} 306 307define float @vreduce_ord_fwadd_v1f32(<1 x half> %v, float %s) { 308; CHECK-LABEL: vreduce_ord_fwadd_v1f32: 309; CHECK: # %bb.0: 310; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 311; CHECK-NEXT: vfmv.s.f v9, fa0 312; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 313; CHECK-NEXT: vfwredosum.vs v8, v8, v9 314; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 315; CHECK-NEXT: vfmv.f.s fa0, v8 316; CHECK-NEXT: ret 317 %e = fpext <1 x half> %v to <1 x float> 318 %red = call float @llvm.vector.reduce.fadd.v1f32(float %s, <1 x float> %e) 319 ret float %red 320} 321 322declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) 323 324define float @vreduce_fadd_v2f32(ptr %x, float %s) { 325; CHECK-LABEL: vreduce_fadd_v2f32: 326; CHECK: # %bb.0: 327; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 328; CHECK-NEXT: vle32.v v8, (a0) 329; CHECK-NEXT: vfmv.s.f v9, fa0 330; CHECK-NEXT: vfredusum.vs v8, v8, v9 331; CHECK-NEXT: vfmv.f.s fa0, v8 332; CHECK-NEXT: ret 333 %v = load <2 x float>, ptr %x 334 %red = call reassoc float @llvm.vector.reduce.fadd.v2f32(float %s, <2 x float> %v) 335 ret float %red 336} 337 338define float @vreduce_ord_fadd_v2f32(ptr %x, float %s) { 339; CHECK-LABEL: vreduce_ord_fadd_v2f32: 340; CHECK: # %bb.0: 341; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 342; CHECK-NEXT: vle32.v v8, (a0) 343; CHECK-NEXT: vfmv.s.f v9, fa0 344; CHECK-NEXT: vfredosum.vs v8, v8, v9 345; CHECK-NEXT: vfmv.f.s fa0, v8 346; CHECK-NEXT: ret 347 %v = load <2 x float>, ptr %x 348 %red = call float @llvm.vector.reduce.fadd.v2f32(float %s, <2 x float> %v) 349 ret float %red 350} 351 352define float @vreduce_fwadd_v2f32(ptr %x, float %s) { 353; CHECK-LABEL: vreduce_fwadd_v2f32: 354; CHECK: # %bb.0: 355; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 356; CHECK-NEXT: vle16.v v8, (a0) 357; CHECK-NEXT: vfmv.s.f v9, fa0 358; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 359; CHECK-NEXT: vfwredusum.vs v8, v8, v9 360; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 361; CHECK-NEXT: vfmv.f.s fa0, v8 362; CHECK-NEXT: ret 363 %v = load <2 x half>, ptr %x 364 %e = fpext <2 x half> %v to <2 x float> 365 %red = call reassoc float @llvm.vector.reduce.fadd.v2f32(float %s, <2 x float> %e) 366 ret float %red 367} 368 369define float @vreduce_ord_fwadd_v2f32(ptr %x, float %s) { 370; CHECK-LABEL: vreduce_ord_fwadd_v2f32: 371; CHECK: # %bb.0: 372; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 373; CHECK-NEXT: vle16.v v8, (a0) 374; CHECK-NEXT: vfmv.s.f v9, fa0 375; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 376; CHECK-NEXT: vfwredosum.vs v8, v8, v9 377; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 378; CHECK-NEXT: vfmv.f.s fa0, v8 379; CHECK-NEXT: ret 380 %v = load <2 x half>, ptr %x 381 %e = fpext <2 x half> %v to <2 x float> 382 %red = call float @llvm.vector.reduce.fadd.v2f32(float %s, <2 x float> %e) 383 ret float %red 384} 385 386declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) 387 388define float @vreduce_fadd_v4f32(ptr %x, float %s) { 389; CHECK-LABEL: vreduce_fadd_v4f32: 390; CHECK: # %bb.0: 391; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 392; CHECK-NEXT: vle32.v v8, (a0) 393; CHECK-NEXT: vfmv.s.f v9, fa0 394; CHECK-NEXT: vfredusum.vs v8, v8, v9 395; CHECK-NEXT: vfmv.f.s fa0, v8 396; CHECK-NEXT: ret 397 %v = load <4 x float>, ptr %x 398 %red = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %v) 399 ret float %red 400} 401 402define float @vreduce_ord_fadd_v4f32(ptr %x, float %s) { 403; CHECK-LABEL: vreduce_ord_fadd_v4f32: 404; CHECK: # %bb.0: 405; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 406; CHECK-NEXT: vle32.v v8, (a0) 407; CHECK-NEXT: vfmv.s.f v9, fa0 408; CHECK-NEXT: vfredosum.vs v8, v8, v9 409; CHECK-NEXT: vfmv.f.s fa0, v8 410; CHECK-NEXT: ret 411 %v = load <4 x float>, ptr %x 412 %red = call float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %v) 413 ret float %red 414} 415 416define float @vreduce_fwadd_v4f32(ptr %x, float %s) { 417; CHECK-LABEL: vreduce_fwadd_v4f32: 418; CHECK: # %bb.0: 419; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 420; CHECK-NEXT: vle16.v v8, (a0) 421; CHECK-NEXT: vfmv.s.f v9, fa0 422; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 423; CHECK-NEXT: vfwredusum.vs v8, v8, v9 424; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 425; CHECK-NEXT: vfmv.f.s fa0, v8 426; CHECK-NEXT: ret 427 %v = load <4 x half>, ptr %x 428 %e = fpext <4 x half> %v to <4 x float> 429 %red = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %e) 430 ret float %red 431} 432 433define float @vreduce_ord_fwadd_v4f32(ptr %x, float %s) { 434; CHECK-LABEL: vreduce_ord_fwadd_v4f32: 435; CHECK: # %bb.0: 436; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 437; CHECK-NEXT: vle16.v v8, (a0) 438; CHECK-NEXT: vfmv.s.f v9, fa0 439; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 440; CHECK-NEXT: vfwredosum.vs v8, v8, v9 441; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 442; CHECK-NEXT: vfmv.f.s fa0, v8 443; CHECK-NEXT: ret 444 %v = load <4 x half>, ptr %x 445 %e = fpext <4 x half> %v to <4 x float> 446 %red = call float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %e) 447 ret float %red 448} 449 450declare float @llvm.vector.reduce.fadd.v7f32(float, <7 x float>) 451 452define float @vreduce_fadd_v7f32(ptr %x, float %s) { 453; CHECK-LABEL: vreduce_fadd_v7f32: 454; CHECK: # %bb.0: 455; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma 456; CHECK-NEXT: vle32.v v8, (a0) 457; CHECK-NEXT: vfmv.s.f v10, fa0 458; CHECK-NEXT: vfredusum.vs v8, v8, v10 459; CHECK-NEXT: vfmv.f.s fa0, v8 460; CHECK-NEXT: ret 461 %v = load <7 x float>, ptr %x 462 %red = call reassoc float @llvm.vector.reduce.fadd.v7f32(float %s, <7 x float> %v) 463 ret float %red 464} 465 466define float @vreduce_ord_fadd_v7f32(ptr %x, float %s) { 467; CHECK-LABEL: vreduce_ord_fadd_v7f32: 468; CHECK: # %bb.0: 469; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma 470; CHECK-NEXT: vle32.v v8, (a0) 471; CHECK-NEXT: vfmv.s.f v10, fa0 472; CHECK-NEXT: vfredosum.vs v8, v8, v10 473; CHECK-NEXT: vfmv.f.s fa0, v8 474; CHECK-NEXT: ret 475 %v = load <7 x float>, ptr %x 476 %red = call float @llvm.vector.reduce.fadd.v7f32(float %s, <7 x float> %v) 477 ret float %red 478} 479 480define float @vreduce_fadd_v7f32_neutralstart(ptr %x) { 481; CHECK-LABEL: vreduce_fadd_v7f32_neutralstart: 482; CHECK: # %bb.0: 483; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma 484; CHECK-NEXT: vle32.v v8, (a0) 485; CHECK-NEXT: lui a0, 524288 486; CHECK-NEXT: vmv.s.x v10, a0 487; CHECK-NEXT: vfredusum.vs v8, v8, v10 488; CHECK-NEXT: vfmv.f.s fa0, v8 489; CHECK-NEXT: ret 490 %v = load <7 x float>, ptr %x 491 %red = call reassoc float @llvm.vector.reduce.fadd.v7f32(float -0.0, <7 x float> %v) 492 ret float %red 493} 494 495define float @vreduce_fadd_v7f32_neutralstart_nsz(ptr %x) { 496; CHECK-LABEL: vreduce_fadd_v7f32_neutralstart_nsz: 497; CHECK: # %bb.0: 498; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma 499; CHECK-NEXT: vle32.v v8, (a0) 500; CHECK-NEXT: lui a0, 524288 501; CHECK-NEXT: vmv.s.x v10, a0 502; CHECK-NEXT: vfredosum.vs v8, v8, v10 503; CHECK-NEXT: vfmv.f.s fa0, v8 504; CHECK-NEXT: ret 505 %v = load <7 x float>, ptr %x 506 %red = call nsz float @llvm.vector.reduce.fadd.v7f32(float -0.0, <7 x float> %v) 507 ret float %red 508} 509 510define float @vreduce_fadd_v7f32_neutralstart_fast(ptr %x) { 511; CHECK-LABEL: vreduce_fadd_v7f32_neutralstart_fast: 512; CHECK: # %bb.0: 513; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma 514; CHECK-NEXT: vle32.v v8, (a0) 515; CHECK-NEXT: vmv.s.x v10, zero 516; CHECK-NEXT: vfredusum.vs v8, v8, v10 517; CHECK-NEXT: vfmv.f.s fa0, v8 518; CHECK-NEXT: ret 519 %v = load <7 x float>, ptr %x 520 %red = call fast float @llvm.vector.reduce.fadd.v7f32(float -0.0, <7 x float> %v) 521 ret float %red 522} 523 524 525declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) 526 527define float @vreduce_fadd_v8f32(ptr %x, float %s) { 528; CHECK-LABEL: vreduce_fadd_v8f32: 529; CHECK: # %bb.0: 530; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 531; CHECK-NEXT: vle32.v v8, (a0) 532; CHECK-NEXT: vfmv.s.f v10, fa0 533; CHECK-NEXT: vfredusum.vs v8, v8, v10 534; CHECK-NEXT: vfmv.f.s fa0, v8 535; CHECK-NEXT: ret 536 %v = load <8 x float>, ptr %x 537 %red = call reassoc float @llvm.vector.reduce.fadd.v8f32(float %s, <8 x float> %v) 538 ret float %red 539} 540 541define float @vreduce_ord_fadd_v8f32(ptr %x, float %s) { 542; CHECK-LABEL: vreduce_ord_fadd_v8f32: 543; CHECK: # %bb.0: 544; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 545; CHECK-NEXT: vle32.v v8, (a0) 546; CHECK-NEXT: vfmv.s.f v10, fa0 547; CHECK-NEXT: vfredosum.vs v8, v8, v10 548; CHECK-NEXT: vfmv.f.s fa0, v8 549; CHECK-NEXT: ret 550 %v = load <8 x float>, ptr %x 551 %red = call float @llvm.vector.reduce.fadd.v8f32(float %s, <8 x float> %v) 552 ret float %red 553} 554 555define float @vreduce_fwadd_v8f32(ptr %x, float %s) { 556; CHECK-LABEL: vreduce_fwadd_v8f32: 557; CHECK: # %bb.0: 558; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 559; CHECK-NEXT: vle16.v v8, (a0) 560; CHECK-NEXT: vfmv.s.f v9, fa0 561; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 562; CHECK-NEXT: vfwredusum.vs v8, v8, v9 563; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 564; CHECK-NEXT: vfmv.f.s fa0, v8 565; CHECK-NEXT: ret 566 %v = load <8 x half>, ptr %x 567 %e = fpext <8 x half> %v to <8 x float> 568 %red = call reassoc float @llvm.vector.reduce.fadd.v8f32(float %s, <8 x float> %e) 569 ret float %red 570} 571 572define float @vreduce_ord_fwadd_v8f32(ptr %x, float %s) { 573; CHECK-LABEL: vreduce_ord_fwadd_v8f32: 574; CHECK: # %bb.0: 575; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 576; CHECK-NEXT: vle16.v v8, (a0) 577; CHECK-NEXT: vfmv.s.f v9, fa0 578; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 579; CHECK-NEXT: vfwredosum.vs v8, v8, v9 580; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 581; CHECK-NEXT: vfmv.f.s fa0, v8 582; CHECK-NEXT: ret 583 %v = load <8 x half>, ptr %x 584 %e = fpext <8 x half> %v to <8 x float> 585 %red = call float @llvm.vector.reduce.fadd.v8f32(float %s, <8 x float> %e) 586 ret float %red 587} 588 589declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>) 590 591define float @vreduce_fadd_v16f32(ptr %x, float %s) { 592; CHECK-LABEL: vreduce_fadd_v16f32: 593; CHECK: # %bb.0: 594; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 595; CHECK-NEXT: vle32.v v8, (a0) 596; CHECK-NEXT: vfmv.s.f v12, fa0 597; CHECK-NEXT: vfredusum.vs v8, v8, v12 598; CHECK-NEXT: vfmv.f.s fa0, v8 599; CHECK-NEXT: ret 600 %v = load <16 x float>, ptr %x 601 %red = call reassoc float @llvm.vector.reduce.fadd.v16f32(float %s, <16 x float> %v) 602 ret float %red 603} 604 605define float @vreduce_ord_fadd_v16f32(ptr %x, float %s) { 606; CHECK-LABEL: vreduce_ord_fadd_v16f32: 607; CHECK: # %bb.0: 608; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 609; CHECK-NEXT: vle32.v v8, (a0) 610; CHECK-NEXT: vfmv.s.f v12, fa0 611; CHECK-NEXT: vfredosum.vs v8, v8, v12 612; CHECK-NEXT: vfmv.f.s fa0, v8 613; CHECK-NEXT: ret 614 %v = load <16 x float>, ptr %x 615 %red = call float @llvm.vector.reduce.fadd.v16f32(float %s, <16 x float> %v) 616 ret float %red 617} 618 619define float @vreduce_fwadd_v16f32(ptr %x, float %s) { 620; CHECK-LABEL: vreduce_fwadd_v16f32: 621; CHECK: # %bb.0: 622; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 623; CHECK-NEXT: vle16.v v8, (a0) 624; CHECK-NEXT: vfmv.s.f v10, fa0 625; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 626; CHECK-NEXT: vfwredusum.vs v8, v8, v10 627; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 628; CHECK-NEXT: vfmv.f.s fa0, v8 629; CHECK-NEXT: ret 630 %v = load <16 x half>, ptr %x 631 %e = fpext <16 x half> %v to <16 x float> 632 %red = call reassoc float @llvm.vector.reduce.fadd.v16f32(float %s, <16 x float> %e) 633 ret float %red 634} 635 636define float @vreduce_ord_fwadd_v16f32(ptr %x, float %s) { 637; CHECK-LABEL: vreduce_ord_fwadd_v16f32: 638; CHECK: # %bb.0: 639; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 640; CHECK-NEXT: vle16.v v8, (a0) 641; CHECK-NEXT: vfmv.s.f v10, fa0 642; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 643; CHECK-NEXT: vfwredosum.vs v8, v8, v10 644; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 645; CHECK-NEXT: vfmv.f.s fa0, v8 646; CHECK-NEXT: ret 647 %v = load <16 x half>, ptr %x 648 %e = fpext <16 x half> %v to <16 x float> 649 %red = call float @llvm.vector.reduce.fadd.v16f32(float %s, <16 x float> %e) 650 ret float %red 651} 652 653declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>) 654 655define float @vreduce_fadd_v32f32(ptr %x, float %s) { 656; CHECK-LABEL: vreduce_fadd_v32f32: 657; CHECK: # %bb.0: 658; CHECK-NEXT: li a1, 32 659; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 660; CHECK-NEXT: vle32.v v8, (a0) 661; CHECK-NEXT: vfmv.s.f v16, fa0 662; CHECK-NEXT: vfredusum.vs v8, v8, v16 663; CHECK-NEXT: vfmv.f.s fa0, v8 664; CHECK-NEXT: ret 665 %v = load <32 x float>, ptr %x 666 %red = call reassoc float @llvm.vector.reduce.fadd.v32f32(float %s, <32 x float> %v) 667 ret float %red 668} 669 670define float @vreduce_ord_fadd_v32f32(ptr %x, float %s) { 671; CHECK-LABEL: vreduce_ord_fadd_v32f32: 672; CHECK: # %bb.0: 673; CHECK-NEXT: li a1, 32 674; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 675; CHECK-NEXT: vle32.v v8, (a0) 676; CHECK-NEXT: vfmv.s.f v16, fa0 677; CHECK-NEXT: vfredosum.vs v8, v8, v16 678; CHECK-NEXT: vfmv.f.s fa0, v8 679; CHECK-NEXT: ret 680 %v = load <32 x float>, ptr %x 681 %red = call float @llvm.vector.reduce.fadd.v32f32(float %s, <32 x float> %v) 682 ret float %red 683} 684 685define float @vreduce_fwadd_v32f32(ptr %x, float %s) { 686; CHECK-LABEL: vreduce_fwadd_v32f32: 687; CHECK: # %bb.0: 688; CHECK-NEXT: li a1, 32 689; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 690; CHECK-NEXT: vle16.v v8, (a0) 691; CHECK-NEXT: vfmv.s.f v12, fa0 692; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 693; CHECK-NEXT: vfwredusum.vs v8, v8, v12 694; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 695; CHECK-NEXT: vfmv.f.s fa0, v8 696; CHECK-NEXT: ret 697 %v = load <32 x half>, ptr %x 698 %e = fpext <32 x half> %v to <32 x float> 699 %red = call reassoc float @llvm.vector.reduce.fadd.v32f32(float %s, <32 x float> %e) 700 ret float %red 701} 702 703define float @vreduce_ord_fwadd_v32f32(ptr %x, float %s) { 704; CHECK-LABEL: vreduce_ord_fwadd_v32f32: 705; CHECK: # %bb.0: 706; CHECK-NEXT: li a1, 32 707; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 708; CHECK-NEXT: vle16.v v8, (a0) 709; CHECK-NEXT: vfmv.s.f v12, fa0 710; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 711; CHECK-NEXT: vfwredosum.vs v8, v8, v12 712; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 713; CHECK-NEXT: vfmv.f.s fa0, v8 714; CHECK-NEXT: ret 715 %v = load <32 x half>, ptr %x 716 %e = fpext <32 x half> %v to <32 x float> 717 %red = call float @llvm.vector.reduce.fadd.v32f32(float %s, <32 x float> %e) 718 ret float %red 719} 720 721declare float @llvm.vector.reduce.fadd.v64f32(float, <64 x float>) 722 723define float @vreduce_fadd_v64f32(ptr %x, float %s) { 724; CHECK-LABEL: vreduce_fadd_v64f32: 725; CHECK: # %bb.0: 726; CHECK-NEXT: li a1, 32 727; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 728; CHECK-NEXT: vle32.v v8, (a0) 729; CHECK-NEXT: addi a0, a0, 128 730; CHECK-NEXT: vle32.v v16, (a0) 731; CHECK-NEXT: vfadd.vv v8, v8, v16 732; CHECK-NEXT: vfmv.s.f v16, fa0 733; CHECK-NEXT: vfredusum.vs v8, v8, v16 734; CHECK-NEXT: vfmv.f.s fa0, v8 735; CHECK-NEXT: ret 736 %v = load <64 x float>, ptr %x 737 %red = call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s, <64 x float> %v) 738 ret float %red 739} 740 741define float @vreduce_ord_fadd_v64f32(ptr %x, float %s) { 742; CHECK-LABEL: vreduce_ord_fadd_v64f32: 743; CHECK: # %bb.0: 744; CHECK-NEXT: addi a1, a0, 128 745; CHECK-NEXT: li a2, 32 746; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma 747; CHECK-NEXT: vle32.v v8, (a0) 748; CHECK-NEXT: vle32.v v16, (a1) 749; CHECK-NEXT: vfmv.s.f v24, fa0 750; CHECK-NEXT: vfredosum.vs v8, v8, v24 751; CHECK-NEXT: vfredosum.vs v8, v16, v8 752; CHECK-NEXT: vfmv.f.s fa0, v8 753; CHECK-NEXT: ret 754 %v = load <64 x float>, ptr %x 755 %red = call float @llvm.vector.reduce.fadd.v64f32(float %s, <64 x float> %v) 756 ret float %red 757} 758 759define float @vreduce_fwadd_v64f32(ptr %x, float %s) { 760; CHECK-LABEL: vreduce_fwadd_v64f32: 761; CHECK: # %bb.0: 762; CHECK-NEXT: li a1, 64 763; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 764; CHECK-NEXT: vle16.v v8, (a0) 765; CHECK-NEXT: li a0, 32 766; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 767; CHECK-NEXT: vslidedown.vx v16, v8, a0 768; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 769; CHECK-NEXT: vfwadd.vv v24, v8, v16 770; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 771; CHECK-NEXT: vfmv.s.f v8, fa0 772; CHECK-NEXT: vfredusum.vs v8, v24, v8 773; CHECK-NEXT: vfmv.f.s fa0, v8 774; CHECK-NEXT: ret 775 %v = load <64 x half>, ptr %x 776 %e = fpext <64 x half> %v to <64 x float> 777 %red = call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s, <64 x float> %e) 778 ret float %red 779} 780 781define float @vreduce_ord_fwadd_v64f32(ptr %x, float %s) { 782; CHECK-LABEL: vreduce_ord_fwadd_v64f32: 783; CHECK: # %bb.0: 784; CHECK-NEXT: li a1, 64 785; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 786; CHECK-NEXT: vle16.v v8, (a0) 787; CHECK-NEXT: li a0, 32 788; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 789; CHECK-NEXT: vfmv.s.f v16, fa0 790; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 791; CHECK-NEXT: vslidedown.vx v24, v8, a0 792; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 793; CHECK-NEXT: vfwredosum.vs v8, v8, v16 794; CHECK-NEXT: vfwredosum.vs v8, v24, v8 795; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 796; CHECK-NEXT: vfmv.f.s fa0, v8 797; CHECK-NEXT: ret 798 %v = load <64 x half>, ptr %x 799 %e = fpext <64 x half> %v to <64 x float> 800 %red = call float @llvm.vector.reduce.fadd.v64f32(float %s, <64 x float> %e) 801 ret float %red 802} 803 804declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>) 805 806define double @vreduce_fadd_v1f64(<1 x double> %v, double %s) { 807; CHECK-LABEL: vreduce_fadd_v1f64: 808; CHECK: # %bb.0: 809; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 810; CHECK-NEXT: vfmv.f.s fa5, v8 811; CHECK-NEXT: fadd.d fa0, fa0, fa5 812; CHECK-NEXT: ret 813 %red = call reassoc double @llvm.vector.reduce.fadd.v1f64(double %s, <1 x double> %v) 814 ret double %red 815} 816 817define double @vreduce_ord_fadd_v1f64(<1 x double> %v, double %s) { 818; CHECK-LABEL: vreduce_ord_fadd_v1f64: 819; CHECK: # %bb.0: 820; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 821; CHECK-NEXT: vfmv.s.f v9, fa0 822; CHECK-NEXT: vfredosum.vs v8, v8, v9 823; CHECK-NEXT: vfmv.f.s fa0, v8 824; CHECK-NEXT: ret 825 %red = call double @llvm.vector.reduce.fadd.v1f64(double %s, <1 x double> %v) 826 ret double %red 827} 828 829define double @vreduce_fwadd_v1f64(<1 x float> %v, double %s) { 830; CHECK-LABEL: vreduce_fwadd_v1f64: 831; CHECK: # %bb.0: 832; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 833; CHECK-NEXT: vfwcvt.f.f.v v9, v8 834; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma 835; CHECK-NEXT: vfmv.f.s fa5, v9 836; CHECK-NEXT: fadd.d fa0, fa0, fa5 837; CHECK-NEXT: ret 838 %e = fpext <1 x float> %v to <1 x double> 839 %red = call reassoc double @llvm.vector.reduce.fadd.v1f64(double %s, <1 x double> %e) 840 ret double %red 841} 842 843define double @vreduce_ord_fwadd_v1f64(<1 x float> %v, double %s) { 844; CHECK-LABEL: vreduce_ord_fwadd_v1f64: 845; CHECK: # %bb.0: 846; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 847; CHECK-NEXT: vfmv.s.f v9, fa0 848; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 849; CHECK-NEXT: vfwredosum.vs v8, v8, v9 850; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma 851; CHECK-NEXT: vfmv.f.s fa0, v8 852; CHECK-NEXT: ret 853 %e = fpext <1 x float> %v to <1 x double> 854 %red = call double @llvm.vector.reduce.fadd.v1f64(double %s, <1 x double> %e) 855 ret double %red 856} 857 858declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) 859 860define double @vreduce_fadd_v2f64(ptr %x, double %s) { 861; CHECK-LABEL: vreduce_fadd_v2f64: 862; CHECK: # %bb.0: 863; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 864; CHECK-NEXT: vle64.v v8, (a0) 865; CHECK-NEXT: vfmv.s.f v9, fa0 866; CHECK-NEXT: vfredusum.vs v8, v8, v9 867; CHECK-NEXT: vfmv.f.s fa0, v8 868; CHECK-NEXT: ret 869 %v = load <2 x double>, ptr %x 870 %red = call reassoc double @llvm.vector.reduce.fadd.v2f64(double %s, <2 x double> %v) 871 ret double %red 872} 873 874define double @vreduce_ord_fadd_v2f64(ptr %x, double %s) { 875; CHECK-LABEL: vreduce_ord_fadd_v2f64: 876; CHECK: # %bb.0: 877; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 878; CHECK-NEXT: vle64.v v8, (a0) 879; CHECK-NEXT: vfmv.s.f v9, fa0 880; CHECK-NEXT: vfredosum.vs v8, v8, v9 881; CHECK-NEXT: vfmv.f.s fa0, v8 882; CHECK-NEXT: ret 883 %v = load <2 x double>, ptr %x 884 %red = call double @llvm.vector.reduce.fadd.v2f64(double %s, <2 x double> %v) 885 ret double %red 886} 887 888define double @vreduce_fwadd_v2f64(ptr %x, double %s) { 889; CHECK-LABEL: vreduce_fwadd_v2f64: 890; CHECK: # %bb.0: 891; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 892; CHECK-NEXT: vle32.v v8, (a0) 893; CHECK-NEXT: vfmv.s.f v9, fa0 894; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 895; CHECK-NEXT: vfwredusum.vs v8, v8, v9 896; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma 897; CHECK-NEXT: vfmv.f.s fa0, v8 898; CHECK-NEXT: ret 899 %v = load <2 x float>, ptr %x 900 %e = fpext <2 x float> %v to <2 x double> 901 %red = call reassoc double @llvm.vector.reduce.fadd.v2f64(double %s, <2 x double> %e) 902 ret double %red 903} 904 905define double @vreduce_ord_fwadd_v2f64(ptr %x, double %s) { 906; CHECK-LABEL: vreduce_ord_fwadd_v2f64: 907; CHECK: # %bb.0: 908; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 909; CHECK-NEXT: vle32.v v8, (a0) 910; CHECK-NEXT: vfmv.s.f v9, fa0 911; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 912; CHECK-NEXT: vfwredosum.vs v8, v8, v9 913; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma 914; CHECK-NEXT: vfmv.f.s fa0, v8 915; CHECK-NEXT: ret 916 %v = load <2 x float>, ptr %x 917 %e = fpext <2 x float> %v to <2 x double> 918 %red = call double @llvm.vector.reduce.fadd.v2f64(double %s, <2 x double> %e) 919 ret double %red 920} 921 922declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) 923 924define double @vreduce_fadd_v4f64(ptr %x, double %s) { 925; CHECK-LABEL: vreduce_fadd_v4f64: 926; CHECK: # %bb.0: 927; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 928; CHECK-NEXT: vle64.v v8, (a0) 929; CHECK-NEXT: vfmv.s.f v10, fa0 930; CHECK-NEXT: vfredusum.vs v8, v8, v10 931; CHECK-NEXT: vfmv.f.s fa0, v8 932; CHECK-NEXT: ret 933 %v = load <4 x double>, ptr %x 934 %red = call reassoc double @llvm.vector.reduce.fadd.v4f64(double %s, <4 x double> %v) 935 ret double %red 936} 937 938define double @vreduce_ord_fadd_v4f64(ptr %x, double %s) { 939; CHECK-LABEL: vreduce_ord_fadd_v4f64: 940; CHECK: # %bb.0: 941; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 942; CHECK-NEXT: vle64.v v8, (a0) 943; CHECK-NEXT: vfmv.s.f v10, fa0 944; CHECK-NEXT: vfredosum.vs v8, v8, v10 945; CHECK-NEXT: vfmv.f.s fa0, v8 946; CHECK-NEXT: ret 947 %v = load <4 x double>, ptr %x 948 %red = call double @llvm.vector.reduce.fadd.v4f64(double %s, <4 x double> %v) 949 ret double %red 950} 951 952define double @vreduce_fwadd_v4f64(ptr %x, double %s) { 953; CHECK-LABEL: vreduce_fwadd_v4f64: 954; CHECK: # %bb.0: 955; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 956; CHECK-NEXT: vle32.v v8, (a0) 957; CHECK-NEXT: vfmv.s.f v9, fa0 958; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 959; CHECK-NEXT: vfwredusum.vs v8, v8, v9 960; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma 961; CHECK-NEXT: vfmv.f.s fa0, v8 962; CHECK-NEXT: ret 963 %v = load <4 x float>, ptr %x 964 %e = fpext <4 x float> %v to <4 x double> 965 %red = call reassoc double @llvm.vector.reduce.fadd.v4f64(double %s, <4 x double> %e) 966 ret double %red 967} 968 969define double @vreduce_ord_fwadd_v4f64(ptr %x, double %s) { 970; CHECK-LABEL: vreduce_ord_fwadd_v4f64: 971; CHECK: # %bb.0: 972; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 973; CHECK-NEXT: vle32.v v8, (a0) 974; CHECK-NEXT: vfmv.s.f v9, fa0 975; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 976; CHECK-NEXT: vfwredosum.vs v8, v8, v9 977; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma 978; CHECK-NEXT: vfmv.f.s fa0, v8 979; CHECK-NEXT: ret 980 %v = load <4 x float>, ptr %x 981 %e = fpext <4 x float> %v to <4 x double> 982 %red = call double @llvm.vector.reduce.fadd.v4f64(double %s, <4 x double> %e) 983 ret double %red 984} 985 986declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>) 987 988define double @vreduce_fadd_v8f64(ptr %x, double %s) { 989; CHECK-LABEL: vreduce_fadd_v8f64: 990; CHECK: # %bb.0: 991; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 992; CHECK-NEXT: vle64.v v8, (a0) 993; CHECK-NEXT: vfmv.s.f v12, fa0 994; CHECK-NEXT: vfredusum.vs v8, v8, v12 995; CHECK-NEXT: vfmv.f.s fa0, v8 996; CHECK-NEXT: ret 997 %v = load <8 x double>, ptr %x 998 %red = call reassoc double @llvm.vector.reduce.fadd.v8f64(double %s, <8 x double> %v) 999 ret double %red 1000} 1001 1002define double @vreduce_ord_fadd_v8f64(ptr %x, double %s) { 1003; CHECK-LABEL: vreduce_ord_fadd_v8f64: 1004; CHECK: # %bb.0: 1005; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1006; CHECK-NEXT: vle64.v v8, (a0) 1007; CHECK-NEXT: vfmv.s.f v12, fa0 1008; CHECK-NEXT: vfredosum.vs v8, v8, v12 1009; CHECK-NEXT: vfmv.f.s fa0, v8 1010; CHECK-NEXT: ret 1011 %v = load <8 x double>, ptr %x 1012 %red = call double @llvm.vector.reduce.fadd.v8f64(double %s, <8 x double> %v) 1013 ret double %red 1014} 1015 1016define double @vreduce_fwadd_v8f64(ptr %x, double %s) { 1017; CHECK-LABEL: vreduce_fwadd_v8f64: 1018; CHECK: # %bb.0: 1019; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1020; CHECK-NEXT: vle32.v v8, (a0) 1021; CHECK-NEXT: vfmv.s.f v10, fa0 1022; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1023; CHECK-NEXT: vfwredusum.vs v8, v8, v10 1024; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma 1025; CHECK-NEXT: vfmv.f.s fa0, v8 1026; CHECK-NEXT: ret 1027 %v = load <8 x float>, ptr %x 1028 %e = fpext <8 x float> %v to <8 x double> 1029 %red = call reassoc double @llvm.vector.reduce.fadd.v8f64(double %s, <8 x double> %e) 1030 ret double %red 1031} 1032 1033define double @vreduce_ord_fwadd_v8f64(ptr %x, double %s) { 1034; CHECK-LABEL: vreduce_ord_fwadd_v8f64: 1035; CHECK: # %bb.0: 1036; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1037; CHECK-NEXT: vle32.v v8, (a0) 1038; CHECK-NEXT: vfmv.s.f v10, fa0 1039; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1040; CHECK-NEXT: vfwredosum.vs v8, v8, v10 1041; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma 1042; CHECK-NEXT: vfmv.f.s fa0, v8 1043; CHECK-NEXT: ret 1044 %v = load <8 x float>, ptr %x 1045 %e = fpext <8 x float> %v to <8 x double> 1046 %red = call double @llvm.vector.reduce.fadd.v8f64(double %s, <8 x double> %e) 1047 ret double %red 1048} 1049 1050declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>) 1051 1052define double @vreduce_fadd_v16f64(ptr %x, double %s) { 1053; CHECK-LABEL: vreduce_fadd_v16f64: 1054; CHECK: # %bb.0: 1055; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1056; CHECK-NEXT: vle64.v v8, (a0) 1057; CHECK-NEXT: vfmv.s.f v16, fa0 1058; CHECK-NEXT: vfredusum.vs v8, v8, v16 1059; CHECK-NEXT: vfmv.f.s fa0, v8 1060; CHECK-NEXT: ret 1061 %v = load <16 x double>, ptr %x 1062 %red = call reassoc double @llvm.vector.reduce.fadd.v16f64(double %s, <16 x double> %v) 1063 ret double %red 1064} 1065 1066define double @vreduce_ord_fadd_v16f64(ptr %x, double %s) { 1067; CHECK-LABEL: vreduce_ord_fadd_v16f64: 1068; CHECK: # %bb.0: 1069; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1070; CHECK-NEXT: vle64.v v8, (a0) 1071; CHECK-NEXT: vfmv.s.f v16, fa0 1072; CHECK-NEXT: vfredosum.vs v8, v8, v16 1073; CHECK-NEXT: vfmv.f.s fa0, v8 1074; CHECK-NEXT: ret 1075 %v = load <16 x double>, ptr %x 1076 %red = call double @llvm.vector.reduce.fadd.v16f64(double %s, <16 x double> %v) 1077 ret double %red 1078} 1079 1080define double @vreduce_fwadd_v16f64(ptr %x, double %s) { 1081; CHECK-LABEL: vreduce_fwadd_v16f64: 1082; CHECK: # %bb.0: 1083; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1084; CHECK-NEXT: vle32.v v8, (a0) 1085; CHECK-NEXT: vfmv.s.f v12, fa0 1086; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1087; CHECK-NEXT: vfwredusum.vs v8, v8, v12 1088; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1089; CHECK-NEXT: vfmv.f.s fa0, v8 1090; CHECK-NEXT: ret 1091 %v = load <16 x float>, ptr %x 1092 %e = fpext <16 x float> %v to <16 x double> 1093 %red = call reassoc double @llvm.vector.reduce.fadd.v16f64(double %s, <16 x double> %e) 1094 ret double %red 1095} 1096 1097define double @vreduce_ord_fwadd_v16f64(ptr %x, double %s) { 1098; CHECK-LABEL: vreduce_ord_fwadd_v16f64: 1099; CHECK: # %bb.0: 1100; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1101; CHECK-NEXT: vle32.v v8, (a0) 1102; CHECK-NEXT: vfmv.s.f v12, fa0 1103; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1104; CHECK-NEXT: vfwredosum.vs v8, v8, v12 1105; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1106; CHECK-NEXT: vfmv.f.s fa0, v8 1107; CHECK-NEXT: ret 1108 %v = load <16 x float>, ptr %x 1109 %e = fpext <16 x float> %v to <16 x double> 1110 %red = call double @llvm.vector.reduce.fadd.v16f64(double %s, <16 x double> %e) 1111 ret double %red 1112} 1113 1114declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>) 1115 1116define double @vreduce_fadd_v32f64(ptr %x, double %s) { 1117; CHECK-LABEL: vreduce_fadd_v32f64: 1118; CHECK: # %bb.0: 1119; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1120; CHECK-NEXT: vle64.v v8, (a0) 1121; CHECK-NEXT: addi a0, a0, 128 1122; CHECK-NEXT: vle64.v v16, (a0) 1123; CHECK-NEXT: vfadd.vv v8, v8, v16 1124; CHECK-NEXT: vfmv.s.f v16, fa0 1125; CHECK-NEXT: vfredusum.vs v8, v8, v16 1126; CHECK-NEXT: vfmv.f.s fa0, v8 1127; CHECK-NEXT: ret 1128 %v = load <32 x double>, ptr %x 1129 %red = call reassoc double @llvm.vector.reduce.fadd.v32f64(double %s, <32 x double> %v) 1130 ret double %red 1131} 1132 1133define double @vreduce_ord_fadd_v32f64(ptr %x, double %s) { 1134; CHECK-LABEL: vreduce_ord_fadd_v32f64: 1135; CHECK: # %bb.0: 1136; CHECK-NEXT: addi a1, a0, 128 1137; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1138; CHECK-NEXT: vle64.v v8, (a0) 1139; CHECK-NEXT: vle64.v v16, (a1) 1140; CHECK-NEXT: vfmv.s.f v24, fa0 1141; CHECK-NEXT: vfredosum.vs v8, v8, v24 1142; CHECK-NEXT: vfredosum.vs v8, v16, v8 1143; CHECK-NEXT: vfmv.f.s fa0, v8 1144; CHECK-NEXT: ret 1145 %v = load <32 x double>, ptr %x 1146 %red = call double @llvm.vector.reduce.fadd.v32f64(double %s, <32 x double> %v) 1147 ret double %red 1148} 1149 1150define double @vreduce_fwadd_v32f64(ptr %x, double %s) { 1151; CHECK-LABEL: vreduce_fwadd_v32f64: 1152; CHECK: # %bb.0: 1153; CHECK-NEXT: li a1, 32 1154; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 1155; CHECK-NEXT: vle32.v v8, (a0) 1156; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma 1157; CHECK-NEXT: vslidedown.vi v16, v8, 16 1158; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1159; CHECK-NEXT: vfwadd.vv v24, v8, v16 1160; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1161; CHECK-NEXT: vfmv.s.f v8, fa0 1162; CHECK-NEXT: vfredusum.vs v8, v24, v8 1163; CHECK-NEXT: vfmv.f.s fa0, v8 1164; CHECK-NEXT: ret 1165 %v = load <32 x float>, ptr %x 1166 %e = fpext <32 x float> %v to <32 x double> 1167 %red = call reassoc double @llvm.vector.reduce.fadd.v32f64(double %s, <32 x double> %e) 1168 ret double %red 1169} 1170 1171define double @vreduce_ord_fwadd_v32f64(ptr %x, double %s) { 1172; CHECK-LABEL: vreduce_ord_fwadd_v32f64: 1173; CHECK: # %bb.0: 1174; CHECK-NEXT: li a1, 32 1175; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 1176; CHECK-NEXT: vle32.v v8, (a0) 1177; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma 1178; CHECK-NEXT: vfmv.s.f v16, fa0 1179; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma 1180; CHECK-NEXT: vslidedown.vi v24, v8, 16 1181; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1182; CHECK-NEXT: vfwredosum.vs v8, v8, v16 1183; CHECK-NEXT: vfwredosum.vs v8, v24, v8 1184; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1185; CHECK-NEXT: vfmv.f.s fa0, v8 1186; CHECK-NEXT: ret 1187 %v = load <32 x float>, ptr %x 1188 %e = fpext <32 x float> %v to <32 x double> 1189 %red = call double @llvm.vector.reduce.fadd.v32f64(double %s, <32 x double> %e) 1190 ret double %red 1191} 1192 1193declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>) 1194 1195define half @vreduce_fmin_v2f16(ptr %x) { 1196; CHECK-LABEL: vreduce_fmin_v2f16: 1197; CHECK: # %bb.0: 1198; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 1199; CHECK-NEXT: vle16.v v8, (a0) 1200; CHECK-NEXT: vfredmin.vs v8, v8, v8 1201; CHECK-NEXT: vfmv.f.s fa0, v8 1202; CHECK-NEXT: ret 1203 %v = load <2 x half>, ptr %x 1204 %red = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> %v) 1205 ret half %red 1206} 1207 1208declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>) 1209 1210define half @vreduce_fmin_v4f16(ptr %x) { 1211; CHECK-LABEL: vreduce_fmin_v4f16: 1212; CHECK: # %bb.0: 1213; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1214; CHECK-NEXT: vle16.v v8, (a0) 1215; CHECK-NEXT: vfredmin.vs v8, v8, v8 1216; CHECK-NEXT: vfmv.f.s fa0, v8 1217; CHECK-NEXT: ret 1218 %v = load <4 x half>, ptr %x 1219 %red = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) 1220 ret half %red 1221} 1222 1223define half @vreduce_fmin_v4f16_nonans(ptr %x) { 1224; CHECK-LABEL: vreduce_fmin_v4f16_nonans: 1225; CHECK: # %bb.0: 1226; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1227; CHECK-NEXT: vle16.v v8, (a0) 1228; CHECK-NEXT: vfredmin.vs v8, v8, v8 1229; CHECK-NEXT: vfmv.f.s fa0, v8 1230; CHECK-NEXT: ret 1231 %v = load <4 x half>, ptr %x 1232 %red = call nnan half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) 1233 ret half %red 1234} 1235 1236define half @vreduce_fmin_v4f16_nonans_noinfs(ptr %x) { 1237; CHECK-LABEL: vreduce_fmin_v4f16_nonans_noinfs: 1238; CHECK: # %bb.0: 1239; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1240; CHECK-NEXT: vle16.v v8, (a0) 1241; CHECK-NEXT: vfredmin.vs v8, v8, v8 1242; CHECK-NEXT: vfmv.f.s fa0, v8 1243; CHECK-NEXT: ret 1244 %v = load <4 x half>, ptr %x 1245 %red = call nnan ninf half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) 1246 ret half %red 1247} 1248 1249declare half @llvm.vector.reduce.fmin.v128f16(<128 x half>) 1250 1251define half @vreduce_fmin_v128f16(ptr %x) { 1252; CHECK-LABEL: vreduce_fmin_v128f16: 1253; CHECK: # %bb.0: 1254; CHECK-NEXT: li a1, 64 1255; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 1256; CHECK-NEXT: vle16.v v8, (a0) 1257; CHECK-NEXT: addi a0, a0, 128 1258; CHECK-NEXT: vle16.v v16, (a0) 1259; CHECK-NEXT: vfmin.vv v8, v8, v16 1260; CHECK-NEXT: vfredmin.vs v8, v8, v8 1261; CHECK-NEXT: vfmv.f.s fa0, v8 1262; CHECK-NEXT: ret 1263 %v = load <128 x half>, ptr %x 1264 %red = call half @llvm.vector.reduce.fmin.v128f16(<128 x half> %v) 1265 ret half %red 1266} 1267 1268declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) 1269 1270define float @vreduce_fmin_v2f32(ptr %x) { 1271; CHECK-LABEL: vreduce_fmin_v2f32: 1272; CHECK: # %bb.0: 1273; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1274; CHECK-NEXT: vle32.v v8, (a0) 1275; CHECK-NEXT: vfredmin.vs v8, v8, v8 1276; CHECK-NEXT: vfmv.f.s fa0, v8 1277; CHECK-NEXT: ret 1278 %v = load <2 x float>, ptr %x 1279 %red = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %v) 1280 ret float %red 1281} 1282 1283declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) 1284 1285define float @vreduce_fmin_v4f32(ptr %x) { 1286; CHECK-LABEL: vreduce_fmin_v4f32: 1287; CHECK: # %bb.0: 1288; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1289; CHECK-NEXT: vle32.v v8, (a0) 1290; CHECK-NEXT: vfredmin.vs v8, v8, v8 1291; CHECK-NEXT: vfmv.f.s fa0, v8 1292; CHECK-NEXT: ret 1293 %v = load <4 x float>, ptr %x 1294 %red = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) 1295 ret float %red 1296} 1297 1298define float @vreduce_fmin_v4f32_nonans(ptr %x) { 1299; CHECK-LABEL: vreduce_fmin_v4f32_nonans: 1300; CHECK: # %bb.0: 1301; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1302; CHECK-NEXT: vle32.v v8, (a0) 1303; CHECK-NEXT: vfredmin.vs v8, v8, v8 1304; CHECK-NEXT: vfmv.f.s fa0, v8 1305; CHECK-NEXT: ret 1306 %v = load <4 x float>, ptr %x 1307 %red = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) 1308 ret float %red 1309} 1310 1311define float @vreduce_fmin_v4f32_nonans_noinfs(ptr %x) { 1312; CHECK-LABEL: vreduce_fmin_v4f32_nonans_noinfs: 1313; CHECK: # %bb.0: 1314; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1315; CHECK-NEXT: vle32.v v8, (a0) 1316; CHECK-NEXT: vfredmin.vs v8, v8, v8 1317; CHECK-NEXT: vfmv.f.s fa0, v8 1318; CHECK-NEXT: ret 1319 %v = load <4 x float>, ptr %x 1320 %red = call nnan ninf float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) 1321 ret float %red 1322} 1323 1324declare float @llvm.vector.reduce.fmin.v7f32(<7 x float>) 1325 1326define float @vreduce_fmin_v7f32(ptr %x) { 1327; CHECK-LABEL: vreduce_fmin_v7f32: 1328; CHECK: # %bb.0: 1329; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma 1330; CHECK-NEXT: vle32.v v8, (a0) 1331; CHECK-NEXT: lui a0, 523264 1332; CHECK-NEXT: vmv.s.x v10, a0 1333; CHECK-NEXT: vfredmin.vs v8, v8, v10 1334; CHECK-NEXT: vfmv.f.s fa0, v8 1335; CHECK-NEXT: ret 1336 %v = load <7 x float>, ptr %x 1337 %red = call float @llvm.vector.reduce.fmin.v7f32(<7 x float> %v) 1338 ret float %red 1339} 1340 1341declare float @llvm.vector.reduce.fmin.v128f32(<128 x float>) 1342 1343define float @vreduce_fmin_v128f32(ptr %x) { 1344; CHECK-LABEL: vreduce_fmin_v128f32: 1345; CHECK: # %bb.0: 1346; CHECK-NEXT: li a1, 32 1347; CHECK-NEXT: addi a2, a0, 384 1348; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 1349; CHECK-NEXT: vle32.v v8, (a2) 1350; CHECK-NEXT: addi a1, a0, 256 1351; CHECK-NEXT: vle32.v v16, (a0) 1352; CHECK-NEXT: addi a0, a0, 128 1353; CHECK-NEXT: vle32.v v24, (a0) 1354; CHECK-NEXT: vle32.v v0, (a1) 1355; CHECK-NEXT: vfmin.vv v8, v24, v8 1356; CHECK-NEXT: vfmin.vv v16, v16, v0 1357; CHECK-NEXT: vfmin.vv v8, v16, v8 1358; CHECK-NEXT: vfredmin.vs v8, v8, v8 1359; CHECK-NEXT: vfmv.f.s fa0, v8 1360; CHECK-NEXT: ret 1361 %v = load <128 x float>, ptr %x 1362 %red = call float @llvm.vector.reduce.fmin.v128f32(<128 x float> %v) 1363 ret float %red 1364} 1365 1366declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) 1367 1368define double @vreduce_fmin_v2f64(ptr %x) { 1369; CHECK-LABEL: vreduce_fmin_v2f64: 1370; CHECK: # %bb.0: 1371; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1372; CHECK-NEXT: vle64.v v8, (a0) 1373; CHECK-NEXT: vfredmin.vs v8, v8, v8 1374; CHECK-NEXT: vfmv.f.s fa0, v8 1375; CHECK-NEXT: ret 1376 %v = load <2 x double>, ptr %x 1377 %red = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %v) 1378 ret double %red 1379} 1380 1381declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) 1382 1383define double @vreduce_fmin_v4f64(ptr %x) { 1384; CHECK-LABEL: vreduce_fmin_v4f64: 1385; CHECK: # %bb.0: 1386; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1387; CHECK-NEXT: vle64.v v8, (a0) 1388; CHECK-NEXT: vfredmin.vs v8, v8, v8 1389; CHECK-NEXT: vfmv.f.s fa0, v8 1390; CHECK-NEXT: ret 1391 %v = load <4 x double>, ptr %x 1392 %red = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) 1393 ret double %red 1394} 1395 1396define double @vreduce_fmin_v4f64_nonans(ptr %x) { 1397; CHECK-LABEL: vreduce_fmin_v4f64_nonans: 1398; CHECK: # %bb.0: 1399; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1400; CHECK-NEXT: vle64.v v8, (a0) 1401; CHECK-NEXT: vfredmin.vs v8, v8, v8 1402; CHECK-NEXT: vfmv.f.s fa0, v8 1403; CHECK-NEXT: ret 1404 %v = load <4 x double>, ptr %x 1405 %red = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) 1406 ret double %red 1407} 1408 1409define double @vreduce_fmin_v4f64_nonans_noinfs(ptr %x) { 1410; CHECK-LABEL: vreduce_fmin_v4f64_nonans_noinfs: 1411; CHECK: # %bb.0: 1412; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1413; CHECK-NEXT: vle64.v v8, (a0) 1414; CHECK-NEXT: vfredmin.vs v8, v8, v8 1415; CHECK-NEXT: vfmv.f.s fa0, v8 1416; CHECK-NEXT: ret 1417 %v = load <4 x double>, ptr %x 1418 %red = call nnan ninf double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) 1419 ret double %red 1420} 1421 1422declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>) 1423 1424define double @vreduce_fmin_v32f64(ptr %x) { 1425; CHECK-LABEL: vreduce_fmin_v32f64: 1426; CHECK: # %bb.0: 1427; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1428; CHECK-NEXT: vle64.v v8, (a0) 1429; CHECK-NEXT: addi a0, a0, 128 1430; CHECK-NEXT: vle64.v v16, (a0) 1431; CHECK-NEXT: vfmin.vv v8, v8, v16 1432; CHECK-NEXT: vfredmin.vs v8, v8, v8 1433; CHECK-NEXT: vfmv.f.s fa0, v8 1434; CHECK-NEXT: ret 1435 %v = load <32 x double>, ptr %x 1436 %red = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %v) 1437 ret double %red 1438} 1439 1440declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>) 1441 1442define half @vreduce_fmax_v2f16(ptr %x) { 1443; CHECK-LABEL: vreduce_fmax_v2f16: 1444; CHECK: # %bb.0: 1445; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 1446; CHECK-NEXT: vle16.v v8, (a0) 1447; CHECK-NEXT: vfredmax.vs v8, v8, v8 1448; CHECK-NEXT: vfmv.f.s fa0, v8 1449; CHECK-NEXT: ret 1450 %v = load <2 x half>, ptr %x 1451 %red = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> %v) 1452 ret half %red 1453} 1454 1455declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>) 1456 1457define half @vreduce_fmax_v4f16(ptr %x) { 1458; CHECK-LABEL: vreduce_fmax_v4f16: 1459; CHECK: # %bb.0: 1460; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1461; CHECK-NEXT: vle16.v v8, (a0) 1462; CHECK-NEXT: vfredmax.vs v8, v8, v8 1463; CHECK-NEXT: vfmv.f.s fa0, v8 1464; CHECK-NEXT: ret 1465 %v = load <4 x half>, ptr %x 1466 %red = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v) 1467 ret half %red 1468} 1469 1470define half @vreduce_fmax_v4f16_nonans(ptr %x) { 1471; CHECK-LABEL: vreduce_fmax_v4f16_nonans: 1472; CHECK: # %bb.0: 1473; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1474; CHECK-NEXT: vle16.v v8, (a0) 1475; CHECK-NEXT: vfredmax.vs v8, v8, v8 1476; CHECK-NEXT: vfmv.f.s fa0, v8 1477; CHECK-NEXT: ret 1478 %v = load <4 x half>, ptr %x 1479 %red = call nnan half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v) 1480 ret half %red 1481} 1482 1483define half @vreduce_fmax_v4f16_nonans_noinfs(ptr %x) { 1484; CHECK-LABEL: vreduce_fmax_v4f16_nonans_noinfs: 1485; CHECK: # %bb.0: 1486; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1487; CHECK-NEXT: vle16.v v8, (a0) 1488; CHECK-NEXT: vfredmax.vs v8, v8, v8 1489; CHECK-NEXT: vfmv.f.s fa0, v8 1490; CHECK-NEXT: ret 1491 %v = load <4 x half>, ptr %x 1492 %red = call nnan ninf half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v) 1493 ret half %red 1494} 1495 1496declare half @llvm.vector.reduce.fmax.v128f16(<128 x half>) 1497 1498define half @vreduce_fmax_v128f16(ptr %x) { 1499; CHECK-LABEL: vreduce_fmax_v128f16: 1500; CHECK: # %bb.0: 1501; CHECK-NEXT: li a1, 64 1502; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 1503; CHECK-NEXT: vle16.v v8, (a0) 1504; CHECK-NEXT: addi a0, a0, 128 1505; CHECK-NEXT: vle16.v v16, (a0) 1506; CHECK-NEXT: vfmax.vv v8, v8, v16 1507; CHECK-NEXT: vfredmax.vs v8, v8, v8 1508; CHECK-NEXT: vfmv.f.s fa0, v8 1509; CHECK-NEXT: ret 1510 %v = load <128 x half>, ptr %x 1511 %red = call half @llvm.vector.reduce.fmax.v128f16(<128 x half> %v) 1512 ret half %red 1513} 1514 1515declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) 1516 1517define float @vreduce_fmax_v2f32(ptr %x) { 1518; CHECK-LABEL: vreduce_fmax_v2f32: 1519; CHECK: # %bb.0: 1520; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1521; CHECK-NEXT: vle32.v v8, (a0) 1522; CHECK-NEXT: vfredmax.vs v8, v8, v8 1523; CHECK-NEXT: vfmv.f.s fa0, v8 1524; CHECK-NEXT: ret 1525 %v = load <2 x float>, ptr %x 1526 %red = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %v) 1527 ret float %red 1528} 1529 1530declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) 1531 1532define float @vreduce_fmax_v4f32(ptr %x) { 1533; CHECK-LABEL: vreduce_fmax_v4f32: 1534; CHECK: # %bb.0: 1535; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1536; CHECK-NEXT: vle32.v v8, (a0) 1537; CHECK-NEXT: vfredmax.vs v8, v8, v8 1538; CHECK-NEXT: vfmv.f.s fa0, v8 1539; CHECK-NEXT: ret 1540 %v = load <4 x float>, ptr %x 1541 %red = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) 1542 ret float %red 1543} 1544 1545define float @vreduce_fmax_v4f32_nonans(ptr %x) { 1546; CHECK-LABEL: vreduce_fmax_v4f32_nonans: 1547; CHECK: # %bb.0: 1548; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1549; CHECK-NEXT: vle32.v v8, (a0) 1550; CHECK-NEXT: vfredmax.vs v8, v8, v8 1551; CHECK-NEXT: vfmv.f.s fa0, v8 1552; CHECK-NEXT: ret 1553 %v = load <4 x float>, ptr %x 1554 %red = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) 1555 ret float %red 1556} 1557 1558define float @vreduce_fmax_v4f32_nonans_noinfs(ptr %x) { 1559; CHECK-LABEL: vreduce_fmax_v4f32_nonans_noinfs: 1560; CHECK: # %bb.0: 1561; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1562; CHECK-NEXT: vle32.v v8, (a0) 1563; CHECK-NEXT: vfredmax.vs v8, v8, v8 1564; CHECK-NEXT: vfmv.f.s fa0, v8 1565; CHECK-NEXT: ret 1566 %v = load <4 x float>, ptr %x 1567 %red = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) 1568 ret float %red 1569} 1570 1571declare float @llvm.vector.reduce.fmax.v7f32(<7 x float>) 1572 1573define float @vreduce_fmax_v7f32(ptr %x) { 1574; CHECK-LABEL: vreduce_fmax_v7f32: 1575; CHECK: # %bb.0: 1576; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma 1577; CHECK-NEXT: vle32.v v8, (a0) 1578; CHECK-NEXT: lui a0, 1047552 1579; CHECK-NEXT: vmv.s.x v10, a0 1580; CHECK-NEXT: vfredmax.vs v8, v8, v10 1581; CHECK-NEXT: vfmv.f.s fa0, v8 1582; CHECK-NEXT: ret 1583 %v = load <7 x float>, ptr %x 1584 %red = call float @llvm.vector.reduce.fmax.v7f32(<7 x float> %v) 1585 ret float %red 1586} 1587 1588declare float @llvm.vector.reduce.fmax.v128f32(<128 x float>) 1589 1590define float @vreduce_fmax_v128f32(ptr %x) { 1591; CHECK-LABEL: vreduce_fmax_v128f32: 1592; CHECK: # %bb.0: 1593; CHECK-NEXT: li a1, 32 1594; CHECK-NEXT: addi a2, a0, 384 1595; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 1596; CHECK-NEXT: vle32.v v8, (a2) 1597; CHECK-NEXT: addi a1, a0, 256 1598; CHECK-NEXT: vle32.v v16, (a0) 1599; CHECK-NEXT: addi a0, a0, 128 1600; CHECK-NEXT: vle32.v v24, (a0) 1601; CHECK-NEXT: vle32.v v0, (a1) 1602; CHECK-NEXT: vfmax.vv v8, v24, v8 1603; CHECK-NEXT: vfmax.vv v16, v16, v0 1604; CHECK-NEXT: vfmax.vv v8, v16, v8 1605; CHECK-NEXT: vfredmax.vs v8, v8, v8 1606; CHECK-NEXT: vfmv.f.s fa0, v8 1607; CHECK-NEXT: ret 1608 %v = load <128 x float>, ptr %x 1609 %red = call float @llvm.vector.reduce.fmax.v128f32(<128 x float> %v) 1610 ret float %red 1611} 1612 1613declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) 1614 1615define double @vreduce_fmax_v2f64(ptr %x) { 1616; CHECK-LABEL: vreduce_fmax_v2f64: 1617; CHECK: # %bb.0: 1618; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1619; CHECK-NEXT: vle64.v v8, (a0) 1620; CHECK-NEXT: vfredmax.vs v8, v8, v8 1621; CHECK-NEXT: vfmv.f.s fa0, v8 1622; CHECK-NEXT: ret 1623 %v = load <2 x double>, ptr %x 1624 %red = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %v) 1625 ret double %red 1626} 1627 1628declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) 1629 1630define double @vreduce_fmax_v4f64(ptr %x) { 1631; CHECK-LABEL: vreduce_fmax_v4f64: 1632; CHECK: # %bb.0: 1633; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1634; CHECK-NEXT: vle64.v v8, (a0) 1635; CHECK-NEXT: vfredmax.vs v8, v8, v8 1636; CHECK-NEXT: vfmv.f.s fa0, v8 1637; CHECK-NEXT: ret 1638 %v = load <4 x double>, ptr %x 1639 %red = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) 1640 ret double %red 1641} 1642 1643define double @vreduce_fmax_v4f64_nonans(ptr %x) { 1644; CHECK-LABEL: vreduce_fmax_v4f64_nonans: 1645; CHECK: # %bb.0: 1646; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1647; CHECK-NEXT: vle64.v v8, (a0) 1648; CHECK-NEXT: vfredmax.vs v8, v8, v8 1649; CHECK-NEXT: vfmv.f.s fa0, v8 1650; CHECK-NEXT: ret 1651 %v = load <4 x double>, ptr %x 1652 %red = call nnan double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) 1653 ret double %red 1654} 1655 1656define double @vreduce_fmax_v4f64_nonans_noinfs(ptr %x) { 1657; CHECK-LABEL: vreduce_fmax_v4f64_nonans_noinfs: 1658; CHECK: # %bb.0: 1659; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1660; CHECK-NEXT: vle64.v v8, (a0) 1661; CHECK-NEXT: vfredmax.vs v8, v8, v8 1662; CHECK-NEXT: vfmv.f.s fa0, v8 1663; CHECK-NEXT: ret 1664 %v = load <4 x double>, ptr %x 1665 %red = call nnan ninf double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) 1666 ret double %red 1667} 1668 1669declare double @llvm.vector.reduce.fmax.v32f64(<32 x double>) 1670 1671define double @vreduce_fmax_v32f64(ptr %x) { 1672; CHECK-LABEL: vreduce_fmax_v32f64: 1673; CHECK: # %bb.0: 1674; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1675; CHECK-NEXT: vle64.v v8, (a0) 1676; CHECK-NEXT: addi a0, a0, 128 1677; CHECK-NEXT: vle64.v v16, (a0) 1678; CHECK-NEXT: vfmax.vv v8, v8, v16 1679; CHECK-NEXT: vfredmax.vs v8, v8, v8 1680; CHECK-NEXT: vfmv.f.s fa0, v8 1681; CHECK-NEXT: ret 1682 %v = load <32 x double>, ptr %x 1683 %red = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %v) 1684 ret double %red 1685} 1686 1687define float @vreduce_nsz_fadd_v4f32(ptr %x, float %s) { 1688; CHECK-LABEL: vreduce_nsz_fadd_v4f32: 1689; CHECK: # %bb.0: 1690; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1691; CHECK-NEXT: vle32.v v8, (a0) 1692; CHECK-NEXT: vfmv.s.f v9, fa0 1693; CHECK-NEXT: vfredusum.vs v8, v8, v9 1694; CHECK-NEXT: vfmv.f.s fa0, v8 1695; CHECK-NEXT: ret 1696 %v = load <4 x float>, ptr %x 1697 %red = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %v) 1698 ret float %red 1699} 1700 1701declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>) 1702 1703define float @vreduce_fminimum_v2f32(ptr %x) { 1704; CHECK-LABEL: vreduce_fminimum_v2f32: 1705; CHECK: # %bb.0: 1706; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1707; CHECK-NEXT: vle32.v v8, (a0) 1708; CHECK-NEXT: vmfne.vv v9, v8, v8 1709; CHECK-NEXT: vcpop.m a0, v9 1710; CHECK-NEXT: beqz a0, .LBB107_2 1711; CHECK-NEXT: # %bb.1: 1712; CHECK-NEXT: lui a0, 523264 1713; CHECK-NEXT: fmv.w.x fa0, a0 1714; CHECK-NEXT: ret 1715; CHECK-NEXT: .LBB107_2: 1716; CHECK-NEXT: vfredmin.vs v8, v8, v8 1717; CHECK-NEXT: vfmv.f.s fa0, v8 1718; CHECK-NEXT: ret 1719 %v = load <2 x float>, ptr %x 1720 %red = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> %v) 1721 ret float %red 1722} 1723 1724define float @vreduce_fminimum_v2f32_nonans(ptr %x) { 1725; CHECK-LABEL: vreduce_fminimum_v2f32_nonans: 1726; CHECK: # %bb.0: 1727; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1728; CHECK-NEXT: vle32.v v8, (a0) 1729; CHECK-NEXT: vfredmin.vs v8, v8, v8 1730; CHECK-NEXT: vfmv.f.s fa0, v8 1731; CHECK-NEXT: ret 1732 %v = load <2 x float>, ptr %x 1733 %red = call nnan float @llvm.vector.reduce.fminimum.v2f32(<2 x float> %v) 1734 ret float %red 1735} 1736 1737declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>) 1738 1739define float @vreduce_fminimum_v4f32(ptr %x) { 1740; CHECK-LABEL: vreduce_fminimum_v4f32: 1741; CHECK: # %bb.0: 1742; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1743; CHECK-NEXT: vle32.v v8, (a0) 1744; CHECK-NEXT: vmfne.vv v9, v8, v8 1745; CHECK-NEXT: vcpop.m a0, v9 1746; CHECK-NEXT: beqz a0, .LBB109_2 1747; CHECK-NEXT: # %bb.1: 1748; CHECK-NEXT: lui a0, 523264 1749; CHECK-NEXT: fmv.w.x fa0, a0 1750; CHECK-NEXT: ret 1751; CHECK-NEXT: .LBB109_2: 1752; CHECK-NEXT: vfredmin.vs v8, v8, v8 1753; CHECK-NEXT: vfmv.f.s fa0, v8 1754; CHECK-NEXT: ret 1755 %v = load <4 x float>, ptr %x 1756 %red = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %v) 1757 ret float %red 1758} 1759 1760define float @vreduce_fminimum_v4f32_nonans(ptr %x) { 1761; CHECK-LABEL: vreduce_fminimum_v4f32_nonans: 1762; CHECK: # %bb.0: 1763; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1764; CHECK-NEXT: vle32.v v8, (a0) 1765; CHECK-NEXT: vfredmin.vs v8, v8, v8 1766; CHECK-NEXT: vfmv.f.s fa0, v8 1767; CHECK-NEXT: ret 1768 %v = load <4 x float>, ptr %x 1769 %red = call nnan float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %v) 1770 ret float %red 1771} 1772 1773declare float @llvm.vector.reduce.fminimum.v7f32(<7 x float>) 1774 1775define float @vreduce_fminimum_v7f32(ptr %x) { 1776; CHECK-LABEL: vreduce_fminimum_v7f32: 1777; CHECK: # %bb.0: 1778; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma 1779; CHECK-NEXT: vle32.v v8, (a0) 1780; CHECK-NEXT: vmfne.vv v10, v8, v8 1781; CHECK-NEXT: vcpop.m a0, v10 1782; CHECK-NEXT: beqz a0, .LBB111_2 1783; CHECK-NEXT: # %bb.1: 1784; CHECK-NEXT: lui a0, 523264 1785; CHECK-NEXT: fmv.w.x fa0, a0 1786; CHECK-NEXT: ret 1787; CHECK-NEXT: .LBB111_2: 1788; CHECK-NEXT: lui a0, 522240 1789; CHECK-NEXT: vmv.s.x v10, a0 1790; CHECK-NEXT: vfredmin.vs v8, v8, v10 1791; CHECK-NEXT: vfmv.f.s fa0, v8 1792; CHECK-NEXT: ret 1793 %v = load <7 x float>, ptr %x 1794 %red = call float @llvm.vector.reduce.fminimum.v7f32(<7 x float> %v) 1795 ret float %red 1796} 1797 1798define float @vreduce_fminimum_v7f32_nonans(ptr %x) { 1799; CHECK-LABEL: vreduce_fminimum_v7f32_nonans: 1800; CHECK: # %bb.0: 1801; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma 1802; CHECK-NEXT: vle32.v v8, (a0) 1803; CHECK-NEXT: lui a0, 522240 1804; CHECK-NEXT: vmv.s.x v10, a0 1805; CHECK-NEXT: vfredmin.vs v8, v8, v10 1806; CHECK-NEXT: vfmv.f.s fa0, v8 1807; CHECK-NEXT: ret 1808 %v = load <7 x float>, ptr %x 1809 %red = call nnan float @llvm.vector.reduce.fminimum.v7f32(<7 x float> %v) 1810 ret float %red 1811} 1812 1813declare float @llvm.vector.reduce.fminimum.v8f32(<8 x float>) 1814 1815define float @vreduce_fminimum_v8f32(ptr %x) { 1816; CHECK-LABEL: vreduce_fminimum_v8f32: 1817; CHECK: # %bb.0: 1818; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1819; CHECK-NEXT: vle32.v v8, (a0) 1820; CHECK-NEXT: vmfne.vv v10, v8, v8 1821; CHECK-NEXT: vcpop.m a0, v10 1822; CHECK-NEXT: beqz a0, .LBB113_2 1823; CHECK-NEXT: # %bb.1: 1824; CHECK-NEXT: lui a0, 523264 1825; CHECK-NEXT: fmv.w.x fa0, a0 1826; CHECK-NEXT: ret 1827; CHECK-NEXT: .LBB113_2: 1828; CHECK-NEXT: vfredmin.vs v8, v8, v8 1829; CHECK-NEXT: vfmv.f.s fa0, v8 1830; CHECK-NEXT: ret 1831 %v = load <8 x float>, ptr %x 1832 %red = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %v) 1833 ret float %red 1834} 1835 1836define float @vreduce_fminimum_v8f32_nonans(ptr %x) { 1837; CHECK-LABEL: vreduce_fminimum_v8f32_nonans: 1838; CHECK: # %bb.0: 1839; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1840; CHECK-NEXT: vle32.v v8, (a0) 1841; CHECK-NEXT: vfredmin.vs v8, v8, v8 1842; CHECK-NEXT: vfmv.f.s fa0, v8 1843; CHECK-NEXT: ret 1844 %v = load <8 x float>, ptr %x 1845 %red = call nnan float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %v) 1846 ret float %red 1847} 1848 1849declare float @llvm.vector.reduce.fminimum.v16f32(<16 x float>) 1850 1851define float @vreduce_fminimum_v16f32(ptr %x) { 1852; CHECK-LABEL: vreduce_fminimum_v16f32: 1853; CHECK: # %bb.0: 1854; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1855; CHECK-NEXT: vle32.v v8, (a0) 1856; CHECK-NEXT: vmfne.vv v12, v8, v8 1857; CHECK-NEXT: vcpop.m a0, v12 1858; CHECK-NEXT: beqz a0, .LBB115_2 1859; CHECK-NEXT: # %bb.1: 1860; CHECK-NEXT: lui a0, 523264 1861; CHECK-NEXT: fmv.w.x fa0, a0 1862; CHECK-NEXT: ret 1863; CHECK-NEXT: .LBB115_2: 1864; CHECK-NEXT: vfredmin.vs v8, v8, v8 1865; CHECK-NEXT: vfmv.f.s fa0, v8 1866; CHECK-NEXT: ret 1867 %v = load <16 x float>, ptr %x 1868 %red = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> %v) 1869 ret float %red 1870} 1871 1872define float @vreduce_fminimum_v16f32_nonans(ptr %x) { 1873; CHECK-LABEL: vreduce_fminimum_v16f32_nonans: 1874; CHECK: # %bb.0: 1875; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1876; CHECK-NEXT: vle32.v v8, (a0) 1877; CHECK-NEXT: vfredmin.vs v8, v8, v8 1878; CHECK-NEXT: vfmv.f.s fa0, v8 1879; CHECK-NEXT: ret 1880 %v = load <16 x float>, ptr %x 1881 %red = call nnan float @llvm.vector.reduce.fminimum.v16f32(<16 x float> %v) 1882 ret float %red 1883} 1884 1885declare float @llvm.vector.reduce.fminimum.v32f32(<32 x float>) 1886 1887define float @vreduce_fminimum_v32f32(ptr %x) { 1888; CHECK-LABEL: vreduce_fminimum_v32f32: 1889; CHECK: # %bb.0: 1890; CHECK-NEXT: li a1, 32 1891; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 1892; CHECK-NEXT: vle32.v v8, (a0) 1893; CHECK-NEXT: vmfne.vv v16, v8, v8 1894; CHECK-NEXT: vcpop.m a0, v16 1895; CHECK-NEXT: beqz a0, .LBB117_2 1896; CHECK-NEXT: # %bb.1: 1897; CHECK-NEXT: lui a0, 523264 1898; CHECK-NEXT: fmv.w.x fa0, a0 1899; CHECK-NEXT: ret 1900; CHECK-NEXT: .LBB117_2: 1901; CHECK-NEXT: vfredmin.vs v8, v8, v8 1902; CHECK-NEXT: vfmv.f.s fa0, v8 1903; CHECK-NEXT: ret 1904 %v = load <32 x float>, ptr %x 1905 %red = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> %v) 1906 ret float %red 1907} 1908 1909define float @vreduce_fminimum_v32f32_nonans(ptr %x) { 1910; CHECK-LABEL: vreduce_fminimum_v32f32_nonans: 1911; CHECK: # %bb.0: 1912; CHECK-NEXT: li a1, 32 1913; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 1914; CHECK-NEXT: vle32.v v8, (a0) 1915; CHECK-NEXT: vfredmin.vs v8, v8, v8 1916; CHECK-NEXT: vfmv.f.s fa0, v8 1917; CHECK-NEXT: ret 1918 %v = load <32 x float>, ptr %x 1919 %red = call nnan float @llvm.vector.reduce.fminimum.v32f32(<32 x float> %v) 1920 ret float %red 1921} 1922 1923declare float @llvm.vector.reduce.fminimum.v64f32(<64 x float>) 1924 1925define float @vreduce_fminimum_v64f32(ptr %x) { 1926; CHECK-LABEL: vreduce_fminimum_v64f32: 1927; CHECK: # %bb.0: 1928; CHECK-NEXT: addi sp, sp, -16 1929; CHECK-NEXT: .cfi_def_cfa_offset 16 1930; CHECK-NEXT: csrr a1, vlenb 1931; CHECK-NEXT: slli a1, a1, 3 1932; CHECK-NEXT: sub sp, sp, a1 1933; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1934; CHECK-NEXT: addi a1, a0, 128 1935; CHECK-NEXT: li a2, 32 1936; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma 1937; CHECK-NEXT: vle32.v v16, (a0) 1938; CHECK-NEXT: vle32.v v24, (a1) 1939; CHECK-NEXT: vmfeq.vv v0, v16, v16 1940; CHECK-NEXT: vmfeq.vv v7, v24, v24 1941; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 1942; CHECK-NEXT: addi a0, sp, 16 1943; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 1944; CHECK-NEXT: vmv1r.v v0, v7 1945; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 1946; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1947; CHECK-NEXT: vfmin.vv v8, v8, v16 1948; CHECK-NEXT: vmfne.vv v16, v8, v8 1949; CHECK-NEXT: vcpop.m a0, v16 1950; CHECK-NEXT: beqz a0, .LBB119_2 1951; CHECK-NEXT: # %bb.1: 1952; CHECK-NEXT: lui a0, 523264 1953; CHECK-NEXT: fmv.w.x fa0, a0 1954; CHECK-NEXT: j .LBB119_3 1955; CHECK-NEXT: .LBB119_2: 1956; CHECK-NEXT: vfredmin.vs v8, v8, v8 1957; CHECK-NEXT: vfmv.f.s fa0, v8 1958; CHECK-NEXT: .LBB119_3: 1959; CHECK-NEXT: csrr a0, vlenb 1960; CHECK-NEXT: slli a0, a0, 3 1961; CHECK-NEXT: add sp, sp, a0 1962; CHECK-NEXT: .cfi_def_cfa sp, 16 1963; CHECK-NEXT: addi sp, sp, 16 1964; CHECK-NEXT: .cfi_def_cfa_offset 0 1965; CHECK-NEXT: ret 1966 %v = load <64 x float>, ptr %x 1967 %red = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> %v) 1968 ret float %red 1969} 1970 1971define float @vreduce_fminimum_v64f32_nonans(ptr %x) { 1972; CHECK-LABEL: vreduce_fminimum_v64f32_nonans: 1973; CHECK: # %bb.0: 1974; CHECK-NEXT: li a1, 32 1975; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 1976; CHECK-NEXT: vle32.v v8, (a0) 1977; CHECK-NEXT: addi a0, a0, 128 1978; CHECK-NEXT: vle32.v v16, (a0) 1979; CHECK-NEXT: vfmin.vv v8, v8, v16 1980; CHECK-NEXT: vfredmin.vs v8, v8, v8 1981; CHECK-NEXT: vfmv.f.s fa0, v8 1982; CHECK-NEXT: ret 1983 %v = load <64 x float>, ptr %x 1984 %red = call nnan float @llvm.vector.reduce.fminimum.v64f32(<64 x float> %v) 1985 ret float %red 1986} 1987 1988declare float @llvm.vector.reduce.fminimum.v128f32(<128 x float>) 1989 1990define float @vreduce_fminimum_v128f32(ptr %x) { 1991; CHECK-LABEL: vreduce_fminimum_v128f32: 1992; CHECK: # %bb.0: 1993; CHECK-NEXT: addi sp, sp, -16 1994; CHECK-NEXT: .cfi_def_cfa_offset 16 1995; CHECK-NEXT: csrr a1, vlenb 1996; CHECK-NEXT: slli a1, a1, 4 1997; CHECK-NEXT: sub sp, sp, a1 1998; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 1999; CHECK-NEXT: li a1, 32 2000; CHECK-NEXT: addi a2, a0, 128 2001; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 2002; CHECK-NEXT: vle32.v v8, (a2) 2003; CHECK-NEXT: addi a1, a0, 384 2004; CHECK-NEXT: vle32.v v16, (a1) 2005; CHECK-NEXT: addi a1, a0, 256 2006; CHECK-NEXT: vle32.v v24, (a0) 2007; CHECK-NEXT: addi a0, sp, 16 2008; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 2009; CHECK-NEXT: vmfeq.vv v0, v8, v8 2010; CHECK-NEXT: vmfeq.vv v7, v16, v16 2011; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 2012; CHECK-NEXT: csrr a0, vlenb 2013; CHECK-NEXT: slli a0, a0, 3 2014; CHECK-NEXT: add a0, sp, a0 2015; CHECK-NEXT: addi a0, a0, 16 2016; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 2017; CHECK-NEXT: vle32.v v24, (a1) 2018; CHECK-NEXT: vmv1r.v v0, v7 2019; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 2020; CHECK-NEXT: csrr a0, vlenb 2021; CHECK-NEXT: slli a0, a0, 3 2022; CHECK-NEXT: add a0, sp, a0 2023; CHECK-NEXT: addi a0, a0, 16 2024; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2025; CHECK-NEXT: vfmin.vv v8, v16, v8 2026; CHECK-NEXT: csrr a0, vlenb 2027; CHECK-NEXT: slli a0, a0, 3 2028; CHECK-NEXT: add a0, sp, a0 2029; CHECK-NEXT: addi a0, a0, 16 2030; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2031; CHECK-NEXT: addi a0, sp, 16 2032; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2033; CHECK-NEXT: vmfeq.vv v0, v8, v8 2034; CHECK-NEXT: vmfeq.vv v7, v24, v24 2035; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 2036; CHECK-NEXT: vmv1r.v v0, v7 2037; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 2038; CHECK-NEXT: vfmin.vv v16, v8, v16 2039; CHECK-NEXT: vmfeq.vv v0, v16, v16 2040; CHECK-NEXT: csrr a0, vlenb 2041; CHECK-NEXT: slli a0, a0, 3 2042; CHECK-NEXT: add a0, sp, a0 2043; CHECK-NEXT: addi a0, a0, 16 2044; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 2045; CHECK-NEXT: vmfeq.vv v7, v24, v24 2046; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 2047; CHECK-NEXT: addi a0, sp, 16 2048; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2049; CHECK-NEXT: vmv1r.v v0, v7 2050; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 2051; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2052; CHECK-NEXT: vfmin.vv v8, v8, v16 2053; CHECK-NEXT: vmfne.vv v16, v8, v8 2054; CHECK-NEXT: vcpop.m a0, v16 2055; CHECK-NEXT: beqz a0, .LBB121_2 2056; CHECK-NEXT: # %bb.1: 2057; CHECK-NEXT: lui a0, 523264 2058; CHECK-NEXT: fmv.w.x fa0, a0 2059; CHECK-NEXT: j .LBB121_3 2060; CHECK-NEXT: .LBB121_2: 2061; CHECK-NEXT: vfredmin.vs v8, v8, v8 2062; CHECK-NEXT: vfmv.f.s fa0, v8 2063; CHECK-NEXT: .LBB121_3: 2064; CHECK-NEXT: csrr a0, vlenb 2065; CHECK-NEXT: slli a0, a0, 4 2066; CHECK-NEXT: add sp, sp, a0 2067; CHECK-NEXT: .cfi_def_cfa sp, 16 2068; CHECK-NEXT: addi sp, sp, 16 2069; CHECK-NEXT: .cfi_def_cfa_offset 0 2070; CHECK-NEXT: ret 2071 %v = load <128 x float>, ptr %x 2072 %red = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> %v) 2073 ret float %red 2074} 2075 2076define float @vreduce_fminimum_v128f32_nonans(ptr %x) { 2077; CHECK-LABEL: vreduce_fminimum_v128f32_nonans: 2078; CHECK: # %bb.0: 2079; CHECK-NEXT: li a1, 32 2080; CHECK-NEXT: addi a2, a0, 384 2081; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 2082; CHECK-NEXT: vle32.v v8, (a2) 2083; CHECK-NEXT: addi a1, a0, 256 2084; CHECK-NEXT: vle32.v v16, (a0) 2085; CHECK-NEXT: addi a0, a0, 128 2086; CHECK-NEXT: vle32.v v24, (a0) 2087; CHECK-NEXT: vle32.v v0, (a1) 2088; CHECK-NEXT: vfmin.vv v8, v24, v8 2089; CHECK-NEXT: vfmin.vv v16, v16, v0 2090; CHECK-NEXT: vfmin.vv v8, v16, v8 2091; CHECK-NEXT: vfredmin.vs v8, v8, v8 2092; CHECK-NEXT: vfmv.f.s fa0, v8 2093; CHECK-NEXT: ret 2094 %v = load <128 x float>, ptr %x 2095 %red = call nnan float @llvm.vector.reduce.fminimum.v128f32(<128 x float> %v) 2096 ret float %red 2097} 2098 2099declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>) 2100 2101define double @vreduce_fminimum_v2f64(ptr %x) { 2102; CHECK-LABEL: vreduce_fminimum_v2f64: 2103; CHECK: # %bb.0: 2104; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 2105; CHECK-NEXT: vle64.v v8, (a0) 2106; CHECK-NEXT: vmfne.vv v9, v8, v8 2107; CHECK-NEXT: vcpop.m a0, v9 2108; CHECK-NEXT: beqz a0, .LBB123_2 2109; CHECK-NEXT: # %bb.1: 2110; CHECK-NEXT: lui a0, %hi(.LCPI123_0) 2111; CHECK-NEXT: fld fa0, %lo(.LCPI123_0)(a0) 2112; CHECK-NEXT: ret 2113; CHECK-NEXT: .LBB123_2: 2114; CHECK-NEXT: vfredmin.vs v8, v8, v8 2115; CHECK-NEXT: vfmv.f.s fa0, v8 2116; CHECK-NEXT: ret 2117 %v = load <2 x double>, ptr %x 2118 %red = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %v) 2119 ret double %red 2120} 2121 2122define double @vreduce_fminimum_v2f64_nonans(ptr %x) { 2123; CHECK-LABEL: vreduce_fminimum_v2f64_nonans: 2124; CHECK: # %bb.0: 2125; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 2126; CHECK-NEXT: vle64.v v8, (a0) 2127; CHECK-NEXT: vfredmin.vs v8, v8, v8 2128; CHECK-NEXT: vfmv.f.s fa0, v8 2129; CHECK-NEXT: ret 2130 %v = load <2 x double>, ptr %x 2131 %red = call nnan double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %v) 2132 ret double %red 2133} 2134 2135declare double @llvm.vector.reduce.fminimum.v4f64(<4 x double>) 2136 2137define double @vreduce_fminimum_v4f64(ptr %x) { 2138; CHECK-LABEL: vreduce_fminimum_v4f64: 2139; CHECK: # %bb.0: 2140; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 2141; CHECK-NEXT: vle64.v v8, (a0) 2142; CHECK-NEXT: vmfne.vv v10, v8, v8 2143; CHECK-NEXT: vcpop.m a0, v10 2144; CHECK-NEXT: beqz a0, .LBB125_2 2145; CHECK-NEXT: # %bb.1: 2146; CHECK-NEXT: lui a0, %hi(.LCPI125_0) 2147; CHECK-NEXT: fld fa0, %lo(.LCPI125_0)(a0) 2148; CHECK-NEXT: ret 2149; CHECK-NEXT: .LBB125_2: 2150; CHECK-NEXT: vfredmin.vs v8, v8, v8 2151; CHECK-NEXT: vfmv.f.s fa0, v8 2152; CHECK-NEXT: ret 2153 %v = load <4 x double>, ptr %x 2154 %red = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %v) 2155 ret double %red 2156} 2157 2158define double @vreduce_fminimum_v4f64_nonans(ptr %x) { 2159; CHECK-LABEL: vreduce_fminimum_v4f64_nonans: 2160; CHECK: # %bb.0: 2161; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 2162; CHECK-NEXT: vle64.v v8, (a0) 2163; CHECK-NEXT: vfredmin.vs v8, v8, v8 2164; CHECK-NEXT: vfmv.f.s fa0, v8 2165; CHECK-NEXT: ret 2166 %v = load <4 x double>, ptr %x 2167 %red = call nnan double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %v) 2168 ret double %red 2169} 2170 2171declare double @llvm.vector.reduce.fminimum.v8f64(<8 x double>) 2172 2173define double @vreduce_fminimum_v8f64(ptr %x) { 2174; CHECK-LABEL: vreduce_fminimum_v8f64: 2175; CHECK: # %bb.0: 2176; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 2177; CHECK-NEXT: vle64.v v8, (a0) 2178; CHECK-NEXT: vmfne.vv v12, v8, v8 2179; CHECK-NEXT: vcpop.m a0, v12 2180; CHECK-NEXT: beqz a0, .LBB127_2 2181; CHECK-NEXT: # %bb.1: 2182; CHECK-NEXT: lui a0, %hi(.LCPI127_0) 2183; CHECK-NEXT: fld fa0, %lo(.LCPI127_0)(a0) 2184; CHECK-NEXT: ret 2185; CHECK-NEXT: .LBB127_2: 2186; CHECK-NEXT: vfredmin.vs v8, v8, v8 2187; CHECK-NEXT: vfmv.f.s fa0, v8 2188; CHECK-NEXT: ret 2189 %v = load <8 x double>, ptr %x 2190 %red = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> %v) 2191 ret double %red 2192} 2193 2194define double @vreduce_fminimum_v8f64_nonans(ptr %x) { 2195; CHECK-LABEL: vreduce_fminimum_v8f64_nonans: 2196; CHECK: # %bb.0: 2197; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 2198; CHECK-NEXT: vle64.v v8, (a0) 2199; CHECK-NEXT: vfredmin.vs v8, v8, v8 2200; CHECK-NEXT: vfmv.f.s fa0, v8 2201; CHECK-NEXT: ret 2202 %v = load <8 x double>, ptr %x 2203 %red = call nnan double @llvm.vector.reduce.fminimum.v8f64(<8 x double> %v) 2204 ret double %red 2205} 2206 2207declare double @llvm.vector.reduce.fminimum.v16f64(<16 x double>) 2208 2209define double @vreduce_fminimum_v16f64(ptr %x) { 2210; CHECK-LABEL: vreduce_fminimum_v16f64: 2211; CHECK: # %bb.0: 2212; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2213; CHECK-NEXT: vle64.v v8, (a0) 2214; CHECK-NEXT: vmfne.vv v16, v8, v8 2215; CHECK-NEXT: vcpop.m a0, v16 2216; CHECK-NEXT: beqz a0, .LBB129_2 2217; CHECK-NEXT: # %bb.1: 2218; CHECK-NEXT: lui a0, %hi(.LCPI129_0) 2219; CHECK-NEXT: fld fa0, %lo(.LCPI129_0)(a0) 2220; CHECK-NEXT: ret 2221; CHECK-NEXT: .LBB129_2: 2222; CHECK-NEXT: vfredmin.vs v8, v8, v8 2223; CHECK-NEXT: vfmv.f.s fa0, v8 2224; CHECK-NEXT: ret 2225 %v = load <16 x double>, ptr %x 2226 %red = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> %v) 2227 ret double %red 2228} 2229 2230define double @vreduce_fminimum_v16f64_nonans(ptr %x) { 2231; CHECK-LABEL: vreduce_fminimum_v16f64_nonans: 2232; CHECK: # %bb.0: 2233; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2234; CHECK-NEXT: vle64.v v8, (a0) 2235; CHECK-NEXT: vfredmin.vs v8, v8, v8 2236; CHECK-NEXT: vfmv.f.s fa0, v8 2237; CHECK-NEXT: ret 2238 %v = load <16 x double>, ptr %x 2239 %red = call nnan double @llvm.vector.reduce.fminimum.v16f64(<16 x double> %v) 2240 ret double %red 2241} 2242 2243declare double @llvm.vector.reduce.fminimum.v32f64(<32 x double>) 2244 2245define double @vreduce_fminimum_v32f64(ptr %x) { 2246; CHECK-LABEL: vreduce_fminimum_v32f64: 2247; CHECK: # %bb.0: 2248; CHECK-NEXT: addi sp, sp, -16 2249; CHECK-NEXT: .cfi_def_cfa_offset 16 2250; CHECK-NEXT: csrr a1, vlenb 2251; CHECK-NEXT: slli a1, a1, 3 2252; CHECK-NEXT: sub sp, sp, a1 2253; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 2254; CHECK-NEXT: addi a1, a0, 128 2255; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2256; CHECK-NEXT: vle64.v v16, (a0) 2257; CHECK-NEXT: vle64.v v24, (a1) 2258; CHECK-NEXT: vmfeq.vv v0, v16, v16 2259; CHECK-NEXT: vmfeq.vv v7, v24, v24 2260; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 2261; CHECK-NEXT: addi a0, sp, 16 2262; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2263; CHECK-NEXT: vmv1r.v v0, v7 2264; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 2265; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2266; CHECK-NEXT: vfmin.vv v8, v8, v16 2267; CHECK-NEXT: vmfne.vv v16, v8, v8 2268; CHECK-NEXT: vcpop.m a0, v16 2269; CHECK-NEXT: beqz a0, .LBB131_2 2270; CHECK-NEXT: # %bb.1: 2271; CHECK-NEXT: lui a0, %hi(.LCPI131_0) 2272; CHECK-NEXT: fld fa0, %lo(.LCPI131_0)(a0) 2273; CHECK-NEXT: j .LBB131_3 2274; CHECK-NEXT: .LBB131_2: 2275; CHECK-NEXT: vfredmin.vs v8, v8, v8 2276; CHECK-NEXT: vfmv.f.s fa0, v8 2277; CHECK-NEXT: .LBB131_3: 2278; CHECK-NEXT: csrr a0, vlenb 2279; CHECK-NEXT: slli a0, a0, 3 2280; CHECK-NEXT: add sp, sp, a0 2281; CHECK-NEXT: .cfi_def_cfa sp, 16 2282; CHECK-NEXT: addi sp, sp, 16 2283; CHECK-NEXT: .cfi_def_cfa_offset 0 2284; CHECK-NEXT: ret 2285 %v = load <32 x double>, ptr %x 2286 %red = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> %v) 2287 ret double %red 2288} 2289 2290define double @vreduce_fminimum_v32f64_nonans(ptr %x) { 2291; CHECK-LABEL: vreduce_fminimum_v32f64_nonans: 2292; CHECK: # %bb.0: 2293; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2294; CHECK-NEXT: vle64.v v8, (a0) 2295; CHECK-NEXT: addi a0, a0, 128 2296; CHECK-NEXT: vle64.v v16, (a0) 2297; CHECK-NEXT: vfmin.vv v8, v8, v16 2298; CHECK-NEXT: vfredmin.vs v8, v8, v8 2299; CHECK-NEXT: vfmv.f.s fa0, v8 2300; CHECK-NEXT: ret 2301 %v = load <32 x double>, ptr %x 2302 %red = call nnan double @llvm.vector.reduce.fminimum.v32f64(<32 x double> %v) 2303 ret double %red 2304} 2305 2306declare double @llvm.vector.reduce.fminimum.v64f64(<64 x double>) 2307 2308define double @vreduce_fminimum_v64f64(ptr %x) { 2309; CHECK-LABEL: vreduce_fminimum_v64f64: 2310; CHECK: # %bb.0: 2311; CHECK-NEXT: addi sp, sp, -16 2312; CHECK-NEXT: .cfi_def_cfa_offset 16 2313; CHECK-NEXT: csrr a1, vlenb 2314; CHECK-NEXT: slli a1, a1, 4 2315; CHECK-NEXT: sub sp, sp, a1 2316; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 2317; CHECK-NEXT: addi a1, a0, 128 2318; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2319; CHECK-NEXT: vle64.v v8, (a1) 2320; CHECK-NEXT: addi a1, a0, 384 2321; CHECK-NEXT: vle64.v v16, (a1) 2322; CHECK-NEXT: addi a1, a0, 256 2323; CHECK-NEXT: vle64.v v24, (a0) 2324; CHECK-NEXT: addi a0, sp, 16 2325; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 2326; CHECK-NEXT: vmfeq.vv v0, v8, v8 2327; CHECK-NEXT: vmfeq.vv v7, v16, v16 2328; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 2329; CHECK-NEXT: csrr a0, vlenb 2330; CHECK-NEXT: slli a0, a0, 3 2331; CHECK-NEXT: add a0, sp, a0 2332; CHECK-NEXT: addi a0, a0, 16 2333; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 2334; CHECK-NEXT: vle64.v v24, (a1) 2335; CHECK-NEXT: vmv1r.v v0, v7 2336; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 2337; CHECK-NEXT: csrr a0, vlenb 2338; CHECK-NEXT: slli a0, a0, 3 2339; CHECK-NEXT: add a0, sp, a0 2340; CHECK-NEXT: addi a0, a0, 16 2341; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2342; CHECK-NEXT: vfmin.vv v8, v16, v8 2343; CHECK-NEXT: csrr a0, vlenb 2344; CHECK-NEXT: slli a0, a0, 3 2345; CHECK-NEXT: add a0, sp, a0 2346; CHECK-NEXT: addi a0, a0, 16 2347; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2348; CHECK-NEXT: addi a0, sp, 16 2349; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2350; CHECK-NEXT: vmfeq.vv v0, v8, v8 2351; CHECK-NEXT: vmfeq.vv v7, v24, v24 2352; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 2353; CHECK-NEXT: vmv1r.v v0, v7 2354; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 2355; CHECK-NEXT: vfmin.vv v16, v8, v16 2356; CHECK-NEXT: vmfeq.vv v0, v16, v16 2357; CHECK-NEXT: csrr a0, vlenb 2358; CHECK-NEXT: slli a0, a0, 3 2359; CHECK-NEXT: add a0, sp, a0 2360; CHECK-NEXT: addi a0, a0, 16 2361; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 2362; CHECK-NEXT: vmfeq.vv v7, v24, v24 2363; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 2364; CHECK-NEXT: addi a0, sp, 16 2365; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2366; CHECK-NEXT: vmv1r.v v0, v7 2367; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 2368; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2369; CHECK-NEXT: vfmin.vv v8, v8, v16 2370; CHECK-NEXT: vmfne.vv v16, v8, v8 2371; CHECK-NEXT: vcpop.m a0, v16 2372; CHECK-NEXT: beqz a0, .LBB133_2 2373; CHECK-NEXT: # %bb.1: 2374; CHECK-NEXT: lui a0, %hi(.LCPI133_0) 2375; CHECK-NEXT: fld fa0, %lo(.LCPI133_0)(a0) 2376; CHECK-NEXT: j .LBB133_3 2377; CHECK-NEXT: .LBB133_2: 2378; CHECK-NEXT: vfredmin.vs v8, v8, v8 2379; CHECK-NEXT: vfmv.f.s fa0, v8 2380; CHECK-NEXT: .LBB133_3: 2381; CHECK-NEXT: csrr a0, vlenb 2382; CHECK-NEXT: slli a0, a0, 4 2383; CHECK-NEXT: add sp, sp, a0 2384; CHECK-NEXT: .cfi_def_cfa sp, 16 2385; CHECK-NEXT: addi sp, sp, 16 2386; CHECK-NEXT: .cfi_def_cfa_offset 0 2387; CHECK-NEXT: ret 2388 %v = load <64 x double>, ptr %x 2389 %red = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> %v) 2390 ret double %red 2391} 2392 2393define double @vreduce_fminimum_v64f64_nonans(ptr %x) { 2394; CHECK-LABEL: vreduce_fminimum_v64f64_nonans: 2395; CHECK: # %bb.0: 2396; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2397; CHECK-NEXT: vle64.v v8, (a0) 2398; CHECK-NEXT: addi a1, a0, 384 2399; CHECK-NEXT: vle64.v v16, (a1) 2400; CHECK-NEXT: addi a1, a0, 256 2401; CHECK-NEXT: addi a0, a0, 128 2402; CHECK-NEXT: vle64.v v24, (a0) 2403; CHECK-NEXT: vle64.v v0, (a1) 2404; CHECK-NEXT: vfmin.vv v16, v24, v16 2405; CHECK-NEXT: vfmin.vv v8, v8, v0 2406; CHECK-NEXT: vfmin.vv v8, v8, v16 2407; CHECK-NEXT: vfredmin.vs v8, v8, v8 2408; CHECK-NEXT: vfmv.f.s fa0, v8 2409; CHECK-NEXT: ret 2410 %v = load <64 x double>, ptr %x 2411 %red = call nnan double @llvm.vector.reduce.fminimum.v64f64(<64 x double> %v) 2412 ret double %red 2413} 2414 2415declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>) 2416 2417define float @vreduce_fmaximum_v2f32(ptr %x) { 2418; CHECK-LABEL: vreduce_fmaximum_v2f32: 2419; CHECK: # %bb.0: 2420; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 2421; CHECK-NEXT: vle32.v v8, (a0) 2422; CHECK-NEXT: vmfne.vv v9, v8, v8 2423; CHECK-NEXT: vcpop.m a0, v9 2424; CHECK-NEXT: beqz a0, .LBB135_2 2425; CHECK-NEXT: # %bb.1: 2426; CHECK-NEXT: lui a0, 523264 2427; CHECK-NEXT: fmv.w.x fa0, a0 2428; CHECK-NEXT: ret 2429; CHECK-NEXT: .LBB135_2: 2430; CHECK-NEXT: vfredmax.vs v8, v8, v8 2431; CHECK-NEXT: vfmv.f.s fa0, v8 2432; CHECK-NEXT: ret 2433 %v = load <2 x float>, ptr %x 2434 %red = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> %v) 2435 ret float %red 2436} 2437 2438define float @vreduce_fmaximum_v2f32_nonans(ptr %x) { 2439; CHECK-LABEL: vreduce_fmaximum_v2f32_nonans: 2440; CHECK: # %bb.0: 2441; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 2442; CHECK-NEXT: vle32.v v8, (a0) 2443; CHECK-NEXT: vfredmax.vs v8, v8, v8 2444; CHECK-NEXT: vfmv.f.s fa0, v8 2445; CHECK-NEXT: ret 2446 %v = load <2 x float>, ptr %x 2447 %red = call nnan float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> %v) 2448 ret float %red 2449} 2450 2451declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>) 2452 2453define float @vreduce_fmaximum_v4f32(ptr %x) { 2454; CHECK-LABEL: vreduce_fmaximum_v4f32: 2455; CHECK: # %bb.0: 2456; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2457; CHECK-NEXT: vle32.v v8, (a0) 2458; CHECK-NEXT: vmfne.vv v9, v8, v8 2459; CHECK-NEXT: vcpop.m a0, v9 2460; CHECK-NEXT: beqz a0, .LBB137_2 2461; CHECK-NEXT: # %bb.1: 2462; CHECK-NEXT: lui a0, 523264 2463; CHECK-NEXT: fmv.w.x fa0, a0 2464; CHECK-NEXT: ret 2465; CHECK-NEXT: .LBB137_2: 2466; CHECK-NEXT: vfredmax.vs v8, v8, v8 2467; CHECK-NEXT: vfmv.f.s fa0, v8 2468; CHECK-NEXT: ret 2469 %v = load <4 x float>, ptr %x 2470 %red = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %v) 2471 ret float %red 2472} 2473 2474define float @vreduce_fmaximum_v4f32_nonans(ptr %x) { 2475; CHECK-LABEL: vreduce_fmaximum_v4f32_nonans: 2476; CHECK: # %bb.0: 2477; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2478; CHECK-NEXT: vle32.v v8, (a0) 2479; CHECK-NEXT: vfredmax.vs v8, v8, v8 2480; CHECK-NEXT: vfmv.f.s fa0, v8 2481; CHECK-NEXT: ret 2482 %v = load <4 x float>, ptr %x 2483 %red = call nnan float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %v) 2484 ret float %red 2485} 2486 2487declare float @llvm.vector.reduce.fmaximum.v7f32(<7 x float>) 2488 2489define float @vreduce_fmaximum_v7f32(ptr %x) { 2490; CHECK-LABEL: vreduce_fmaximum_v7f32: 2491; CHECK: # %bb.0: 2492; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma 2493; CHECK-NEXT: vle32.v v8, (a0) 2494; CHECK-NEXT: vmfne.vv v10, v8, v8 2495; CHECK-NEXT: vcpop.m a0, v10 2496; CHECK-NEXT: beqz a0, .LBB139_2 2497; CHECK-NEXT: # %bb.1: 2498; CHECK-NEXT: lui a0, 523264 2499; CHECK-NEXT: fmv.w.x fa0, a0 2500; CHECK-NEXT: ret 2501; CHECK-NEXT: .LBB139_2: 2502; CHECK-NEXT: lui a0, 1046528 2503; CHECK-NEXT: vmv.s.x v10, a0 2504; CHECK-NEXT: vfredmax.vs v8, v8, v10 2505; CHECK-NEXT: vfmv.f.s fa0, v8 2506; CHECK-NEXT: ret 2507 %v = load <7 x float>, ptr %x 2508 %red = call float @llvm.vector.reduce.fmaximum.v7f32(<7 x float> %v) 2509 ret float %red 2510} 2511 2512define float @vreduce_fmaximum_v7f32_nonans(ptr %x) { 2513; CHECK-LABEL: vreduce_fmaximum_v7f32_nonans: 2514; CHECK: # %bb.0: 2515; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma 2516; CHECK-NEXT: vle32.v v8, (a0) 2517; CHECK-NEXT: lui a0, 1046528 2518; CHECK-NEXT: vmv.s.x v10, a0 2519; CHECK-NEXT: vfredmax.vs v8, v8, v10 2520; CHECK-NEXT: vfmv.f.s fa0, v8 2521; CHECK-NEXT: ret 2522 %v = load <7 x float>, ptr %x 2523 %red = call nnan float @llvm.vector.reduce.fmaximum.v7f32(<7 x float> %v) 2524 ret float %red 2525} 2526 2527declare float @llvm.vector.reduce.fmaximum.v8f32(<8 x float>) 2528 2529define float @vreduce_fmaximum_v8f32(ptr %x) { 2530; CHECK-LABEL: vreduce_fmaximum_v8f32: 2531; CHECK: # %bb.0: 2532; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2533; CHECK-NEXT: vle32.v v8, (a0) 2534; CHECK-NEXT: vmfne.vv v10, v8, v8 2535; CHECK-NEXT: vcpop.m a0, v10 2536; CHECK-NEXT: beqz a0, .LBB141_2 2537; CHECK-NEXT: # %bb.1: 2538; CHECK-NEXT: lui a0, 523264 2539; CHECK-NEXT: fmv.w.x fa0, a0 2540; CHECK-NEXT: ret 2541; CHECK-NEXT: .LBB141_2: 2542; CHECK-NEXT: vfredmax.vs v8, v8, v8 2543; CHECK-NEXT: vfmv.f.s fa0, v8 2544; CHECK-NEXT: ret 2545 %v = load <8 x float>, ptr %x 2546 %red = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %v) 2547 ret float %red 2548} 2549 2550define float @vreduce_fmaximum_v8f32_nonans(ptr %x) { 2551; CHECK-LABEL: vreduce_fmaximum_v8f32_nonans: 2552; CHECK: # %bb.0: 2553; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2554; CHECK-NEXT: vle32.v v8, (a0) 2555; CHECK-NEXT: vfredmax.vs v8, v8, v8 2556; CHECK-NEXT: vfmv.f.s fa0, v8 2557; CHECK-NEXT: ret 2558 %v = load <8 x float>, ptr %x 2559 %red = call nnan float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %v) 2560 ret float %red 2561} 2562 2563declare float @llvm.vector.reduce.fmaximum.v16f32(<16 x float>) 2564 2565define float @vreduce_fmaximum_v16f32(ptr %x) { 2566; CHECK-LABEL: vreduce_fmaximum_v16f32: 2567; CHECK: # %bb.0: 2568; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 2569; CHECK-NEXT: vle32.v v8, (a0) 2570; CHECK-NEXT: vmfne.vv v12, v8, v8 2571; CHECK-NEXT: vcpop.m a0, v12 2572; CHECK-NEXT: beqz a0, .LBB143_2 2573; CHECK-NEXT: # %bb.1: 2574; CHECK-NEXT: lui a0, 523264 2575; CHECK-NEXT: fmv.w.x fa0, a0 2576; CHECK-NEXT: ret 2577; CHECK-NEXT: .LBB143_2: 2578; CHECK-NEXT: vfredmax.vs v8, v8, v8 2579; CHECK-NEXT: vfmv.f.s fa0, v8 2580; CHECK-NEXT: ret 2581 %v = load <16 x float>, ptr %x 2582 %red = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %v) 2583 ret float %red 2584} 2585 2586define float @vreduce_fmaximum_v16f32_nonans(ptr %x) { 2587; CHECK-LABEL: vreduce_fmaximum_v16f32_nonans: 2588; CHECK: # %bb.0: 2589; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 2590; CHECK-NEXT: vle32.v v8, (a0) 2591; CHECK-NEXT: vfredmax.vs v8, v8, v8 2592; CHECK-NEXT: vfmv.f.s fa0, v8 2593; CHECK-NEXT: ret 2594 %v = load <16 x float>, ptr %x 2595 %red = call nnan float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %v) 2596 ret float %red 2597} 2598 2599declare float @llvm.vector.reduce.fmaximum.v32f32(<32 x float>) 2600 2601define float @vreduce_fmaximum_v32f32(ptr %x) { 2602; CHECK-LABEL: vreduce_fmaximum_v32f32: 2603; CHECK: # %bb.0: 2604; CHECK-NEXT: li a1, 32 2605; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 2606; CHECK-NEXT: vle32.v v8, (a0) 2607; CHECK-NEXT: vmfne.vv v16, v8, v8 2608; CHECK-NEXT: vcpop.m a0, v16 2609; CHECK-NEXT: beqz a0, .LBB145_2 2610; CHECK-NEXT: # %bb.1: 2611; CHECK-NEXT: lui a0, 523264 2612; CHECK-NEXT: fmv.w.x fa0, a0 2613; CHECK-NEXT: ret 2614; CHECK-NEXT: .LBB145_2: 2615; CHECK-NEXT: vfredmax.vs v8, v8, v8 2616; CHECK-NEXT: vfmv.f.s fa0, v8 2617; CHECK-NEXT: ret 2618 %v = load <32 x float>, ptr %x 2619 %red = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> %v) 2620 ret float %red 2621} 2622 2623define float @vreduce_fmaximum_v32f32_nonans(ptr %x) { 2624; CHECK-LABEL: vreduce_fmaximum_v32f32_nonans: 2625; CHECK: # %bb.0: 2626; CHECK-NEXT: li a1, 32 2627; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 2628; CHECK-NEXT: vle32.v v8, (a0) 2629; CHECK-NEXT: vfredmax.vs v8, v8, v8 2630; CHECK-NEXT: vfmv.f.s fa0, v8 2631; CHECK-NEXT: ret 2632 %v = load <32 x float>, ptr %x 2633 %red = call nnan float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> %v) 2634 ret float %red 2635} 2636 2637declare float @llvm.vector.reduce.fmaximum.v64f32(<64 x float>) 2638 2639define float @vreduce_fmaximum_v64f32(ptr %x) { 2640; CHECK-LABEL: vreduce_fmaximum_v64f32: 2641; CHECK: # %bb.0: 2642; CHECK-NEXT: addi sp, sp, -16 2643; CHECK-NEXT: .cfi_def_cfa_offset 16 2644; CHECK-NEXT: csrr a1, vlenb 2645; CHECK-NEXT: slli a1, a1, 3 2646; CHECK-NEXT: sub sp, sp, a1 2647; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 2648; CHECK-NEXT: addi a1, a0, 128 2649; CHECK-NEXT: li a2, 32 2650; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma 2651; CHECK-NEXT: vle32.v v16, (a0) 2652; CHECK-NEXT: vle32.v v24, (a1) 2653; CHECK-NEXT: vmfeq.vv v0, v16, v16 2654; CHECK-NEXT: vmfeq.vv v7, v24, v24 2655; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 2656; CHECK-NEXT: addi a0, sp, 16 2657; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2658; CHECK-NEXT: vmv1r.v v0, v7 2659; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 2660; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2661; CHECK-NEXT: vfmax.vv v8, v8, v16 2662; CHECK-NEXT: vmfne.vv v16, v8, v8 2663; CHECK-NEXT: vcpop.m a0, v16 2664; CHECK-NEXT: beqz a0, .LBB147_2 2665; CHECK-NEXT: # %bb.1: 2666; CHECK-NEXT: lui a0, 523264 2667; CHECK-NEXT: fmv.w.x fa0, a0 2668; CHECK-NEXT: j .LBB147_3 2669; CHECK-NEXT: .LBB147_2: 2670; CHECK-NEXT: vfredmax.vs v8, v8, v8 2671; CHECK-NEXT: vfmv.f.s fa0, v8 2672; CHECK-NEXT: .LBB147_3: 2673; CHECK-NEXT: csrr a0, vlenb 2674; CHECK-NEXT: slli a0, a0, 3 2675; CHECK-NEXT: add sp, sp, a0 2676; CHECK-NEXT: .cfi_def_cfa sp, 16 2677; CHECK-NEXT: addi sp, sp, 16 2678; CHECK-NEXT: .cfi_def_cfa_offset 0 2679; CHECK-NEXT: ret 2680 %v = load <64 x float>, ptr %x 2681 %red = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> %v) 2682 ret float %red 2683} 2684 2685define float @vreduce_fmaximum_v64f32_nonans(ptr %x) { 2686; CHECK-LABEL: vreduce_fmaximum_v64f32_nonans: 2687; CHECK: # %bb.0: 2688; CHECK-NEXT: li a1, 32 2689; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 2690; CHECK-NEXT: vle32.v v8, (a0) 2691; CHECK-NEXT: addi a0, a0, 128 2692; CHECK-NEXT: vle32.v v16, (a0) 2693; CHECK-NEXT: vfmax.vv v8, v8, v16 2694; CHECK-NEXT: vfredmax.vs v8, v8, v8 2695; CHECK-NEXT: vfmv.f.s fa0, v8 2696; CHECK-NEXT: ret 2697 %v = load <64 x float>, ptr %x 2698 %red = call nnan float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> %v) 2699 ret float %red 2700} 2701 2702declare float @llvm.vector.reduce.fmaximum.v128f32(<128 x float>) 2703 2704define float @vreduce_fmaximum_v128f32(ptr %x) { 2705; CHECK-LABEL: vreduce_fmaximum_v128f32: 2706; CHECK: # %bb.0: 2707; CHECK-NEXT: addi sp, sp, -16 2708; CHECK-NEXT: .cfi_def_cfa_offset 16 2709; CHECK-NEXT: csrr a1, vlenb 2710; CHECK-NEXT: slli a1, a1, 4 2711; CHECK-NEXT: sub sp, sp, a1 2712; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 2713; CHECK-NEXT: li a1, 32 2714; CHECK-NEXT: addi a2, a0, 128 2715; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 2716; CHECK-NEXT: vle32.v v8, (a2) 2717; CHECK-NEXT: addi a1, a0, 384 2718; CHECK-NEXT: vle32.v v16, (a1) 2719; CHECK-NEXT: addi a1, a0, 256 2720; CHECK-NEXT: vle32.v v24, (a0) 2721; CHECK-NEXT: addi a0, sp, 16 2722; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 2723; CHECK-NEXT: vmfeq.vv v0, v8, v8 2724; CHECK-NEXT: vmfeq.vv v7, v16, v16 2725; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 2726; CHECK-NEXT: csrr a0, vlenb 2727; CHECK-NEXT: slli a0, a0, 3 2728; CHECK-NEXT: add a0, sp, a0 2729; CHECK-NEXT: addi a0, a0, 16 2730; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 2731; CHECK-NEXT: vle32.v v24, (a1) 2732; CHECK-NEXT: vmv1r.v v0, v7 2733; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 2734; CHECK-NEXT: csrr a0, vlenb 2735; CHECK-NEXT: slli a0, a0, 3 2736; CHECK-NEXT: add a0, sp, a0 2737; CHECK-NEXT: addi a0, a0, 16 2738; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2739; CHECK-NEXT: vfmax.vv v8, v16, v8 2740; CHECK-NEXT: csrr a0, vlenb 2741; CHECK-NEXT: slli a0, a0, 3 2742; CHECK-NEXT: add a0, sp, a0 2743; CHECK-NEXT: addi a0, a0, 16 2744; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2745; CHECK-NEXT: addi a0, sp, 16 2746; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2747; CHECK-NEXT: vmfeq.vv v0, v8, v8 2748; CHECK-NEXT: vmfeq.vv v7, v24, v24 2749; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 2750; CHECK-NEXT: vmv1r.v v0, v7 2751; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 2752; CHECK-NEXT: vfmax.vv v16, v8, v16 2753; CHECK-NEXT: vmfeq.vv v0, v16, v16 2754; CHECK-NEXT: csrr a0, vlenb 2755; CHECK-NEXT: slli a0, a0, 3 2756; CHECK-NEXT: add a0, sp, a0 2757; CHECK-NEXT: addi a0, a0, 16 2758; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 2759; CHECK-NEXT: vmfeq.vv v7, v24, v24 2760; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 2761; CHECK-NEXT: addi a0, sp, 16 2762; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2763; CHECK-NEXT: vmv1r.v v0, v7 2764; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 2765; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2766; CHECK-NEXT: vfmax.vv v8, v8, v16 2767; CHECK-NEXT: vmfne.vv v16, v8, v8 2768; CHECK-NEXT: vcpop.m a0, v16 2769; CHECK-NEXT: beqz a0, .LBB149_2 2770; CHECK-NEXT: # %bb.1: 2771; CHECK-NEXT: lui a0, 523264 2772; CHECK-NEXT: fmv.w.x fa0, a0 2773; CHECK-NEXT: j .LBB149_3 2774; CHECK-NEXT: .LBB149_2: 2775; CHECK-NEXT: vfredmax.vs v8, v8, v8 2776; CHECK-NEXT: vfmv.f.s fa0, v8 2777; CHECK-NEXT: .LBB149_3: 2778; CHECK-NEXT: csrr a0, vlenb 2779; CHECK-NEXT: slli a0, a0, 4 2780; CHECK-NEXT: add sp, sp, a0 2781; CHECK-NEXT: .cfi_def_cfa sp, 16 2782; CHECK-NEXT: addi sp, sp, 16 2783; CHECK-NEXT: .cfi_def_cfa_offset 0 2784; CHECK-NEXT: ret 2785 %v = load <128 x float>, ptr %x 2786 %red = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> %v) 2787 ret float %red 2788} 2789 2790define float @vreduce_fmaximum_v128f32_nonans(ptr %x) { 2791; CHECK-LABEL: vreduce_fmaximum_v128f32_nonans: 2792; CHECK: # %bb.0: 2793; CHECK-NEXT: li a1, 32 2794; CHECK-NEXT: addi a2, a0, 384 2795; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 2796; CHECK-NEXT: vle32.v v8, (a2) 2797; CHECK-NEXT: addi a1, a0, 256 2798; CHECK-NEXT: vle32.v v16, (a0) 2799; CHECK-NEXT: addi a0, a0, 128 2800; CHECK-NEXT: vle32.v v24, (a0) 2801; CHECK-NEXT: vle32.v v0, (a1) 2802; CHECK-NEXT: vfmax.vv v8, v24, v8 2803; CHECK-NEXT: vfmax.vv v16, v16, v0 2804; CHECK-NEXT: vfmax.vv v8, v16, v8 2805; CHECK-NEXT: vfredmax.vs v8, v8, v8 2806; CHECK-NEXT: vfmv.f.s fa0, v8 2807; CHECK-NEXT: ret 2808 %v = load <128 x float>, ptr %x 2809 %red = call nnan float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> %v) 2810 ret float %red 2811} 2812 2813declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>) 2814 2815define double @vreduce_fmaximum_v2f64(ptr %x) { 2816; CHECK-LABEL: vreduce_fmaximum_v2f64: 2817; CHECK: # %bb.0: 2818; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 2819; CHECK-NEXT: vle64.v v8, (a0) 2820; CHECK-NEXT: vmfne.vv v9, v8, v8 2821; CHECK-NEXT: vcpop.m a0, v9 2822; CHECK-NEXT: beqz a0, .LBB151_2 2823; CHECK-NEXT: # %bb.1: 2824; CHECK-NEXT: lui a0, %hi(.LCPI151_0) 2825; CHECK-NEXT: fld fa0, %lo(.LCPI151_0)(a0) 2826; CHECK-NEXT: ret 2827; CHECK-NEXT: .LBB151_2: 2828; CHECK-NEXT: vfredmax.vs v8, v8, v8 2829; CHECK-NEXT: vfmv.f.s fa0, v8 2830; CHECK-NEXT: ret 2831 %v = load <2 x double>, ptr %x 2832 %red = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %v) 2833 ret double %red 2834} 2835 2836define double @vreduce_fmaximum_v2f64_nonans(ptr %x) { 2837; CHECK-LABEL: vreduce_fmaximum_v2f64_nonans: 2838; CHECK: # %bb.0: 2839; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 2840; CHECK-NEXT: vle64.v v8, (a0) 2841; CHECK-NEXT: vfredmax.vs v8, v8, v8 2842; CHECK-NEXT: vfmv.f.s fa0, v8 2843; CHECK-NEXT: ret 2844 %v = load <2 x double>, ptr %x 2845 %red = call nnan double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %v) 2846 ret double %red 2847} 2848 2849declare double @llvm.vector.reduce.fmaximum.v4f64(<4 x double>) 2850 2851define double @vreduce_fmaximum_v4f64(ptr %x) { 2852; CHECK-LABEL: vreduce_fmaximum_v4f64: 2853; CHECK: # %bb.0: 2854; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 2855; CHECK-NEXT: vle64.v v8, (a0) 2856; CHECK-NEXT: vmfne.vv v10, v8, v8 2857; CHECK-NEXT: vcpop.m a0, v10 2858; CHECK-NEXT: beqz a0, .LBB153_2 2859; CHECK-NEXT: # %bb.1: 2860; CHECK-NEXT: lui a0, %hi(.LCPI153_0) 2861; CHECK-NEXT: fld fa0, %lo(.LCPI153_0)(a0) 2862; CHECK-NEXT: ret 2863; CHECK-NEXT: .LBB153_2: 2864; CHECK-NEXT: vfredmax.vs v8, v8, v8 2865; CHECK-NEXT: vfmv.f.s fa0, v8 2866; CHECK-NEXT: ret 2867 %v = load <4 x double>, ptr %x 2868 %red = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %v) 2869 ret double %red 2870} 2871 2872define double @vreduce_fmaximum_v4f64_nonans(ptr %x) { 2873; CHECK-LABEL: vreduce_fmaximum_v4f64_nonans: 2874; CHECK: # %bb.0: 2875; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 2876; CHECK-NEXT: vle64.v v8, (a0) 2877; CHECK-NEXT: vfredmax.vs v8, v8, v8 2878; CHECK-NEXT: vfmv.f.s fa0, v8 2879; CHECK-NEXT: ret 2880 %v = load <4 x double>, ptr %x 2881 %red = call nnan double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %v) 2882 ret double %red 2883} 2884 2885declare double @llvm.vector.reduce.fmaximum.v8f64(<8 x double>) 2886 2887define double @vreduce_fmaximum_v8f64(ptr %x) { 2888; CHECK-LABEL: vreduce_fmaximum_v8f64: 2889; CHECK: # %bb.0: 2890; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 2891; CHECK-NEXT: vle64.v v8, (a0) 2892; CHECK-NEXT: vmfne.vv v12, v8, v8 2893; CHECK-NEXT: vcpop.m a0, v12 2894; CHECK-NEXT: beqz a0, .LBB155_2 2895; CHECK-NEXT: # %bb.1: 2896; CHECK-NEXT: lui a0, %hi(.LCPI155_0) 2897; CHECK-NEXT: fld fa0, %lo(.LCPI155_0)(a0) 2898; CHECK-NEXT: ret 2899; CHECK-NEXT: .LBB155_2: 2900; CHECK-NEXT: vfredmax.vs v8, v8, v8 2901; CHECK-NEXT: vfmv.f.s fa0, v8 2902; CHECK-NEXT: ret 2903 %v = load <8 x double>, ptr %x 2904 %red = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> %v) 2905 ret double %red 2906} 2907 2908define double @vreduce_fmaximum_v8f64_nonans(ptr %x) { 2909; CHECK-LABEL: vreduce_fmaximum_v8f64_nonans: 2910; CHECK: # %bb.0: 2911; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 2912; CHECK-NEXT: vle64.v v8, (a0) 2913; CHECK-NEXT: vfredmax.vs v8, v8, v8 2914; CHECK-NEXT: vfmv.f.s fa0, v8 2915; CHECK-NEXT: ret 2916 %v = load <8 x double>, ptr %x 2917 %red = call nnan double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> %v) 2918 ret double %red 2919} 2920 2921declare double @llvm.vector.reduce.fmaximum.v16f64(<16 x double>) 2922 2923define double @vreduce_fmaximum_v16f64(ptr %x) { 2924; CHECK-LABEL: vreduce_fmaximum_v16f64: 2925; CHECK: # %bb.0: 2926; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2927; CHECK-NEXT: vle64.v v8, (a0) 2928; CHECK-NEXT: vmfne.vv v16, v8, v8 2929; CHECK-NEXT: vcpop.m a0, v16 2930; CHECK-NEXT: beqz a0, .LBB157_2 2931; CHECK-NEXT: # %bb.1: 2932; CHECK-NEXT: lui a0, %hi(.LCPI157_0) 2933; CHECK-NEXT: fld fa0, %lo(.LCPI157_0)(a0) 2934; CHECK-NEXT: ret 2935; CHECK-NEXT: .LBB157_2: 2936; CHECK-NEXT: vfredmax.vs v8, v8, v8 2937; CHECK-NEXT: vfmv.f.s fa0, v8 2938; CHECK-NEXT: ret 2939 %v = load <16 x double>, ptr %x 2940 %red = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> %v) 2941 ret double %red 2942} 2943 2944define double @vreduce_fmaximum_v16f64_nonans(ptr %x) { 2945; CHECK-LABEL: vreduce_fmaximum_v16f64_nonans: 2946; CHECK: # %bb.0: 2947; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2948; CHECK-NEXT: vle64.v v8, (a0) 2949; CHECK-NEXT: vfredmax.vs v8, v8, v8 2950; CHECK-NEXT: vfmv.f.s fa0, v8 2951; CHECK-NEXT: ret 2952 %v = load <16 x double>, ptr %x 2953 %red = call nnan double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> %v) 2954 ret double %red 2955} 2956 2957declare double @llvm.vector.reduce.fmaximum.v32f64(<32 x double>) 2958 2959define double @vreduce_fmaximum_v32f64(ptr %x) { 2960; CHECK-LABEL: vreduce_fmaximum_v32f64: 2961; CHECK: # %bb.0: 2962; CHECK-NEXT: addi sp, sp, -16 2963; CHECK-NEXT: .cfi_def_cfa_offset 16 2964; CHECK-NEXT: csrr a1, vlenb 2965; CHECK-NEXT: slli a1, a1, 3 2966; CHECK-NEXT: sub sp, sp, a1 2967; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 2968; CHECK-NEXT: addi a1, a0, 128 2969; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2970; CHECK-NEXT: vle64.v v16, (a0) 2971; CHECK-NEXT: vle64.v v24, (a1) 2972; CHECK-NEXT: vmfeq.vv v0, v16, v16 2973; CHECK-NEXT: vmfeq.vv v7, v24, v24 2974; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 2975; CHECK-NEXT: addi a0, sp, 16 2976; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2977; CHECK-NEXT: vmv1r.v v0, v7 2978; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 2979; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2980; CHECK-NEXT: vfmax.vv v8, v8, v16 2981; CHECK-NEXT: vmfne.vv v16, v8, v8 2982; CHECK-NEXT: vcpop.m a0, v16 2983; CHECK-NEXT: beqz a0, .LBB159_2 2984; CHECK-NEXT: # %bb.1: 2985; CHECK-NEXT: lui a0, %hi(.LCPI159_0) 2986; CHECK-NEXT: fld fa0, %lo(.LCPI159_0)(a0) 2987; CHECK-NEXT: j .LBB159_3 2988; CHECK-NEXT: .LBB159_2: 2989; CHECK-NEXT: vfredmax.vs v8, v8, v8 2990; CHECK-NEXT: vfmv.f.s fa0, v8 2991; CHECK-NEXT: .LBB159_3: 2992; CHECK-NEXT: csrr a0, vlenb 2993; CHECK-NEXT: slli a0, a0, 3 2994; CHECK-NEXT: add sp, sp, a0 2995; CHECK-NEXT: .cfi_def_cfa sp, 16 2996; CHECK-NEXT: addi sp, sp, 16 2997; CHECK-NEXT: .cfi_def_cfa_offset 0 2998; CHECK-NEXT: ret 2999 %v = load <32 x double>, ptr %x 3000 %red = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> %v) 3001 ret double %red 3002} 3003 3004define double @vreduce_fmaximum_v32f64_nonans(ptr %x) { 3005; CHECK-LABEL: vreduce_fmaximum_v32f64_nonans: 3006; CHECK: # %bb.0: 3007; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3008; CHECK-NEXT: vle64.v v8, (a0) 3009; CHECK-NEXT: addi a0, a0, 128 3010; CHECK-NEXT: vle64.v v16, (a0) 3011; CHECK-NEXT: vfmax.vv v8, v8, v16 3012; CHECK-NEXT: vfredmax.vs v8, v8, v8 3013; CHECK-NEXT: vfmv.f.s fa0, v8 3014; CHECK-NEXT: ret 3015 %v = load <32 x double>, ptr %x 3016 %red = call nnan double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> %v) 3017 ret double %red 3018} 3019 3020declare double @llvm.vector.reduce.fmaximum.v64f64(<64 x double>) 3021 3022define double @vreduce_fmaximum_v64f64(ptr %x) { 3023; CHECK-LABEL: vreduce_fmaximum_v64f64: 3024; CHECK: # %bb.0: 3025; CHECK-NEXT: addi sp, sp, -16 3026; CHECK-NEXT: .cfi_def_cfa_offset 16 3027; CHECK-NEXT: csrr a1, vlenb 3028; CHECK-NEXT: slli a1, a1, 4 3029; CHECK-NEXT: sub sp, sp, a1 3030; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 3031; CHECK-NEXT: addi a1, a0, 128 3032; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3033; CHECK-NEXT: vle64.v v8, (a1) 3034; CHECK-NEXT: addi a1, a0, 384 3035; CHECK-NEXT: vle64.v v16, (a1) 3036; CHECK-NEXT: addi a1, a0, 256 3037; CHECK-NEXT: vle64.v v24, (a0) 3038; CHECK-NEXT: addi a0, sp, 16 3039; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 3040; CHECK-NEXT: vmfeq.vv v0, v8, v8 3041; CHECK-NEXT: vmfeq.vv v7, v16, v16 3042; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 3043; CHECK-NEXT: csrr a0, vlenb 3044; CHECK-NEXT: slli a0, a0, 3 3045; CHECK-NEXT: add a0, sp, a0 3046; CHECK-NEXT: addi a0, a0, 16 3047; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 3048; CHECK-NEXT: vle64.v v24, (a1) 3049; CHECK-NEXT: vmv1r.v v0, v7 3050; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 3051; CHECK-NEXT: csrr a0, vlenb 3052; CHECK-NEXT: slli a0, a0, 3 3053; CHECK-NEXT: add a0, sp, a0 3054; CHECK-NEXT: addi a0, a0, 16 3055; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 3056; CHECK-NEXT: vfmax.vv v8, v16, v8 3057; CHECK-NEXT: csrr a0, vlenb 3058; CHECK-NEXT: slli a0, a0, 3 3059; CHECK-NEXT: add a0, sp, a0 3060; CHECK-NEXT: addi a0, a0, 16 3061; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 3062; CHECK-NEXT: addi a0, sp, 16 3063; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 3064; CHECK-NEXT: vmfeq.vv v0, v8, v8 3065; CHECK-NEXT: vmfeq.vv v7, v24, v24 3066; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 3067; CHECK-NEXT: vmv1r.v v0, v7 3068; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 3069; CHECK-NEXT: vfmax.vv v16, v8, v16 3070; CHECK-NEXT: vmfeq.vv v0, v16, v16 3071; CHECK-NEXT: csrr a0, vlenb 3072; CHECK-NEXT: slli a0, a0, 3 3073; CHECK-NEXT: add a0, sp, a0 3074; CHECK-NEXT: addi a0, a0, 16 3075; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 3076; CHECK-NEXT: vmfeq.vv v7, v24, v24 3077; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 3078; CHECK-NEXT: addi a0, sp, 16 3079; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 3080; CHECK-NEXT: vmv1r.v v0, v7 3081; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 3082; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 3083; CHECK-NEXT: vfmax.vv v8, v8, v16 3084; CHECK-NEXT: vmfne.vv v16, v8, v8 3085; CHECK-NEXT: vcpop.m a0, v16 3086; CHECK-NEXT: beqz a0, .LBB161_2 3087; CHECK-NEXT: # %bb.1: 3088; CHECK-NEXT: lui a0, %hi(.LCPI161_0) 3089; CHECK-NEXT: fld fa0, %lo(.LCPI161_0)(a0) 3090; CHECK-NEXT: j .LBB161_3 3091; CHECK-NEXT: .LBB161_2: 3092; CHECK-NEXT: vfredmax.vs v8, v8, v8 3093; CHECK-NEXT: vfmv.f.s fa0, v8 3094; CHECK-NEXT: .LBB161_3: 3095; CHECK-NEXT: csrr a0, vlenb 3096; CHECK-NEXT: slli a0, a0, 4 3097; CHECK-NEXT: add sp, sp, a0 3098; CHECK-NEXT: .cfi_def_cfa sp, 16 3099; CHECK-NEXT: addi sp, sp, 16 3100; CHECK-NEXT: .cfi_def_cfa_offset 0 3101; CHECK-NEXT: ret 3102 %v = load <64 x double>, ptr %x 3103 %red = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> %v) 3104 ret double %red 3105} 3106 3107define double @vreduce_fmaximum_v64f64_nonans(ptr %x) { 3108; CHECK-LABEL: vreduce_fmaximum_v64f64_nonans: 3109; CHECK: # %bb.0: 3110; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3111; CHECK-NEXT: vle64.v v8, (a0) 3112; CHECK-NEXT: addi a1, a0, 384 3113; CHECK-NEXT: vle64.v v16, (a1) 3114; CHECK-NEXT: addi a1, a0, 256 3115; CHECK-NEXT: addi a0, a0, 128 3116; CHECK-NEXT: vle64.v v24, (a0) 3117; CHECK-NEXT: vle64.v v0, (a1) 3118; CHECK-NEXT: vfmax.vv v16, v24, v16 3119; CHECK-NEXT: vfmax.vv v8, v8, v0 3120; CHECK-NEXT: vfmax.vv v8, v8, v16 3121; CHECK-NEXT: vfredmax.vs v8, v8, v8 3122; CHECK-NEXT: vfmv.f.s fa0, v8 3123; CHECK-NEXT: ret 3124 %v = load <64 x double>, ptr %x 3125 %red = call nnan double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> %v) 3126 ret double %red 3127} 3128