1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s 3; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE 4 5target triple = "aarch64-unknown-linux-gnu" 6 7; 8; FADDA 9; 10 11define half @fadda_v4f16(half %start, <4 x half> %a) { 12; CHECK-LABEL: fadda_v4f16: 13; CHECK: // %bb.0: 14; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 15; CHECK-NEXT: fadd h0, h0, h1 16; CHECK-NEXT: mov z2.h, z1.h[1] 17; CHECK-NEXT: fadd h0, h0, h2 18; CHECK-NEXT: mov z2.h, z1.h[2] 19; CHECK-NEXT: mov z1.h, z1.h[3] 20; CHECK-NEXT: fadd h0, h0, h2 21; CHECK-NEXT: fadd h0, h0, h1 22; CHECK-NEXT: ret 23; 24; NONEON-NOSVE-LABEL: fadda_v4f16: 25; NONEON-NOSVE: // %bb.0: 26; NONEON-NOSVE-NEXT: sub sp, sp, #16 27; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 28; NONEON-NOSVE-NEXT: str d1, [sp, #8] 29; NONEON-NOSVE-NEXT: fcvt s0, h0 30; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] 31; NONEON-NOSVE-NEXT: fcvt s1, h1 32; NONEON-NOSVE-NEXT: fadd s0, s0, s1 33; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] 34; NONEON-NOSVE-NEXT: fcvt s1, h1 35; NONEON-NOSVE-NEXT: fcvt h0, s0 36; NONEON-NOSVE-NEXT: fcvt s0, h0 37; NONEON-NOSVE-NEXT: fadd s0, s0, s1 38; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] 39; NONEON-NOSVE-NEXT: fcvt s1, h1 40; NONEON-NOSVE-NEXT: fcvt h0, s0 41; NONEON-NOSVE-NEXT: fcvt s0, h0 42; NONEON-NOSVE-NEXT: fadd s0, s0, s1 43; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] 44; NONEON-NOSVE-NEXT: fcvt s1, h1 45; NONEON-NOSVE-NEXT: fcvt h0, s0 46; NONEON-NOSVE-NEXT: fcvt s0, h0 47; NONEON-NOSVE-NEXT: fadd s0, s0, s1 48; NONEON-NOSVE-NEXT: fcvt h0, s0 49; NONEON-NOSVE-NEXT: add sp, sp, #16 50; NONEON-NOSVE-NEXT: ret 51 %res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a) 52 ret half %res 53} 54 55define half @fadda_v8f16(half %start, <8 x half> %a) { 56; CHECK-LABEL: fadda_v8f16: 57; CHECK: // %bb.0: 58; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 59; CHECK-NEXT: fadd h0, h0, h1 60; CHECK-NEXT: mov z2.h, z1.h[1] 61; CHECK-NEXT: fadd h0, h0, h2 62; CHECK-NEXT: mov z2.h, z1.h[2] 63; CHECK-NEXT: fadd h0, h0, h2 64; CHECK-NEXT: mov z2.h, z1.h[3] 65; CHECK-NEXT: fadd h0, h0, h2 66; CHECK-NEXT: mov z2.h, z1.h[4] 67; CHECK-NEXT: fadd h0, h0, h2 68; CHECK-NEXT: mov z2.h, z1.h[5] 69; CHECK-NEXT: fadd h0, h0, h2 70; CHECK-NEXT: mov z2.h, z1.h[6] 71; CHECK-NEXT: mov z1.h, z1.h[7] 72; CHECK-NEXT: fadd h0, h0, h2 73; CHECK-NEXT: fadd h0, h0, h1 74; CHECK-NEXT: ret 75; 76; NONEON-NOSVE-LABEL: fadda_v8f16: 77; NONEON-NOSVE: // %bb.0: 78; NONEON-NOSVE-NEXT: str q1, [sp, #-16]! 79; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 80; NONEON-NOSVE-NEXT: ldr h1, [sp] 81; NONEON-NOSVE-NEXT: fcvt s0, h0 82; NONEON-NOSVE-NEXT: fcvt s1, h1 83; NONEON-NOSVE-NEXT: fadd s0, s0, s1 84; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] 85; NONEON-NOSVE-NEXT: fcvt s1, h1 86; NONEON-NOSVE-NEXT: fcvt h0, s0 87; NONEON-NOSVE-NEXT: fcvt s0, h0 88; NONEON-NOSVE-NEXT: fadd s0, s0, s1 89; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] 90; NONEON-NOSVE-NEXT: fcvt s1, h1 91; NONEON-NOSVE-NEXT: fcvt h0, s0 92; NONEON-NOSVE-NEXT: fcvt s0, h0 93; NONEON-NOSVE-NEXT: fadd s0, s0, s1 94; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] 95; NONEON-NOSVE-NEXT: fcvt s1, h1 96; NONEON-NOSVE-NEXT: fcvt h0, s0 97; NONEON-NOSVE-NEXT: fcvt s0, h0 98; NONEON-NOSVE-NEXT: fadd s0, s0, s1 99; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] 100; NONEON-NOSVE-NEXT: fcvt s1, h1 101; NONEON-NOSVE-NEXT: fcvt h0, s0 102; NONEON-NOSVE-NEXT: fcvt s0, h0 103; NONEON-NOSVE-NEXT: fadd s0, s0, s1 104; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] 105; NONEON-NOSVE-NEXT: fcvt s1, h1 106; NONEON-NOSVE-NEXT: fcvt h0, s0 107; NONEON-NOSVE-NEXT: fcvt s0, h0 108; NONEON-NOSVE-NEXT: fadd s0, s0, s1 109; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] 110; NONEON-NOSVE-NEXT: fcvt s1, h1 111; NONEON-NOSVE-NEXT: fcvt h0, s0 112; NONEON-NOSVE-NEXT: fcvt s0, h0 113; NONEON-NOSVE-NEXT: fadd s0, s0, s1 114; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] 115; NONEON-NOSVE-NEXT: fcvt s1, h1 116; NONEON-NOSVE-NEXT: fcvt h0, s0 117; NONEON-NOSVE-NEXT: fcvt s0, h0 118; NONEON-NOSVE-NEXT: fadd s0, s0, s1 119; NONEON-NOSVE-NEXT: fcvt h0, s0 120; NONEON-NOSVE-NEXT: add sp, sp, #16 121; NONEON-NOSVE-NEXT: ret 122 %res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a) 123 ret half %res 124} 125 126define half @fadda_v16f16(half %start, ptr %a) { 127; CHECK-LABEL: fadda_v16f16: 128; CHECK: // %bb.0: 129; CHECK-NEXT: ldr q1, [x0] 130; CHECK-NEXT: fadd h0, h0, h1 131; CHECK-NEXT: mov z2.h, z1.h[1] 132; CHECK-NEXT: fadd h0, h0, h2 133; CHECK-NEXT: mov z2.h, z1.h[2] 134; CHECK-NEXT: fadd h0, h0, h2 135; CHECK-NEXT: mov z2.h, z1.h[3] 136; CHECK-NEXT: fadd h0, h0, h2 137; CHECK-NEXT: mov z2.h, z1.h[4] 138; CHECK-NEXT: fadd h0, h0, h2 139; CHECK-NEXT: mov z2.h, z1.h[5] 140; CHECK-NEXT: fadd h0, h0, h2 141; CHECK-NEXT: mov z2.h, z1.h[6] 142; CHECK-NEXT: mov z1.h, z1.h[7] 143; CHECK-NEXT: fadd h0, h0, h2 144; CHECK-NEXT: fadd h0, h0, h1 145; CHECK-NEXT: ldr q1, [x0, #16] 146; CHECK-NEXT: mov z2.h, z1.h[1] 147; CHECK-NEXT: fadd h0, h0, h1 148; CHECK-NEXT: fadd h0, h0, h2 149; CHECK-NEXT: mov z2.h, z1.h[2] 150; CHECK-NEXT: fadd h0, h0, h2 151; CHECK-NEXT: mov z2.h, z1.h[3] 152; CHECK-NEXT: fadd h0, h0, h2 153; CHECK-NEXT: mov z2.h, z1.h[4] 154; CHECK-NEXT: fadd h0, h0, h2 155; CHECK-NEXT: mov z2.h, z1.h[5] 156; CHECK-NEXT: fadd h0, h0, h2 157; CHECK-NEXT: mov z2.h, z1.h[6] 158; CHECK-NEXT: mov z1.h, z1.h[7] 159; CHECK-NEXT: fadd h0, h0, h2 160; CHECK-NEXT: fadd h0, h0, h1 161; CHECK-NEXT: ret 162; 163; NONEON-NOSVE-LABEL: fadda_v16f16: 164; NONEON-NOSVE: // %bb.0: 165; NONEON-NOSVE-NEXT: sub sp, sp, #32 166; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 167; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] 168; NONEON-NOSVE-NEXT: fcvt s0, h0 169; NONEON-NOSVE-NEXT: str q1, [sp, #16] 170; NONEON-NOSVE-NEXT: ldr q1, [x0] 171; NONEON-NOSVE-NEXT: str q1, [sp] 172; NONEON-NOSVE-NEXT: ldr h1, [sp] 173; NONEON-NOSVE-NEXT: fcvt s1, h1 174; NONEON-NOSVE-NEXT: fadd s0, s0, s1 175; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] 176; NONEON-NOSVE-NEXT: fcvt s1, h1 177; NONEON-NOSVE-NEXT: fcvt h0, s0 178; NONEON-NOSVE-NEXT: fcvt s0, h0 179; NONEON-NOSVE-NEXT: fadd s0, s0, s1 180; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] 181; NONEON-NOSVE-NEXT: fcvt s1, h1 182; NONEON-NOSVE-NEXT: fcvt h0, s0 183; NONEON-NOSVE-NEXT: fcvt s0, h0 184; NONEON-NOSVE-NEXT: fadd s0, s0, s1 185; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] 186; NONEON-NOSVE-NEXT: fcvt s1, h1 187; NONEON-NOSVE-NEXT: fcvt h0, s0 188; NONEON-NOSVE-NEXT: fcvt s0, h0 189; NONEON-NOSVE-NEXT: fadd s0, s0, s1 190; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] 191; NONEON-NOSVE-NEXT: fcvt s1, h1 192; NONEON-NOSVE-NEXT: fcvt h0, s0 193; NONEON-NOSVE-NEXT: fcvt s0, h0 194; NONEON-NOSVE-NEXT: fadd s0, s0, s1 195; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] 196; NONEON-NOSVE-NEXT: fcvt s1, h1 197; NONEON-NOSVE-NEXT: fcvt h0, s0 198; NONEON-NOSVE-NEXT: fcvt s0, h0 199; NONEON-NOSVE-NEXT: fadd s0, s0, s1 200; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] 201; NONEON-NOSVE-NEXT: fcvt s1, h1 202; NONEON-NOSVE-NEXT: fcvt h0, s0 203; NONEON-NOSVE-NEXT: fcvt s0, h0 204; NONEON-NOSVE-NEXT: fadd s0, s0, s1 205; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] 206; NONEON-NOSVE-NEXT: fcvt s1, h1 207; NONEON-NOSVE-NEXT: fcvt h0, s0 208; NONEON-NOSVE-NEXT: fcvt s0, h0 209; NONEON-NOSVE-NEXT: fadd s0, s0, s1 210; NONEON-NOSVE-NEXT: ldr h1, [sp, #16] 211; NONEON-NOSVE-NEXT: fcvt s1, h1 212; NONEON-NOSVE-NEXT: fcvt h0, s0 213; NONEON-NOSVE-NEXT: fcvt s0, h0 214; NONEON-NOSVE-NEXT: fadd s0, s0, s1 215; NONEON-NOSVE-NEXT: ldr h1, [sp, #18] 216; NONEON-NOSVE-NEXT: fcvt s1, h1 217; NONEON-NOSVE-NEXT: fcvt h0, s0 218; NONEON-NOSVE-NEXT: fcvt s0, h0 219; NONEON-NOSVE-NEXT: fadd s0, s0, s1 220; NONEON-NOSVE-NEXT: ldr h1, [sp, #20] 221; NONEON-NOSVE-NEXT: fcvt s1, h1 222; NONEON-NOSVE-NEXT: fcvt h0, s0 223; NONEON-NOSVE-NEXT: fcvt s0, h0 224; NONEON-NOSVE-NEXT: fadd s0, s0, s1 225; NONEON-NOSVE-NEXT: ldr h1, [sp, #22] 226; NONEON-NOSVE-NEXT: fcvt s1, h1 227; NONEON-NOSVE-NEXT: fcvt h0, s0 228; NONEON-NOSVE-NEXT: fcvt s0, h0 229; NONEON-NOSVE-NEXT: fadd s0, s0, s1 230; NONEON-NOSVE-NEXT: ldr h1, [sp, #24] 231; NONEON-NOSVE-NEXT: fcvt s1, h1 232; NONEON-NOSVE-NEXT: fcvt h0, s0 233; NONEON-NOSVE-NEXT: fcvt s0, h0 234; NONEON-NOSVE-NEXT: fadd s0, s0, s1 235; NONEON-NOSVE-NEXT: ldr h1, [sp, #26] 236; NONEON-NOSVE-NEXT: fcvt s1, h1 237; NONEON-NOSVE-NEXT: fcvt h0, s0 238; NONEON-NOSVE-NEXT: fcvt s0, h0 239; NONEON-NOSVE-NEXT: fadd s0, s0, s1 240; NONEON-NOSVE-NEXT: ldr h1, [sp, #28] 241; NONEON-NOSVE-NEXT: fcvt s1, h1 242; NONEON-NOSVE-NEXT: fcvt h0, s0 243; NONEON-NOSVE-NEXT: fcvt s0, h0 244; NONEON-NOSVE-NEXT: fadd s0, s0, s1 245; NONEON-NOSVE-NEXT: ldr h1, [sp, #30] 246; NONEON-NOSVE-NEXT: fcvt s1, h1 247; NONEON-NOSVE-NEXT: fcvt h0, s0 248; NONEON-NOSVE-NEXT: fcvt s0, h0 249; NONEON-NOSVE-NEXT: fadd s0, s0, s1 250; NONEON-NOSVE-NEXT: fcvt h0, s0 251; NONEON-NOSVE-NEXT: add sp, sp, #32 252; NONEON-NOSVE-NEXT: ret 253 %op = load <16 x half>, ptr %a 254 %res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op) 255 ret half %res 256} 257 258define float @fadda_v2f32(float %start, <2 x float> %a) { 259; CHECK-LABEL: fadda_v2f32: 260; CHECK: // %bb.0: 261; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 262; CHECK-NEXT: fadd s0, s0, s1 263; CHECK-NEXT: mov z1.s, z1.s[1] 264; CHECK-NEXT: fadd s0, s0, s1 265; CHECK-NEXT: ret 266; 267; NONEON-NOSVE-LABEL: fadda_v2f32: 268; NONEON-NOSVE: // %bb.0: 269; NONEON-NOSVE-NEXT: sub sp, sp, #16 270; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 271; NONEON-NOSVE-NEXT: str d1, [sp, #8] 272; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] 273; NONEON-NOSVE-NEXT: fadd s0, s0, s2 274; NONEON-NOSVE-NEXT: fadd s0, s0, s1 275; NONEON-NOSVE-NEXT: add sp, sp, #16 276; NONEON-NOSVE-NEXT: ret 277 %res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a) 278 ret float %res 279} 280 281define float @fadda_v4f32(float %start, <4 x float> %a) { 282; CHECK-LABEL: fadda_v4f32: 283; CHECK: // %bb.0: 284; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 285; CHECK-NEXT: fadd s0, s0, s1 286; CHECK-NEXT: mov z2.s, z1.s[1] 287; CHECK-NEXT: fadd s0, s0, s2 288; CHECK-NEXT: mov z2.s, z1.s[2] 289; CHECK-NEXT: mov z1.s, z1.s[3] 290; CHECK-NEXT: fadd s0, s0, s2 291; CHECK-NEXT: fadd s0, s0, s1 292; CHECK-NEXT: ret 293; 294; NONEON-NOSVE-LABEL: fadda_v4f32: 295; NONEON-NOSVE: // %bb.0: 296; NONEON-NOSVE-NEXT: str q1, [sp, #-16]! 297; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 298; NONEON-NOSVE-NEXT: ldp s2, s1, [sp] 299; NONEON-NOSVE-NEXT: fadd s0, s0, s2 300; NONEON-NOSVE-NEXT: fadd s0, s0, s1 301; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] 302; NONEON-NOSVE-NEXT: fadd s0, s0, s2 303; NONEON-NOSVE-NEXT: fadd s0, s0, s1 304; NONEON-NOSVE-NEXT: add sp, sp, #16 305; NONEON-NOSVE-NEXT: ret 306 %res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a) 307 ret float %res 308} 309 310define float @fadda_v8f32(float %start, ptr %a) { 311; CHECK-LABEL: fadda_v8f32: 312; CHECK: // %bb.0: 313; CHECK-NEXT: ldr q1, [x0] 314; CHECK-NEXT: fadd s0, s0, s1 315; CHECK-NEXT: mov z2.s, z1.s[1] 316; CHECK-NEXT: fadd s0, s0, s2 317; CHECK-NEXT: mov z2.s, z1.s[2] 318; CHECK-NEXT: mov z1.s, z1.s[3] 319; CHECK-NEXT: fadd s0, s0, s2 320; CHECK-NEXT: fadd s0, s0, s1 321; CHECK-NEXT: ldr q1, [x0, #16] 322; CHECK-NEXT: mov z2.s, z1.s[1] 323; CHECK-NEXT: fadd s0, s0, s1 324; CHECK-NEXT: fadd s0, s0, s2 325; CHECK-NEXT: mov z2.s, z1.s[2] 326; CHECK-NEXT: mov z1.s, z1.s[3] 327; CHECK-NEXT: fadd s0, s0, s2 328; CHECK-NEXT: fadd s0, s0, s1 329; CHECK-NEXT: ret 330; 331; NONEON-NOSVE-LABEL: fadda_v8f32: 332; NONEON-NOSVE: // %bb.0: 333; NONEON-NOSVE-NEXT: sub sp, sp, #32 334; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 335; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] 336; NONEON-NOSVE-NEXT: str q1, [sp, #16] 337; NONEON-NOSVE-NEXT: ldr q1, [x0] 338; NONEON-NOSVE-NEXT: str q1, [sp] 339; NONEON-NOSVE-NEXT: ldp s2, s1, [sp] 340; NONEON-NOSVE-NEXT: fadd s0, s0, s2 341; NONEON-NOSVE-NEXT: fadd s0, s0, s1 342; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] 343; NONEON-NOSVE-NEXT: fadd s0, s0, s2 344; NONEON-NOSVE-NEXT: fadd s0, s0, s1 345; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #16] 346; NONEON-NOSVE-NEXT: fadd s0, s0, s2 347; NONEON-NOSVE-NEXT: fadd s0, s0, s1 348; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #24] 349; NONEON-NOSVE-NEXT: fadd s0, s0, s2 350; NONEON-NOSVE-NEXT: fadd s0, s0, s1 351; NONEON-NOSVE-NEXT: add sp, sp, #32 352; NONEON-NOSVE-NEXT: ret 353 %op = load <8 x float>, ptr %a 354 %res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op) 355 ret float %res 356} 357 358define double @fadda_v1f64(double %start, <1 x double> %a) { 359; CHECK-LABEL: fadda_v1f64: 360; CHECK: // %bb.0: 361; CHECK-NEXT: fadd d0, d0, d1 362; CHECK-NEXT: ret 363; 364; NONEON-NOSVE-LABEL: fadda_v1f64: 365; NONEON-NOSVE: // %bb.0: 366; NONEON-NOSVE-NEXT: fadd d0, d0, d1 367; NONEON-NOSVE-NEXT: ret 368 %res = call double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a) 369 ret double %res 370} 371 372define double @fadda_v2f64(double %start, <2 x double> %a) { 373; CHECK-LABEL: fadda_v2f64: 374; CHECK: // %bb.0: 375; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 376; CHECK-NEXT: fadd d0, d0, d1 377; CHECK-NEXT: mov z1.d, z1.d[1] 378; CHECK-NEXT: fadd d0, d0, d1 379; CHECK-NEXT: ret 380; 381; NONEON-NOSVE-LABEL: fadda_v2f64: 382; NONEON-NOSVE: // %bb.0: 383; NONEON-NOSVE-NEXT: str q1, [sp, #-16]! 384; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 385; NONEON-NOSVE-NEXT: ldp d2, d1, [sp], #16 386; NONEON-NOSVE-NEXT: fadd d0, d0, d2 387; NONEON-NOSVE-NEXT: fadd d0, d0, d1 388; NONEON-NOSVE-NEXT: ret 389 %res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a) 390 ret double %res 391} 392 393define double @fadda_v4f64(double %start, ptr %a) { 394; CHECK-LABEL: fadda_v4f64: 395; CHECK: // %bb.0: 396; CHECK-NEXT: ldr q1, [x0] 397; CHECK-NEXT: fadd d0, d0, d1 398; CHECK-NEXT: mov z1.d, z1.d[1] 399; CHECK-NEXT: fadd d0, d0, d1 400; CHECK-NEXT: ldr q1, [x0, #16] 401; CHECK-NEXT: fadd d0, d0, d1 402; CHECK-NEXT: mov z1.d, z1.d[1] 403; CHECK-NEXT: fadd d0, d0, d1 404; CHECK-NEXT: ret 405; 406; NONEON-NOSVE-LABEL: fadda_v4f64: 407; NONEON-NOSVE: // %bb.0: 408; NONEON-NOSVE-NEXT: sub sp, sp, #32 409; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 410; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] 411; NONEON-NOSVE-NEXT: str q1, [sp, #16] 412; NONEON-NOSVE-NEXT: ldr q1, [x0] 413; NONEON-NOSVE-NEXT: str q1, [sp] 414; NONEON-NOSVE-NEXT: ldp d2, d1, [sp] 415; NONEON-NOSVE-NEXT: fadd d0, d0, d2 416; NONEON-NOSVE-NEXT: fadd d0, d0, d1 417; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #16] 418; NONEON-NOSVE-NEXT: fadd d0, d0, d2 419; NONEON-NOSVE-NEXT: fadd d0, d0, d1 420; NONEON-NOSVE-NEXT: add sp, sp, #32 421; NONEON-NOSVE-NEXT: ret 422 %op = load <4 x double>, ptr %a 423 %res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op) 424 ret double %res 425} 426 427; 428; FADDV 429; 430 431define half @faddv_v4f16(half %start, <4 x half> %a) { 432; CHECK-LABEL: faddv_v4f16: 433; CHECK: // %bb.0: 434; CHECK-NEXT: ptrue p0.h, vl4 435; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 436; CHECK-NEXT: faddv h1, p0, z1.h 437; CHECK-NEXT: fadd h0, h0, h1 438; CHECK-NEXT: ret 439; 440; NONEON-NOSVE-LABEL: faddv_v4f16: 441; NONEON-NOSVE: // %bb.0: 442; NONEON-NOSVE-NEXT: sub sp, sp, #16 443; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 444; NONEON-NOSVE-NEXT: str d1, [sp, #8] 445; NONEON-NOSVE-NEXT: fcvt s0, h0 446; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] 447; NONEON-NOSVE-NEXT: ldr h2, [sp, #10] 448; NONEON-NOSVE-NEXT: fcvt s2, h2 449; NONEON-NOSVE-NEXT: fcvt s1, h1 450; NONEON-NOSVE-NEXT: fadd s1, s1, s2 451; NONEON-NOSVE-NEXT: ldr h2, [sp, #12] 452; NONEON-NOSVE-NEXT: fcvt s2, h2 453; NONEON-NOSVE-NEXT: fcvt h1, s1 454; NONEON-NOSVE-NEXT: fcvt s1, h1 455; NONEON-NOSVE-NEXT: fadd s1, s1, s2 456; NONEON-NOSVE-NEXT: ldr h2, [sp, #14] 457; NONEON-NOSVE-NEXT: fcvt s2, h2 458; NONEON-NOSVE-NEXT: fcvt h1, s1 459; NONEON-NOSVE-NEXT: fcvt s1, h1 460; NONEON-NOSVE-NEXT: fadd s1, s1, s2 461; NONEON-NOSVE-NEXT: fcvt h1, s1 462; NONEON-NOSVE-NEXT: fcvt s1, h1 463; NONEON-NOSVE-NEXT: fadd s0, s0, s1 464; NONEON-NOSVE-NEXT: fcvt h0, s0 465; NONEON-NOSVE-NEXT: add sp, sp, #16 466; NONEON-NOSVE-NEXT: ret 467 %res = call fast half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a) 468 ret half %res 469} 470 471define half @faddv_v8f16(half %start, <8 x half> %a) { 472; CHECK-LABEL: faddv_v8f16: 473; CHECK: // %bb.0: 474; CHECK-NEXT: ptrue p0.h, vl8 475; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 476; CHECK-NEXT: faddv h1, p0, z1.h 477; CHECK-NEXT: fadd h0, h0, h1 478; CHECK-NEXT: ret 479; 480; NONEON-NOSVE-LABEL: faddv_v8f16: 481; NONEON-NOSVE: // %bb.0: 482; NONEON-NOSVE-NEXT: str q1, [sp, #-16]! 483; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 484; NONEON-NOSVE-NEXT: ldr h1, [sp] 485; NONEON-NOSVE-NEXT: ldr h2, [sp, #2] 486; NONEON-NOSVE-NEXT: fcvt s0, h0 487; NONEON-NOSVE-NEXT: fcvt s2, h2 488; NONEON-NOSVE-NEXT: fcvt s1, h1 489; NONEON-NOSVE-NEXT: fadd s1, s1, s2 490; NONEON-NOSVE-NEXT: ldr h2, [sp, #4] 491; NONEON-NOSVE-NEXT: fcvt s2, h2 492; NONEON-NOSVE-NEXT: fcvt h1, s1 493; NONEON-NOSVE-NEXT: fcvt s1, h1 494; NONEON-NOSVE-NEXT: fadd s1, s1, s2 495; NONEON-NOSVE-NEXT: ldr h2, [sp, #6] 496; NONEON-NOSVE-NEXT: fcvt s2, h2 497; NONEON-NOSVE-NEXT: fcvt h1, s1 498; NONEON-NOSVE-NEXT: fcvt s1, h1 499; NONEON-NOSVE-NEXT: fadd s1, s1, s2 500; NONEON-NOSVE-NEXT: ldr h2, [sp, #8] 501; NONEON-NOSVE-NEXT: fcvt s2, h2 502; NONEON-NOSVE-NEXT: fcvt h1, s1 503; NONEON-NOSVE-NEXT: fcvt s1, h1 504; NONEON-NOSVE-NEXT: fadd s1, s1, s2 505; NONEON-NOSVE-NEXT: ldr h2, [sp, #10] 506; NONEON-NOSVE-NEXT: fcvt s2, h2 507; NONEON-NOSVE-NEXT: fcvt h1, s1 508; NONEON-NOSVE-NEXT: fcvt s1, h1 509; NONEON-NOSVE-NEXT: fadd s1, s1, s2 510; NONEON-NOSVE-NEXT: ldr h2, [sp, #12] 511; NONEON-NOSVE-NEXT: fcvt s2, h2 512; NONEON-NOSVE-NEXT: fcvt h1, s1 513; NONEON-NOSVE-NEXT: fcvt s1, h1 514; NONEON-NOSVE-NEXT: fadd s1, s1, s2 515; NONEON-NOSVE-NEXT: ldr h2, [sp, #14] 516; NONEON-NOSVE-NEXT: fcvt s2, h2 517; NONEON-NOSVE-NEXT: fcvt h1, s1 518; NONEON-NOSVE-NEXT: fcvt s1, h1 519; NONEON-NOSVE-NEXT: fadd s1, s1, s2 520; NONEON-NOSVE-NEXT: fcvt h1, s1 521; NONEON-NOSVE-NEXT: fcvt s1, h1 522; NONEON-NOSVE-NEXT: fadd s0, s0, s1 523; NONEON-NOSVE-NEXT: fcvt h0, s0 524; NONEON-NOSVE-NEXT: add sp, sp, #16 525; NONEON-NOSVE-NEXT: ret 526 %res = call fast half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a) 527 ret half %res 528} 529 530define half @faddv_v16f16(half %start, ptr %a) { 531; CHECK-LABEL: faddv_v16f16: 532; CHECK: // %bb.0: 533; CHECK-NEXT: ldp q2, q1, [x0] 534; CHECK-NEXT: ptrue p0.h, vl8 535; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z2.h 536; CHECK-NEXT: faddv h1, p0, z1.h 537; CHECK-NEXT: fadd h0, h0, h1 538; CHECK-NEXT: ret 539; 540; NONEON-NOSVE-LABEL: faddv_v16f16: 541; NONEON-NOSVE: // %bb.0: 542; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] 543; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #-32]! 544; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 545; NONEON-NOSVE-NEXT: ldr h1, [sp, #18] 546; NONEON-NOSVE-NEXT: ldr h2, [sp, #2] 547; NONEON-NOSVE-NEXT: fcvt s0, h0 548; NONEON-NOSVE-NEXT: ldr h3, [sp, #16] 549; NONEON-NOSVE-NEXT: ldr h4, [sp] 550; NONEON-NOSVE-NEXT: fcvt s1, h1 551; NONEON-NOSVE-NEXT: fcvt s2, h2 552; NONEON-NOSVE-NEXT: fcvt s3, h3 553; NONEON-NOSVE-NEXT: fcvt s4, h4 554; NONEON-NOSVE-NEXT: fadd s1, s2, s1 555; NONEON-NOSVE-NEXT: fadd s2, s4, s3 556; NONEON-NOSVE-NEXT: ldr h3, [sp, #20] 557; NONEON-NOSVE-NEXT: ldr h4, [sp, #4] 558; NONEON-NOSVE-NEXT: fcvt s3, h3 559; NONEON-NOSVE-NEXT: fcvt s4, h4 560; NONEON-NOSVE-NEXT: fcvt h1, s1 561; NONEON-NOSVE-NEXT: fcvt h2, s2 562; NONEON-NOSVE-NEXT: fadd s3, s4, s3 563; NONEON-NOSVE-NEXT: ldr h4, [sp, #6] 564; NONEON-NOSVE-NEXT: fcvt s1, h1 565; NONEON-NOSVE-NEXT: fcvt s2, h2 566; NONEON-NOSVE-NEXT: fcvt s4, h4 567; NONEON-NOSVE-NEXT: fadd s1, s2, s1 568; NONEON-NOSVE-NEXT: fcvt h2, s3 569; NONEON-NOSVE-NEXT: ldr h3, [sp, #22] 570; NONEON-NOSVE-NEXT: fcvt s3, h3 571; NONEON-NOSVE-NEXT: fcvt h1, s1 572; NONEON-NOSVE-NEXT: fcvt s2, h2 573; NONEON-NOSVE-NEXT: fadd s3, s4, s3 574; NONEON-NOSVE-NEXT: ldr h4, [sp, #8] 575; NONEON-NOSVE-NEXT: fcvt s1, h1 576; NONEON-NOSVE-NEXT: fcvt s4, h4 577; NONEON-NOSVE-NEXT: fadd s1, s1, s2 578; NONEON-NOSVE-NEXT: fcvt h2, s3 579; NONEON-NOSVE-NEXT: ldr h3, [sp, #24] 580; NONEON-NOSVE-NEXT: fcvt s3, h3 581; NONEON-NOSVE-NEXT: fcvt h1, s1 582; NONEON-NOSVE-NEXT: fcvt s2, h2 583; NONEON-NOSVE-NEXT: fadd s3, s4, s3 584; NONEON-NOSVE-NEXT: ldr h4, [sp, #10] 585; NONEON-NOSVE-NEXT: fcvt s1, h1 586; NONEON-NOSVE-NEXT: fcvt s4, h4 587; NONEON-NOSVE-NEXT: fadd s1, s1, s2 588; NONEON-NOSVE-NEXT: fcvt h2, s3 589; NONEON-NOSVE-NEXT: ldr h3, [sp, #26] 590; NONEON-NOSVE-NEXT: fcvt s3, h3 591; NONEON-NOSVE-NEXT: fcvt h1, s1 592; NONEON-NOSVE-NEXT: fcvt s2, h2 593; NONEON-NOSVE-NEXT: fadd s3, s4, s3 594; NONEON-NOSVE-NEXT: ldr h4, [sp, #12] 595; NONEON-NOSVE-NEXT: fcvt s1, h1 596; NONEON-NOSVE-NEXT: fcvt s4, h4 597; NONEON-NOSVE-NEXT: fadd s1, s1, s2 598; NONEON-NOSVE-NEXT: fcvt h2, s3 599; NONEON-NOSVE-NEXT: ldr h3, [sp, #28] 600; NONEON-NOSVE-NEXT: fcvt s3, h3 601; NONEON-NOSVE-NEXT: fcvt h1, s1 602; NONEON-NOSVE-NEXT: fcvt s2, h2 603; NONEON-NOSVE-NEXT: fcvt s1, h1 604; NONEON-NOSVE-NEXT: fadd s1, s1, s2 605; NONEON-NOSVE-NEXT: fadd s2, s4, s3 606; NONEON-NOSVE-NEXT: ldr h3, [sp, #30] 607; NONEON-NOSVE-NEXT: ldr h4, [sp, #14] 608; NONEON-NOSVE-NEXT: fcvt s3, h3 609; NONEON-NOSVE-NEXT: fcvt s4, h4 610; NONEON-NOSVE-NEXT: fcvt h1, s1 611; NONEON-NOSVE-NEXT: fcvt h2, s2 612; NONEON-NOSVE-NEXT: fcvt s1, h1 613; NONEON-NOSVE-NEXT: fcvt s2, h2 614; NONEON-NOSVE-NEXT: fadd s1, s1, s2 615; NONEON-NOSVE-NEXT: fadd s2, s4, s3 616; NONEON-NOSVE-NEXT: fcvt h1, s1 617; NONEON-NOSVE-NEXT: fcvt h2, s2 618; NONEON-NOSVE-NEXT: fcvt s1, h1 619; NONEON-NOSVE-NEXT: fcvt s2, h2 620; NONEON-NOSVE-NEXT: fadd s1, s1, s2 621; NONEON-NOSVE-NEXT: fcvt h1, s1 622; NONEON-NOSVE-NEXT: fcvt s1, h1 623; NONEON-NOSVE-NEXT: fadd s0, s0, s1 624; NONEON-NOSVE-NEXT: fcvt h0, s0 625; NONEON-NOSVE-NEXT: add sp, sp, #32 626; NONEON-NOSVE-NEXT: ret 627 %op = load <16 x half>, ptr %a 628 %res = call fast half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op) 629 ret half %res 630} 631 632define float @faddv_v2f32(float %start, <2 x float> %a) { 633; CHECK-LABEL: faddv_v2f32: 634; CHECK: // %bb.0: 635; CHECK-NEXT: ptrue p0.s, vl2 636; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 637; CHECK-NEXT: faddv s1, p0, z1.s 638; CHECK-NEXT: fadd s0, s0, s1 639; CHECK-NEXT: ret 640; 641; NONEON-NOSVE-LABEL: faddv_v2f32: 642; NONEON-NOSVE: // %bb.0: 643; NONEON-NOSVE-NEXT: sub sp, sp, #16 644; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 645; NONEON-NOSVE-NEXT: str d1, [sp, #8] 646; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] 647; NONEON-NOSVE-NEXT: fadd s1, s2, s1 648; NONEON-NOSVE-NEXT: fadd s0, s0, s1 649; NONEON-NOSVE-NEXT: add sp, sp, #16 650; NONEON-NOSVE-NEXT: ret 651 %res = call fast float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a) 652 ret float %res 653} 654 655define float @faddv_v4f32(float %start, <4 x float> %a) { 656; CHECK-LABEL: faddv_v4f32: 657; CHECK: // %bb.0: 658; CHECK-NEXT: ptrue p0.s, vl4 659; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 660; CHECK-NEXT: faddv s1, p0, z1.s 661; CHECK-NEXT: fadd s0, s0, s1 662; CHECK-NEXT: ret 663; 664; NONEON-NOSVE-LABEL: faddv_v4f32: 665; NONEON-NOSVE: // %bb.0: 666; NONEON-NOSVE-NEXT: str q1, [sp, #-16]! 667; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 668; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] 669; NONEON-NOSVE-NEXT: ldp s4, s3, [sp], #16 670; NONEON-NOSVE-NEXT: fadd s3, s4, s3 671; NONEON-NOSVE-NEXT: fadd s1, s2, s1 672; NONEON-NOSVE-NEXT: fadd s1, s3, s1 673; NONEON-NOSVE-NEXT: fadd s0, s0, s1 674; NONEON-NOSVE-NEXT: ret 675 %res = call fast float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a) 676 ret float %res 677} 678 679define float @faddv_v8f32(float %start, ptr %a) { 680; CHECK-LABEL: faddv_v8f32: 681; CHECK: // %bb.0: 682; CHECK-NEXT: ldp q2, q1, [x0] 683; CHECK-NEXT: ptrue p0.s, vl4 684; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z2.s 685; CHECK-NEXT: faddv s1, p0, z1.s 686; CHECK-NEXT: fadd s0, s0, s1 687; CHECK-NEXT: ret 688; 689; NONEON-NOSVE-LABEL: faddv_v8f32: 690; NONEON-NOSVE: // %bb.0: 691; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] 692; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #-32]! 693; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 694; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #16] 695; NONEON-NOSVE-NEXT: ldp s4, s3, [sp] 696; NONEON-NOSVE-NEXT: ldp s5, s6, [sp, #24] 697; NONEON-NOSVE-NEXT: ldp s7, s16, [sp, #8] 698; NONEON-NOSVE-NEXT: fadd s1, s3, s1 699; NONEON-NOSVE-NEXT: fadd s2, s4, s2 700; NONEON-NOSVE-NEXT: fadd s3, s7, s5 701; NONEON-NOSVE-NEXT: fadd s4, s16, s6 702; NONEON-NOSVE-NEXT: fadd s1, s2, s1 703; NONEON-NOSVE-NEXT: fadd s2, s3, s4 704; NONEON-NOSVE-NEXT: fadd s1, s1, s2 705; NONEON-NOSVE-NEXT: fadd s0, s0, s1 706; NONEON-NOSVE-NEXT: add sp, sp, #32 707; NONEON-NOSVE-NEXT: ret 708 %op = load <8 x float>, ptr %a 709 %res = call fast float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op) 710 ret float %res 711} 712 713define double @faddv_v1f64(double %start, <1 x double> %a) { 714; CHECK-LABEL: faddv_v1f64: 715; CHECK: // %bb.0: 716; CHECK-NEXT: fadd d0, d0, d1 717; CHECK-NEXT: ret 718; 719; NONEON-NOSVE-LABEL: faddv_v1f64: 720; NONEON-NOSVE: // %bb.0: 721; NONEON-NOSVE-NEXT: fadd d0, d0, d1 722; NONEON-NOSVE-NEXT: ret 723 %res = call fast double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a) 724 ret double %res 725} 726 727define double @faddv_v2f64(double %start, <2 x double> %a) { 728; CHECK-LABEL: faddv_v2f64: 729; CHECK: // %bb.0: 730; CHECK-NEXT: ptrue p0.d, vl2 731; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 732; CHECK-NEXT: faddv d1, p0, z1.d 733; CHECK-NEXT: fadd d0, d0, d1 734; CHECK-NEXT: ret 735; 736; NONEON-NOSVE-LABEL: faddv_v2f64: 737; NONEON-NOSVE: // %bb.0: 738; NONEON-NOSVE-NEXT: str q1, [sp, #-16]! 739; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 740; NONEON-NOSVE-NEXT: ldp d2, d1, [sp], #16 741; NONEON-NOSVE-NEXT: fadd d1, d2, d1 742; NONEON-NOSVE-NEXT: fadd d0, d0, d1 743; NONEON-NOSVE-NEXT: ret 744 %res = call fast double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a) 745 ret double %res 746} 747 748define double @faddv_v4f64(double %start, ptr %a) { 749; CHECK-LABEL: faddv_v4f64: 750; CHECK: // %bb.0: 751; CHECK-NEXT: ldp q2, q1, [x0] 752; CHECK-NEXT: ptrue p0.d, vl2 753; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z2.d 754; CHECK-NEXT: faddv d1, p0, z1.d 755; CHECK-NEXT: fadd d0, d0, d1 756; CHECK-NEXT: ret 757; 758; NONEON-NOSVE-LABEL: faddv_v4f64: 759; NONEON-NOSVE: // %bb.0: 760; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] 761; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #-32]! 762; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 763; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #16] 764; NONEON-NOSVE-NEXT: ldp d4, d3, [sp], #32 765; NONEON-NOSVE-NEXT: fadd d1, d3, d1 766; NONEON-NOSVE-NEXT: fadd d2, d4, d2 767; NONEON-NOSVE-NEXT: fadd d1, d2, d1 768; NONEON-NOSVE-NEXT: fadd d0, d0, d1 769; NONEON-NOSVE-NEXT: ret 770 %op = load <4 x double>, ptr %a 771 %res = call fast double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op) 772 ret double %res 773} 774 775; 776; FMAXNMV 777; 778 779define half @fmaxv_v4f16(<4 x half> %a) { 780; CHECK-LABEL: fmaxv_v4f16: 781; CHECK: // %bb.0: 782; CHECK-NEXT: ptrue p0.h, vl4 783; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 784; CHECK-NEXT: fmaxnmv h0, p0, z0.h 785; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 786; CHECK-NEXT: ret 787; 788; NONEON-NOSVE-LABEL: fmaxv_v4f16: 789; NONEON-NOSVE: // %bb.0: 790; NONEON-NOSVE-NEXT: sub sp, sp, #16 791; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 792; NONEON-NOSVE-NEXT: str d0, [sp, #8] 793; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] 794; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] 795; NONEON-NOSVE-NEXT: fcvt s1, h1 796; NONEON-NOSVE-NEXT: fcvt s0, h0 797; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 798; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] 799; NONEON-NOSVE-NEXT: fcvt s1, h1 800; NONEON-NOSVE-NEXT: fcvt h0, s0 801; NONEON-NOSVE-NEXT: fcvt s0, h0 802; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 803; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] 804; NONEON-NOSVE-NEXT: fcvt s1, h1 805; NONEON-NOSVE-NEXT: fcvt h0, s0 806; NONEON-NOSVE-NEXT: fcvt s0, h0 807; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 808; NONEON-NOSVE-NEXT: fcvt h0, s0 809; NONEON-NOSVE-NEXT: add sp, sp, #16 810; NONEON-NOSVE-NEXT: ret 811 %res = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) 812 ret half %res 813} 814 815define half @fmaxv_v8f16(<8 x half> %a) { 816; CHECK-LABEL: fmaxv_v8f16: 817; CHECK: // %bb.0: 818; CHECK-NEXT: ptrue p0.h, vl8 819; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 820; CHECK-NEXT: fmaxnmv h0, p0, z0.h 821; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 822; CHECK-NEXT: ret 823; 824; NONEON-NOSVE-LABEL: fmaxv_v8f16: 825; NONEON-NOSVE: // %bb.0: 826; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 827; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 828; NONEON-NOSVE-NEXT: ldr h0, [sp] 829; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] 830; NONEON-NOSVE-NEXT: fcvt s1, h1 831; NONEON-NOSVE-NEXT: fcvt s0, h0 832; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 833; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] 834; NONEON-NOSVE-NEXT: fcvt s1, h1 835; NONEON-NOSVE-NEXT: fcvt h0, s0 836; NONEON-NOSVE-NEXT: fcvt s0, h0 837; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 838; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] 839; NONEON-NOSVE-NEXT: fcvt s1, h1 840; NONEON-NOSVE-NEXT: fcvt h0, s0 841; NONEON-NOSVE-NEXT: fcvt s0, h0 842; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 843; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] 844; NONEON-NOSVE-NEXT: fcvt s1, h1 845; NONEON-NOSVE-NEXT: fcvt h0, s0 846; NONEON-NOSVE-NEXT: fcvt s0, h0 847; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 848; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] 849; NONEON-NOSVE-NEXT: fcvt s1, h1 850; NONEON-NOSVE-NEXT: fcvt h0, s0 851; NONEON-NOSVE-NEXT: fcvt s0, h0 852; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 853; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] 854; NONEON-NOSVE-NEXT: fcvt s1, h1 855; NONEON-NOSVE-NEXT: fcvt h0, s0 856; NONEON-NOSVE-NEXT: fcvt s0, h0 857; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 858; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] 859; NONEON-NOSVE-NEXT: fcvt s1, h1 860; NONEON-NOSVE-NEXT: fcvt h0, s0 861; NONEON-NOSVE-NEXT: fcvt s0, h0 862; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 863; NONEON-NOSVE-NEXT: fcvt h0, s0 864; NONEON-NOSVE-NEXT: add sp, sp, #16 865; NONEON-NOSVE-NEXT: ret 866 %res = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a) 867 ret half %res 868} 869 870define half @fmaxv_v16f16(ptr %a) { 871; CHECK-LABEL: fmaxv_v16f16: 872; CHECK: // %bb.0: 873; CHECK-NEXT: ldp q1, q0, [x0] 874; CHECK-NEXT: ptrue p0.h, vl8 875; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h 876; CHECK-NEXT: fmaxnmv h0, p0, z0.h 877; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 878; CHECK-NEXT: ret 879; 880; NONEON-NOSVE-LABEL: fmaxv_v16f16: 881; NONEON-NOSVE: // %bb.0: 882; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 883; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 884; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 885; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] 886; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] 887; NONEON-NOSVE-NEXT: ldr h2, [sp, #16] 888; NONEON-NOSVE-NEXT: ldr h3, [sp] 889; NONEON-NOSVE-NEXT: fcvt s0, h0 890; NONEON-NOSVE-NEXT: fcvt s1, h1 891; NONEON-NOSVE-NEXT: fcvt s2, h2 892; NONEON-NOSVE-NEXT: fcvt s3, h3 893; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 894; NONEON-NOSVE-NEXT: fmaxnm s1, s3, s2 895; NONEON-NOSVE-NEXT: ldr h2, [sp, #20] 896; NONEON-NOSVE-NEXT: ldr h3, [sp, #4] 897; NONEON-NOSVE-NEXT: fcvt s2, h2 898; NONEON-NOSVE-NEXT: fcvt s3, h3 899; NONEON-NOSVE-NEXT: fcvt h0, s0 900; NONEON-NOSVE-NEXT: fcvt h1, s1 901; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2 902; NONEON-NOSVE-NEXT: ldr h3, [sp, #6] 903; NONEON-NOSVE-NEXT: fcvt s0, h0 904; NONEON-NOSVE-NEXT: fcvt s1, h1 905; NONEON-NOSVE-NEXT: fcvt s3, h3 906; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 907; NONEON-NOSVE-NEXT: fcvt h1, s2 908; NONEON-NOSVE-NEXT: ldr h2, [sp, #22] 909; NONEON-NOSVE-NEXT: fcvt s2, h2 910; NONEON-NOSVE-NEXT: fcvt h0, s0 911; NONEON-NOSVE-NEXT: fcvt s1, h1 912; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2 913; NONEON-NOSVE-NEXT: ldr h3, [sp, #8] 914; NONEON-NOSVE-NEXT: fcvt s0, h0 915; NONEON-NOSVE-NEXT: fcvt s3, h3 916; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 917; NONEON-NOSVE-NEXT: fcvt h1, s2 918; NONEON-NOSVE-NEXT: ldr h2, [sp, #24] 919; NONEON-NOSVE-NEXT: fcvt s2, h2 920; NONEON-NOSVE-NEXT: fcvt h0, s0 921; NONEON-NOSVE-NEXT: fcvt s1, h1 922; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2 923; NONEON-NOSVE-NEXT: ldr h3, [sp, #10] 924; NONEON-NOSVE-NEXT: fcvt s0, h0 925; NONEON-NOSVE-NEXT: fcvt s3, h3 926; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 927; NONEON-NOSVE-NEXT: fcvt h1, s2 928; NONEON-NOSVE-NEXT: ldr h2, [sp, #26] 929; NONEON-NOSVE-NEXT: fcvt s2, h2 930; NONEON-NOSVE-NEXT: fcvt h0, s0 931; NONEON-NOSVE-NEXT: fcvt s1, h1 932; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2 933; NONEON-NOSVE-NEXT: ldr h3, [sp, #12] 934; NONEON-NOSVE-NEXT: fcvt s0, h0 935; NONEON-NOSVE-NEXT: fcvt s3, h3 936; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 937; NONEON-NOSVE-NEXT: fcvt h1, s2 938; NONEON-NOSVE-NEXT: ldr h2, [sp, #28] 939; NONEON-NOSVE-NEXT: fcvt s2, h2 940; NONEON-NOSVE-NEXT: fcvt h0, s0 941; NONEON-NOSVE-NEXT: fcvt s1, h1 942; NONEON-NOSVE-NEXT: fcvt s0, h0 943; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 944; NONEON-NOSVE-NEXT: fmaxnm s1, s3, s2 945; NONEON-NOSVE-NEXT: ldr h2, [sp, #30] 946; NONEON-NOSVE-NEXT: ldr h3, [sp, #14] 947; NONEON-NOSVE-NEXT: fcvt s2, h2 948; NONEON-NOSVE-NEXT: fcvt s3, h3 949; NONEON-NOSVE-NEXT: fcvt h0, s0 950; NONEON-NOSVE-NEXT: fcvt h1, s1 951; NONEON-NOSVE-NEXT: fcvt s0, h0 952; NONEON-NOSVE-NEXT: fcvt s1, h1 953; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 954; NONEON-NOSVE-NEXT: fmaxnm s1, s3, s2 955; NONEON-NOSVE-NEXT: fcvt h0, s0 956; NONEON-NOSVE-NEXT: fcvt h1, s1 957; NONEON-NOSVE-NEXT: fcvt s0, h0 958; NONEON-NOSVE-NEXT: fcvt s1, h1 959; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 960; NONEON-NOSVE-NEXT: fcvt h0, s0 961; NONEON-NOSVE-NEXT: add sp, sp, #32 962; NONEON-NOSVE-NEXT: ret 963 %op = load <16 x half>, ptr %a 964 %res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op) 965 ret half %res 966} 967 968define float @fmaxv_v2f32(<2 x float> %a) { 969; CHECK-LABEL: fmaxv_v2f32: 970; CHECK: // %bb.0: 971; CHECK-NEXT: ptrue p0.s, vl2 972; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 973; CHECK-NEXT: fmaxnmv s0, p0, z0.s 974; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 975; CHECK-NEXT: ret 976; 977; NONEON-NOSVE-LABEL: fmaxv_v2f32: 978; NONEON-NOSVE: // %bb.0: 979; NONEON-NOSVE-NEXT: sub sp, sp, #16 980; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 981; NONEON-NOSVE-NEXT: str d0, [sp, #8] 982; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #8] 983; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 984; NONEON-NOSVE-NEXT: add sp, sp, #16 985; NONEON-NOSVE-NEXT: ret 986 %res = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a) 987 ret float %res 988} 989 990define float @fmaxv_v4f32(<4 x float> %a) { 991; CHECK-LABEL: fmaxv_v4f32: 992; CHECK: // %bb.0: 993; CHECK-NEXT: ptrue p0.s, vl4 994; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 995; CHECK-NEXT: fmaxnmv s0, p0, z0.s 996; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 997; CHECK-NEXT: ret 998; 999; NONEON-NOSVE-LABEL: fmaxv_v4f32: 1000; NONEON-NOSVE: // %bb.0: 1001; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1002; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1003; NONEON-NOSVE-NEXT: ldp s1, s0, [sp] 1004; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 1005; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] 1006; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s2 1007; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 1008; NONEON-NOSVE-NEXT: add sp, sp, #16 1009; NONEON-NOSVE-NEXT: ret 1010 %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a) 1011 ret float %res 1012} 1013 1014define float @fmaxv_v8f32(ptr %a) { 1015; CHECK-LABEL: fmaxv_v8f32: 1016; CHECK: // %bb.0: 1017; CHECK-NEXT: ldp q1, q0, [x0] 1018; CHECK-NEXT: ptrue p0.s, vl4 1019; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s 1020; CHECK-NEXT: fmaxnmv s0, p0, z0.s 1021; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 1022; CHECK-NEXT: ret 1023; 1024; NONEON-NOSVE-LABEL: fmaxv_v8f32: 1025; NONEON-NOSVE: // %bb.0: 1026; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1027; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1028; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1029; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16] 1030; NONEON-NOSVE-NEXT: ldp s3, s2, [sp] 1031; NONEON-NOSVE-NEXT: fmaxnm s0, s2, s0 1032; NONEON-NOSVE-NEXT: fmaxnm s1, s3, s1 1033; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8] 1034; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 1035; NONEON-NOSVE-NEXT: ldp s3, s1, [sp, #24] 1036; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s3 1037; NONEON-NOSVE-NEXT: fmaxnm s1, s4, s1 1038; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s2 1039; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 1040; NONEON-NOSVE-NEXT: add sp, sp, #32 1041; NONEON-NOSVE-NEXT: ret 1042 %op = load <8 x float>, ptr %a 1043 %res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op) 1044 ret float %res 1045} 1046 1047define double @fmaxv_v1f64(<1 x double> %a) { 1048; CHECK-LABEL: fmaxv_v1f64: 1049; CHECK: // %bb.0: 1050; CHECK-NEXT: ret 1051; 1052; NONEON-NOSVE-LABEL: fmaxv_v1f64: 1053; NONEON-NOSVE: // %bb.0: 1054; NONEON-NOSVE-NEXT: ret 1055 %res = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a) 1056 ret double %res 1057} 1058 1059define double @fmaxv_v2f64(<2 x double> %a) { 1060; CHECK-LABEL: fmaxv_v2f64: 1061; CHECK: // %bb.0: 1062; CHECK-NEXT: ptrue p0.d, vl2 1063; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1064; CHECK-NEXT: fmaxnmv d0, p0, z0.d 1065; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 1066; CHECK-NEXT: ret 1067; 1068; NONEON-NOSVE-LABEL: fmaxv_v2f64: 1069; NONEON-NOSVE: // %bb.0: 1070; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1071; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1072; NONEON-NOSVE-NEXT: ldp d1, d0, [sp], #16 1073; NONEON-NOSVE-NEXT: fmaxnm d0, d1, d0 1074; NONEON-NOSVE-NEXT: ret 1075 %res = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a) 1076 ret double %res 1077} 1078 1079define double @fmaxv_v4f64(ptr %a) { 1080; CHECK-LABEL: fmaxv_v4f64: 1081; CHECK: // %bb.0: 1082; CHECK-NEXT: ldp q1, q0, [x0] 1083; CHECK-NEXT: ptrue p0.d, vl2 1084; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d 1085; CHECK-NEXT: fmaxnmv d0, p0, z0.d 1086; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 1087; CHECK-NEXT: ret 1088; 1089; NONEON-NOSVE-LABEL: fmaxv_v4f64: 1090; NONEON-NOSVE: // %bb.0: 1091; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1092; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1093; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1094; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] 1095; NONEON-NOSVE-NEXT: ldp d3, d2, [sp], #32 1096; NONEON-NOSVE-NEXT: fmaxnm d0, d2, d0 1097; NONEON-NOSVE-NEXT: fmaxnm d1, d3, d1 1098; NONEON-NOSVE-NEXT: fmaxnm d0, d1, d0 1099; NONEON-NOSVE-NEXT: ret 1100 %op = load <4 x double>, ptr %a 1101 %res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op) 1102 ret double %res 1103} 1104 1105; 1106; FMINNMV 1107; 1108 1109define half @fminv_v4f16(<4 x half> %a) { 1110; CHECK-LABEL: fminv_v4f16: 1111; CHECK: // %bb.0: 1112; CHECK-NEXT: ptrue p0.h, vl4 1113; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1114; CHECK-NEXT: fminnmv h0, p0, z0.h 1115; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 1116; CHECK-NEXT: ret 1117; 1118; NONEON-NOSVE-LABEL: fminv_v4f16: 1119; NONEON-NOSVE: // %bb.0: 1120; NONEON-NOSVE-NEXT: sub sp, sp, #16 1121; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1122; NONEON-NOSVE-NEXT: str d0, [sp, #8] 1123; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] 1124; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] 1125; NONEON-NOSVE-NEXT: fcvt s1, h1 1126; NONEON-NOSVE-NEXT: fcvt s0, h0 1127; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1128; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] 1129; NONEON-NOSVE-NEXT: fcvt s1, h1 1130; NONEON-NOSVE-NEXT: fcvt h0, s0 1131; NONEON-NOSVE-NEXT: fcvt s0, h0 1132; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1133; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] 1134; NONEON-NOSVE-NEXT: fcvt s1, h1 1135; NONEON-NOSVE-NEXT: fcvt h0, s0 1136; NONEON-NOSVE-NEXT: fcvt s0, h0 1137; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1138; NONEON-NOSVE-NEXT: fcvt h0, s0 1139; NONEON-NOSVE-NEXT: add sp, sp, #16 1140; NONEON-NOSVE-NEXT: ret 1141 %res = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) 1142 ret half %res 1143} 1144 1145define half @fminv_v8f16(<8 x half> %a) { 1146; CHECK-LABEL: fminv_v8f16: 1147; CHECK: // %bb.0: 1148; CHECK-NEXT: ptrue p0.h, vl8 1149; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1150; CHECK-NEXT: fminnmv h0, p0, z0.h 1151; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 1152; CHECK-NEXT: ret 1153; 1154; NONEON-NOSVE-LABEL: fminv_v8f16: 1155; NONEON-NOSVE: // %bb.0: 1156; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1157; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1158; NONEON-NOSVE-NEXT: ldr h0, [sp] 1159; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] 1160; NONEON-NOSVE-NEXT: fcvt s1, h1 1161; NONEON-NOSVE-NEXT: fcvt s0, h0 1162; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1163; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] 1164; NONEON-NOSVE-NEXT: fcvt s1, h1 1165; NONEON-NOSVE-NEXT: fcvt h0, s0 1166; NONEON-NOSVE-NEXT: fcvt s0, h0 1167; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1168; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] 1169; NONEON-NOSVE-NEXT: fcvt s1, h1 1170; NONEON-NOSVE-NEXT: fcvt h0, s0 1171; NONEON-NOSVE-NEXT: fcvt s0, h0 1172; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1173; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] 1174; NONEON-NOSVE-NEXT: fcvt s1, h1 1175; NONEON-NOSVE-NEXT: fcvt h0, s0 1176; NONEON-NOSVE-NEXT: fcvt s0, h0 1177; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1178; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] 1179; NONEON-NOSVE-NEXT: fcvt s1, h1 1180; NONEON-NOSVE-NEXT: fcvt h0, s0 1181; NONEON-NOSVE-NEXT: fcvt s0, h0 1182; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1183; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] 1184; NONEON-NOSVE-NEXT: fcvt s1, h1 1185; NONEON-NOSVE-NEXT: fcvt h0, s0 1186; NONEON-NOSVE-NEXT: fcvt s0, h0 1187; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1188; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] 1189; NONEON-NOSVE-NEXT: fcvt s1, h1 1190; NONEON-NOSVE-NEXT: fcvt h0, s0 1191; NONEON-NOSVE-NEXT: fcvt s0, h0 1192; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1193; NONEON-NOSVE-NEXT: fcvt h0, s0 1194; NONEON-NOSVE-NEXT: add sp, sp, #16 1195; NONEON-NOSVE-NEXT: ret 1196 %res = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %a) 1197 ret half %res 1198} 1199 1200define half @fminv_v16f16(ptr %a) { 1201; CHECK-LABEL: fminv_v16f16: 1202; CHECK: // %bb.0: 1203; CHECK-NEXT: ldp q1, q0, [x0] 1204; CHECK-NEXT: ptrue p0.h, vl8 1205; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h 1206; CHECK-NEXT: fminnmv h0, p0, z0.h 1207; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 1208; CHECK-NEXT: ret 1209; 1210; NONEON-NOSVE-LABEL: fminv_v16f16: 1211; NONEON-NOSVE: // %bb.0: 1212; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1213; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1214; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1215; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] 1216; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] 1217; NONEON-NOSVE-NEXT: ldr h2, [sp, #16] 1218; NONEON-NOSVE-NEXT: ldr h3, [sp] 1219; NONEON-NOSVE-NEXT: fcvt s0, h0 1220; NONEON-NOSVE-NEXT: fcvt s1, h1 1221; NONEON-NOSVE-NEXT: fcvt s2, h2 1222; NONEON-NOSVE-NEXT: fcvt s3, h3 1223; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 1224; NONEON-NOSVE-NEXT: fminnm s1, s3, s2 1225; NONEON-NOSVE-NEXT: ldr h2, [sp, #20] 1226; NONEON-NOSVE-NEXT: ldr h3, [sp, #4] 1227; NONEON-NOSVE-NEXT: fcvt s2, h2 1228; NONEON-NOSVE-NEXT: fcvt s3, h3 1229; NONEON-NOSVE-NEXT: fcvt h0, s0 1230; NONEON-NOSVE-NEXT: fcvt h1, s1 1231; NONEON-NOSVE-NEXT: fminnm s2, s3, s2 1232; NONEON-NOSVE-NEXT: ldr h3, [sp, #6] 1233; NONEON-NOSVE-NEXT: fcvt s0, h0 1234; NONEON-NOSVE-NEXT: fcvt s1, h1 1235; NONEON-NOSVE-NEXT: fcvt s3, h3 1236; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 1237; NONEON-NOSVE-NEXT: fcvt h1, s2 1238; NONEON-NOSVE-NEXT: ldr h2, [sp, #22] 1239; NONEON-NOSVE-NEXT: fcvt s2, h2 1240; NONEON-NOSVE-NEXT: fcvt h0, s0 1241; NONEON-NOSVE-NEXT: fcvt s1, h1 1242; NONEON-NOSVE-NEXT: fminnm s2, s3, s2 1243; NONEON-NOSVE-NEXT: ldr h3, [sp, #8] 1244; NONEON-NOSVE-NEXT: fcvt s0, h0 1245; NONEON-NOSVE-NEXT: fcvt s3, h3 1246; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1247; NONEON-NOSVE-NEXT: fcvt h1, s2 1248; NONEON-NOSVE-NEXT: ldr h2, [sp, #24] 1249; NONEON-NOSVE-NEXT: fcvt s2, h2 1250; NONEON-NOSVE-NEXT: fcvt h0, s0 1251; NONEON-NOSVE-NEXT: fcvt s1, h1 1252; NONEON-NOSVE-NEXT: fminnm s2, s3, s2 1253; NONEON-NOSVE-NEXT: ldr h3, [sp, #10] 1254; NONEON-NOSVE-NEXT: fcvt s0, h0 1255; NONEON-NOSVE-NEXT: fcvt s3, h3 1256; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1257; NONEON-NOSVE-NEXT: fcvt h1, s2 1258; NONEON-NOSVE-NEXT: ldr h2, [sp, #26] 1259; NONEON-NOSVE-NEXT: fcvt s2, h2 1260; NONEON-NOSVE-NEXT: fcvt h0, s0 1261; NONEON-NOSVE-NEXT: fcvt s1, h1 1262; NONEON-NOSVE-NEXT: fminnm s2, s3, s2 1263; NONEON-NOSVE-NEXT: ldr h3, [sp, #12] 1264; NONEON-NOSVE-NEXT: fcvt s0, h0 1265; NONEON-NOSVE-NEXT: fcvt s3, h3 1266; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1267; NONEON-NOSVE-NEXT: fcvt h1, s2 1268; NONEON-NOSVE-NEXT: ldr h2, [sp, #28] 1269; NONEON-NOSVE-NEXT: fcvt s2, h2 1270; NONEON-NOSVE-NEXT: fcvt h0, s0 1271; NONEON-NOSVE-NEXT: fcvt s1, h1 1272; NONEON-NOSVE-NEXT: fcvt s0, h0 1273; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1274; NONEON-NOSVE-NEXT: fminnm s1, s3, s2 1275; NONEON-NOSVE-NEXT: ldr h2, [sp, #30] 1276; NONEON-NOSVE-NEXT: ldr h3, [sp, #14] 1277; NONEON-NOSVE-NEXT: fcvt s2, h2 1278; NONEON-NOSVE-NEXT: fcvt s3, h3 1279; NONEON-NOSVE-NEXT: fcvt h0, s0 1280; NONEON-NOSVE-NEXT: fcvt h1, s1 1281; NONEON-NOSVE-NEXT: fcvt s0, h0 1282; NONEON-NOSVE-NEXT: fcvt s1, h1 1283; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1284; NONEON-NOSVE-NEXT: fminnm s1, s3, s2 1285; NONEON-NOSVE-NEXT: fcvt h0, s0 1286; NONEON-NOSVE-NEXT: fcvt h1, s1 1287; NONEON-NOSVE-NEXT: fcvt s0, h0 1288; NONEON-NOSVE-NEXT: fcvt s1, h1 1289; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1290; NONEON-NOSVE-NEXT: fcvt h0, s0 1291; NONEON-NOSVE-NEXT: add sp, sp, #32 1292; NONEON-NOSVE-NEXT: ret 1293 %op = load <16 x half>, ptr %a 1294 %res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op) 1295 ret half %res 1296} 1297 1298define float @fminv_v2f32(<2 x float> %a) { 1299; CHECK-LABEL: fminv_v2f32: 1300; CHECK: // %bb.0: 1301; CHECK-NEXT: ptrue p0.s, vl2 1302; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1303; CHECK-NEXT: fminnmv s0, p0, z0.s 1304; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 1305; CHECK-NEXT: ret 1306; 1307; NONEON-NOSVE-LABEL: fminv_v2f32: 1308; NONEON-NOSVE: // %bb.0: 1309; NONEON-NOSVE-NEXT: sub sp, sp, #16 1310; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1311; NONEON-NOSVE-NEXT: str d0, [sp, #8] 1312; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #8] 1313; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 1314; NONEON-NOSVE-NEXT: add sp, sp, #16 1315; NONEON-NOSVE-NEXT: ret 1316 %res = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a) 1317 ret float %res 1318} 1319 1320define float @fminv_v4f32(<4 x float> %a) { 1321; CHECK-LABEL: fminv_v4f32: 1322; CHECK: // %bb.0: 1323; CHECK-NEXT: ptrue p0.s, vl4 1324; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1325; CHECK-NEXT: fminnmv s0, p0, z0.s 1326; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 1327; CHECK-NEXT: ret 1328; 1329; NONEON-NOSVE-LABEL: fminv_v4f32: 1330; NONEON-NOSVE: // %bb.0: 1331; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1332; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1333; NONEON-NOSVE-NEXT: ldp s1, s0, [sp] 1334; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 1335; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] 1336; NONEON-NOSVE-NEXT: fminnm s0, s0, s2 1337; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1338; NONEON-NOSVE-NEXT: add sp, sp, #16 1339; NONEON-NOSVE-NEXT: ret 1340 %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a) 1341 ret float %res 1342} 1343 1344define float @fminv_v8f32(ptr %a) { 1345; CHECK-LABEL: fminv_v8f32: 1346; CHECK: // %bb.0: 1347; CHECK-NEXT: ldp q1, q0, [x0] 1348; CHECK-NEXT: ptrue p0.s, vl4 1349; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s 1350; CHECK-NEXT: fminnmv s0, p0, z0.s 1351; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 1352; CHECK-NEXT: ret 1353; 1354; NONEON-NOSVE-LABEL: fminv_v8f32: 1355; NONEON-NOSVE: // %bb.0: 1356; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1357; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1358; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1359; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16] 1360; NONEON-NOSVE-NEXT: ldp s3, s2, [sp] 1361; NONEON-NOSVE-NEXT: fminnm s0, s2, s0 1362; NONEON-NOSVE-NEXT: fminnm s1, s3, s1 1363; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8] 1364; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 1365; NONEON-NOSVE-NEXT: ldp s3, s1, [sp, #24] 1366; NONEON-NOSVE-NEXT: fminnm s2, s2, s3 1367; NONEON-NOSVE-NEXT: fminnm s1, s4, s1 1368; NONEON-NOSVE-NEXT: fminnm s0, s0, s2 1369; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 1370; NONEON-NOSVE-NEXT: add sp, sp, #32 1371; NONEON-NOSVE-NEXT: ret 1372 %op = load <8 x float>, ptr %a 1373 %res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op) 1374 ret float %res 1375} 1376 1377define double @fminv_v1f64(<1 x double> %a) { 1378; CHECK-LABEL: fminv_v1f64: 1379; CHECK: // %bb.0: 1380; CHECK-NEXT: ret 1381; 1382; NONEON-NOSVE-LABEL: fminv_v1f64: 1383; NONEON-NOSVE: // %bb.0: 1384; NONEON-NOSVE-NEXT: ret 1385 %res = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a) 1386 ret double %res 1387} 1388 1389define double @fminv_v2f64(<2 x double> %a) { 1390; CHECK-LABEL: fminv_v2f64: 1391; CHECK: // %bb.0: 1392; CHECK-NEXT: ptrue p0.d, vl2 1393; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1394; CHECK-NEXT: fminnmv d0, p0, z0.d 1395; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 1396; CHECK-NEXT: ret 1397; 1398; NONEON-NOSVE-LABEL: fminv_v2f64: 1399; NONEON-NOSVE: // %bb.0: 1400; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1401; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1402; NONEON-NOSVE-NEXT: ldp d1, d0, [sp], #16 1403; NONEON-NOSVE-NEXT: fminnm d0, d1, d0 1404; NONEON-NOSVE-NEXT: ret 1405 %res = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a) 1406 ret double %res 1407} 1408 1409define double @fminv_v4f64(ptr %a) { 1410; CHECK-LABEL: fminv_v4f64: 1411; CHECK: // %bb.0: 1412; CHECK-NEXT: ldp q1, q0, [x0] 1413; CHECK-NEXT: ptrue p0.d, vl2 1414; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d 1415; CHECK-NEXT: fminnmv d0, p0, z0.d 1416; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 1417; CHECK-NEXT: ret 1418; 1419; NONEON-NOSVE-LABEL: fminv_v4f64: 1420; NONEON-NOSVE: // %bb.0: 1421; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1422; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1423; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1424; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] 1425; NONEON-NOSVE-NEXT: ldp d3, d2, [sp], #32 1426; NONEON-NOSVE-NEXT: fminnm d0, d2, d0 1427; NONEON-NOSVE-NEXT: fminnm d1, d3, d1 1428; NONEON-NOSVE-NEXT: fminnm d0, d1, d0 1429; NONEON-NOSVE-NEXT: ret 1430 %op = load <4 x double>, ptr %a 1431 %res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op) 1432 ret double %res 1433} 1434 1435; 1436; FMAXV 1437; 1438 1439define half @fmaximumv_v4f16(<4 x half> %a) { 1440; CHECK-LABEL: fmaximumv_v4f16: 1441; CHECK: // %bb.0: 1442; CHECK-NEXT: ptrue p0.h, vl4 1443; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1444; CHECK-NEXT: fmaxv h0, p0, z0.h 1445; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 1446; CHECK-NEXT: ret 1447; 1448; NONEON-NOSVE-LABEL: fmaximumv_v4f16: 1449; NONEON-NOSVE: // %bb.0: 1450; NONEON-NOSVE-NEXT: sub sp, sp, #16 1451; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1452; NONEON-NOSVE-NEXT: str d0, [sp, #8] 1453; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] 1454; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] 1455; NONEON-NOSVE-NEXT: fcvt s1, h1 1456; NONEON-NOSVE-NEXT: fcvt s0, h0 1457; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1458; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] 1459; NONEON-NOSVE-NEXT: fcvt s1, h1 1460; NONEON-NOSVE-NEXT: fcvt h0, s0 1461; NONEON-NOSVE-NEXT: fcvt s0, h0 1462; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1463; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] 1464; NONEON-NOSVE-NEXT: fcvt s1, h1 1465; NONEON-NOSVE-NEXT: fcvt h0, s0 1466; NONEON-NOSVE-NEXT: fcvt s0, h0 1467; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1468; NONEON-NOSVE-NEXT: fcvt h0, s0 1469; NONEON-NOSVE-NEXT: add sp, sp, #16 1470; NONEON-NOSVE-NEXT: ret 1471 %res = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a) 1472 ret half %res 1473} 1474 1475define half @fmaximumv_v8f16(<8 x half> %a) { 1476; CHECK-LABEL: fmaximumv_v8f16: 1477; CHECK: // %bb.0: 1478; CHECK-NEXT: ptrue p0.h, vl8 1479; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1480; CHECK-NEXT: fmaxv h0, p0, z0.h 1481; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 1482; CHECK-NEXT: ret 1483; 1484; NONEON-NOSVE-LABEL: fmaximumv_v8f16: 1485; NONEON-NOSVE: // %bb.0: 1486; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1487; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1488; NONEON-NOSVE-NEXT: ldr h0, [sp] 1489; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] 1490; NONEON-NOSVE-NEXT: fcvt s1, h1 1491; NONEON-NOSVE-NEXT: fcvt s0, h0 1492; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1493; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] 1494; NONEON-NOSVE-NEXT: fcvt s1, h1 1495; NONEON-NOSVE-NEXT: fcvt h0, s0 1496; NONEON-NOSVE-NEXT: fcvt s0, h0 1497; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1498; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] 1499; NONEON-NOSVE-NEXT: fcvt s1, h1 1500; NONEON-NOSVE-NEXT: fcvt h0, s0 1501; NONEON-NOSVE-NEXT: fcvt s0, h0 1502; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1503; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] 1504; NONEON-NOSVE-NEXT: fcvt s1, h1 1505; NONEON-NOSVE-NEXT: fcvt h0, s0 1506; NONEON-NOSVE-NEXT: fcvt s0, h0 1507; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1508; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] 1509; NONEON-NOSVE-NEXT: fcvt s1, h1 1510; NONEON-NOSVE-NEXT: fcvt h0, s0 1511; NONEON-NOSVE-NEXT: fcvt s0, h0 1512; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1513; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] 1514; NONEON-NOSVE-NEXT: fcvt s1, h1 1515; NONEON-NOSVE-NEXT: fcvt h0, s0 1516; NONEON-NOSVE-NEXT: fcvt s0, h0 1517; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1518; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] 1519; NONEON-NOSVE-NEXT: fcvt s1, h1 1520; NONEON-NOSVE-NEXT: fcvt h0, s0 1521; NONEON-NOSVE-NEXT: fcvt s0, h0 1522; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1523; NONEON-NOSVE-NEXT: fcvt h0, s0 1524; NONEON-NOSVE-NEXT: add sp, sp, #16 1525; NONEON-NOSVE-NEXT: ret 1526 %res = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> %a) 1527 ret half %res 1528} 1529 1530define half @fmaximumv_v16f16(ptr %a) { 1531; CHECK-LABEL: fmaximumv_v16f16: 1532; CHECK: // %bb.0: 1533; CHECK-NEXT: ldp q1, q0, [x0] 1534; CHECK-NEXT: ptrue p0.h, vl8 1535; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h 1536; CHECK-NEXT: fmaxv h0, p0, z0.h 1537; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 1538; CHECK-NEXT: ret 1539; 1540; NONEON-NOSVE-LABEL: fmaximumv_v16f16: 1541; NONEON-NOSVE: // %bb.0: 1542; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1543; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1544; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1545; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] 1546; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] 1547; NONEON-NOSVE-NEXT: ldr h2, [sp, #16] 1548; NONEON-NOSVE-NEXT: ldr h3, [sp] 1549; NONEON-NOSVE-NEXT: fcvt s0, h0 1550; NONEON-NOSVE-NEXT: fcvt s1, h1 1551; NONEON-NOSVE-NEXT: fcvt s2, h2 1552; NONEON-NOSVE-NEXT: fcvt s3, h3 1553; NONEON-NOSVE-NEXT: fmax s0, s1, s0 1554; NONEON-NOSVE-NEXT: fmax s1, s3, s2 1555; NONEON-NOSVE-NEXT: ldr h2, [sp, #20] 1556; NONEON-NOSVE-NEXT: ldr h3, [sp, #4] 1557; NONEON-NOSVE-NEXT: fcvt s2, h2 1558; NONEON-NOSVE-NEXT: fcvt s3, h3 1559; NONEON-NOSVE-NEXT: fcvt h0, s0 1560; NONEON-NOSVE-NEXT: fcvt h1, s1 1561; NONEON-NOSVE-NEXT: fmax s2, s3, s2 1562; NONEON-NOSVE-NEXT: ldr h3, [sp, #6] 1563; NONEON-NOSVE-NEXT: fcvt s0, h0 1564; NONEON-NOSVE-NEXT: fcvt s1, h1 1565; NONEON-NOSVE-NEXT: fcvt s3, h3 1566; NONEON-NOSVE-NEXT: fmax s0, s1, s0 1567; NONEON-NOSVE-NEXT: fcvt h1, s2 1568; NONEON-NOSVE-NEXT: ldr h2, [sp, #22] 1569; NONEON-NOSVE-NEXT: fcvt s2, h2 1570; NONEON-NOSVE-NEXT: fcvt h0, s0 1571; NONEON-NOSVE-NEXT: fcvt s1, h1 1572; NONEON-NOSVE-NEXT: fmax s2, s3, s2 1573; NONEON-NOSVE-NEXT: ldr h3, [sp, #8] 1574; NONEON-NOSVE-NEXT: fcvt s0, h0 1575; NONEON-NOSVE-NEXT: fcvt s3, h3 1576; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1577; NONEON-NOSVE-NEXT: fcvt h1, s2 1578; NONEON-NOSVE-NEXT: ldr h2, [sp, #24] 1579; NONEON-NOSVE-NEXT: fcvt s2, h2 1580; NONEON-NOSVE-NEXT: fcvt h0, s0 1581; NONEON-NOSVE-NEXT: fcvt s1, h1 1582; NONEON-NOSVE-NEXT: fmax s2, s3, s2 1583; NONEON-NOSVE-NEXT: ldr h3, [sp, #10] 1584; NONEON-NOSVE-NEXT: fcvt s0, h0 1585; NONEON-NOSVE-NEXT: fcvt s3, h3 1586; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1587; NONEON-NOSVE-NEXT: fcvt h1, s2 1588; NONEON-NOSVE-NEXT: ldr h2, [sp, #26] 1589; NONEON-NOSVE-NEXT: fcvt s2, h2 1590; NONEON-NOSVE-NEXT: fcvt h0, s0 1591; NONEON-NOSVE-NEXT: fcvt s1, h1 1592; NONEON-NOSVE-NEXT: fmax s2, s3, s2 1593; NONEON-NOSVE-NEXT: ldr h3, [sp, #12] 1594; NONEON-NOSVE-NEXT: fcvt s0, h0 1595; NONEON-NOSVE-NEXT: fcvt s3, h3 1596; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1597; NONEON-NOSVE-NEXT: fcvt h1, s2 1598; NONEON-NOSVE-NEXT: ldr h2, [sp, #28] 1599; NONEON-NOSVE-NEXT: fcvt s2, h2 1600; NONEON-NOSVE-NEXT: fcvt h0, s0 1601; NONEON-NOSVE-NEXT: fcvt s1, h1 1602; NONEON-NOSVE-NEXT: fcvt s0, h0 1603; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1604; NONEON-NOSVE-NEXT: fmax s1, s3, s2 1605; NONEON-NOSVE-NEXT: ldr h2, [sp, #30] 1606; NONEON-NOSVE-NEXT: ldr h3, [sp, #14] 1607; NONEON-NOSVE-NEXT: fcvt s2, h2 1608; NONEON-NOSVE-NEXT: fcvt s3, h3 1609; NONEON-NOSVE-NEXT: fcvt h0, s0 1610; NONEON-NOSVE-NEXT: fcvt h1, s1 1611; NONEON-NOSVE-NEXT: fcvt s0, h0 1612; NONEON-NOSVE-NEXT: fcvt s1, h1 1613; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1614; NONEON-NOSVE-NEXT: fmax s1, s3, s2 1615; NONEON-NOSVE-NEXT: fcvt h0, s0 1616; NONEON-NOSVE-NEXT: fcvt h1, s1 1617; NONEON-NOSVE-NEXT: fcvt s0, h0 1618; NONEON-NOSVE-NEXT: fcvt s1, h1 1619; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1620; NONEON-NOSVE-NEXT: fcvt h0, s0 1621; NONEON-NOSVE-NEXT: add sp, sp, #32 1622; NONEON-NOSVE-NEXT: ret 1623 %op = load <16 x half>, ptr %a 1624 %res = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> %op) 1625 ret half %res 1626} 1627 1628define float @fmaximumv_v2f32(<2 x float> %a) { 1629; CHECK-LABEL: fmaximumv_v2f32: 1630; CHECK: // %bb.0: 1631; CHECK-NEXT: ptrue p0.s, vl2 1632; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1633; CHECK-NEXT: fmaxv s0, p0, z0.s 1634; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 1635; CHECK-NEXT: ret 1636; 1637; NONEON-NOSVE-LABEL: fmaximumv_v2f32: 1638; NONEON-NOSVE: // %bb.0: 1639; NONEON-NOSVE-NEXT: sub sp, sp, #16 1640; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1641; NONEON-NOSVE-NEXT: str d0, [sp, #8] 1642; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #8] 1643; NONEON-NOSVE-NEXT: fmax s0, s1, s0 1644; NONEON-NOSVE-NEXT: add sp, sp, #16 1645; NONEON-NOSVE-NEXT: ret 1646 %res = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> %a) 1647 ret float %res 1648} 1649 1650define float @fmaximumv_v4f32(<4 x float> %a) { 1651; CHECK-LABEL: fmaximumv_v4f32: 1652; CHECK: // %bb.0: 1653; CHECK-NEXT: ptrue p0.s, vl4 1654; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1655; CHECK-NEXT: fmaxv s0, p0, z0.s 1656; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 1657; CHECK-NEXT: ret 1658; 1659; NONEON-NOSVE-LABEL: fmaximumv_v4f32: 1660; NONEON-NOSVE: // %bb.0: 1661; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1662; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1663; NONEON-NOSVE-NEXT: ldp s1, s0, [sp] 1664; NONEON-NOSVE-NEXT: fmax s0, s1, s0 1665; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] 1666; NONEON-NOSVE-NEXT: fmax s0, s0, s2 1667; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1668; NONEON-NOSVE-NEXT: add sp, sp, #16 1669; NONEON-NOSVE-NEXT: ret 1670 %res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %a) 1671 ret float %res 1672} 1673 1674define float @fmaximumv_v8f32(ptr %a) { 1675; CHECK-LABEL: fmaximumv_v8f32: 1676; CHECK: // %bb.0: 1677; CHECK-NEXT: ldp q1, q0, [x0] 1678; CHECK-NEXT: ptrue p0.s, vl4 1679; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s 1680; CHECK-NEXT: fmaxv s0, p0, z0.s 1681; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 1682; CHECK-NEXT: ret 1683; 1684; NONEON-NOSVE-LABEL: fmaximumv_v8f32: 1685; NONEON-NOSVE: // %bb.0: 1686; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1687; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1688; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1689; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16] 1690; NONEON-NOSVE-NEXT: ldp s3, s2, [sp] 1691; NONEON-NOSVE-NEXT: fmax s0, s2, s0 1692; NONEON-NOSVE-NEXT: fmax s1, s3, s1 1693; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8] 1694; NONEON-NOSVE-NEXT: fmax s0, s1, s0 1695; NONEON-NOSVE-NEXT: ldp s3, s1, [sp, #24] 1696; NONEON-NOSVE-NEXT: fmax s2, s2, s3 1697; NONEON-NOSVE-NEXT: fmax s1, s4, s1 1698; NONEON-NOSVE-NEXT: fmax s0, s0, s2 1699; NONEON-NOSVE-NEXT: fmax s0, s0, s1 1700; NONEON-NOSVE-NEXT: add sp, sp, #32 1701; NONEON-NOSVE-NEXT: ret 1702 %op = load <8 x float>, ptr %a 1703 %res = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %op) 1704 ret float %res 1705} 1706 1707define double @fmaximumv_v1f64(<1 x double> %a) { 1708; CHECK-LABEL: fmaximumv_v1f64: 1709; CHECK: // %bb.0: 1710; CHECK-NEXT: ret 1711; 1712; NONEON-NOSVE-LABEL: fmaximumv_v1f64: 1713; NONEON-NOSVE: // %bb.0: 1714; NONEON-NOSVE-NEXT: ret 1715 %res = call double @llvm.vector.reduce.fmaximum.v1f64(<1 x double> %a) 1716 ret double %res 1717} 1718 1719define double @fmaximumv_v2f64(<2 x double> %a) { 1720; CHECK-LABEL: fmaximumv_v2f64: 1721; CHECK: // %bb.0: 1722; CHECK-NEXT: ptrue p0.d, vl2 1723; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1724; CHECK-NEXT: fmaxv d0, p0, z0.d 1725; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 1726; CHECK-NEXT: ret 1727; 1728; NONEON-NOSVE-LABEL: fmaximumv_v2f64: 1729; NONEON-NOSVE: // %bb.0: 1730; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1731; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1732; NONEON-NOSVE-NEXT: ldp d1, d0, [sp], #16 1733; NONEON-NOSVE-NEXT: fmax d0, d1, d0 1734; NONEON-NOSVE-NEXT: ret 1735 %res = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %a) 1736 ret double %res 1737} 1738 1739define double @fmaximumv_v4f64(ptr %a) { 1740; CHECK-LABEL: fmaximumv_v4f64: 1741; CHECK: // %bb.0: 1742; CHECK-NEXT: ldp q1, q0, [x0] 1743; CHECK-NEXT: ptrue p0.d, vl2 1744; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d 1745; CHECK-NEXT: fmaxv d0, p0, z0.d 1746; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 1747; CHECK-NEXT: ret 1748; 1749; NONEON-NOSVE-LABEL: fmaximumv_v4f64: 1750; NONEON-NOSVE: // %bb.0: 1751; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1752; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1753; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1754; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] 1755; NONEON-NOSVE-NEXT: ldp d3, d2, [sp], #32 1756; NONEON-NOSVE-NEXT: fmax d0, d2, d0 1757; NONEON-NOSVE-NEXT: fmax d1, d3, d1 1758; NONEON-NOSVE-NEXT: fmax d0, d1, d0 1759; NONEON-NOSVE-NEXT: ret 1760 %op = load <4 x double>, ptr %a 1761 %res = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %op) 1762 ret double %res 1763} 1764 1765; 1766; FMINV 1767; 1768 1769define half @fminimumv_v4f16(<4 x half> %a) { 1770; CHECK-LABEL: fminimumv_v4f16: 1771; CHECK: // %bb.0: 1772; CHECK-NEXT: ptrue p0.h, vl4 1773; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1774; CHECK-NEXT: fminv h0, p0, z0.h 1775; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 1776; CHECK-NEXT: ret 1777; 1778; NONEON-NOSVE-LABEL: fminimumv_v4f16: 1779; NONEON-NOSVE: // %bb.0: 1780; NONEON-NOSVE-NEXT: sub sp, sp, #16 1781; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1782; NONEON-NOSVE-NEXT: str d0, [sp, #8] 1783; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] 1784; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] 1785; NONEON-NOSVE-NEXT: fcvt s1, h1 1786; NONEON-NOSVE-NEXT: fcvt s0, h0 1787; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1788; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] 1789; NONEON-NOSVE-NEXT: fcvt s1, h1 1790; NONEON-NOSVE-NEXT: fcvt h0, s0 1791; NONEON-NOSVE-NEXT: fcvt s0, h0 1792; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1793; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] 1794; NONEON-NOSVE-NEXT: fcvt s1, h1 1795; NONEON-NOSVE-NEXT: fcvt h0, s0 1796; NONEON-NOSVE-NEXT: fcvt s0, h0 1797; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1798; NONEON-NOSVE-NEXT: fcvt h0, s0 1799; NONEON-NOSVE-NEXT: add sp, sp, #16 1800; NONEON-NOSVE-NEXT: ret 1801 %res = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> %a) 1802 ret half %res 1803} 1804 1805define half @fminimumv_v8f16(<8 x half> %a) { 1806; CHECK-LABEL: fminimumv_v8f16: 1807; CHECK: // %bb.0: 1808; CHECK-NEXT: ptrue p0.h, vl8 1809; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1810; CHECK-NEXT: fminv h0, p0, z0.h 1811; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 1812; CHECK-NEXT: ret 1813; 1814; NONEON-NOSVE-LABEL: fminimumv_v8f16: 1815; NONEON-NOSVE: // %bb.0: 1816; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1817; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1818; NONEON-NOSVE-NEXT: ldr h0, [sp] 1819; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] 1820; NONEON-NOSVE-NEXT: fcvt s1, h1 1821; NONEON-NOSVE-NEXT: fcvt s0, h0 1822; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1823; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] 1824; NONEON-NOSVE-NEXT: fcvt s1, h1 1825; NONEON-NOSVE-NEXT: fcvt h0, s0 1826; NONEON-NOSVE-NEXT: fcvt s0, h0 1827; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1828; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] 1829; NONEON-NOSVE-NEXT: fcvt s1, h1 1830; NONEON-NOSVE-NEXT: fcvt h0, s0 1831; NONEON-NOSVE-NEXT: fcvt s0, h0 1832; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1833; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] 1834; NONEON-NOSVE-NEXT: fcvt s1, h1 1835; NONEON-NOSVE-NEXT: fcvt h0, s0 1836; NONEON-NOSVE-NEXT: fcvt s0, h0 1837; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1838; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] 1839; NONEON-NOSVE-NEXT: fcvt s1, h1 1840; NONEON-NOSVE-NEXT: fcvt h0, s0 1841; NONEON-NOSVE-NEXT: fcvt s0, h0 1842; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1843; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] 1844; NONEON-NOSVE-NEXT: fcvt s1, h1 1845; NONEON-NOSVE-NEXT: fcvt h0, s0 1846; NONEON-NOSVE-NEXT: fcvt s0, h0 1847; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1848; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] 1849; NONEON-NOSVE-NEXT: fcvt s1, h1 1850; NONEON-NOSVE-NEXT: fcvt h0, s0 1851; NONEON-NOSVE-NEXT: fcvt s0, h0 1852; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1853; NONEON-NOSVE-NEXT: fcvt h0, s0 1854; NONEON-NOSVE-NEXT: add sp, sp, #16 1855; NONEON-NOSVE-NEXT: ret 1856 %res = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> %a) 1857 ret half %res 1858} 1859 1860define half @fminimumv_v16f16(ptr %a) { 1861; CHECK-LABEL: fminimumv_v16f16: 1862; CHECK: // %bb.0: 1863; CHECK-NEXT: ldp q1, q0, [x0] 1864; CHECK-NEXT: ptrue p0.h, vl8 1865; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h 1866; CHECK-NEXT: fminv h0, p0, z0.h 1867; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 1868; CHECK-NEXT: ret 1869; 1870; NONEON-NOSVE-LABEL: fminimumv_v16f16: 1871; NONEON-NOSVE: // %bb.0: 1872; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1873; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1874; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1875; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] 1876; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] 1877; NONEON-NOSVE-NEXT: ldr h2, [sp, #16] 1878; NONEON-NOSVE-NEXT: ldr h3, [sp] 1879; NONEON-NOSVE-NEXT: fcvt s0, h0 1880; NONEON-NOSVE-NEXT: fcvt s1, h1 1881; NONEON-NOSVE-NEXT: fcvt s2, h2 1882; NONEON-NOSVE-NEXT: fcvt s3, h3 1883; NONEON-NOSVE-NEXT: fmin s0, s1, s0 1884; NONEON-NOSVE-NEXT: fmin s1, s3, s2 1885; NONEON-NOSVE-NEXT: ldr h2, [sp, #20] 1886; NONEON-NOSVE-NEXT: ldr h3, [sp, #4] 1887; NONEON-NOSVE-NEXT: fcvt s2, h2 1888; NONEON-NOSVE-NEXT: fcvt s3, h3 1889; NONEON-NOSVE-NEXT: fcvt h0, s0 1890; NONEON-NOSVE-NEXT: fcvt h1, s1 1891; NONEON-NOSVE-NEXT: fmin s2, s3, s2 1892; NONEON-NOSVE-NEXT: ldr h3, [sp, #6] 1893; NONEON-NOSVE-NEXT: fcvt s0, h0 1894; NONEON-NOSVE-NEXT: fcvt s1, h1 1895; NONEON-NOSVE-NEXT: fcvt s3, h3 1896; NONEON-NOSVE-NEXT: fmin s0, s1, s0 1897; NONEON-NOSVE-NEXT: fcvt h1, s2 1898; NONEON-NOSVE-NEXT: ldr h2, [sp, #22] 1899; NONEON-NOSVE-NEXT: fcvt s2, h2 1900; NONEON-NOSVE-NEXT: fcvt h0, s0 1901; NONEON-NOSVE-NEXT: fcvt s1, h1 1902; NONEON-NOSVE-NEXT: fmin s2, s3, s2 1903; NONEON-NOSVE-NEXT: ldr h3, [sp, #8] 1904; NONEON-NOSVE-NEXT: fcvt s0, h0 1905; NONEON-NOSVE-NEXT: fcvt s3, h3 1906; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1907; NONEON-NOSVE-NEXT: fcvt h1, s2 1908; NONEON-NOSVE-NEXT: ldr h2, [sp, #24] 1909; NONEON-NOSVE-NEXT: fcvt s2, h2 1910; NONEON-NOSVE-NEXT: fcvt h0, s0 1911; NONEON-NOSVE-NEXT: fcvt s1, h1 1912; NONEON-NOSVE-NEXT: fmin s2, s3, s2 1913; NONEON-NOSVE-NEXT: ldr h3, [sp, #10] 1914; NONEON-NOSVE-NEXT: fcvt s0, h0 1915; NONEON-NOSVE-NEXT: fcvt s3, h3 1916; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1917; NONEON-NOSVE-NEXT: fcvt h1, s2 1918; NONEON-NOSVE-NEXT: ldr h2, [sp, #26] 1919; NONEON-NOSVE-NEXT: fcvt s2, h2 1920; NONEON-NOSVE-NEXT: fcvt h0, s0 1921; NONEON-NOSVE-NEXT: fcvt s1, h1 1922; NONEON-NOSVE-NEXT: fmin s2, s3, s2 1923; NONEON-NOSVE-NEXT: ldr h3, [sp, #12] 1924; NONEON-NOSVE-NEXT: fcvt s0, h0 1925; NONEON-NOSVE-NEXT: fcvt s3, h3 1926; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1927; NONEON-NOSVE-NEXT: fcvt h1, s2 1928; NONEON-NOSVE-NEXT: ldr h2, [sp, #28] 1929; NONEON-NOSVE-NEXT: fcvt s2, h2 1930; NONEON-NOSVE-NEXT: fcvt h0, s0 1931; NONEON-NOSVE-NEXT: fcvt s1, h1 1932; NONEON-NOSVE-NEXT: fcvt s0, h0 1933; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1934; NONEON-NOSVE-NEXT: fmin s1, s3, s2 1935; NONEON-NOSVE-NEXT: ldr h2, [sp, #30] 1936; NONEON-NOSVE-NEXT: ldr h3, [sp, #14] 1937; NONEON-NOSVE-NEXT: fcvt s2, h2 1938; NONEON-NOSVE-NEXT: fcvt s3, h3 1939; NONEON-NOSVE-NEXT: fcvt h0, s0 1940; NONEON-NOSVE-NEXT: fcvt h1, s1 1941; NONEON-NOSVE-NEXT: fcvt s0, h0 1942; NONEON-NOSVE-NEXT: fcvt s1, h1 1943; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1944; NONEON-NOSVE-NEXT: fmin s1, s3, s2 1945; NONEON-NOSVE-NEXT: fcvt h0, s0 1946; NONEON-NOSVE-NEXT: fcvt h1, s1 1947; NONEON-NOSVE-NEXT: fcvt s0, h0 1948; NONEON-NOSVE-NEXT: fcvt s1, h1 1949; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1950; NONEON-NOSVE-NEXT: fcvt h0, s0 1951; NONEON-NOSVE-NEXT: add sp, sp, #32 1952; NONEON-NOSVE-NEXT: ret 1953 %op = load <16 x half>, ptr %a 1954 %res = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> %op) 1955 ret half %res 1956} 1957 1958define float @fminimumv_v2f32(<2 x float> %a) { 1959; CHECK-LABEL: fminimumv_v2f32: 1960; CHECK: // %bb.0: 1961; CHECK-NEXT: ptrue p0.s, vl2 1962; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1963; CHECK-NEXT: fminv s0, p0, z0.s 1964; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 1965; CHECK-NEXT: ret 1966; 1967; NONEON-NOSVE-LABEL: fminimumv_v2f32: 1968; NONEON-NOSVE: // %bb.0: 1969; NONEON-NOSVE-NEXT: sub sp, sp, #16 1970; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1971; NONEON-NOSVE-NEXT: str d0, [sp, #8] 1972; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #8] 1973; NONEON-NOSVE-NEXT: fmin s0, s1, s0 1974; NONEON-NOSVE-NEXT: add sp, sp, #16 1975; NONEON-NOSVE-NEXT: ret 1976 %res = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> %a) 1977 ret float %res 1978} 1979 1980define float @fminimumv_v4f32(<4 x float> %a) { 1981; CHECK-LABEL: fminimumv_v4f32: 1982; CHECK: // %bb.0: 1983; CHECK-NEXT: ptrue p0.s, vl4 1984; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1985; CHECK-NEXT: fminv s0, p0, z0.s 1986; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 1987; CHECK-NEXT: ret 1988; 1989; NONEON-NOSVE-LABEL: fminimumv_v4f32: 1990; NONEON-NOSVE: // %bb.0: 1991; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1992; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1993; NONEON-NOSVE-NEXT: ldp s1, s0, [sp] 1994; NONEON-NOSVE-NEXT: fmin s0, s1, s0 1995; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] 1996; NONEON-NOSVE-NEXT: fmin s0, s0, s2 1997; NONEON-NOSVE-NEXT: fmin s0, s0, s1 1998; NONEON-NOSVE-NEXT: add sp, sp, #16 1999; NONEON-NOSVE-NEXT: ret 2000 %res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %a) 2001 ret float %res 2002} 2003 2004define float @fminimumv_v8f32(ptr %a) { 2005; CHECK-LABEL: fminimumv_v8f32: 2006; CHECK: // %bb.0: 2007; CHECK-NEXT: ldp q1, q0, [x0] 2008; CHECK-NEXT: ptrue p0.s, vl4 2009; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s 2010; CHECK-NEXT: fminv s0, p0, z0.s 2011; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 2012; CHECK-NEXT: ret 2013; 2014; NONEON-NOSVE-LABEL: fminimumv_v8f32: 2015; NONEON-NOSVE: // %bb.0: 2016; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 2017; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 2018; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 2019; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16] 2020; NONEON-NOSVE-NEXT: ldp s3, s2, [sp] 2021; NONEON-NOSVE-NEXT: fmin s0, s2, s0 2022; NONEON-NOSVE-NEXT: fmin s1, s3, s1 2023; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8] 2024; NONEON-NOSVE-NEXT: fmin s0, s1, s0 2025; NONEON-NOSVE-NEXT: ldp s3, s1, [sp, #24] 2026; NONEON-NOSVE-NEXT: fmin s2, s2, s3 2027; NONEON-NOSVE-NEXT: fmin s1, s4, s1 2028; NONEON-NOSVE-NEXT: fmin s0, s0, s2 2029; NONEON-NOSVE-NEXT: fmin s0, s0, s1 2030; NONEON-NOSVE-NEXT: add sp, sp, #32 2031; NONEON-NOSVE-NEXT: ret 2032 %op = load <8 x float>, ptr %a 2033 %res = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %op) 2034 ret float %res 2035} 2036 2037define double @fminimumv_v1f64(<1 x double> %a) { 2038; CHECK-LABEL: fminimumv_v1f64: 2039; CHECK: // %bb.0: 2040; CHECK-NEXT: ret 2041; 2042; NONEON-NOSVE-LABEL: fminimumv_v1f64: 2043; NONEON-NOSVE: // %bb.0: 2044; NONEON-NOSVE-NEXT: ret 2045 %res = call double @llvm.vector.reduce.fminimum.v1f64(<1 x double> %a) 2046 ret double %res 2047} 2048 2049define double @fminimumv_v2f64(<2 x double> %a) { 2050; CHECK-LABEL: fminimumv_v2f64: 2051; CHECK: // %bb.0: 2052; CHECK-NEXT: ptrue p0.d, vl2 2053; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 2054; CHECK-NEXT: fminv d0, p0, z0.d 2055; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 2056; CHECK-NEXT: ret 2057; 2058; NONEON-NOSVE-LABEL: fminimumv_v2f64: 2059; NONEON-NOSVE: // %bb.0: 2060; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 2061; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 2062; NONEON-NOSVE-NEXT: ldp d1, d0, [sp], #16 2063; NONEON-NOSVE-NEXT: fmin d0, d1, d0 2064; NONEON-NOSVE-NEXT: ret 2065 %res = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %a) 2066 ret double %res 2067} 2068 2069define double @fminimumv_v4f64(ptr %a) { 2070; CHECK-LABEL: fminimumv_v4f64: 2071; CHECK: // %bb.0: 2072; CHECK-NEXT: ldp q1, q0, [x0] 2073; CHECK-NEXT: ptrue p0.d, vl2 2074; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d 2075; CHECK-NEXT: fminv d0, p0, z0.d 2076; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 2077; CHECK-NEXT: ret 2078; 2079; NONEON-NOSVE-LABEL: fminimumv_v4f64: 2080; NONEON-NOSVE: // %bb.0: 2081; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 2082; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 2083; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 2084; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] 2085; NONEON-NOSVE-NEXT: ldp d3, d2, [sp], #32 2086; NONEON-NOSVE-NEXT: fmin d0, d2, d0 2087; NONEON-NOSVE-NEXT: fmin d1, d3, d1 2088; NONEON-NOSVE-NEXT: fmin d0, d1, d0 2089; NONEON-NOSVE-NEXT: ret 2090 %op = load <4 x double>, ptr %a 2091 %res = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %op) 2092 ret double %res 2093} 2094 2095declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>) 2096declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>) 2097declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>) 2098 2099declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) 2100declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) 2101declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) 2102 2103declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>) 2104declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) 2105declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) 2106 2107declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>) 2108declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>) 2109declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>) 2110 2111declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) 2112declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) 2113declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) 2114 2115declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>) 2116declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) 2117declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) 2118 2119declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>) 2120declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>) 2121declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>) 2122 2123declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) 2124declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) 2125declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) 2126 2127declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>) 2128declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) 2129declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) 2130 2131declare half @llvm.vector.reduce.fmaximum.v4f16(<4 x half>) 2132declare half @llvm.vector.reduce.fmaximum.v8f16(<8 x half>) 2133declare half @llvm.vector.reduce.fmaximum.v16f16(<16 x half>) 2134 2135declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>) 2136declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>) 2137declare float @llvm.vector.reduce.fmaximum.v8f32(<8 x float>) 2138 2139declare double @llvm.vector.reduce.fmaximum.v1f64(<1 x double>) 2140declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>) 2141declare double @llvm.vector.reduce.fmaximum.v4f64(<4 x double>) 2142 2143declare half @llvm.vector.reduce.fminimum.v4f16(<4 x half>) 2144declare half @llvm.vector.reduce.fminimum.v8f16(<8 x half>) 2145declare half @llvm.vector.reduce.fminimum.v16f16(<16 x half>) 2146 2147declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>) 2148declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>) 2149declare float @llvm.vector.reduce.fminimum.v8f32(<8 x float>) 2150 2151declare double @llvm.vector.reduce.fminimum.v1f64(<1 x double>) 2152declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>) 2153declare double @llvm.vector.reduce.fminimum.v4f64(<4 x double>) 2154