1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi, -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-MVE-FP 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi, -mattr=+mve.fp -fp-contract=fast -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-MVE-VMLA 4; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-MVE 5 6define arm_aapcs_vfpcc <8 x half> @vfma16_v1(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) { 7; CHECK-MVE-FP-LABEL: vfma16_v1: 8; CHECK-MVE-FP: @ %bb.0: @ %entry 9; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, q2 10; CHECK-MVE-FP-NEXT: vadd.f16 q0, q0, q1 11; CHECK-MVE-FP-NEXT: bx lr 12; 13; CHECK-MVE-VMLA-LABEL: vfma16_v1: 14; CHECK-MVE-VMLA: @ %bb.0: @ %entry 15; CHECK-MVE-VMLA-NEXT: vfma.f16 q0, q1, q2 16; CHECK-MVE-VMLA-NEXT: bx lr 17; 18; CHECK-MVE-LABEL: vfma16_v1: 19; CHECK-MVE: @ %bb.0: @ %entry 20; CHECK-MVE-NEXT: vmovx.f16 s13, s0 21; CHECK-MVE-NEXT: vmovx.f16 s12, s8 22; CHECK-MVE-NEXT: vmovx.f16 s14, s4 23; CHECK-MVE-NEXT: vmla.f16 s0, s4, s8 24; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12 25; CHECK-MVE-NEXT: vmovx.f16 s12, s1 26; CHECK-MVE-NEXT: vmovx.f16 s4, s9 27; CHECK-MVE-NEXT: vmovx.f16 s8, s5 28; CHECK-MVE-NEXT: vmla.f16 s12, s8, s4 29; CHECK-MVE-NEXT: vmla.f16 s1, s5, s9 30; CHECK-MVE-NEXT: vins.f16 s1, s12 31; CHECK-MVE-NEXT: vmovx.f16 s12, s2 32; CHECK-MVE-NEXT: vmovx.f16 s4, s10 33; CHECK-MVE-NEXT: vmovx.f16 s8, s6 34; CHECK-MVE-NEXT: vmla.f16 s12, s8, s4 35; CHECK-MVE-NEXT: vmla.f16 s2, s6, s10 36; CHECK-MVE-NEXT: vmovx.f16 s8, s3 37; CHECK-MVE-NEXT: vmovx.f16 s4, s11 38; CHECK-MVE-NEXT: vmovx.f16 s6, s7 39; CHECK-MVE-NEXT: vmla.f16 s3, s7, s11 40; CHECK-MVE-NEXT: vmla.f16 s8, s6, s4 41; CHECK-MVE-NEXT: vins.f16 s0, s13 42; CHECK-MVE-NEXT: vins.f16 s2, s12 43; CHECK-MVE-NEXT: vins.f16 s3, s8 44; CHECK-MVE-NEXT: bx lr 45entry: 46 %0 = fmul <8 x half> %src2, %src3 47 %1 = fadd <8 x half> %src1, %0 48 ret <8 x half> %1 49} 50 51define arm_aapcs_vfpcc <8 x half> @vfma16_v2(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) { 52; CHECK-MVE-FP-LABEL: vfma16_v2: 53; CHECK-MVE-FP: @ %bb.0: @ %entry 54; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, q2 55; CHECK-MVE-FP-NEXT: vadd.f16 q0, q1, q0 56; CHECK-MVE-FP-NEXT: bx lr 57; 58; CHECK-MVE-VMLA-LABEL: vfma16_v2: 59; CHECK-MVE-VMLA: @ %bb.0: @ %entry 60; CHECK-MVE-VMLA-NEXT: vfma.f16 q0, q1, q2 61; CHECK-MVE-VMLA-NEXT: bx lr 62; 63; CHECK-MVE-LABEL: vfma16_v2: 64; CHECK-MVE: @ %bb.0: @ %entry 65; CHECK-MVE-NEXT: vmovx.f16 s13, s0 66; CHECK-MVE-NEXT: vmovx.f16 s12, s8 67; CHECK-MVE-NEXT: vmovx.f16 s14, s4 68; CHECK-MVE-NEXT: vmla.f16 s0, s4, s8 69; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12 70; CHECK-MVE-NEXT: vmovx.f16 s12, s1 71; CHECK-MVE-NEXT: vmovx.f16 s4, s9 72; CHECK-MVE-NEXT: vmovx.f16 s8, s5 73; CHECK-MVE-NEXT: vmla.f16 s12, s8, s4 74; CHECK-MVE-NEXT: vmla.f16 s1, s5, s9 75; CHECK-MVE-NEXT: vins.f16 s1, s12 76; CHECK-MVE-NEXT: vmovx.f16 s12, s2 77; CHECK-MVE-NEXT: vmovx.f16 s4, s10 78; CHECK-MVE-NEXT: vmovx.f16 s8, s6 79; CHECK-MVE-NEXT: vmla.f16 s12, s8, s4 80; CHECK-MVE-NEXT: vmla.f16 s2, s6, s10 81; CHECK-MVE-NEXT: vmovx.f16 s8, s3 82; CHECK-MVE-NEXT: vmovx.f16 s4, s11 83; CHECK-MVE-NEXT: vmovx.f16 s6, s7 84; CHECK-MVE-NEXT: vmla.f16 s3, s7, s11 85; CHECK-MVE-NEXT: vmla.f16 s8, s6, s4 86; CHECK-MVE-NEXT: vins.f16 s0, s13 87; CHECK-MVE-NEXT: vins.f16 s2, s12 88; CHECK-MVE-NEXT: vins.f16 s3, s8 89; CHECK-MVE-NEXT: bx lr 90entry: 91 %0 = fmul <8 x half> %src2, %src3 92 %1 = fadd <8 x half> %0, %src1 93 ret <8 x half> %1 94} 95 96define arm_aapcs_vfpcc <8 x half> @vfms16(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) { 97; CHECK-MVE-FP-LABEL: vfms16: 98; CHECK-MVE-FP: @ %bb.0: @ %entry 99; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, q2 100; CHECK-MVE-FP-NEXT: vsub.f16 q0, q0, q1 101; CHECK-MVE-FP-NEXT: bx lr 102; 103; CHECK-MVE-VMLA-LABEL: vfms16: 104; CHECK-MVE-VMLA: @ %bb.0: @ %entry 105; CHECK-MVE-VMLA-NEXT: vfms.f16 q0, q1, q2 106; CHECK-MVE-VMLA-NEXT: bx lr 107; 108; CHECK-MVE-LABEL: vfms16: 109; CHECK-MVE: @ %bb.0: @ %entry 110; CHECK-MVE-NEXT: vmovx.f16 s13, s0 111; CHECK-MVE-NEXT: vmovx.f16 s12, s8 112; CHECK-MVE-NEXT: vmovx.f16 s14, s4 113; CHECK-MVE-NEXT: vmls.f16 s0, s4, s8 114; CHECK-MVE-NEXT: vmls.f16 s13, s14, s12 115; CHECK-MVE-NEXT: vmovx.f16 s12, s1 116; CHECK-MVE-NEXT: vmovx.f16 s4, s9 117; CHECK-MVE-NEXT: vmovx.f16 s8, s5 118; CHECK-MVE-NEXT: vmls.f16 s12, s8, s4 119; CHECK-MVE-NEXT: vmls.f16 s1, s5, s9 120; CHECK-MVE-NEXT: vins.f16 s1, s12 121; CHECK-MVE-NEXT: vmovx.f16 s12, s2 122; CHECK-MVE-NEXT: vmovx.f16 s4, s10 123; CHECK-MVE-NEXT: vmovx.f16 s8, s6 124; CHECK-MVE-NEXT: vmls.f16 s12, s8, s4 125; CHECK-MVE-NEXT: vmls.f16 s2, s6, s10 126; CHECK-MVE-NEXT: vmovx.f16 s8, s3 127; CHECK-MVE-NEXT: vmovx.f16 s4, s11 128; CHECK-MVE-NEXT: vmovx.f16 s6, s7 129; CHECK-MVE-NEXT: vmls.f16 s3, s7, s11 130; CHECK-MVE-NEXT: vmls.f16 s8, s6, s4 131; CHECK-MVE-NEXT: vins.f16 s0, s13 132; CHECK-MVE-NEXT: vins.f16 s2, s12 133; CHECK-MVE-NEXT: vins.f16 s3, s8 134; CHECK-MVE-NEXT: bx lr 135entry: 136 %0 = fmul <8 x half> %src2, %src3 137 %1 = fsub <8 x half> %src1, %0 138 ret <8 x half> %1 139} 140 141define arm_aapcs_vfpcc <8 x half> @vfmar16(<8 x half> %src1, <8 x half> %src2, float %src3o) { 142; CHECK-MVE-FP-LABEL: vfmar16: 143; CHECK-MVE-FP: @ %bb.0: @ %entry 144; CHECK-MVE-FP-NEXT: vcvtb.f16.f32 s8, s8 145; CHECK-MVE-FP-NEXT: vmov.f16 r0, s8 146; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, r0 147; CHECK-MVE-FP-NEXT: vadd.f16 q0, q0, q1 148; CHECK-MVE-FP-NEXT: bx lr 149; 150; CHECK-MVE-VMLA-LABEL: vfmar16: 151; CHECK-MVE-VMLA: @ %bb.0: @ %entry 152; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8 153; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8 154; CHECK-MVE-VMLA-NEXT: vfma.f16 q0, q1, r0 155; CHECK-MVE-VMLA-NEXT: bx lr 156; 157; CHECK-MVE-LABEL: vfmar16: 158; CHECK-MVE: @ %bb.0: @ %entry 159; CHECK-MVE-NEXT: vcvtb.f16.f32 s8, s8 160; CHECK-MVE-NEXT: vmovx.f16 s12, s0 161; CHECK-MVE-NEXT: vmovx.f16 s10, s4 162; CHECK-MVE-NEXT: vmla.f16 s0, s4, s8 163; CHECK-MVE-NEXT: vmla.f16 s12, s10, s8 164; CHECK-MVE-NEXT: vmovx.f16 s10, s1 165; CHECK-MVE-NEXT: vmovx.f16 s4, s5 166; CHECK-MVE-NEXT: vmla.f16 s1, s5, s8 167; CHECK-MVE-NEXT: vmla.f16 s10, s4, s8 168; CHECK-MVE-NEXT: vmovx.f16 s4, s6 169; CHECK-MVE-NEXT: vins.f16 s1, s10 170; CHECK-MVE-NEXT: vmovx.f16 s10, s2 171; CHECK-MVE-NEXT: vmla.f16 s10, s4, s8 172; CHECK-MVE-NEXT: vmla.f16 s2, s6, s8 173; CHECK-MVE-NEXT: vmovx.f16 s6, s3 174; CHECK-MVE-NEXT: vmovx.f16 s4, s7 175; CHECK-MVE-NEXT: vmla.f16 s6, s4, s8 176; CHECK-MVE-NEXT: vmla.f16 s3, s7, s8 177; CHECK-MVE-NEXT: vins.f16 s0, s12 178; CHECK-MVE-NEXT: vins.f16 s2, s10 179; CHECK-MVE-NEXT: vins.f16 s3, s6 180; CHECK-MVE-NEXT: bx lr 181entry: 182 %src3 = fptrunc float %src3o to half 183 %i = insertelement <8 x half> undef, half %src3, i32 0 184 %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 185 %0 = fmul <8 x half> %src2, %sp 186 %1 = fadd <8 x half> %src1, %0 187 ret <8 x half> %1 188} 189 190define arm_aapcs_vfpcc <8 x half> @vfma16(<8 x half> %src1, <8 x half> %src2, float %src3o) { 191; CHECK-MVE-FP-LABEL: vfma16: 192; CHECK-MVE-FP: @ %bb.0: @ %entry 193; CHECK-MVE-FP-NEXT: vcvtb.f16.f32 s8, s8 194; CHECK-MVE-FP-NEXT: vmul.f16 q0, q0, q1 195; CHECK-MVE-FP-NEXT: vmov.f16 r0, s8 196; CHECK-MVE-FP-NEXT: vadd.f16 q0, q0, r0 197; CHECK-MVE-FP-NEXT: bx lr 198; 199; CHECK-MVE-VMLA-LABEL: vfma16: 200; CHECK-MVE-VMLA: @ %bb.0: @ %entry 201; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8 202; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8 203; CHECK-MVE-VMLA-NEXT: vfmas.f16 q0, q1, r0 204; CHECK-MVE-VMLA-NEXT: bx lr 205; 206; CHECK-MVE-LABEL: vfma16: 207; CHECK-MVE: @ %bb.0: @ %entry 208; CHECK-MVE-NEXT: vmov q3, q0 209; CHECK-MVE-NEXT: vcvtb.f16.f32 s3, s8 210; CHECK-MVE-NEXT: vmovx.f16 s0, s4 211; CHECK-MVE-NEXT: vmovx.f16 s2, s12 212; CHECK-MVE-NEXT: vmov.f32 s8, s3 213; CHECK-MVE-NEXT: vmla.f16 s8, s2, s0 214; CHECK-MVE-NEXT: vmov.f32 s0, s3 215; CHECK-MVE-NEXT: vmla.f16 s0, s12, s4 216; CHECK-MVE-NEXT: vmov.f32 s1, s3 217; CHECK-MVE-NEXT: vins.f16 s0, s8 218; CHECK-MVE-NEXT: vmovx.f16 s2, s5 219; CHECK-MVE-NEXT: vmovx.f16 s4, s13 220; CHECK-MVE-NEXT: vmov.f32 s8, s3 221; CHECK-MVE-NEXT: vmla.f16 s8, s4, s2 222; CHECK-MVE-NEXT: vmla.f16 s1, s13, s5 223; CHECK-MVE-NEXT: vins.f16 s1, s8 224; CHECK-MVE-NEXT: vmovx.f16 s2, s6 225; CHECK-MVE-NEXT: vmovx.f16 s4, s14 226; CHECK-MVE-NEXT: vmov.f32 s8, s3 227; CHECK-MVE-NEXT: vmla.f16 s8, s4, s2 228; CHECK-MVE-NEXT: vmov.f32 s2, s3 229; CHECK-MVE-NEXT: vmla.f16 s2, s14, s6 230; CHECK-MVE-NEXT: vmovx.f16 s4, s7 231; CHECK-MVE-NEXT: vins.f16 s2, s8 232; CHECK-MVE-NEXT: vmov.f32 s8, s3 233; CHECK-MVE-NEXT: vmovx.f16 s6, s15 234; CHECK-MVE-NEXT: vmla.f16 s3, s15, s7 235; CHECK-MVE-NEXT: vmla.f16 s8, s6, s4 236; CHECK-MVE-NEXT: vins.f16 s3, s8 237; CHECK-MVE-NEXT: bx lr 238entry: 239 %src3 = fptrunc float %src3o to half 240 %i = insertelement <8 x half> undef, half %src3, i32 0 241 %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 242 %0 = fmul <8 x half> %src1, %src2 243 %1 = fadd <8 x half> %sp, %0 244 ret <8 x half> %1 245} 246 247define arm_aapcs_vfpcc <4 x float> @vfma32_v1(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) { 248; CHECK-MVE-FP-LABEL: vfma32_v1: 249; CHECK-MVE-FP: @ %bb.0: @ %entry 250; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, q2 251; CHECK-MVE-FP-NEXT: vadd.f32 q0, q0, q1 252; CHECK-MVE-FP-NEXT: bx lr 253; 254; CHECK-MVE-VMLA-LABEL: vfma32_v1: 255; CHECK-MVE-VMLA: @ %bb.0: @ %entry 256; CHECK-MVE-VMLA-NEXT: vfma.f32 q0, q1, q2 257; CHECK-MVE-VMLA-NEXT: bx lr 258; 259; CHECK-MVE-LABEL: vfma32_v1: 260; CHECK-MVE: @ %bb.0: @ %entry 261; CHECK-MVE-NEXT: vmla.f32 s3, s7, s11 262; CHECK-MVE-NEXT: vmla.f32 s2, s6, s10 263; CHECK-MVE-NEXT: vmla.f32 s1, s5, s9 264; CHECK-MVE-NEXT: vmla.f32 s0, s4, s8 265; CHECK-MVE-NEXT: bx lr 266entry: 267 %0 = fmul <4 x float> %src2, %src3 268 %1 = fadd <4 x float> %src1, %0 269 ret <4 x float> %1 270} 271 272define arm_aapcs_vfpcc <4 x float> @vfma32_v2(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) { 273; CHECK-MVE-FP-LABEL: vfma32_v2: 274; CHECK-MVE-FP: @ %bb.0: @ %entry 275; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, q2 276; CHECK-MVE-FP-NEXT: vadd.f32 q0, q1, q0 277; CHECK-MVE-FP-NEXT: bx lr 278; 279; CHECK-MVE-VMLA-LABEL: vfma32_v2: 280; CHECK-MVE-VMLA: @ %bb.0: @ %entry 281; CHECK-MVE-VMLA-NEXT: vfma.f32 q0, q1, q2 282; CHECK-MVE-VMLA-NEXT: bx lr 283; 284; CHECK-MVE-LABEL: vfma32_v2: 285; CHECK-MVE: @ %bb.0: @ %entry 286; CHECK-MVE-NEXT: vmla.f32 s3, s7, s11 287; CHECK-MVE-NEXT: vmla.f32 s2, s6, s10 288; CHECK-MVE-NEXT: vmla.f32 s1, s5, s9 289; CHECK-MVE-NEXT: vmla.f32 s0, s4, s8 290; CHECK-MVE-NEXT: bx lr 291entry: 292 %0 = fmul <4 x float> %src2, %src3 293 %1 = fadd <4 x float> %0, %src1 294 ret <4 x float> %1 295} 296 297define arm_aapcs_vfpcc <4 x float> @vfms32(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) { 298; CHECK-MVE-FP-LABEL: vfms32: 299; CHECK-MVE-FP: @ %bb.0: @ %entry 300; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, q2 301; CHECK-MVE-FP-NEXT: vsub.f32 q0, q0, q1 302; CHECK-MVE-FP-NEXT: bx lr 303; 304; CHECK-MVE-VMLA-LABEL: vfms32: 305; CHECK-MVE-VMLA: @ %bb.0: @ %entry 306; CHECK-MVE-VMLA-NEXT: vfms.f32 q0, q1, q2 307; CHECK-MVE-VMLA-NEXT: bx lr 308; 309; CHECK-MVE-LABEL: vfms32: 310; CHECK-MVE: @ %bb.0: @ %entry 311; CHECK-MVE-NEXT: vmls.f32 s3, s7, s11 312; CHECK-MVE-NEXT: vmls.f32 s2, s6, s10 313; CHECK-MVE-NEXT: vmls.f32 s1, s5, s9 314; CHECK-MVE-NEXT: vmls.f32 s0, s4, s8 315; CHECK-MVE-NEXT: bx lr 316entry: 317 %0 = fmul <4 x float> %src2, %src3 318 %1 = fsub <4 x float> %src1, %0 319 ret <4 x float> %1 320} 321 322define arm_aapcs_vfpcc <4 x float> @vfmar32(<4 x float> %src1, <4 x float> %src2, float %src3) { 323; CHECK-MVE-FP-LABEL: vfmar32: 324; CHECK-MVE-FP: @ %bb.0: @ %entry 325; CHECK-MVE-FP-NEXT: vmov r0, s8 326; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, r0 327; CHECK-MVE-FP-NEXT: vadd.f32 q0, q0, q1 328; CHECK-MVE-FP-NEXT: bx lr 329; 330; CHECK-MVE-VMLA-LABEL: vfmar32: 331; CHECK-MVE-VMLA: @ %bb.0: @ %entry 332; CHECK-MVE-VMLA-NEXT: vmov r0, s8 333; CHECK-MVE-VMLA-NEXT: vfma.f32 q0, q1, r0 334; CHECK-MVE-VMLA-NEXT: bx lr 335; 336; CHECK-MVE-LABEL: vfmar32: 337; CHECK-MVE: @ %bb.0: @ %entry 338; CHECK-MVE-NEXT: vmla.f32 s3, s7, s8 339; CHECK-MVE-NEXT: vmla.f32 s2, s6, s8 340; CHECK-MVE-NEXT: vmla.f32 s1, s5, s8 341; CHECK-MVE-NEXT: vmla.f32 s0, s4, s8 342; CHECK-MVE-NEXT: bx lr 343entry: 344 %i = insertelement <4 x float> undef, float %src3, i32 0 345 %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 346 %0 = fmul <4 x float> %src2, %sp 347 %1 = fadd <4 x float> %src1, %0 348 ret <4 x float> %1 349} 350 351define arm_aapcs_vfpcc <4 x float> @vfmas32(<4 x float> %src1, <4 x float> %src2, float %src3) { 352; CHECK-MVE-FP-LABEL: vfmas32: 353; CHECK-MVE-FP: @ %bb.0: @ %entry 354; CHECK-MVE-FP-NEXT: vmov r0, s8 355; CHECK-MVE-FP-NEXT: vmul.f32 q0, q0, q1 356; CHECK-MVE-FP-NEXT: vadd.f32 q0, q0, r0 357; CHECK-MVE-FP-NEXT: bx lr 358; 359; CHECK-MVE-VMLA-LABEL: vfmas32: 360; CHECK-MVE-VMLA: @ %bb.0: @ %entry 361; CHECK-MVE-VMLA-NEXT: vmov r0, s8 362; CHECK-MVE-VMLA-NEXT: vfmas.f32 q0, q1, r0 363; CHECK-MVE-VMLA-NEXT: bx lr 364; 365; CHECK-MVE-LABEL: vfmas32: 366; CHECK-MVE: @ %bb.0: @ %entry 367; CHECK-MVE-NEXT: vmov.f32 s11, s8 368; CHECK-MVE-NEXT: vmov.f32 s10, s8 369; CHECK-MVE-NEXT: vmov.f32 s9, s8 370; CHECK-MVE-NEXT: vmla.f32 s8, s0, s4 371; CHECK-MVE-NEXT: vmla.f32 s11, s3, s7 372; CHECK-MVE-NEXT: vmla.f32 s10, s2, s6 373; CHECK-MVE-NEXT: vmla.f32 s9, s1, s5 374; CHECK-MVE-NEXT: vmov q0, q2 375; CHECK-MVE-NEXT: bx lr 376entry: 377 %i = insertelement <4 x float> undef, float %src3, i32 0 378 %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 379 %0 = fmul <4 x float> %src1, %src2 380 %1 = fadd <4 x float> %sp, %0 381 ret <4 x float> %1 382} 383 384 385; Predicated version of the same tests 386 387define arm_aapcs_vfpcc <8 x half> @vfma16_v1_pred(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) { 388; CHECK-MVE-FP-LABEL: vfma16_v1_pred: 389; CHECK-MVE-FP: @ %bb.0: @ %entry 390; CHECK-MVE-FP-NEXT: vmul.f16 q2, q1, q2 391; CHECK-MVE-FP-NEXT: vpt.f16 lt, q1, zr 392; CHECK-MVE-FP-NEXT: vaddt.f16 q0, q0, q2 393; CHECK-MVE-FP-NEXT: bx lr 394; 395; CHECK-MVE-VMLA-LABEL: vfma16_v1_pred: 396; CHECK-MVE-VMLA: @ %bb.0: @ %entry 397; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr 398; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, q2 399; CHECK-MVE-VMLA-NEXT: bx lr 400; 401; CHECK-MVE-LABEL: vfma16_v1_pred: 402; CHECK-MVE: @ %bb.0: @ %entry 403; CHECK-MVE-NEXT: vmovx.f16 s14, s4 404; CHECK-MVE-NEXT: vmovx.f16 s13, s0 405; CHECK-MVE-NEXT: vcmp.f16 s14, #0 406; CHECK-MVE-NEXT: vmovx.f16 s12, s8 407; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 408; CHECK-MVE-NEXT: vmov.f32 s15, s13 409; CHECK-MVE-NEXT: vmla.f16 s15, s14, s12 410; CHECK-MVE-NEXT: vcmp.f16 s4, #0 411; CHECK-MVE-NEXT: vmov.f32 s14, s0 412; CHECK-MVE-NEXT: vmla.f16 s14, s4, s8 413; CHECK-MVE-NEXT: vmovx.f16 s8, s5 414; CHECK-MVE-NEXT: vmovx.f16 s4, s9 415; CHECK-MVE-NEXT: cset r0, mi 416; CHECK-MVE-NEXT: cmp r0, #0 417; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s15 418; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 419; CHECK-MVE-NEXT: vcmp.f16 s8, #0 420; CHECK-MVE-NEXT: cset r0, mi 421; CHECK-MVE-NEXT: cmp r0, #0 422; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s14 423; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 424; CHECK-MVE-NEXT: vins.f16 s0, s12 425; CHECK-MVE-NEXT: vmovx.f16 s12, s1 426; CHECK-MVE-NEXT: vmov.f32 s14, s12 427; CHECK-MVE-NEXT: vcmp.f16 s5, #0 428; CHECK-MVE-NEXT: vmla.f16 s14, s8, s4 429; CHECK-MVE-NEXT: vmov.f32 s8, s1 430; CHECK-MVE-NEXT: vmla.f16 s8, s5, s9 431; CHECK-MVE-NEXT: cset r0, mi 432; CHECK-MVE-NEXT: cmp r0, #0 433; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14 434; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 435; CHECK-MVE-NEXT: vmovx.f16 s12, s2 436; CHECK-MVE-NEXT: vmov.f32 s14, s12 437; CHECK-MVE-NEXT: cset r0, mi 438; CHECK-MVE-NEXT: cmp r0, #0 439; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s8 440; CHECK-MVE-NEXT: vmovx.f16 s8, s6 441; CHECK-MVE-NEXT: vcmp.f16 s8, #0 442; CHECK-MVE-NEXT: vins.f16 s1, s4 443; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 444; CHECK-MVE-NEXT: vmovx.f16 s4, s10 445; CHECK-MVE-NEXT: vmla.f16 s14, s8, s4 446; CHECK-MVE-NEXT: vcmp.f16 s6, #0 447; CHECK-MVE-NEXT: vmov.f32 s8, s2 448; CHECK-MVE-NEXT: vmla.f16 s8, s6, s10 449; CHECK-MVE-NEXT: vmovx.f16 s6, s7 450; CHECK-MVE-NEXT: cset r0, mi 451; CHECK-MVE-NEXT: cmp r0, #0 452; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14 453; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 454; CHECK-MVE-NEXT: vcmp.f16 s6, #0 455; CHECK-MVE-NEXT: cset r0, mi 456; CHECK-MVE-NEXT: cmp r0, #0 457; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s8 458; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 459; CHECK-MVE-NEXT: vmovx.f16 s8, s3 460; CHECK-MVE-NEXT: vins.f16 s2, s4 461; CHECK-MVE-NEXT: vmovx.f16 s4, s11 462; CHECK-MVE-NEXT: vmov.f32 s10, s8 463; CHECK-MVE-NEXT: vmla.f16 s10, s6, s4 464; CHECK-MVE-NEXT: vcmp.f16 s7, #0 465; CHECK-MVE-NEXT: vmov.f32 s6, s3 466; CHECK-MVE-NEXT: vmla.f16 s6, s7, s11 467; CHECK-MVE-NEXT: cset r0, mi 468; CHECK-MVE-NEXT: cmp r0, #0 469; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s10 470; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 471; CHECK-MVE-NEXT: cset r0, mi 472; CHECK-MVE-NEXT: cmp r0, #0 473; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s6 474; CHECK-MVE-NEXT: vins.f16 s3, s4 475; CHECK-MVE-NEXT: bx lr 476entry: 477 %0 = fmul <8 x half> %src2, %src3 478 %1 = fadd <8 x half> %src1, %0 479 %c = fcmp olt <8 x half> %src2, zeroinitializer 480 %s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1 481 ret <8 x half> %s 482} 483 484define arm_aapcs_vfpcc <8 x half> @vfma16_v2_pred(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) { 485; CHECK-MVE-FP-LABEL: vfma16_v2_pred: 486; CHECK-MVE-FP: @ %bb.0: @ %entry 487; CHECK-MVE-FP-NEXT: vmul.f16 q2, q1, q2 488; CHECK-MVE-FP-NEXT: vpt.f16 lt, q1, zr 489; CHECK-MVE-FP-NEXT: vaddt.f16 q0, q2, q0 490; CHECK-MVE-FP-NEXT: bx lr 491; 492; CHECK-MVE-VMLA-LABEL: vfma16_v2_pred: 493; CHECK-MVE-VMLA: @ %bb.0: @ %entry 494; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr 495; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, q2 496; CHECK-MVE-VMLA-NEXT: bx lr 497; 498; CHECK-MVE-LABEL: vfma16_v2_pred: 499; CHECK-MVE: @ %bb.0: @ %entry 500; CHECK-MVE-NEXT: vmovx.f16 s14, s4 501; CHECK-MVE-NEXT: vmovx.f16 s13, s0 502; CHECK-MVE-NEXT: vcmp.f16 s14, #0 503; CHECK-MVE-NEXT: vmovx.f16 s12, s8 504; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 505; CHECK-MVE-NEXT: vmov.f32 s15, s13 506; CHECK-MVE-NEXT: vmla.f16 s15, s14, s12 507; CHECK-MVE-NEXT: vcmp.f16 s4, #0 508; CHECK-MVE-NEXT: vmov.f32 s14, s0 509; CHECK-MVE-NEXT: vmla.f16 s14, s4, s8 510; CHECK-MVE-NEXT: vmovx.f16 s8, s5 511; CHECK-MVE-NEXT: vmovx.f16 s4, s9 512; CHECK-MVE-NEXT: cset r0, mi 513; CHECK-MVE-NEXT: cmp r0, #0 514; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s15 515; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 516; CHECK-MVE-NEXT: vcmp.f16 s8, #0 517; CHECK-MVE-NEXT: cset r0, mi 518; CHECK-MVE-NEXT: cmp r0, #0 519; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s14 520; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 521; CHECK-MVE-NEXT: vins.f16 s0, s12 522; CHECK-MVE-NEXT: vmovx.f16 s12, s1 523; CHECK-MVE-NEXT: vmov.f32 s14, s12 524; CHECK-MVE-NEXT: vcmp.f16 s5, #0 525; CHECK-MVE-NEXT: vmla.f16 s14, s8, s4 526; CHECK-MVE-NEXT: vmov.f32 s8, s1 527; CHECK-MVE-NEXT: vmla.f16 s8, s5, s9 528; CHECK-MVE-NEXT: cset r0, mi 529; CHECK-MVE-NEXT: cmp r0, #0 530; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14 531; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 532; CHECK-MVE-NEXT: vmovx.f16 s12, s2 533; CHECK-MVE-NEXT: vmov.f32 s14, s12 534; CHECK-MVE-NEXT: cset r0, mi 535; CHECK-MVE-NEXT: cmp r0, #0 536; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s8 537; CHECK-MVE-NEXT: vmovx.f16 s8, s6 538; CHECK-MVE-NEXT: vcmp.f16 s8, #0 539; CHECK-MVE-NEXT: vins.f16 s1, s4 540; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 541; CHECK-MVE-NEXT: vmovx.f16 s4, s10 542; CHECK-MVE-NEXT: vmla.f16 s14, s8, s4 543; CHECK-MVE-NEXT: vcmp.f16 s6, #0 544; CHECK-MVE-NEXT: vmov.f32 s8, s2 545; CHECK-MVE-NEXT: vmla.f16 s8, s6, s10 546; CHECK-MVE-NEXT: vmovx.f16 s6, s7 547; CHECK-MVE-NEXT: cset r0, mi 548; CHECK-MVE-NEXT: cmp r0, #0 549; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14 550; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 551; CHECK-MVE-NEXT: vcmp.f16 s6, #0 552; CHECK-MVE-NEXT: cset r0, mi 553; CHECK-MVE-NEXT: cmp r0, #0 554; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s8 555; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 556; CHECK-MVE-NEXT: vmovx.f16 s8, s3 557; CHECK-MVE-NEXT: vins.f16 s2, s4 558; CHECK-MVE-NEXT: vmovx.f16 s4, s11 559; CHECK-MVE-NEXT: vmov.f32 s10, s8 560; CHECK-MVE-NEXT: vmla.f16 s10, s6, s4 561; CHECK-MVE-NEXT: vcmp.f16 s7, #0 562; CHECK-MVE-NEXT: vmov.f32 s6, s3 563; CHECK-MVE-NEXT: vmla.f16 s6, s7, s11 564; CHECK-MVE-NEXT: cset r0, mi 565; CHECK-MVE-NEXT: cmp r0, #0 566; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s10 567; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 568; CHECK-MVE-NEXT: cset r0, mi 569; CHECK-MVE-NEXT: cmp r0, #0 570; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s6 571; CHECK-MVE-NEXT: vins.f16 s3, s4 572; CHECK-MVE-NEXT: bx lr 573entry: 574 %0 = fmul <8 x half> %src2, %src3 575 %1 = fadd <8 x half> %0, %src1 576 %c = fcmp olt <8 x half> %src2, zeroinitializer 577 %s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1 578 ret <8 x half> %s 579} 580 581define arm_aapcs_vfpcc <8 x half> @vfms16_pred(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) { 582; CHECK-MVE-FP-LABEL: vfms16_pred: 583; CHECK-MVE-FP: @ %bb.0: @ %entry 584; CHECK-MVE-FP-NEXT: vmul.f16 q2, q1, q2 585; CHECK-MVE-FP-NEXT: vpt.f16 lt, q1, zr 586; CHECK-MVE-FP-NEXT: vsubt.f16 q0, q0, q2 587; CHECK-MVE-FP-NEXT: bx lr 588; 589; CHECK-MVE-VMLA-LABEL: vfms16_pred: 590; CHECK-MVE-VMLA: @ %bb.0: @ %entry 591; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr 592; CHECK-MVE-VMLA-NEXT: vfmst.f16 q0, q1, q2 593; CHECK-MVE-VMLA-NEXT: bx lr 594; 595; CHECK-MVE-LABEL: vfms16_pred: 596; CHECK-MVE: @ %bb.0: @ %entry 597; CHECK-MVE-NEXT: vmovx.f16 s14, s4 598; CHECK-MVE-NEXT: vmovx.f16 s13, s0 599; CHECK-MVE-NEXT: vcmp.f16 s14, #0 600; CHECK-MVE-NEXT: vmovx.f16 s12, s8 601; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 602; CHECK-MVE-NEXT: vmov.f32 s15, s13 603; CHECK-MVE-NEXT: vmls.f16 s15, s14, s12 604; CHECK-MVE-NEXT: vcmp.f16 s4, #0 605; CHECK-MVE-NEXT: vmov.f32 s14, s0 606; CHECK-MVE-NEXT: vmls.f16 s14, s4, s8 607; CHECK-MVE-NEXT: vmovx.f16 s8, s5 608; CHECK-MVE-NEXT: vmovx.f16 s4, s9 609; CHECK-MVE-NEXT: cset r0, mi 610; CHECK-MVE-NEXT: cmp r0, #0 611; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s15 612; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 613; CHECK-MVE-NEXT: vcmp.f16 s8, #0 614; CHECK-MVE-NEXT: cset r0, mi 615; CHECK-MVE-NEXT: cmp r0, #0 616; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s14 617; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 618; CHECK-MVE-NEXT: vins.f16 s0, s12 619; CHECK-MVE-NEXT: vmovx.f16 s12, s1 620; CHECK-MVE-NEXT: vmov.f32 s14, s12 621; CHECK-MVE-NEXT: vcmp.f16 s5, #0 622; CHECK-MVE-NEXT: vmls.f16 s14, s8, s4 623; CHECK-MVE-NEXT: vmov.f32 s8, s1 624; CHECK-MVE-NEXT: vmls.f16 s8, s5, s9 625; CHECK-MVE-NEXT: cset r0, mi 626; CHECK-MVE-NEXT: cmp r0, #0 627; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14 628; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 629; CHECK-MVE-NEXT: vmovx.f16 s12, s2 630; CHECK-MVE-NEXT: vmov.f32 s14, s12 631; CHECK-MVE-NEXT: cset r0, mi 632; CHECK-MVE-NEXT: cmp r0, #0 633; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s8 634; CHECK-MVE-NEXT: vmovx.f16 s8, s6 635; CHECK-MVE-NEXT: vcmp.f16 s8, #0 636; CHECK-MVE-NEXT: vins.f16 s1, s4 637; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 638; CHECK-MVE-NEXT: vmovx.f16 s4, s10 639; CHECK-MVE-NEXT: vmls.f16 s14, s8, s4 640; CHECK-MVE-NEXT: vcmp.f16 s6, #0 641; CHECK-MVE-NEXT: vmov.f32 s8, s2 642; CHECK-MVE-NEXT: vmls.f16 s8, s6, s10 643; CHECK-MVE-NEXT: vmovx.f16 s6, s7 644; CHECK-MVE-NEXT: cset r0, mi 645; CHECK-MVE-NEXT: cmp r0, #0 646; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14 647; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 648; CHECK-MVE-NEXT: vcmp.f16 s6, #0 649; CHECK-MVE-NEXT: cset r0, mi 650; CHECK-MVE-NEXT: cmp r0, #0 651; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s8 652; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 653; CHECK-MVE-NEXT: vmovx.f16 s8, s3 654; CHECK-MVE-NEXT: vins.f16 s2, s4 655; CHECK-MVE-NEXT: vmovx.f16 s4, s11 656; CHECK-MVE-NEXT: vmov.f32 s10, s8 657; CHECK-MVE-NEXT: vmls.f16 s10, s6, s4 658; CHECK-MVE-NEXT: vcmp.f16 s7, #0 659; CHECK-MVE-NEXT: vmov.f32 s6, s3 660; CHECK-MVE-NEXT: vmls.f16 s6, s7, s11 661; CHECK-MVE-NEXT: cset r0, mi 662; CHECK-MVE-NEXT: cmp r0, #0 663; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s10 664; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 665; CHECK-MVE-NEXT: cset r0, mi 666; CHECK-MVE-NEXT: cmp r0, #0 667; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s6 668; CHECK-MVE-NEXT: vins.f16 s3, s4 669; CHECK-MVE-NEXT: bx lr 670entry: 671 %0 = fmul <8 x half> %src2, %src3 672 %1 = fsub <8 x half> %src1, %0 673 %c = fcmp olt <8 x half> %src2, zeroinitializer 674 %s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1 675 ret <8 x half> %s 676} 677 678define arm_aapcs_vfpcc <8 x half> @vfmar16_pred(<8 x half> %src1, <8 x half> %src2, float %src3o) { 679; CHECK-MVE-FP-LABEL: vfmar16_pred: 680; CHECK-MVE-FP: @ %bb.0: @ %entry 681; CHECK-MVE-FP-NEXT: vcvtb.f16.f32 s8, s8 682; CHECK-MVE-FP-NEXT: vcmp.f16 lt, q1, zr 683; CHECK-MVE-FP-NEXT: vmov.f16 r0, s8 684; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, r0 685; CHECK-MVE-FP-NEXT: vpst 686; CHECK-MVE-FP-NEXT: vaddt.f16 q0, q0, q1 687; CHECK-MVE-FP-NEXT: bx lr 688; 689; CHECK-MVE-VMLA-LABEL: vfmar16_pred: 690; CHECK-MVE-VMLA: @ %bb.0: @ %entry 691; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8 692; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8 693; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr 694; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, r0 695; CHECK-MVE-VMLA-NEXT: bx lr 696; 697; CHECK-MVE-LABEL: vfmar16_pred: 698; CHECK-MVE: @ %bb.0: @ %entry 699; CHECK-MVE-NEXT: vmovx.f16 s10, s4 700; CHECK-MVE-NEXT: vmovx.f16 s12, s0 701; CHECK-MVE-NEXT: vcmp.f16 s10, #0 702; CHECK-MVE-NEXT: vcvtb.f16.f32 s8, s8 703; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 704; CHECK-MVE-NEXT: vmov.f32 s14, s12 705; CHECK-MVE-NEXT: vmla.f16 s14, s10, s8 706; CHECK-MVE-NEXT: vcmp.f16 s4, #0 707; CHECK-MVE-NEXT: cset r0, mi 708; CHECK-MVE-NEXT: cmp r0, #0 709; CHECK-MVE-NEXT: vseleq.f16 s10, s12, s14 710; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 711; CHECK-MVE-NEXT: vmov.f32 s12, s0 712; CHECK-MVE-NEXT: vmla.f16 s12, s4, s8 713; CHECK-MVE-NEXT: vmovx.f16 s4, s5 714; CHECK-MVE-NEXT: vcmp.f16 s4, #0 715; CHECK-MVE-NEXT: cset r0, mi 716; CHECK-MVE-NEXT: cmp r0, #0 717; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s12 718; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 719; CHECK-MVE-NEXT: vins.f16 s0, s10 720; CHECK-MVE-NEXT: vmovx.f16 s10, s1 721; CHECK-MVE-NEXT: vmov.f32 s12, s10 722; CHECK-MVE-NEXT: vcmp.f16 s5, #0 723; CHECK-MVE-NEXT: vmla.f16 s12, s4, s8 724; CHECK-MVE-NEXT: cset r0, mi 725; CHECK-MVE-NEXT: cmp r0, #0 726; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s12 727; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 728; CHECK-MVE-NEXT: vmov.f32 s10, s1 729; CHECK-MVE-NEXT: vmla.f16 s10, s5, s8 730; CHECK-MVE-NEXT: cset r0, mi 731; CHECK-MVE-NEXT: cmp r0, #0 732; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s10 733; CHECK-MVE-NEXT: vmovx.f16 s10, s2 734; CHECK-MVE-NEXT: vins.f16 s1, s4 735; CHECK-MVE-NEXT: vmovx.f16 s4, s6 736; CHECK-MVE-NEXT: vcmp.f16 s4, #0 737; CHECK-MVE-NEXT: vmov.f32 s12, s10 738; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 739; CHECK-MVE-NEXT: vmla.f16 s12, s4, s8 740; CHECK-MVE-NEXT: vcmp.f16 s6, #0 741; CHECK-MVE-NEXT: cset r0, mi 742; CHECK-MVE-NEXT: cmp r0, #0 743; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s12 744; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 745; CHECK-MVE-NEXT: vmov.f32 s10, s2 746; CHECK-MVE-NEXT: vmla.f16 s10, s6, s8 747; CHECK-MVE-NEXT: vmovx.f16 s6, s3 748; CHECK-MVE-NEXT: cset r0, mi 749; CHECK-MVE-NEXT: cmp r0, #0 750; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s10 751; CHECK-MVE-NEXT: vmov.f32 s10, s6 752; CHECK-MVE-NEXT: vins.f16 s2, s4 753; CHECK-MVE-NEXT: vmovx.f16 s4, s7 754; CHECK-MVE-NEXT: vcmp.f16 s4, #0 755; CHECK-MVE-NEXT: vmla.f16 s10, s4, s8 756; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 757; CHECK-MVE-NEXT: vcmp.f16 s7, #0 758; CHECK-MVE-NEXT: cset r0, mi 759; CHECK-MVE-NEXT: cmp r0, #0 760; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s10 761; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 762; CHECK-MVE-NEXT: vmov.f32 s6, s3 763; CHECK-MVE-NEXT: vmla.f16 s6, s7, s8 764; CHECK-MVE-NEXT: cset r0, mi 765; CHECK-MVE-NEXT: cmp r0, #0 766; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s6 767; CHECK-MVE-NEXT: vins.f16 s3, s4 768; CHECK-MVE-NEXT: bx lr 769entry: 770 %src3 = fptrunc float %src3o to half 771 %i = insertelement <8 x half> undef, half %src3, i32 0 772 %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 773 %0 = fmul <8 x half> %src2, %sp 774 %1 = fadd <8 x half> %src1, %0 775 %c = fcmp olt <8 x half> %src2, zeroinitializer 776 %s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1 777 ret <8 x half> %s 778} 779 780define arm_aapcs_vfpcc <8 x half> @vfma16_pred(<8 x half> %src1, <8 x half> %src2, float %src3o) { 781; CHECK-MVE-FP-LABEL: vfma16_pred: 782; CHECK-MVE-FP: @ %bb.0: @ %entry 783; CHECK-MVE-FP-NEXT: vcvtb.f16.f32 s8, s8 784; CHECK-MVE-FP-NEXT: vmov.f16 r0, s8 785; CHECK-MVE-FP-NEXT: vmul.f16 q2, q0, q1 786; CHECK-MVE-FP-NEXT: vpt.f16 lt, q1, zr 787; CHECK-MVE-FP-NEXT: vaddt.f16 q0, q2, r0 788; CHECK-MVE-FP-NEXT: bx lr 789; 790; CHECK-MVE-VMLA-LABEL: vfma16_pred: 791; CHECK-MVE-VMLA: @ %bb.0: @ %entry 792; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8 793; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8 794; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr 795; CHECK-MVE-VMLA-NEXT: vfmast.f16 q0, q1, r0 796; CHECK-MVE-VMLA-NEXT: bx lr 797; 798; CHECK-MVE-LABEL: vfma16_pred: 799; CHECK-MVE: @ %bb.0: @ %entry 800; CHECK-MVE-NEXT: vmovx.f16 s10, s4 801; CHECK-MVE-NEXT: vcvtb.f16.f32 s8, s8 802; CHECK-MVE-NEXT: vcmp.f16 s10, #0 803; CHECK-MVE-NEXT: vmovx.f16 s12, s0 804; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 805; CHECK-MVE-NEXT: vmov.f32 s14, s8 806; CHECK-MVE-NEXT: vmla.f16 s14, s12, s10 807; CHECK-MVE-NEXT: vcmp.f16 s4, #0 808; CHECK-MVE-NEXT: cset r0, mi 809; CHECK-MVE-NEXT: cmp r0, #0 810; CHECK-MVE-NEXT: vseleq.f16 s10, s12, s14 811; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 812; CHECK-MVE-NEXT: vmov.f32 s12, s8 813; CHECK-MVE-NEXT: vmla.f16 s12, s0, s4 814; CHECK-MVE-NEXT: vmovx.f16 s4, s5 815; CHECK-MVE-NEXT: vcmp.f16 s4, #0 816; CHECK-MVE-NEXT: cset r0, mi 817; CHECK-MVE-NEXT: cmp r0, #0 818; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s12 819; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 820; CHECK-MVE-NEXT: vins.f16 s0, s10 821; CHECK-MVE-NEXT: vmovx.f16 s10, s1 822; CHECK-MVE-NEXT: vmov.f32 s12, s8 823; CHECK-MVE-NEXT: vcmp.f16 s5, #0 824; CHECK-MVE-NEXT: vmla.f16 s12, s10, s4 825; CHECK-MVE-NEXT: cset r0, mi 826; CHECK-MVE-NEXT: cmp r0, #0 827; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s12 828; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 829; CHECK-MVE-NEXT: vmov.f32 s10, s8 830; CHECK-MVE-NEXT: vmla.f16 s10, s1, s5 831; CHECK-MVE-NEXT: vmov.f32 s12, s8 832; CHECK-MVE-NEXT: cset r0, mi 833; CHECK-MVE-NEXT: cmp r0, #0 834; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s10 835; CHECK-MVE-NEXT: vmovx.f16 s10, s2 836; CHECK-MVE-NEXT: vins.f16 s1, s4 837; CHECK-MVE-NEXT: vmovx.f16 s4, s6 838; CHECK-MVE-NEXT: vcmp.f16 s4, #0 839; CHECK-MVE-NEXT: vmla.f16 s12, s10, s4 840; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 841; CHECK-MVE-NEXT: vcmp.f16 s6, #0 842; CHECK-MVE-NEXT: cset r0, mi 843; CHECK-MVE-NEXT: cmp r0, #0 844; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s12 845; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 846; CHECK-MVE-NEXT: vmov.f32 s10, s8 847; CHECK-MVE-NEXT: vmla.f16 s10, s2, s6 848; CHECK-MVE-NEXT: vmovx.f16 s6, s3 849; CHECK-MVE-NEXT: cset r0, mi 850; CHECK-MVE-NEXT: cmp r0, #0 851; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s10 852; CHECK-MVE-NEXT: vmov.f32 s10, s8 853; CHECK-MVE-NEXT: vins.f16 s2, s4 854; CHECK-MVE-NEXT: vmovx.f16 s4, s7 855; CHECK-MVE-NEXT: vcmp.f16 s4, #0 856; CHECK-MVE-NEXT: vmla.f16 s10, s6, s4 857; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 858; CHECK-MVE-NEXT: vcmp.f16 s7, #0 859; CHECK-MVE-NEXT: vmla.f16 s8, s3, s7 860; CHECK-MVE-NEXT: cset r0, mi 861; CHECK-MVE-NEXT: cmp r0, #0 862; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s10 863; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 864; CHECK-MVE-NEXT: cset r0, mi 865; CHECK-MVE-NEXT: cmp r0, #0 866; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s8 867; CHECK-MVE-NEXT: vins.f16 s3, s4 868; CHECK-MVE-NEXT: bx lr 869entry: 870 %src3 = fptrunc float %src3o to half 871 %i = insertelement <8 x half> undef, half %src3, i32 0 872 %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 873 %0 = fmul <8 x half> %src1, %src2 874 %1 = fadd <8 x half> %sp, %0 875 %c = fcmp olt <8 x half> %src2, zeroinitializer 876 %s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1 877 ret <8 x half> %s 878} 879 880define arm_aapcs_vfpcc <4 x float> @vfma32_v1_pred(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) { 881; CHECK-MVE-FP-LABEL: vfma32_v1_pred: 882; CHECK-MVE-FP: @ %bb.0: @ %entry 883; CHECK-MVE-FP-NEXT: vmul.f32 q2, q1, q2 884; CHECK-MVE-FP-NEXT: vpt.f32 lt, q1, zr 885; CHECK-MVE-FP-NEXT: vaddt.f32 q0, q0, q2 886; CHECK-MVE-FP-NEXT: bx lr 887; 888; CHECK-MVE-VMLA-LABEL: vfma32_v1_pred: 889; CHECK-MVE-VMLA: @ %bb.0: @ %entry 890; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr 891; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, q2 892; CHECK-MVE-VMLA-NEXT: bx lr 893; 894; CHECK-MVE-LABEL: vfma32_v1_pred: 895; CHECK-MVE: @ %bb.0: @ %entry 896; CHECK-MVE-NEXT: vcmp.f32 s4, #0 897; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 898; CHECK-MVE-NEXT: vcmp.f32 s7, #0 899; CHECK-MVE-NEXT: vmov.f32 s12, s2 900; CHECK-MVE-NEXT: vmov.f32 s14, s3 901; CHECK-MVE-NEXT: vmla.f32 s12, s6, s10 902; CHECK-MVE-NEXT: vmov.f32 s10, s1 903; CHECK-MVE-NEXT: vmla.f32 s14, s7, s11 904; CHECK-MVE-NEXT: vmla.f32 s10, s5, s9 905; CHECK-MVE-NEXT: vmov.f32 s9, s0 906; CHECK-MVE-NEXT: cset r0, mi 907; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 908; CHECK-MVE-NEXT: vcmp.f32 s5, #0 909; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8 910; CHECK-MVE-NEXT: cset r1, mi 911; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 912; CHECK-MVE-NEXT: vcmp.f32 s6, #0 913; CHECK-MVE-NEXT: cset r2, mi 914; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 915; CHECK-MVE-NEXT: cset r3, mi 916; CHECK-MVE-NEXT: cmp r3, #0 917; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12 918; CHECK-MVE-NEXT: cmp r2, #0 919; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10 920; CHECK-MVE-NEXT: cmp r1, #0 921; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14 922; CHECK-MVE-NEXT: cmp r0, #0 923; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9 924; CHECK-MVE-NEXT: bx lr 925entry: 926 %0 = fmul <4 x float> %src2, %src3 927 %1 = fadd <4 x float> %src1, %0 928 %c = fcmp olt <4 x float> %src2, zeroinitializer 929 %s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1 930 ret <4 x float> %s 931} 932 933define arm_aapcs_vfpcc <4 x float> @vfma32_v2_pred(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) { 934; CHECK-MVE-FP-LABEL: vfma32_v2_pred: 935; CHECK-MVE-FP: @ %bb.0: @ %entry 936; CHECK-MVE-FP-NEXT: vmul.f32 q2, q1, q2 937; CHECK-MVE-FP-NEXT: vpt.f32 lt, q1, zr 938; CHECK-MVE-FP-NEXT: vaddt.f32 q0, q2, q0 939; CHECK-MVE-FP-NEXT: bx lr 940; 941; CHECK-MVE-VMLA-LABEL: vfma32_v2_pred: 942; CHECK-MVE-VMLA: @ %bb.0: @ %entry 943; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr 944; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, q2 945; CHECK-MVE-VMLA-NEXT: bx lr 946; 947; CHECK-MVE-LABEL: vfma32_v2_pred: 948; CHECK-MVE: @ %bb.0: @ %entry 949; CHECK-MVE-NEXT: vcmp.f32 s4, #0 950; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 951; CHECK-MVE-NEXT: vcmp.f32 s7, #0 952; CHECK-MVE-NEXT: vmov.f32 s12, s2 953; CHECK-MVE-NEXT: vmov.f32 s14, s3 954; CHECK-MVE-NEXT: vmla.f32 s12, s6, s10 955; CHECK-MVE-NEXT: vmov.f32 s10, s1 956; CHECK-MVE-NEXT: vmla.f32 s14, s7, s11 957; CHECK-MVE-NEXT: vmla.f32 s10, s5, s9 958; CHECK-MVE-NEXT: vmov.f32 s9, s0 959; CHECK-MVE-NEXT: cset r0, mi 960; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 961; CHECK-MVE-NEXT: vcmp.f32 s5, #0 962; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8 963; CHECK-MVE-NEXT: cset r1, mi 964; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 965; CHECK-MVE-NEXT: vcmp.f32 s6, #0 966; CHECK-MVE-NEXT: cset r2, mi 967; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 968; CHECK-MVE-NEXT: cset r3, mi 969; CHECK-MVE-NEXT: cmp r3, #0 970; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12 971; CHECK-MVE-NEXT: cmp r2, #0 972; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10 973; CHECK-MVE-NEXT: cmp r1, #0 974; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14 975; CHECK-MVE-NEXT: cmp r0, #0 976; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9 977; CHECK-MVE-NEXT: bx lr 978entry: 979 %0 = fmul <4 x float> %src2, %src3 980 %1 = fadd <4 x float> %0, %src1 981 %c = fcmp olt <4 x float> %src2, zeroinitializer 982 %s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1 983 ret <4 x float> %s 984} 985 986define arm_aapcs_vfpcc <4 x float> @vfms32_pred(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) { 987; CHECK-MVE-FP-LABEL: vfms32_pred: 988; CHECK-MVE-FP: @ %bb.0: @ %entry 989; CHECK-MVE-FP-NEXT: vmul.f32 q2, q1, q2 990; CHECK-MVE-FP-NEXT: vpt.f32 lt, q1, zr 991; CHECK-MVE-FP-NEXT: vsubt.f32 q0, q0, q2 992; CHECK-MVE-FP-NEXT: bx lr 993; 994; CHECK-MVE-VMLA-LABEL: vfms32_pred: 995; CHECK-MVE-VMLA: @ %bb.0: @ %entry 996; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr 997; CHECK-MVE-VMLA-NEXT: vfmst.f32 q0, q1, q2 998; CHECK-MVE-VMLA-NEXT: bx lr 999; 1000; CHECK-MVE-LABEL: vfms32_pred: 1001; CHECK-MVE: @ %bb.0: @ %entry 1002; CHECK-MVE-NEXT: vcmp.f32 s4, #0 1003; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 1004; CHECK-MVE-NEXT: vcmp.f32 s7, #0 1005; CHECK-MVE-NEXT: vmov.f32 s12, s2 1006; CHECK-MVE-NEXT: vmov.f32 s14, s3 1007; CHECK-MVE-NEXT: vmls.f32 s12, s6, s10 1008; CHECK-MVE-NEXT: vmov.f32 s10, s1 1009; CHECK-MVE-NEXT: vmls.f32 s14, s7, s11 1010; CHECK-MVE-NEXT: vmls.f32 s10, s5, s9 1011; CHECK-MVE-NEXT: vmov.f32 s9, s0 1012; CHECK-MVE-NEXT: cset r0, mi 1013; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 1014; CHECK-MVE-NEXT: vcmp.f32 s5, #0 1015; CHECK-MVE-NEXT: vmls.f32 s9, s4, s8 1016; CHECK-MVE-NEXT: cset r1, mi 1017; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 1018; CHECK-MVE-NEXT: vcmp.f32 s6, #0 1019; CHECK-MVE-NEXT: cset r2, mi 1020; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 1021; CHECK-MVE-NEXT: cset r3, mi 1022; CHECK-MVE-NEXT: cmp r3, #0 1023; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12 1024; CHECK-MVE-NEXT: cmp r2, #0 1025; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10 1026; CHECK-MVE-NEXT: cmp r1, #0 1027; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14 1028; CHECK-MVE-NEXT: cmp r0, #0 1029; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9 1030; CHECK-MVE-NEXT: bx lr 1031entry: 1032 %0 = fmul <4 x float> %src2, %src3 1033 %1 = fsub <4 x float> %src1, %0 1034 %c = fcmp olt <4 x float> %src2, zeroinitializer 1035 %s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1 1036 ret <4 x float> %s 1037} 1038 1039define arm_aapcs_vfpcc <4 x float> @vfmar32_pred(<4 x float> %src1, <4 x float> %src2, float %src3) { 1040; CHECK-MVE-FP-LABEL: vfmar32_pred: 1041; CHECK-MVE-FP: @ %bb.0: @ %entry 1042; CHECK-MVE-FP-NEXT: vmov r0, s8 1043; CHECK-MVE-FP-NEXT: vcmp.f32 lt, q1, zr 1044; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, r0 1045; CHECK-MVE-FP-NEXT: vpst 1046; CHECK-MVE-FP-NEXT: vaddt.f32 q0, q0, q1 1047; CHECK-MVE-FP-NEXT: bx lr 1048; 1049; CHECK-MVE-VMLA-LABEL: vfmar32_pred: 1050; CHECK-MVE-VMLA: @ %bb.0: @ %entry 1051; CHECK-MVE-VMLA-NEXT: vmov r0, s8 1052; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr 1053; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, r0 1054; CHECK-MVE-VMLA-NEXT: bx lr 1055; 1056; CHECK-MVE-LABEL: vfmar32_pred: 1057; CHECK-MVE: @ %bb.0: @ %entry 1058; CHECK-MVE-NEXT: vcmp.f32 s4, #0 1059; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 1060; CHECK-MVE-NEXT: vcmp.f32 s7, #0 1061; CHECK-MVE-NEXT: vmov.f32 s10, s2 1062; CHECK-MVE-NEXT: vmov.f32 s12, s1 1063; CHECK-MVE-NEXT: vmov.f32 s14, s3 1064; CHECK-MVE-NEXT: vmov.f32 s9, s0 1065; CHECK-MVE-NEXT: vmla.f32 s10, s6, s8 1066; CHECK-MVE-NEXT: vmla.f32 s12, s5, s8 1067; CHECK-MVE-NEXT: vmla.f32 s14, s7, s8 1068; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8 1069; CHECK-MVE-NEXT: cset r0, mi 1070; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 1071; CHECK-MVE-NEXT: vcmp.f32 s5, #0 1072; CHECK-MVE-NEXT: cset r1, mi 1073; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 1074; CHECK-MVE-NEXT: vcmp.f32 s6, #0 1075; CHECK-MVE-NEXT: cset r2, mi 1076; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 1077; CHECK-MVE-NEXT: cset r3, mi 1078; CHECK-MVE-NEXT: cmp r3, #0 1079; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s10 1080; CHECK-MVE-NEXT: cmp r2, #0 1081; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s12 1082; CHECK-MVE-NEXT: cmp r1, #0 1083; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14 1084; CHECK-MVE-NEXT: cmp r0, #0 1085; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9 1086; CHECK-MVE-NEXT: bx lr 1087entry: 1088 %i = insertelement <4 x float> undef, float %src3, i32 0 1089 %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 1090 %0 = fmul <4 x float> %src2, %sp 1091 %1 = fadd <4 x float> %src1, %0 1092 %c = fcmp olt <4 x float> %src2, zeroinitializer 1093 %s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1 1094 ret <4 x float> %s 1095} 1096 1097define arm_aapcs_vfpcc <4 x float> @vfmas32_pred(<4 x float> %src1, <4 x float> %src2, float %src3) { 1098; CHECK-MVE-FP-LABEL: vfmas32_pred: 1099; CHECK-MVE-FP: @ %bb.0: @ %entry 1100; CHECK-MVE-FP-NEXT: vmov r0, s8 1101; CHECK-MVE-FP-NEXT: vmul.f32 q2, q0, q1 1102; CHECK-MVE-FP-NEXT: vpt.f32 lt, q1, zr 1103; CHECK-MVE-FP-NEXT: vaddt.f32 q0, q2, r0 1104; CHECK-MVE-FP-NEXT: bx lr 1105; 1106; CHECK-MVE-VMLA-LABEL: vfmas32_pred: 1107; CHECK-MVE-VMLA: @ %bb.0: @ %entry 1108; CHECK-MVE-VMLA-NEXT: vmov r0, s8 1109; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr 1110; CHECK-MVE-VMLA-NEXT: vfmast.f32 q0, q1, r0 1111; CHECK-MVE-VMLA-NEXT: bx lr 1112; 1113; CHECK-MVE-LABEL: vfmas32_pred: 1114; CHECK-MVE: @ %bb.0: @ %entry 1115; CHECK-MVE-NEXT: vcmp.f32 s4, #0 1116; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 1117; CHECK-MVE-NEXT: vcmp.f32 s7, #0 1118; CHECK-MVE-NEXT: vmov.f32 s10, s8 1119; CHECK-MVE-NEXT: vmov.f32 s12, s8 1120; CHECK-MVE-NEXT: vmov.f32 s14, s8 1121; CHECK-MVE-NEXT: vmla.f32 s8, s0, s4 1122; CHECK-MVE-NEXT: vmla.f32 s10, s2, s6 1123; CHECK-MVE-NEXT: vmla.f32 s12, s1, s5 1124; CHECK-MVE-NEXT: vmla.f32 s14, s3, s7 1125; CHECK-MVE-NEXT: cset r0, mi 1126; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 1127; CHECK-MVE-NEXT: vcmp.f32 s5, #0 1128; CHECK-MVE-NEXT: cset r1, mi 1129; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 1130; CHECK-MVE-NEXT: vcmp.f32 s6, #0 1131; CHECK-MVE-NEXT: cset r2, mi 1132; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr 1133; CHECK-MVE-NEXT: cset r3, mi 1134; CHECK-MVE-NEXT: cmp r3, #0 1135; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s10 1136; CHECK-MVE-NEXT: cmp r2, #0 1137; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s12 1138; CHECK-MVE-NEXT: cmp r1, #0 1139; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14 1140; CHECK-MVE-NEXT: cmp r0, #0 1141; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s8 1142; CHECK-MVE-NEXT: bx lr 1143entry: 1144 %i = insertelement <4 x float> undef, float %src3, i32 0 1145 %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 1146 %0 = fmul <4 x float> %src1, %src2 1147 %1 = fadd <4 x float> %sp, %0 1148 %c = fcmp olt <4 x float> %src2, zeroinitializer 1149 %s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1 1150 ret <4 x float> %s 1151} 1152