1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 3 4attributes #0 = { strictfp } 5 6declare float @llvm.fma.f32(float, float, float) 7declare double @llvm.fma.f64(double, double, double) 8declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) 9declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) 10 11define float @test_fmla_ss4S_0(float %a, float %b, <4 x float> %v) { 12; CHECK-LABEL: test_fmla_ss4S_0: 13; CHECK: // %bb.0: 14; CHECK-NEXT: fmadd s0, s1, s2, s0 15; CHECK-NEXT: ret 16 %tmp1 = extractelement <4 x float> %v, i32 0 17 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a) 18 ret float %tmp2 19} 20 21define float @test_fmla_ss4S_0_swap(float %a, float %b, <4 x float> %v) { 22; CHECK-LABEL: test_fmla_ss4S_0_swap: 23; CHECK: // %bb.0: 24; CHECK-NEXT: fmadd s0, s2, s1, s0 25; CHECK-NEXT: ret 26 %tmp1 = extractelement <4 x float> %v, i32 0 27 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %b, float %a) 28 ret float %tmp2 29} 30 31define float @test_fmla_ss4S_3(float %a, float %b, <4 x float> %v) { 32; CHECK-LABEL: test_fmla_ss4S_3: 33; CHECK: // %bb.0: 34; CHECK-NEXT: fmla s0, s1, v2.s[3] 35; CHECK-NEXT: ret 36 %tmp1 = extractelement <4 x float> %v, i32 3 37 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a) 38 ret float %tmp2 39} 40 41define float @test_fmla_ss4S_3_swap(float %a, float %b, <4 x float> %v) { 42; CHECK-LABEL: test_fmla_ss4S_3_swap: 43; CHECK: // %bb.0: 44; CHECK-NEXT: fmla s0, s0, v2.s[3] 45; CHECK-NEXT: ret 46 %tmp1 = extractelement <4 x float> %v, i32 3 47 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a) 48 ret float %tmp2 49} 50 51define float @test_fmla_ss2S_0(float %a, float %b, <2 x float> %v) { 52; CHECK-LABEL: test_fmla_ss2S_0: 53; CHECK: // %bb.0: 54; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 55; CHECK-NEXT: fmadd s0, s1, s2, s0 56; CHECK-NEXT: ret 57 %tmp1 = extractelement <2 x float> %v, i32 0 58 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a) 59 ret float %tmp2 60} 61 62define float @test_fmla_ss2S_0_swap(float %a, float %b, <2 x float> %v) { 63; CHECK-LABEL: test_fmla_ss2S_0_swap: 64; CHECK: // %bb.0: 65; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 66; CHECK-NEXT: fmadd s0, s2, s1, s0 67; CHECK-NEXT: ret 68 %tmp1 = extractelement <2 x float> %v, i32 0 69 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %b, float %a) 70 ret float %tmp2 71} 72 73define float @test_fmla_ss2S_1(float %a, float %b, <2 x float> %v) { 74; CHECK-LABEL: test_fmla_ss2S_1: 75; CHECK: // %bb.0: 76; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 77; CHECK-NEXT: fmla s0, s1, v2.s[1] 78; CHECK-NEXT: ret 79 %tmp1 = extractelement <2 x float> %v, i32 1 80 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a) 81 ret float %tmp2 82} 83 84define float @test_fmla_ss4S_3_ext0(float %a, <4 x float> %v) { 85; CHECK-LABEL: test_fmla_ss4S_3_ext0: 86; CHECK: // %bb.0: 87; CHECK-NEXT: fmla s0, s1, v1.s[3] 88; CHECK-NEXT: ret 89 %tmp0 = extractelement <4 x float> %v, i32 0 90 %tmp1 = extractelement <4 x float> %v, i32 3 91 %tmp2 = call float @llvm.fma.f32(float %tmp0, float %tmp1, float %a) 92 ret float %tmp2 93} 94 95define float @test_fmla_ss4S_3_ext0_swp(float %a, <4 x float> %v) { 96; CHECK-LABEL: test_fmla_ss4S_3_ext0_swp: 97; CHECK: // %bb.0: 98; CHECK-NEXT: fmla s0, s1, v1.s[3] 99; CHECK-NEXT: ret 100 %tmp0 = extractelement <4 x float> %v, i32 0 101 %tmp1 = extractelement <4 x float> %v, i32 3 102 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %tmp0, float %a) 103 ret float %tmp2 104} 105 106define float @test_fmla_ss4S_0_ext0(float %a, <4 x float> %v, <4 x float> %w) { 107; CHECK-LABEL: test_fmla_ss4S_0_ext0: 108; CHECK: // %bb.0: 109; CHECK-NEXT: fmadd s0, s1, s2, s0 110; CHECK-NEXT: ret 111 %tmp0 = extractelement <4 x float> %v, i32 0 112 %tmp1 = extractelement <4 x float> %w, i32 0 113 %tmp2 = call float @llvm.fma.f32(float %tmp0, float %tmp1, float %a) 114 ret float %tmp2 115} 116 117define float @test_fmla_ss2S_3_ext0(float %a, <2 x float> %v) { 118; CHECK-LABEL: test_fmla_ss2S_3_ext0: 119; CHECK: // %bb.0: 120; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 121; CHECK-NEXT: fmla s0, s1, v1.s[1] 122; CHECK-NEXT: ret 123 %tmp0 = extractelement <2 x float> %v, i32 0 124 %tmp1 = extractelement <2 x float> %v, i32 1 125 %tmp2 = call float @llvm.fma.f32(float %tmp0, float %tmp1, float %a) 126 ret float %tmp2 127} 128 129define float @test_fmla_ss2S_3_ext0_swp(float %a, <2 x float> %v) { 130; CHECK-LABEL: test_fmla_ss2S_3_ext0_swp: 131; CHECK: // %bb.0: 132; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 133; CHECK-NEXT: fmla s0, s1, v1.s[1] 134; CHECK-NEXT: ret 135 %tmp0 = extractelement <2 x float> %v, i32 0 136 %tmp1 = extractelement <2 x float> %v, i32 1 137 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %tmp0, float %a) 138 ret float %tmp2 139} 140 141define float @test_fmla_ss2S_0_ext0(float %a, <2 x float> %v, <2 x float> %w) { 142; CHECK-LABEL: test_fmla_ss2S_0_ext0: 143; CHECK: // %bb.0: 144; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 145; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 146; CHECK-NEXT: fmadd s0, s1, s2, s0 147; CHECK-NEXT: ret 148 %tmp0 = extractelement <2 x float> %v, i32 0 149 %tmp1 = extractelement <2 x float> %w, i32 0 150 %tmp2 = call float @llvm.fma.f32(float %tmp0, float %tmp1, float %a) 151 ret float %tmp2 152} 153 154define double @test_fmla_ddD_0(double %a, double %b, <1 x double> %v) { 155; CHECK-LABEL: test_fmla_ddD_0: 156; CHECK: // %bb.0: 157; CHECK-NEXT: fmadd d0, d1, d2, d0 158; CHECK-NEXT: ret 159 %tmp1 = extractelement <1 x double> %v, i32 0 160 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a) 161 ret double %tmp2 162} 163 164define double @test_fmla_ddD_0_swap(double %a, double %b, <1 x double> %v) { 165; CHECK-LABEL: test_fmla_ddD_0_swap: 166; CHECK: // %bb.0: 167; CHECK-NEXT: fmadd d0, d2, d1, d0 168; CHECK-NEXT: ret 169 %tmp1 = extractelement <1 x double> %v, i32 0 170 %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a) 171 ret double %tmp2 172} 173 174define double @test_fmla_dd2D_0(double %a, double %b, <2 x double> %v) { 175; CHECK-LABEL: test_fmla_dd2D_0: 176; CHECK: // %bb.0: 177; CHECK-NEXT: fmadd d0, d1, d2, d0 178; CHECK-NEXT: ret 179 %tmp1 = extractelement <2 x double> %v, i32 0 180 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a) 181 ret double %tmp2 182} 183 184define double @test_fmla_dd2D_0_swap(double %a, double %b, <2 x double> %v) { 185; CHECK-LABEL: test_fmla_dd2D_0_swap: 186; CHECK: // %bb.0: 187; CHECK-NEXT: fmadd d0, d2, d1, d0 188; CHECK-NEXT: ret 189 %tmp1 = extractelement <2 x double> %v, i32 0 190 %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a) 191 ret double %tmp2 192} 193 194define double @test_fmla_dd2D_1(double %a, double %b, <2 x double> %v) { 195; CHECK-LABEL: test_fmla_dd2D_1: 196; CHECK: // %bb.0: 197; CHECK-NEXT: fmla d0, d1, v2.d[1] 198; CHECK-NEXT: ret 199 %tmp1 = extractelement <2 x double> %v, i32 1 200 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a) 201 ret double %tmp2 202} 203 204define double @test_fmla_dd2D_1_swap(double %a, double %b, <2 x double> %v) { 205; CHECK-LABEL: test_fmla_dd2D_1_swap: 206; CHECK: // %bb.0: 207; CHECK-NEXT: fmla d0, d1, v2.d[1] 208; CHECK-NEXT: ret 209 %tmp1 = extractelement <2 x double> %v, i32 1 210 %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a) 211 ret double %tmp2 212} 213 214define double @test_fmla_ss2D_1_ext0(double %a, <2 x double> %v) { 215; CHECK-LABEL: test_fmla_ss2D_1_ext0: 216; CHECK: // %bb.0: 217; CHECK-NEXT: fmla d0, d1, v1.d[1] 218; CHECK-NEXT: ret 219 %tmp0 = extractelement <2 x double> %v, i32 0 220 %tmp1 = extractelement <2 x double> %v, i32 1 221 %tmp2 = call double @llvm.fma.f64(double %tmp0, double %tmp1, double %a) 222 ret double %tmp2 223} 224 225define double @test_fmla_ss2D_1_ext0_swp(double %a, <2 x double> %v) { 226; CHECK-LABEL: test_fmla_ss2D_1_ext0_swp: 227; CHECK: // %bb.0: 228; CHECK-NEXT: fmla d0, d1, v1.d[1] 229; CHECK-NEXT: ret 230 %tmp0 = extractelement <2 x double> %v, i32 0 231 %tmp1 = extractelement <2 x double> %v, i32 1 232 %tmp2 = call double @llvm.fma.f64(double %tmp1, double %tmp0, double %a) 233 ret double %tmp2 234} 235 236define double @test_fmla_ss2D_0_ext0(double %a, <2 x double> %v, <2 x double> %w) { 237; CHECK-LABEL: test_fmla_ss2D_0_ext0: 238; CHECK: // %bb.0: 239; CHECK-NEXT: fmadd d0, d1, d2, d0 240; CHECK-NEXT: ret 241 %tmp0 = extractelement <2 x double> %v, i32 0 242 %tmp1 = extractelement <2 x double> %w, i32 0 243 %tmp2 = call double @llvm.fma.f64(double %tmp0, double %tmp1, double %a) 244 ret double %tmp2 245} 246 247define float @test_fmls_ss4S_0(float %a, float %b, <4 x float> %v) { 248; CHECK-LABEL: test_fmls_ss4S_0: 249; CHECK: // %bb.0: // %entry 250; CHECK-NEXT: fmsub s0, s2, s1, s0 251; CHECK-NEXT: ret 252entry: 253 %fneg = fneg float %b 254 %extract = extractelement <4 x float> %v, i64 0 255 %0 = tail call float @llvm.fma.f32(float %fneg, float %extract, float %a) 256 ret float %0 257} 258 259define float @test_fmls_ss4S_0_swap(float %a, float %b, <4 x float> %v) { 260; CHECK-LABEL: test_fmls_ss4S_0_swap: 261; CHECK: // %bb.0: // %entry 262; CHECK-NEXT: fmsub s0, s2, s1, s0 263; CHECK-NEXT: ret 264entry: 265 %fneg = fneg float %b 266 %extract = extractelement <4 x float> %v, i64 0 267 %0 = tail call float @llvm.fma.f32(float %extract, float %fneg, float %a) 268 ret float %0 269} 270 271define float @test_fmls_ss4S_3(float %a, float %b, <4 x float> %v) { 272; CHECK-LABEL: test_fmls_ss4S_3: 273; CHECK: // %bb.0: 274; CHECK-NEXT: mov s1, v2.s[3] 275; CHECK-NEXT: fmls s0, s1, v2.s[3] 276; CHECK-NEXT: ret 277 %tmp1 = extractelement <4 x float> %v, i32 3 278 %tmp2 = fsub float -0.0, %tmp1 279 %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a) 280 ret float %tmp3 281} 282 283define float @test_fmls_ss4S_3_swap(float %a, float %b, <4 x float> %v) { 284; CHECK-LABEL: test_fmls_ss4S_3_swap: 285; CHECK: // %bb.0: 286; CHECK-NEXT: mov s1, v2.s[3] 287; CHECK-NEXT: fmls s0, s1, v2.s[3] 288; CHECK-NEXT: ret 289 %tmp1 = extractelement <4 x float> %v, i32 3 290 %tmp2 = fsub float -0.0, %tmp1 291 %tmp3 = call float @llvm.fma.f32(float %tmp1, float %tmp2, float %a) 292 ret float %tmp3 293} 294 295define float @test_fmls_ss2S_0(float %a, float %b, <2 x float> %v) { 296; CHECK-LABEL: test_fmls_ss2S_0: 297; CHECK: // %bb.0: // %entry 298; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 299; CHECK-NEXT: fmsub s0, s2, s1, s0 300; CHECK-NEXT: ret 301entry: 302 %fneg = fneg float %b 303 %extract = extractelement <2 x float> %v, i64 0 304 %0 = tail call float @llvm.fma.f32(float %fneg, float %extract, float %a) 305 ret float %0 306} 307 308define float @test_fmls_ss2S_0_swap(float %a, float %b, <2 x float> %v) { 309; CHECK-LABEL: test_fmls_ss2S_0_swap: 310; CHECK: // %bb.0: // %entry 311; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 312; CHECK-NEXT: fmsub s0, s2, s1, s0 313; CHECK-NEXT: ret 314entry: 315 %fneg = fneg float %b 316 %extract = extractelement <2 x float> %v, i64 0 317 %0 = tail call float @llvm.fma.f32(float %extract, float %fneg, float %a) 318 ret float %0 319} 320 321define float @test_fmls_ss2S_1(float %a, float %b, <2 x float> %v) { 322; CHECK-LABEL: test_fmls_ss2S_1: 323; CHECK: // %bb.0: 324; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 325; CHECK-NEXT: mov s1, v2.s[1] 326; CHECK-NEXT: fmls s0, s1, v2.s[1] 327; CHECK-NEXT: ret 328 %tmp1 = extractelement <2 x float> %v, i32 1 329 %tmp2 = fsub float -0.0, %tmp1 330 %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a) 331 ret float %tmp3 332} 333 334define float @test_fmls_ss4S_3_ext0(float %a, <4 x float> %v) { 335; CHECK-LABEL: test_fmls_ss4S_3_ext0: 336; CHECK: // %bb.0: 337; CHECK-NEXT: fmls s0, s1, v1.s[3] 338; CHECK-NEXT: ret 339 %tmp0 = extractelement <4 x float> %v, i32 0 340 %tmp1 = extractelement <4 x float> %v, i32 3 341 %tmp2 = fsub float -0.0, %tmp1 342 %tmp3 = call float @llvm.fma.f32(float %tmp0, float %tmp2, float %a) 343 ret float %tmp3 344} 345 346define float @test_fmls_ss4S_0_ext0(float %a, <4 x float> %v, <4 x float> %w) { 347; CHECK-LABEL: test_fmls_ss4S_0_ext0: 348; CHECK: // %bb.0: 349; CHECK-NEXT: fmsub s0, s1, s2, s0 350; CHECK-NEXT: ret 351 %tmp0 = extractelement <4 x float> %v, i32 0 352 %tmp1 = extractelement <4 x float> %w, i32 0 353 %tmp2 = fsub float -0.0, %tmp1 354 %tmp3 = call float @llvm.fma.f32(float %tmp0, float %tmp2, float %a) 355 ret float %tmp3 356} 357 358define double @test_fmls_ddD_0(double %a, double %b, <1 x double> %v) { 359; CHECK-LABEL: test_fmls_ddD_0: 360; CHECK: // %bb.0: // %entry 361; CHECK-NEXT: fmsub d0, d1, d2, d0 362; CHECK-NEXT: ret 363entry: 364 %fneg = fneg double %b 365 %extract = extractelement <1 x double> %v, i64 0 366 %0 = tail call double @llvm.fma.f64(double %fneg, double %extract, double %a) 367 ret double %0 368} 369 370define double @test_fmls_ddD_0_swap(double %a, double %b, <1 x double> %v) { 371; CHECK-LABEL: test_fmls_ddD_0_swap: 372; CHECK: // %bb.0: // %entry 373; CHECK-NEXT: fmsub d0, d2, d1, d0 374; CHECK-NEXT: ret 375entry: 376 %fneg = fneg double %b 377 %extract = extractelement <1 x double> %v, i64 0 378 %0 = tail call double @llvm.fma.f64(double %extract, double %fneg, double %a) 379 ret double %0 380} 381 382define double @test_fmls_dd2D_0(double %a, double %b, <2 x double> %v) { 383; CHECK-LABEL: test_fmls_dd2D_0: 384; CHECK: // %bb.0: // %entry 385; CHECK-NEXT: fmsub d0, d2, d1, d0 386; CHECK-NEXT: ret 387entry: 388 %fneg = fneg double %b 389 %extract = extractelement <2 x double> %v, i64 0 390 %0 = tail call double @llvm.fma.f64(double %fneg, double %extract, double %a) 391 ret double %0 392} 393 394define double @test_fmls_dd2D_0_swap(double %a, double %b, <2 x double> %v) { 395; CHECK-LABEL: test_fmls_dd2D_0_swap: 396; CHECK: // %bb.0: // %entry 397; CHECK-NEXT: fmsub d0, d2, d1, d0 398; CHECK-NEXT: ret 399entry: 400 %fneg = fneg double %b 401 %extract = extractelement <2 x double> %v, i64 0 402 %0 = tail call double @llvm.fma.f64(double %extract, double %fneg, double %a) 403 ret double %0 404} 405 406define double @test_fmls_dd2D_1(double %a, double %b, <2 x double> %v) { 407; CHECK-LABEL: test_fmls_dd2D_1: 408; CHECK: // %bb.0: 409; CHECK-NEXT: mov d1, v2.d[1] 410; CHECK-NEXT: fmls d0, d1, v2.d[1] 411; CHECK-NEXT: ret 412 %tmp1 = extractelement <2 x double> %v, i32 1 413 %tmp2 = fsub double -0.0, %tmp1 414 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a) 415 ret double %tmp3 416} 417 418define double @test_fmls_dd2D_1_swap(double %a, double %b, <2 x double> %v) { 419; CHECK-LABEL: test_fmls_dd2D_1_swap: 420; CHECK: // %bb.0: 421; CHECK-NEXT: mov d1, v2.d[1] 422; CHECK-NEXT: fmls d0, d1, v2.d[1] 423; CHECK-NEXT: ret 424 %tmp1 = extractelement <2 x double> %v, i32 1 425 %tmp2 = fsub double -0.0, %tmp1 426 %tmp3 = call double @llvm.fma.f64(double %tmp1, double %tmp2, double %a) 427 ret double %tmp3 428} 429 430define double @test_fmls_dd2D_1_ext0(double %a, <2 x double> %v) { 431; CHECK-LABEL: test_fmls_dd2D_1_ext0: 432; CHECK: // %bb.0: 433; CHECK-NEXT: fmls d0, d1, v1.d[1] 434; CHECK-NEXT: ret 435 %tmp0 = extractelement <2 x double> %v, i32 0 436 %tmp1 = extractelement <2 x double> %v, i32 1 437 %tmp2 = fsub double -0.0, %tmp1 438 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp0, double %a) 439 ret double %tmp3 440} 441 442define double @test_fmls_dd2D_0_ext0(double %a, <2 x double> %v, <2 x double> %w) { 443; CHECK-LABEL: test_fmls_dd2D_0_ext0: 444; CHECK: // %bb.0: 445; CHECK-NEXT: fmsub d0, d1, d2, d0 446; CHECK-NEXT: ret 447 %tmp0 = extractelement <2 x double> %v, i32 0 448 %tmp1 = extractelement <2 x double> %w, i32 0 449 %tmp2 = fsub double -0.0, %tmp1 450 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp0, double %a) 451 ret double %tmp3 452} 453 454define float @test_fmla_ss4S_0_strict(float %a, float %b, <4 x float> %v) #0 { 455; CHECK-LABEL: test_fmla_ss4S_0_strict: 456; CHECK: // %bb.0: 457; CHECK-NEXT: fmadd s0, s1, s2, s0 458; CHECK-NEXT: ret 459 %tmp1 = extractelement <4 x float> %v, i32 0 460 %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 461 ret float %tmp2 462} 463 464define float @test_fmla_ss4S_0_swap_strict(float %a, float %b, <4 x float> %v) #0 { 465; CHECK-LABEL: test_fmla_ss4S_0_swap_strict: 466; CHECK: // %bb.0: 467; CHECK-NEXT: fmadd s0, s2, s1, s0 468; CHECK-NEXT: ret 469 %tmp1 = extractelement <4 x float> %v, i32 0 470 %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %b, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 471 ret float %tmp2 472} 473 474define float @test_fmla_ss4S_3_strict(float %a, float %b, <4 x float> %v) #0 { 475; CHECK-LABEL: test_fmla_ss4S_3_strict: 476; CHECK: // %bb.0: 477; CHECK-NEXT: fmla s0, s1, v2.s[3] 478; CHECK-NEXT: ret 479 %tmp1 = extractelement <4 x float> %v, i32 3 480 %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 481 ret float %tmp2 482} 483 484define float @test_fmla_ss4S_3_swap_strict(float %a, float %b, <4 x float> %v) #0 { 485; CHECK-LABEL: test_fmla_ss4S_3_swap_strict: 486; CHECK: // %bb.0: 487; CHECK-NEXT: fmla s0, s0, v2.s[3] 488; CHECK-NEXT: ret 489 %tmp1 = extractelement <4 x float> %v, i32 3 490 %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %a, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 491 ret float %tmp2 492} 493 494define float @test_fmla_ss2S_0_strict(float %a, float %b, <2 x float> %v) #0 { 495; CHECK-LABEL: test_fmla_ss2S_0_strict: 496; CHECK: // %bb.0: 497; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 498; CHECK-NEXT: fmadd s0, s1, s2, s0 499; CHECK-NEXT: ret 500 %tmp1 = extractelement <2 x float> %v, i32 0 501 %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 502 ret float %tmp2 503} 504 505define float @test_fmla_ss2S_0_swap_strict(float %a, float %b, <2 x float> %v) #0 { 506; CHECK-LABEL: test_fmla_ss2S_0_swap_strict: 507; CHECK: // %bb.0: 508; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 509; CHECK-NEXT: fmadd s0, s2, s1, s0 510; CHECK-NEXT: ret 511 %tmp1 = extractelement <2 x float> %v, i32 0 512 %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %b, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 513 ret float %tmp2 514} 515 516define float @test_fmla_ss2S_1_strict(float %a, float %b, <2 x float> %v) #0 { 517; CHECK-LABEL: test_fmla_ss2S_1_strict: 518; CHECK: // %bb.0: 519; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 520; CHECK-NEXT: fmla s0, s1, v2.s[1] 521; CHECK-NEXT: ret 522 %tmp1 = extractelement <2 x float> %v, i32 1 523 %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 524 ret float %tmp2 525} 526 527define double @test_fmla_ddD_0_strict(double %a, double %b, <1 x double> %v) #0 { 528; CHECK-LABEL: test_fmla_ddD_0_strict: 529; CHECK: // %bb.0: 530; CHECK-NEXT: fmadd d0, d1, d2, d0 531; CHECK-NEXT: ret 532 %tmp1 = extractelement <1 x double> %v, i32 0 533 %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %b, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 534 ret double %tmp2 535} 536 537define double @test_fmla_ddD_0_swap_strict(double %a, double %b, <1 x double> %v) #0 { 538; CHECK-LABEL: test_fmla_ddD_0_swap_strict: 539; CHECK: // %bb.0: 540; CHECK-NEXT: fmadd d0, d2, d1, d0 541; CHECK-NEXT: ret 542 %tmp1 = extractelement <1 x double> %v, i32 0 543 %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %b, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 544 ret double %tmp2 545} 546 547define double @test_fmla_dd2D_0_strict(double %a, double %b, <2 x double> %v) #0 { 548; CHECK-LABEL: test_fmla_dd2D_0_strict: 549; CHECK: // %bb.0: 550; CHECK-NEXT: fmadd d0, d1, d2, d0 551; CHECK-NEXT: ret 552 %tmp1 = extractelement <2 x double> %v, i32 0 553 %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %b, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 554 ret double %tmp2 555} 556 557define double @test_fmla_dd2D_0_swap_strict(double %a, double %b, <2 x double> %v) #0 { 558; CHECK-LABEL: test_fmla_dd2D_0_swap_strict: 559; CHECK: // %bb.0: 560; CHECK-NEXT: fmadd d0, d2, d1, d0 561; CHECK-NEXT: ret 562 %tmp1 = extractelement <2 x double> %v, i32 0 563 %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %b, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 564 ret double %tmp2 565} 566 567define double @test_fmla_dd2D_1_strict(double %a, double %b, <2 x double> %v) #0 { 568; CHECK-LABEL: test_fmla_dd2D_1_strict: 569; CHECK: // %bb.0: 570; CHECK-NEXT: fmla d0, d1, v2.d[1] 571; CHECK-NEXT: ret 572 %tmp1 = extractelement <2 x double> %v, i32 1 573 %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %b, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 574 ret double %tmp2 575} 576 577define double @test_fmla_dd2D_1_swap_strict(double %a, double %b, <2 x double> %v) #0 { 578; CHECK-LABEL: test_fmla_dd2D_1_swap_strict: 579; CHECK: // %bb.0: 580; CHECK-NEXT: fmla d0, d1, v2.d[1] 581; CHECK-NEXT: ret 582 %tmp1 = extractelement <2 x double> %v, i32 1 583 %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %b, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 584 ret double %tmp2 585} 586 587define float @test_fmls_ss4S_0_strict(float %a, float %b, <4 x float> %v) #0 { 588; CHECK-LABEL: test_fmls_ss4S_0_strict: 589; CHECK: // %bb.0: // %entry 590; CHECK-NEXT: fmsub s0, s2, s1, s0 591; CHECK-NEXT: ret 592entry: 593 %fneg = fneg float %b 594 %extract = extractelement <4 x float> %v, i64 0 595 %0 = tail call float @llvm.experimental.constrained.fma.f32(float %fneg, float %extract, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 596 ret float %0 597} 598 599define float @test_fmls_ss4S_0_swap_strict(float %a, float %b, <4 x float> %v) #0 { 600; CHECK-LABEL: test_fmls_ss4S_0_swap_strict: 601; CHECK: // %bb.0: // %entry 602; CHECK-NEXT: fmsub s0, s2, s1, s0 603; CHECK-NEXT: ret 604entry: 605 %fneg = fneg float %b 606 %extract = extractelement <4 x float> %v, i64 0 607 %0 = tail call float @llvm.experimental.constrained.fma.f32(float %extract, float %fneg, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 608 ret float %0 609} 610 611define float @test_fmls_ss4S_3_strict(float %a, float %b, <4 x float> %v) #0 { 612; CHECK-LABEL: test_fmls_ss4S_3_strict: 613; CHECK: // %bb.0: 614; CHECK-NEXT: mov s1, v2.s[3] 615; CHECK-NEXT: fmls s0, s1, v2.s[3] 616; CHECK-NEXT: ret 617 %tmp1 = extractelement <4 x float> %v, i32 3 618 %tmp2 = fneg float %tmp1 619 %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp2, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 620 ret float %tmp3 621} 622 623define float @test_fmls_ss4S_3_swap_strict(float %a, float %b, <4 x float> %v) #0 { 624; CHECK-LABEL: test_fmls_ss4S_3_swap_strict: 625; CHECK: // %bb.0: 626; CHECK-NEXT: mov s1, v2.s[3] 627; CHECK-NEXT: fmls s0, s1, v2.s[3] 628; CHECK-NEXT: ret 629 %tmp1 = extractelement <4 x float> %v, i32 3 630 %tmp2 = fneg float %tmp1 631 %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %tmp2, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 632 ret float %tmp3 633} 634 635define float @test_fmls_ss2S_0_strict(float %a, float %b, <2 x float> %v) #0 { 636; CHECK-LABEL: test_fmls_ss2S_0_strict: 637; CHECK: // %bb.0: // %entry 638; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 639; CHECK-NEXT: fmsub s0, s2, s1, s0 640; CHECK-NEXT: ret 641entry: 642 %fneg = fneg float %b 643 %extract = extractelement <2 x float> %v, i64 0 644 %0 = tail call float @llvm.experimental.constrained.fma.f32(float %fneg, float %extract, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 645 ret float %0 646} 647 648define float @test_fmls_ss2S_0_swap_strict(float %a, float %b, <2 x float> %v) #0 { 649; CHECK-LABEL: test_fmls_ss2S_0_swap_strict: 650; CHECK: // %bb.0: // %entry 651; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 652; CHECK-NEXT: fmsub s0, s2, s1, s0 653; CHECK-NEXT: ret 654entry: 655 %fneg = fneg float %b 656 %extract = extractelement <2 x float> %v, i64 0 657 %0 = tail call float @llvm.experimental.constrained.fma.f32(float %extract, float %fneg, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 658 ret float %0 659} 660 661define float @test_fmls_ss2S_1_strict(float %a, float %b, <2 x float> %v) #0 { 662; CHECK-LABEL: test_fmls_ss2S_1_strict: 663; CHECK: // %bb.0: 664; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 665; CHECK-NEXT: mov s1, v2.s[1] 666; CHECK-NEXT: fmls s0, s1, v2.s[1] 667; CHECK-NEXT: ret 668 %tmp1 = extractelement <2 x float> %v, i32 1 669 %tmp2 = fneg float %tmp1 670 %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp2, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 671 ret float %tmp3 672} 673 674define double @test_fmls_ddD_0_strict(double %a, double %b, <1 x double> %v) #0 { 675; CHECK-LABEL: test_fmls_ddD_0_strict: 676; CHECK: // %bb.0: // %entry 677; CHECK-NEXT: fmsub d0, d2, d1, d0 678; CHECK-NEXT: ret 679entry: 680 %fneg = fneg double %b 681 %extract = extractelement <1 x double> %v, i64 0 682 %0 = tail call double @llvm.experimental.constrained.fma.f64(double %fneg, double %extract, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 683 ret double %0 684} 685 686define double @test_fmls_ddD_0_swap_strict(double %a, double %b, <1 x double> %v) #0 { 687; CHECK-LABEL: test_fmls_ddD_0_swap_strict: 688; CHECK: // %bb.0: // %entry 689; CHECK-NEXT: fmsub d0, d2, d1, d0 690; CHECK-NEXT: ret 691entry: 692 %fneg = fneg double %b 693 %extract = extractelement <1 x double> %v, i64 0 694 %0 = tail call double @llvm.experimental.constrained.fma.f64(double %extract, double %fneg, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 695 ret double %0 696} 697 698define double @test_fmls_dd2D_0_strict(double %a, double %b, <2 x double> %v) #0 { 699; CHECK-LABEL: test_fmls_dd2D_0_strict: 700; CHECK: // %bb.0: // %entry 701; CHECK-NEXT: fmsub d0, d2, d1, d0 702; CHECK-NEXT: ret 703entry: 704 %fneg = fneg double %b 705 %extract = extractelement <2 x double> %v, i64 0 706 %0 = tail call double @llvm.experimental.constrained.fma.f64(double %fneg, double %extract, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 707 ret double %0 708} 709 710define double @test_fmls_dd2D_0_swap_strict(double %a, double %b, <2 x double> %v) #0 { 711; CHECK-LABEL: test_fmls_dd2D_0_swap_strict: 712; CHECK: // %bb.0: // %entry 713; CHECK-NEXT: fmsub d0, d2, d1, d0 714; CHECK-NEXT: ret 715entry: 716 %fneg = fneg double %b 717 %extract = extractelement <2 x double> %v, i64 0 718 %0 = tail call double @llvm.experimental.constrained.fma.f64(double %extract, double %fneg, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 719 ret double %0 720} 721 722define double @test_fmls_dd2D_1_strict(double %a, double %b, <2 x double> %v) #0 { 723; CHECK-LABEL: test_fmls_dd2D_1_strict: 724; CHECK: // %bb.0: 725; CHECK-NEXT: mov d1, v2.d[1] 726; CHECK-NEXT: fmls d0, d1, v2.d[1] 727; CHECK-NEXT: ret 728 %tmp1 = extractelement <2 x double> %v, i32 1 729 %tmp2 = fneg double %tmp1 730 %tmp3 = call double @llvm.experimental.constrained.fma.f64(double %tmp2, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 731 ret double %tmp3 732} 733 734define double @test_fmls_dd2D_1_swap_strict(double %a, double %b, <2 x double> %v) #0 { 735; CHECK-LABEL: test_fmls_dd2D_1_swap_strict: 736; CHECK: // %bb.0: 737; CHECK-NEXT: mov d1, v2.d[1] 738; CHECK-NEXT: fmls d0, d1, v2.d[1] 739; CHECK-NEXT: ret 740 %tmp1 = extractelement <2 x double> %v, i32 1 741 %tmp2 = fneg double %tmp1 742 %tmp3 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %tmp2, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 743 ret double %tmp3 744} 745 746