1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx,fma | FileCheck %s --check-prefix=FMA3 3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx,fma4 | FileCheck %s --check-prefix=FMA4 4 5define float @test_fneg_fma_subx_y_negz_f32(float %w, float %x, float %y, float %z) { 6; FMA3-LABEL: test_fneg_fma_subx_y_negz_f32: 7; FMA3: # %bb.0: # %entry 8; FMA3-NEXT: vsubss %xmm1, %xmm0, %xmm0 9; FMA3-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm2 * xmm0) + xmm3 10; FMA3-NEXT: retq 11; 12; FMA4-LABEL: test_fneg_fma_subx_y_negz_f32: 13; FMA4: # %bb.0: # %entry 14; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm0 15; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm3 16; FMA4-NEXT: retq 17entry: 18 %subx = fsub nsz float %w, %x 19 %negz = fsub float -0.000000e+00, %z 20 %0 = tail call nsz float @llvm.fma.f32(float %subx, float %y, float %negz) 21 %1 = fsub float -0.000000e+00, %0 22 ret float %1 23} 24 25define float @test_fneg_fma_x_suby_negz_f32(float %w, float %x, float %y, float %z) { 26; FMA3-LABEL: test_fneg_fma_x_suby_negz_f32: 27; FMA3: # %bb.0: # %entry 28; FMA3-NEXT: vsubss %xmm2, %xmm0, %xmm0 29; FMA3-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3 30; FMA3-NEXT: retq 31; 32; FMA4-LABEL: test_fneg_fma_x_suby_negz_f32: 33; FMA4: # %bb.0: # %entry 34; FMA4-NEXT: vsubss %xmm2, %xmm0, %xmm0 35; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3 36; FMA4-NEXT: retq 37entry: 38 %suby = fsub nsz float %w, %y 39 %negz = fsub float -0.000000e+00, %z 40 %0 = tail call nsz float @llvm.fma.f32(float %x, float %suby, float %negz) 41 %1 = fsub float -0.000000e+00, %0 42 ret float %1 43} 44 45define float @test_fneg_fma_subx_suby_negz_f32(float %w, float %x, float %y, float %z) { 46; FMA3-LABEL: test_fneg_fma_subx_suby_negz_f32: 47; FMA3: # %bb.0: # %entry 48; FMA3-NEXT: vsubss %xmm1, %xmm0, %xmm1 49; FMA3-NEXT: vsubss %xmm2, %xmm0, %xmm0 50; FMA3-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3 51; FMA3-NEXT: retq 52; 53; FMA4-LABEL: test_fneg_fma_subx_suby_negz_f32: 54; FMA4: # %bb.0: # %entry 55; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm1 56; FMA4-NEXT: vsubss %xmm2, %xmm0, %xmm0 57; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3 58; FMA4-NEXT: retq 59entry: 60 %subx = fsub nsz float %w, %x 61 %suby = fsub nsz float %w, %y 62 %negz = fsub float -0.000000e+00, %z 63 %0 = tail call nsz float @llvm.fma.f32(float %subx, float %suby, float %negz) 64 %1 = fsub float -0.000000e+00, %0 65 ret float %1 66} 67 68define float @test_fneg_fma_subx_negy_negz_f32(float %w, float %x, float %y, float %z) { 69; FMA3-LABEL: test_fneg_fma_subx_negy_negz_f32: 70; FMA3: # %bb.0: # %entry 71; FMA3-NEXT: vsubss %xmm1, %xmm0, %xmm0 72; FMA3-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm3 73; FMA3-NEXT: retq 74; 75; FMA4-LABEL: test_fneg_fma_subx_negy_negz_f32: 76; FMA4: # %bb.0: # %entry 77; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm0 78; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm3 79; FMA4-NEXT: retq 80entry: 81 %subx = fsub nsz float %w, %x 82 %negy = fsub float -0.000000e+00, %y 83 %negz = fsub float -0.000000e+00, %z 84 %0 = tail call nsz float @llvm.fma.f32(float %subx, float %negy, float %negz) 85 %1 = fsub float -0.000000e+00, %0 86 ret float %1 87} 88 89define <4 x float> @test_fma_rcp_fneg_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z) { 90; FMA3-LABEL: test_fma_rcp_fneg_v4f32: 91; FMA3: # %bb.0: # %entry 92; FMA3-NEXT: vrcpps %xmm2, %xmm2 93; FMA3-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 94; FMA3-NEXT: retq 95; 96; FMA4-LABEL: test_fma_rcp_fneg_v4f32: 97; FMA4: # %bb.0: # %entry 98; FMA4-NEXT: vrcpps %xmm2, %xmm2 99; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 100; FMA4-NEXT: retq 101entry: 102 %0 = fneg <4 x float> %z 103 %1 = tail call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %0) 104 %2 = tail call nsz <4 x float> @llvm.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %1) 105 ret <4 x float> %2 106} 107declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) 108 109; This would crash while trying getNegatedExpression(). 110 111define float @negated_constant(float %x) { 112; FMA3-LABEL: negated_constant: 113; FMA3: # %bb.0: 114; FMA3-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 115; FMA3-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 116; FMA3-NEXT: retq 117; 118; FMA4-LABEL: negated_constant: 119; FMA4: # %bb.0: 120; FMA4-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 121; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 122; FMA4-NEXT: retq 123 %m = fmul float %x, 42.0 124 %fma = call nsz float @llvm.fma.f32(float %x, float -42.0, float %m) 125 %nfma = fneg float %fma 126 ret float %nfma 127} 128 129define <4 x double> @negated_constant_v4f64(<4 x double> %a) { 130; FMA3-LABEL: negated_constant_v4f64: 131; FMA3: # %bb.0: 132; FMA3-NEXT: vmovapd {{.*#+}} ymm1 = [-5.0E-1,-2.5E-1,-1.25E-1,-6.25E-2] 133; FMA3-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm1 134; FMA3-NEXT: retq 135; 136; FMA4-LABEL: negated_constant_v4f64: 137; FMA4: # %bb.0: 138; FMA4-NEXT: vmovapd {{.*#+}} ymm1 = [-5.0E-1,-2.5E-1,-1.25E-1,-6.25E-2] 139; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm1 140; FMA4-NEXT: retq 141 %t = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> <double 5.000000e-01, double 2.5000000e-01, double 1.25000000e-01, double 0.62500000e-01>, <4 x double> <double -5.000000e-01, double -2.5000000e-01, double -1.25000000e-01, double -0.62500000e-01>) 142 ret <4 x double> %t 143} 144 145define <4 x double> @negated_constant_v4f64_2fmas(<4 x double> %a, <4 x double> %b) { 146; FMA3-LABEL: negated_constant_v4f64_2fmas: 147; FMA3: # %bb.0: 148; FMA3-NEXT: vmovapd {{.*#+}} ymm2 = [-5.0E-1,u,-2.5E+0,-4.5E+0] 149; FMA3-NEXT: vmovapd %ymm2, %ymm3 150; FMA3-NEXT: vfmadd213pd {{.*#+}} ymm3 = (ymm0 * ymm3) + ymm1 151; FMA3-NEXT: vfnmadd213pd {{.*#+}} ymm2 = -(ymm0 * ymm2) + ymm1 152; FMA3-NEXT: vaddpd %ymm2, %ymm3, %ymm0 153; FMA3-NEXT: retq 154; 155; FMA4-LABEL: negated_constant_v4f64_2fmas: 156; FMA4: # %bb.0: 157; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [-5.0E-1,u,-2.5E+0,-4.5E+0] 158; FMA4-NEXT: vfmaddpd {{.*#+}} ymm3 = (ymm0 * ymm2) + ymm1 159; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 160; FMA4-NEXT: vaddpd %ymm0, %ymm3, %ymm0 161; FMA4-NEXT: retq 162 %t0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> <double -5.000000e-01, double undef, double -25.000000e-01, double -45.000000e-01>, <4 x double> %b) 163 %t1 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> <double 5.000000e-01, double undef, double 25.000000e-01, double 45.000000e-01>, <4 x double> %b) 164 %t2 = fadd <4 x double> %t0, %t1 165 ret <4 x double> %t2 166} 167 168define <4 x double> @negated_constant_v4f64_fadd(<4 x double> %a) { 169; FMA3-LABEL: negated_constant_v4f64_fadd: 170; FMA3: # %bb.0: 171; FMA3-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [1.5E+0,1.25E-1,1.5E+0,1.25E-1] 172; FMA3-NEXT: # ymm1 = mem[0,1,0,1] 173; FMA3-NEXT: vaddpd %ymm1, %ymm0, %ymm0 174; FMA3-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm1 175; FMA3-NEXT: retq 176; 177; FMA4-LABEL: negated_constant_v4f64_fadd: 178; FMA4: # %bb.0: 179; FMA4-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [1.5E+0,1.25E-1,1.5E+0,1.25E-1] 180; FMA4-NEXT: # ymm1 = mem[0,1,0,1] 181; FMA4-NEXT: vaddpd %ymm1, %ymm0, %ymm0 182; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm1 183; FMA4-NEXT: retq 184 %t0 = fadd <4 x double> %a, <double 15.000000e-01, double 1.25000000e-01, double 15.000000e-01, double 1.25000000e-01> 185 %t1 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %t0, <4 x double> <double 15.000000e-01, double 1.25000000e-01, double 15.000000e-01, double 1.25000000e-01>, <4 x double> <double -15.000000e-01, double -1.25000000e-01, double -15.000000e-01, double -1.25000000e-01>) 186 ret <4 x double> %t1 187} 188 189define <4 x double> @negated_constant_v4f64_2fma_undefs(<4 x double> %a, <4 x double> %b) { 190; FMA3-LABEL: negated_constant_v4f64_2fma_undefs: 191; FMA3: # %bb.0: 192; FMA3-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] 193; FMA3-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) + mem 194; FMA3-NEXT: vfmadd132pd {{.*#+}} ymm1 = (ymm1 * mem) + ymm2 195; FMA3-NEXT: vaddpd %ymm1, %ymm0, %ymm0 196; FMA3-NEXT: retq 197; 198; FMA4-LABEL: negated_constant_v4f64_2fma_undefs: 199; FMA4: # %bb.0: 200; FMA4-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] 201; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + mem 202; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * mem) + ymm2 203; FMA4-NEXT: vaddpd %ymm1, %ymm0, %ymm0 204; FMA4-NEXT: retq 205 %t0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double> <double -5.000000e-01, double undef, double -5.000000e-01, double -5.000000e-01>) 206 %t1 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %b, <4 x double> <double undef, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double> <double -5.000000e-01, double -5.000000e-01, double -5.000000e-01, double -5.000000e-01>) 207 %t2 = fadd <4 x double> %t0, %t1 208 ret <4 x double> %t2 209} 210 211declare float @llvm.fma.f32(float, float, float) 212declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 213declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) 214