1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX 4 5; 6; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443), 7; so we need to edit it to remove the NAN constant comments 8; 9 10; fabs(c1) -> c2 11define float @combine_fabs_constant() { 12; SSE-LABEL: combine_fabs_constant: 13; SSE: # %bb.0: 14; SSE-NEXT: movss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] 15; SSE-NEXT: retq 16; 17; AVX-LABEL: combine_fabs_constant: 18; AVX: # %bb.0: 19; AVX-NEXT: vmovss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] 20; AVX-NEXT: retq 21 %1 = call float @llvm.fabs.f32(float -2.0) 22 ret float %1 23} 24 25define <4 x float> @combine_vec_fabs_constant() { 26; SSE-LABEL: combine_vec_fabs_constant: 27; SSE: # %bb.0: 28; SSE-NEXT: movaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0] 29; SSE-NEXT: retq 30; 31; AVX-LABEL: combine_vec_fabs_constant: 32; AVX: # %bb.0: 33; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0] 34; AVX-NEXT: retq 35 %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> <float 0.0, float -0.0, float 2.0, float -2.0>) 36 ret <4 x float> %1 37} 38 39; fabs(fabs(x)) -> fabs(x) 40define float @combine_fabs_fabs(float %a) { 41; SSE-LABEL: combine_fabs_fabs: 42; SSE: # %bb.0: 43; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 44; SSE-NEXT: retq 45; 46; AVX-LABEL: combine_fabs_fabs: 47; AVX: # %bb.0: 48; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 49; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 50; AVX-NEXT: retq 51 %1 = call float @llvm.fabs.f32(float %a) 52 %2 = call float @llvm.fabs.f32(float %1) 53 ret float %2 54} 55 56define <4 x float> @combine_vec_fabs_fabs(<4 x float> %a) { 57; SSE-LABEL: combine_vec_fabs_fabs: 58; SSE: # %bb.0: 59; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 60; SSE-NEXT: retq 61; 62; AVX-LABEL: combine_vec_fabs_fabs: 63; AVX: # %bb.0: 64; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 65; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 66; AVX-NEXT: retq 67 %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) 68 %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1) 69 ret <4 x float> %2 70} 71 72; fabs(fneg(x)) -> fabs(x) 73define float @combine_fabs_fneg(float %a) { 74; SSE-LABEL: combine_fabs_fneg: 75; SSE: # %bb.0: 76; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 77; SSE-NEXT: retq 78; 79; AVX-LABEL: combine_fabs_fneg: 80; AVX: # %bb.0: 81; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 82; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 83; AVX-NEXT: retq 84 %1 = fsub float -0.0, %a 85 %2 = call float @llvm.fabs.f32(float %1) 86 ret float %2 87} 88 89define <4 x float> @combine_vec_fabs_fneg(<4 x float> %a) { 90; SSE-LABEL: combine_vec_fabs_fneg: 91; SSE: # %bb.0: 92; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 93; SSE-NEXT: retq 94; 95; AVX-LABEL: combine_vec_fabs_fneg: 96; AVX: # %bb.0: 97; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 98; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 99; AVX-NEXT: retq 100 %1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %a 101 %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1) 102 ret <4 x float> %2 103} 104 105; fabs(fcopysign(x, y)) -> fabs(x) 106define float @combine_fabs_fcopysign(float %a, float %b) { 107; SSE-LABEL: combine_fabs_fcopysign: 108; SSE: # %bb.0: 109; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 110; SSE-NEXT: retq 111; 112; AVX-LABEL: combine_fabs_fcopysign: 113; AVX: # %bb.0: 114; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 115; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 116; AVX-NEXT: retq 117 %1 = call float @llvm.copysign.f32(float %a, float %b) 118 %2 = call float @llvm.fabs.f32(float %1) 119 ret float %2 120} 121 122define <4 x float> @combine_vec_fabs_fcopysign(<4 x float> %a, <4 x float> %b) { 123; SSE-LABEL: combine_vec_fabs_fcopysign: 124; SSE: # %bb.0: 125; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 126; SSE-NEXT: retq 127; 128; AVX-LABEL: combine_vec_fabs_fcopysign: 129; AVX: # %bb.0: 130; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 131; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 132; AVX-NEXT: retq 133 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) 134 %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1) 135 ret <4 x float> %2 136} 137 138; store(fabs(load())) - convert scalar to integer 139define void @combine_fabs_int_rmw_f64(ptr %ptr) { 140; SSE-LABEL: combine_fabs_int_rmw_f64: 141; SSE: # %bb.0: 142; SSE-NEXT: andb $127, 7(%rdi) 143; SSE-NEXT: retq 144; 145; AVX-LABEL: combine_fabs_int_rmw_f64: 146; AVX: # %bb.0: 147; AVX-NEXT: andb $127, 7(%rdi) 148; AVX-NEXT: retq 149 %1 = load double, ptr %ptr 150 %2 = call double @llvm.fabs.f64(double %1) 151 store double %2, ptr %ptr 152 ret void 153} 154 155define void @combine_fabs_int_f32(ptr %src, ptr %dst) { 156; SSE-LABEL: combine_fabs_int_f32: 157; SSE: # %bb.0: 158; SSE-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 159; SSE-NEXT: andl (%rdi), %eax 160; SSE-NEXT: movl %eax, (%rsi) 161; SSE-NEXT: retq 162; 163; AVX-LABEL: combine_fabs_int_f32: 164; AVX: # %bb.0: 165; AVX-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 166; AVX-NEXT: andl (%rdi), %eax 167; AVX-NEXT: movl %eax, (%rsi) 168; AVX-NEXT: retq 169 %1 = load float, ptr %src 170 %2 = call float @llvm.fabs.f32(float %1) 171 store float %2, ptr %dst 172 ret void 173} 174 175define void @combine_fabs_int_rmw_bfloat(ptr %ptr) nounwind { 176; SSE-LABEL: combine_fabs_int_rmw_bfloat: 177; SSE: # %bb.0: 178; SSE-NEXT: andb $127, 1(%rdi) 179; SSE-NEXT: retq 180; 181; AVX-LABEL: combine_fabs_int_rmw_bfloat: 182; AVX: # %bb.0: 183; AVX-NEXT: andb $127, 1(%rdi) 184; AVX-NEXT: retq 185 %1 = load bfloat, ptr %ptr 186 %2 = call bfloat @llvm.fabs.bf16(bfloat %1) 187 store bfloat %2, ptr %ptr 188 ret void 189} 190 191define void @combine_fabs_int_half(ptr %src, ptr %dst) nounwind { 192; SSE-LABEL: combine_fabs_int_half: 193; SSE: # %bb.0: 194; SSE-NEXT: movzwl (%rdi), %eax 195; SSE-NEXT: andl $32767, %eax # imm = 0x7FFF 196; SSE-NEXT: movw %ax, (%rsi) 197; SSE-NEXT: retq 198; 199; AVX-LABEL: combine_fabs_int_half: 200; AVX: # %bb.0: 201; AVX-NEXT: movzwl (%rdi), %eax 202; AVX-NEXT: andl $32767, %eax # imm = 0x7FFF 203; AVX-NEXT: movw %ax, (%rsi) 204; AVX-NEXT: retq 205 %1 = load half, ptr %src 206 %2 = call half @llvm.fabs.f16(half %1) 207 store half %2, ptr %dst 208 ret void 209} 210 211; don't convert vector to scalar 212define void @combine_fabs_vec_int_v4f32(ptr %src, ptr %dst) { 213; SSE-LABEL: combine_fabs_vec_int_v4f32: 214; SSE: # %bb.0: 215; SSE-NEXT: movaps (%rdi), %xmm0 216; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 217; SSE-NEXT: movaps %xmm0, (%rsi) 218; SSE-NEXT: retq 219; 220; AVX-LABEL: combine_fabs_vec_int_v4f32: 221; AVX: # %bb.0: 222; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN] 223; AVX-NEXT: vandps (%rdi), %xmm0, %xmm0 224; AVX-NEXT: vmovaps %xmm0, (%rsi) 225; AVX-NEXT: retq 226 %1 = load <4 x float>, ptr %src 227 %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1) 228 store <4 x float> %2, ptr %dst 229 ret void 230} 231 232declare float @llvm.fabs.f32(float %p) 233declare float @llvm.copysign.f32(float %Mag, float %Sgn) 234 235declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p) 236declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn) 237