1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX 4 5; 6; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443), 7; so we need to edit it to remove the NAN constant comments 8; 9 10; copysign(x, c1) -> fabs(x) iff ispos(c1) 11define <4 x float> @combine_vec_fcopysign_pos_constant0(<4 x float> %x) { 12; SSE-LABEL: combine_vec_fcopysign_pos_constant0: 13; SSE: # %bb.0: 14; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 15; SSE-NEXT: retq 16; 17; AVX-LABEL: combine_vec_fcopysign_pos_constant0: 18; AVX: # %bb.0: 19; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 20; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 21; AVX-NEXT: retq 22 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>) 23 ret <4 x float> %1 24} 25 26define <4 x float> @combine_vec_fcopysign_pos_constant1(<4 x float> %x) { 27; SSE-LABEL: combine_vec_fcopysign_pos_constant1: 28; SSE: # %bb.0: 29; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 30; SSE-NEXT: retq 31; 32; AVX-LABEL: combine_vec_fcopysign_pos_constant1: 33; AVX: # %bb.0: 34; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 35; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 36; AVX-NEXT: retq 37 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 2.0, float 4.0, float 8.0>) 38 ret <4 x float> %1 39} 40 41define <4 x float> @combine_vec_fcopysign_fabs_sgn(<4 x float> %x, <4 x float> %y) { 42; SSE-LABEL: combine_vec_fcopysign_fabs_sgn: 43; SSE: # %bb.0: 44; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 45; SSE-NEXT: retq 46; 47; AVX-LABEL: combine_vec_fcopysign_fabs_sgn: 48; AVX: # %bb.0: 49; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 50; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 51; AVX-NEXT: retq 52 %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y) 53 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1) 54 ret <4 x float> %2 55} 56 57; copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 58define <4 x float> @combine_vec_fcopysign_neg_constant0(<4 x float> %x) { 59; SSE-LABEL: combine_vec_fcopysign_neg_constant0: 60; SSE: # %bb.0: 61; SSE-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 62; SSE-NEXT: retq 63; 64; AVX-LABEL: combine_vec_fcopysign_neg_constant0: 65; AVX: # %bb.0: 66; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 67; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 68; AVX-NEXT: retq 69 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -2.0, float -2.0, float -2.0, float -2.0>) 70 ret <4 x float> %1 71} 72 73define <4 x float> @combine_vec_fcopysign_neg_constant1(<4 x float> %x) { 74; SSE-LABEL: combine_vec_fcopysign_neg_constant1: 75; SSE: # %bb.0: 76; SSE-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 77; SSE-NEXT: retq 78; 79; AVX-LABEL: combine_vec_fcopysign_neg_constant1: 80; AVX: # %bb.0: 81; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 82; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 83; AVX-NEXT: retq 84 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -0.0, float -2.0, float -4.0, float -8.0>) 85 ret <4 x float> %1 86} 87 88define <4 x float> @combine_vec_fcopysign_fneg_fabs_sgn(<4 x float> %x, <4 x float> %y) { 89; SSE-LABEL: combine_vec_fcopysign_fneg_fabs_sgn: 90; SSE: # %bb.0: 91; SSE-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 92; SSE-NEXT: retq 93; 94; AVX-LABEL: combine_vec_fcopysign_fneg_fabs_sgn: 95; AVX: # %bb.0: 96; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 97; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 98; AVX-NEXT: retq 99 %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y) 100 %2 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %1 101 %3 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %2) 102 ret <4 x float> %3 103} 104 105; copysign(fabs(x), y) -> copysign(x, y) 106define <4 x float> @combine_vec_fcopysign_fabs_mag(<4 x float> %x, <4 x float> %y) { 107; SSE-LABEL: combine_vec_fcopysign_fabs_mag: 108; SSE: # %bb.0: 109; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 110; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 111; SSE-NEXT: orps %xmm1, %xmm0 112; SSE-NEXT: retq 113; 114; AVX-LABEL: combine_vec_fcopysign_fabs_mag: 115; AVX: # %bb.0: 116; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 117; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1 118; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] 119; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 120; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 121; AVX-NEXT: retq 122 %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) 123 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y) 124 ret <4 x float> %2 125} 126 127; copysign(fneg(x), y) -> copysign(x, y) 128define <4 x float> @combine_vec_fcopysign_fneg_mag(<4 x float> %x, <4 x float> %y) { 129; SSE-LABEL: combine_vec_fcopysign_fneg_mag: 130; SSE: # %bb.0: 131; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 132; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 133; SSE-NEXT: orps %xmm1, %xmm0 134; SSE-NEXT: retq 135; 136; AVX-LABEL: combine_vec_fcopysign_fneg_mag: 137; AVX: # %bb.0: 138; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 139; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1 140; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] 141; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 142; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 143; AVX-NEXT: retq 144 %1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %x 145 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y) 146 ret <4 x float> %2 147} 148 149; copysign(copysign(x,z), y) -> copysign(x, y) 150define <4 x float> @combine_vec_fcopysign_fcopysign_mag(<4 x float> %x, <4 x float> %y, <4 x float> %z) { 151; SSE-LABEL: combine_vec_fcopysign_fcopysign_mag: 152; SSE: # %bb.0: 153; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 154; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 155; SSE-NEXT: orps %xmm1, %xmm0 156; SSE-NEXT: retq 157; 158; AVX-LABEL: combine_vec_fcopysign_fcopysign_mag: 159; AVX: # %bb.0: 160; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 161; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1 162; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] 163; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 164; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 165; AVX-NEXT: retq 166 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %z) 167 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y) 168 ret <4 x float> %2 169} 170 171; copysign(x, copysign(y,z)) -> copysign(x, z) 172define <4 x float> @combine_vec_fcopysign_fcopysign_sgn(<4 x float> %x, <4 x float> %y, <4 x float> %z) { 173; SSE-LABEL: combine_vec_fcopysign_fcopysign_sgn: 174; SSE: # %bb.0: 175; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 176; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 177; SSE-NEXT: orps %xmm2, %xmm0 178; SSE-NEXT: retq 179; 180; AVX-LABEL: combine_vec_fcopysign_fcopysign_sgn: 181; AVX: # %bb.0: 182; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 183; AVX-NEXT: vandps %xmm1, %xmm2, %xmm1 184; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] 185; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 186; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 187; AVX-NEXT: retq 188 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %y, <4 x float> %z) 189 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1) 190 ret <4 x float> %2 191} 192 193; copysign(x, fp_extend(y)) -> copysign(x, y) 194define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float> %y) { 195; SSE-LABEL: combine_vec_fcopysign_fpext_sgn: 196; SSE: # %bb.0: 197; SSE-NEXT: cvtps2pd %xmm2, %xmm3 198; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] 199; SSE-NEXT: cvtps2pd %xmm2, %xmm2 200; SSE-NEXT: movaps {{.*#+}} xmm4 = [NaN,NaN] 201; SSE-NEXT: andps %xmm4, %xmm0 202; SSE-NEXT: movaps %xmm4, %xmm5 203; SSE-NEXT: andnps %xmm3, %xmm5 204; SSE-NEXT: orps %xmm5, %xmm0 205; SSE-NEXT: andps %xmm4, %xmm1 206; SSE-NEXT: andnps %xmm2, %xmm4 207; SSE-NEXT: orps %xmm4, %xmm1 208; SSE-NEXT: retq 209; 210; AVX-LABEL: combine_vec_fcopysign_fpext_sgn: 211; AVX: # %bb.0: 212; AVX-NEXT: vcvtps2pd %xmm1, %ymm1 213; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN] 214; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 215; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 216; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 217; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0 218; AVX-NEXT: retq 219 %1 = fpext <4 x float> %y to <4 x double> 220 %2 = call <4 x double> @llvm.copysign.v4f64(<4 x double> %x, <4 x double> %1) 221 ret <4 x double> %2 222} 223 224; copysign(x, fp_round(y)) -> copysign(x, y) 225define <4 x float> @combine_vec_fcopysign_fptrunc_sgn(<4 x float> %x, <4 x double> %y) { 226; SSE-LABEL: combine_vec_fcopysign_fptrunc_sgn: 227; SSE: # %bb.0: 228; SSE-NEXT: cvtpd2ps %xmm2, %xmm2 229; SSE-NEXT: cvtpd2ps %xmm1, %xmm1 230; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 231; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 232; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 233; SSE-NEXT: orpd %xmm1, %xmm0 234; SSE-NEXT: retq 235; 236; AVX-LABEL: combine_vec_fcopysign_fptrunc_sgn: 237; AVX: # %bb.0: 238; AVX-NEXT: vcvtpd2ps %ymm1, %xmm1 239; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] 240; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm0 241; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 242; AVX-NEXT: vandpd %xmm2, %xmm1, %xmm1 243; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 244; AVX-NEXT: vzeroupper 245; AVX-NEXT: retq 246 %1 = fptrunc <4 x double> %y to <4 x float> 247 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1) 248 ret <4 x float> %2 249} 250 251declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p) 252declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn) 253declare <4 x double> @llvm.copysign.v4f64(<4 x double> %Mag, <4 x double> %Sgn) 254