1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=CHECK,KNL 3;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=CHECK,SKX 4 5define i32 @hadd_16(<16 x i32> %x225) { 6; KNL-LABEL: hadd_16: 7; KNL: # %bb.0: 8; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 9; KNL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 10; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 11; KNL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 12; KNL-NEXT: vmovd %xmm0, %eax 13; KNL-NEXT: retq 14; 15; SKX-LABEL: hadd_16: 16; SKX: # %bb.0: 17; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 18; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 19; SKX-NEXT: vpsrlq $32, %xmm0, %xmm1 20; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 21; SKX-NEXT: vmovd %xmm0, %eax 22; SKX-NEXT: vzeroupper 23; SKX-NEXT: retq 24 %x226 = shufflevector <16 x i32> %x225, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 25 %x227 = add <16 x i32> %x225, %x226 26 %x228 = shufflevector <16 x i32> %x227, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 27 %x229 = add <16 x i32> %x227, %x228 28 %x230 = extractelement <16 x i32> %x229, i32 0 29 ret i32 %x230 30} 31 32define i32 @hsub_16(<16 x i32> %x225) { 33; KNL-LABEL: hsub_16: 34; KNL: # %bb.0: 35; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 36; KNL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 37; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 38; KNL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 39; KNL-NEXT: vmovd %xmm0, %eax 40; KNL-NEXT: retq 41; 42; SKX-LABEL: hsub_16: 43; SKX: # %bb.0: 44; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 45; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 46; SKX-NEXT: vpsrlq $32, %xmm0, %xmm1 47; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 48; SKX-NEXT: vmovd %xmm0, %eax 49; SKX-NEXT: vzeroupper 50; SKX-NEXT: retq 51 %x226 = shufflevector <16 x i32> %x225, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 52 %x227 = add <16 x i32> %x225, %x226 53 %x228 = shufflevector <16 x i32> %x227, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 54 %x229 = sub <16 x i32> %x227, %x228 55 %x230 = extractelement <16 x i32> %x229, i32 0 56 ret i32 %x230 57} 58 59define float @fhadd_16(<16 x float> %x225) { 60; KNL-LABEL: fhadd_16: 61; KNL: # %bb.0: 62; KNL-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 63; KNL-NEXT: vaddps %xmm1, %xmm0, %xmm0 64; KNL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 65; KNL-NEXT: vaddss %xmm1, %xmm0, %xmm0 66; KNL-NEXT: retq 67; 68; SKX-LABEL: fhadd_16: 69; SKX: # %bb.0: 70; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 71; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 72; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 73; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 74; SKX-NEXT: vzeroupper 75; SKX-NEXT: retq 76 %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 77 %x227 = fadd <16 x float> %x225, %x226 78 %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 79 %x229 = fadd <16 x float> %x227, %x228 80 %x230 = extractelement <16 x float> %x229, i32 0 81 ret float %x230 82} 83 84define float @fhsub_16(<16 x float> %x225) { 85; KNL-LABEL: fhsub_16: 86; KNL: # %bb.0: 87; KNL-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 88; KNL-NEXT: vaddps %xmm1, %xmm0, %xmm0 89; KNL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 90; KNL-NEXT: vsubss %xmm1, %xmm0, %xmm0 91; KNL-NEXT: retq 92; 93; SKX-LABEL: fhsub_16: 94; SKX: # %bb.0: 95; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 96; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 97; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 98; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 99; SKX-NEXT: vzeroupper 100; SKX-NEXT: retq 101 %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 102 %x227 = fadd <16 x float> %x225, %x226 103 %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 104 %x229 = fsub <16 x float> %x227, %x228 105 %x230 = extractelement <16 x float> %x229, i32 0 106 ret float %x230 107} 108 109define <16 x i32> @hadd_16_3(<16 x i32> %x225, <16 x i32> %x227) { 110; CHECK-LABEL: hadd_16_3: 111; CHECK: # %bb.0: 112; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0 113; CHECK-NEXT: retq 114 %x226 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18 115, i32 4, i32 6, i32 20, i32 22, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 116 %x228 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 1, i32 3, i32 17, i32 19 117, i32 5 , i32 7, i32 21, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, 118 i32 undef, i32 undef> 119 %x229 = add <16 x i32> %x226, %x228 120 ret <16 x i32> %x229 121} 122 123define <16 x float> @fhadd_16_3(<16 x float> %x225, <16 x float> %x227) { 124; CHECK-LABEL: fhadd_16_3: 125; CHECK: # %bb.0: 126; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0 127; CHECK-NEXT: retq 128 %x226 = shufflevector <16 x float> %x225, <16 x float> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18 129, i32 4, i32 6, i32 20, i32 22, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 130 %x228 = shufflevector <16 x float> %x225, <16 x float> %x227, <16 x i32> <i32 1, i32 3, i32 17, i32 19 131, i32 5 , i32 7, i32 21, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 132 %x229 = fadd <16 x float> %x226, %x228 133 ret <16 x float> %x229 134} 135 136define <8 x double> @fhadd_16_4(<8 x double> %x225, <8 x double> %x227) { 137; CHECK-LABEL: fhadd_16_4: 138; CHECK: # %bb.0: 139; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 140; CHECK-NEXT: retq 141 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 undef, i32 undef, i32 undef, i32 undef> 142 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 undef ,i32 undef, i32 undef, i32 undef> 143 %x229 = fadd <8 x double> %x226, %x228 144 ret <8 x double> %x229 145} 146 147define <4 x double> @fadd_noundef_low(<8 x double> %x225, <8 x double> %x227) { 148; CHECK-LABEL: fadd_noundef_low: 149; CHECK: # %bb.0: 150; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 151; CHECK-NEXT: retq 152 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 153 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15> 154 %x229 = fadd <8 x double> %x226, %x228 155 %x230 = shufflevector <8 x double> %x229, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 156 ret <4 x double> %x230 157} 158 159define <4 x double> @fadd_noundef_high(<8 x double> %x225, <8 x double> %x227) { 160; CHECK-LABEL: fadd_noundef_high: 161; CHECK: # %bb.0: 162; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 163; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 164; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 165; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm1 166; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 167; CHECK-NEXT: retq 168 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 169 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15> 170 %x229 = fadd <8 x double> %x226, %x228 171 %x230 = shufflevector <8 x double> %x229, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 172 ret <4 x double> %x230 173} 174 175 176define <8 x i32> @hadd_16_3_sv(<16 x i32> %x225, <16 x i32> %x227) { 177; CHECK-LABEL: hadd_16_3_sv: 178; CHECK: # %bb.0: 179; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0 180; CHECK-NEXT: retq 181 %x226 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18 182, i32 4, i32 6, i32 20, i32 22, i32 8, i32 10, i32 24, i32 26, i32 12, i32 14, i32 28, i32 30> 183 %x228 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 1, i32 3, i32 17, i32 19 184, i32 5 , i32 7, i32 21, i32 23, i32 9, i32 11, i32 25, i32 27, i32 13, i32 15, 185 i32 29, i32 31> 186 %x229 = add <16 x i32> %x226, %x228 187 %x230 = shufflevector <16 x i32> %x229, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4 ,i32 5, i32 6, i32 7> 188 ret <8 x i32> %x230 189} 190 191 192define double @fadd_noundef_eel(<8 x double> %x225, <8 x double> %x227) { 193; KNL-LABEL: fadd_noundef_eel: 194; KNL: # %bb.0: 195; KNL-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 196; KNL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 197; KNL-NEXT: retq 198; 199; SKX-LABEL: fadd_noundef_eel: 200; SKX: # %bb.0: 201; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 202; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 203; SKX-NEXT: vzeroupper 204; SKX-NEXT: retq 205 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 206 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15> 207 %x229 = fadd <8 x double> %x226, %x228 208 %x230 = extractelement <8 x double> %x229, i32 0 209 ret double %x230 210} 211 212 213 214define double @fsub_noundef_ee (<8 x double> %x225, <8 x double> %x227) { 215; KNL-LABEL: fsub_noundef_ee: 216; KNL: # %bb.0: 217; KNL-NEXT: vextractf32x4 $2, %zmm1, %xmm0 218; KNL-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 219; KNL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 220; KNL-NEXT: retq 221; 222; SKX-LABEL: fsub_noundef_ee: 223; SKX: # %bb.0: 224; SKX-NEXT: vextractf32x4 $2, %zmm1, %xmm0 225; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 226; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 227; SKX-NEXT: vzeroupper 228; SKX-NEXT: retq 229 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 230 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15> 231 %x229 = fsub <8 x double> %x226, %x228 232 %x230 = extractelement <8 x double> %x229, i32 5 233 ret double %x230 234} 235 236