1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM 4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 8 9; 10; 128-bit vectors 11; 12 13define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) { 14; CHECK-LABEL: @test_v2f64( 15; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2> 16; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3> 17; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] 18; CHECK-NEXT: ret <2 x double> [[TMP3]] 19; 20 %a0 = extractelement <2 x double> %a, i32 0 21 %a1 = extractelement <2 x double> %a, i32 1 22 %b0 = extractelement <2 x double> %b, i32 0 23 %b1 = extractelement <2 x double> %b, i32 1 24 %r0 = fsub double %a0, %a1 25 %r1 = fsub double %b0, %b1 26 %r00 = insertelement <2 x double> poison, double %r0, i32 0 27 %r01 = insertelement <2 x double> %r00, double %r1, i32 1 28 ret <2 x double> %r01 29} 30 31define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) { 32; CHECK-LABEL: @test_v4f32( 33; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 34; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 35; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] 36; CHECK-NEXT: ret <4 x float> [[TMP3]] 37; 38 %a0 = extractelement <4 x float> %a, i32 0 39 %a1 = extractelement <4 x float> %a, i32 1 40 %a2 = extractelement <4 x float> %a, i32 2 41 %a3 = extractelement <4 x float> %a, i32 3 42 %b0 = extractelement <4 x float> %b, i32 0 43 %b1 = extractelement <4 x float> %b, i32 1 44 %b2 = extractelement <4 x float> %b, i32 2 45 %b3 = extractelement <4 x float> %b, i32 3 46 %r0 = fsub float %a0, %a1 47 %r1 = fsub float %a2, %a3 48 %r2 = fsub float %b0, %b1 49 %r3 = fsub float %b2, %b3 50 %r00 = insertelement <4 x float> poison, float %r0, i32 0 51 %r01 = insertelement <4 x float> %r00, float %r1, i32 1 52 %r02 = insertelement <4 x float> %r01, float %r2, i32 2 53 %r03 = insertelement <4 x float> %r02, float %r3, i32 3 54 ret <4 x float> %r03 55} 56 57define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) { 58; CHECK-LABEL: @test_v2i64( 59; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 2> 60; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3> 61; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]] 62; CHECK-NEXT: ret <2 x i64> [[TMP3]] 63; 64 %a0 = extractelement <2 x i64> %a, i32 0 65 %a1 = extractelement <2 x i64> %a, i32 1 66 %b0 = extractelement <2 x i64> %b, i32 0 67 %b1 = extractelement <2 x i64> %b, i32 1 68 %r0 = sub i64 %a0, %a1 69 %r1 = sub i64 %b0, %b1 70 %r00 = insertelement <2 x i64> poison, i64 %r0, i32 0 71 %r01 = insertelement <2 x i64> %r00, i64 %r1, i32 1 72 ret <2 x i64> %r01 73} 74 75define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) { 76; CHECK-LABEL: @test_v4i32( 77; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 78; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 79; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] 80; CHECK-NEXT: ret <4 x i32> [[TMP3]] 81; 82 %a0 = extractelement <4 x i32> %a, i32 0 83 %a1 = extractelement <4 x i32> %a, i32 1 84 %a2 = extractelement <4 x i32> %a, i32 2 85 %a3 = extractelement <4 x i32> %a, i32 3 86 %b0 = extractelement <4 x i32> %b, i32 0 87 %b1 = extractelement <4 x i32> %b, i32 1 88 %b2 = extractelement <4 x i32> %b, i32 2 89 %b3 = extractelement <4 x i32> %b, i32 3 90 %r0 = sub i32 %a0, %a1 91 %r1 = sub i32 %a2, %a3 92 %r2 = sub i32 %b0, %b1 93 %r3 = sub i32 %b2, %b3 94 %r00 = insertelement <4 x i32> poison, i32 %r0, i32 0 95 %r01 = insertelement <4 x i32> %r00, i32 %r1, i32 1 96 %r02 = insertelement <4 x i32> %r01, i32 %r2, i32 2 97 %r03 = insertelement <4 x i32> %r02, i32 %r3, i32 3 98 ret <4 x i32> %r03 99} 100 101define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { 102; CHECK-LABEL: @test_v8i16( 103; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 104; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 105; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] 106; CHECK-NEXT: ret <8 x i16> [[TMP3]] 107; 108 %a0 = extractelement <8 x i16> %a, i32 0 109 %a1 = extractelement <8 x i16> %a, i32 1 110 %a2 = extractelement <8 x i16> %a, i32 2 111 %a3 = extractelement <8 x i16> %a, i32 3 112 %a4 = extractelement <8 x i16> %a, i32 4 113 %a5 = extractelement <8 x i16> %a, i32 5 114 %a6 = extractelement <8 x i16> %a, i32 6 115 %a7 = extractelement <8 x i16> %a, i32 7 116 %b0 = extractelement <8 x i16> %b, i32 0 117 %b1 = extractelement <8 x i16> %b, i32 1 118 %b2 = extractelement <8 x i16> %b, i32 2 119 %b3 = extractelement <8 x i16> %b, i32 3 120 %b4 = extractelement <8 x i16> %b, i32 4 121 %b5 = extractelement <8 x i16> %b, i32 5 122 %b6 = extractelement <8 x i16> %b, i32 6 123 %b7 = extractelement <8 x i16> %b, i32 7 124 %r0 = sub i16 %a0, %a1 125 %r1 = sub i16 %a2, %a3 126 %r2 = sub i16 %a4, %a5 127 %r3 = sub i16 %a6, %a7 128 %r4 = sub i16 %b0, %b1 129 %r5 = sub i16 %b2, %b3 130 %r6 = sub i16 %b4, %b5 131 %r7 = sub i16 %b6, %b7 132 %r00 = insertelement <8 x i16> poison, i16 %r0, i32 0 133 %r01 = insertelement <8 x i16> %r00, i16 %r1, i32 1 134 %r02 = insertelement <8 x i16> %r01, i16 %r2, i32 2 135 %r03 = insertelement <8 x i16> %r02, i16 %r3, i32 3 136 %r04 = insertelement <8 x i16> %r03, i16 %r4, i32 4 137 %r05 = insertelement <8 x i16> %r04, i16 %r5, i32 5 138 %r06 = insertelement <8 x i16> %r05, i16 %r6, i32 6 139 %r07 = insertelement <8 x i16> %r06, i16 %r7, i32 7 140 ret <8 x i16> %r07 141} 142 143; 144; 256-bit vectors 145; 146 147define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { 148; SSE-LABEL: @test_v4f64( 149; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4> 150; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6> 151; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 152; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7> 153; SSE-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]] 154; SSE-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]] 155; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 156; SSE-NEXT: ret <4 x double> [[TMP7]] 157; 158; SLM-LABEL: @test_v4f64( 159; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4> 160; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6> 161; SLM-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 162; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7> 163; SLM-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]] 164; SLM-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]] 165; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 166; SLM-NEXT: ret <4 x double> [[TMP7]] 167; 168; AVX-LABEL: @test_v4f64( 169; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 170; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 171; AVX-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] 172; AVX-NEXT: ret <4 x double> [[TMP3]] 173; 174 %a0 = extractelement <4 x double> %a, i32 0 175 %a1 = extractelement <4 x double> %a, i32 1 176 %a2 = extractelement <4 x double> %a, i32 2 177 %a3 = extractelement <4 x double> %a, i32 3 178 %b0 = extractelement <4 x double> %b, i32 0 179 %b1 = extractelement <4 x double> %b, i32 1 180 %b2 = extractelement <4 x double> %b, i32 2 181 %b3 = extractelement <4 x double> %b, i32 3 182 %r0 = fsub double %a0, %a1 183 %r1 = fsub double %b0, %b1 184 %r2 = fsub double %a2, %a3 185 %r3 = fsub double %b2, %b3 186 %r00 = insertelement <4 x double> poison, double %r0, i32 0 187 %r01 = insertelement <4 x double> %r00, double %r1, i32 1 188 %r02 = insertelement <4 x double> %r01, double %r2, i32 2 189 %r03 = insertelement <4 x double> %r02, double %r3, i32 3 190 ret <4 x double> %r03 191} 192 193define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) { 194; SSE-LABEL: @test_v8f32( 195; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10> 196; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14> 197; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11> 198; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15> 199; SSE-NEXT: [[TMP5:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]] 200; SSE-NEXT: [[TMP6:%.*]] = fsub <4 x float> [[TMP2]], [[TMP4]] 201; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 202; SSE-NEXT: ret <8 x float> [[TMP7]] 203; 204; SLM-LABEL: @test_v8f32( 205; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10> 206; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14> 207; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11> 208; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15> 209; SLM-NEXT: [[TMP5:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]] 210; SLM-NEXT: [[TMP6:%.*]] = fsub <4 x float> [[TMP2]], [[TMP4]] 211; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 212; SLM-NEXT: ret <8 x float> [[TMP7]] 213; 214; AVX-LABEL: @test_v8f32( 215; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 216; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 217; AVX-NEXT: [[TMP3:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]] 218; AVX-NEXT: ret <8 x float> [[TMP3]] 219; 220 %a0 = extractelement <8 x float> %a, i32 0 221 %a1 = extractelement <8 x float> %a, i32 1 222 %a2 = extractelement <8 x float> %a, i32 2 223 %a3 = extractelement <8 x float> %a, i32 3 224 %a4 = extractelement <8 x float> %a, i32 4 225 %a5 = extractelement <8 x float> %a, i32 5 226 %a6 = extractelement <8 x float> %a, i32 6 227 %a7 = extractelement <8 x float> %a, i32 7 228 %b0 = extractelement <8 x float> %b, i32 0 229 %b1 = extractelement <8 x float> %b, i32 1 230 %b2 = extractelement <8 x float> %b, i32 2 231 %b3 = extractelement <8 x float> %b, i32 3 232 %b4 = extractelement <8 x float> %b, i32 4 233 %b5 = extractelement <8 x float> %b, i32 5 234 %b6 = extractelement <8 x float> %b, i32 6 235 %b7 = extractelement <8 x float> %b, i32 7 236 %r0 = fsub float %a0, %a1 237 %r1 = fsub float %a2, %a3 238 %r2 = fsub float %b0, %b1 239 %r3 = fsub float %b2, %b3 240 %r4 = fsub float %a4, %a5 241 %r5 = fsub float %a6, %a7 242 %r6 = fsub float %b4, %b5 243 %r7 = fsub float %b6, %b7 244 %r00 = insertelement <8 x float> poison, float %r0, i32 0 245 %r01 = insertelement <8 x float> %r00, float %r1, i32 1 246 %r02 = insertelement <8 x float> %r01, float %r2, i32 2 247 %r03 = insertelement <8 x float> %r02, float %r3, i32 3 248 %r04 = insertelement <8 x float> %r03, float %r4, i32 4 249 %r05 = insertelement <8 x float> %r04, float %r5, i32 5 250 %r06 = insertelement <8 x float> %r05, float %r6, i32 6 251 %r07 = insertelement <8 x float> %r06, float %r7, i32 7 252 ret <8 x float> %r07 253} 254 255define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) { 256; SSE-LABEL: @test_v4i64( 257; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 4> 258; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 2, i32 6> 259; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 1, i32 5> 260; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 3, i32 7> 261; SSE-NEXT: [[TMP5:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]] 262; SSE-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]] 263; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 264; SSE-NEXT: ret <4 x i64> [[TMP7]] 265; 266; SLM-LABEL: @test_v4i64( 267; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 4> 268; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 2, i32 6> 269; SLM-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 1, i32 5> 270; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 3, i32 7> 271; SLM-NEXT: [[TMP5:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]] 272; SLM-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]] 273; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 274; SLM-NEXT: ret <4 x i64> [[TMP7]] 275; 276; AVX-LABEL: @test_v4i64( 277; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 278; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 279; AVX-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]] 280; AVX-NEXT: ret <4 x i64> [[TMP3]] 281; 282 %a0 = extractelement <4 x i64> %a, i32 0 283 %a1 = extractelement <4 x i64> %a, i32 1 284 %a2 = extractelement <4 x i64> %a, i32 2 285 %a3 = extractelement <4 x i64> %a, i32 3 286 %b0 = extractelement <4 x i64> %b, i32 0 287 %b1 = extractelement <4 x i64> %b, i32 1 288 %b2 = extractelement <4 x i64> %b, i32 2 289 %b3 = extractelement <4 x i64> %b, i32 3 290 %r0 = sub i64 %a0, %a1 291 %r1 = sub i64 %b0, %b1 292 %r2 = sub i64 %a2, %a3 293 %r3 = sub i64 %b2, %b3 294 %r00 = insertelement <4 x i64> poison, i64 %r0, i32 0 295 %r01 = insertelement <4 x i64> %r00, i64 %r1, i32 1 296 %r02 = insertelement <4 x i64> %r01, i64 %r2, i32 2 297 %r03 = insertelement <4 x i64> %r02, i64 %r3, i32 3 298 ret <4 x i64> %r03 299} 300 301define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) { 302; SSE-LABEL: @test_v8i32( 303; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10> 304; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14> 305; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11> 306; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15> 307; SSE-NEXT: [[TMP5:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]] 308; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP2]], [[TMP4]] 309; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 310; SSE-NEXT: ret <8 x i32> [[TMP7]] 311; 312; SLM-LABEL: @test_v8i32( 313; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10> 314; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14> 315; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11> 316; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15> 317; SLM-NEXT: [[TMP5:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]] 318; SLM-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP2]], [[TMP4]] 319; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 320; SLM-NEXT: ret <8 x i32> [[TMP7]] 321; 322; AVX-LABEL: @test_v8i32( 323; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 324; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 325; AVX-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]] 326; AVX-NEXT: ret <8 x i32> [[TMP3]] 327; 328 %a0 = extractelement <8 x i32> %a, i32 0 329 %a1 = extractelement <8 x i32> %a, i32 1 330 %a2 = extractelement <8 x i32> %a, i32 2 331 %a3 = extractelement <8 x i32> %a, i32 3 332 %a4 = extractelement <8 x i32> %a, i32 4 333 %a5 = extractelement <8 x i32> %a, i32 5 334 %a6 = extractelement <8 x i32> %a, i32 6 335 %a7 = extractelement <8 x i32> %a, i32 7 336 %b0 = extractelement <8 x i32> %b, i32 0 337 %b1 = extractelement <8 x i32> %b, i32 1 338 %b2 = extractelement <8 x i32> %b, i32 2 339 %b3 = extractelement <8 x i32> %b, i32 3 340 %b4 = extractelement <8 x i32> %b, i32 4 341 %b5 = extractelement <8 x i32> %b, i32 5 342 %b6 = extractelement <8 x i32> %b, i32 6 343 %b7 = extractelement <8 x i32> %b, i32 7 344 %r0 = sub i32 %a0, %a1 345 %r1 = sub i32 %a2, %a3 346 %r2 = sub i32 %b0, %b1 347 %r3 = sub i32 %b2, %b3 348 %r4 = sub i32 %a4, %a5 349 %r5 = sub i32 %a6, %a7 350 %r6 = sub i32 %b4, %b5 351 %r7 = sub i32 %b6, %b7 352 %r00 = insertelement <8 x i32> poison, i32 %r0, i32 0 353 %r01 = insertelement <8 x i32> %r00, i32 %r1, i32 1 354 %r02 = insertelement <8 x i32> %r01, i32 %r2, i32 2 355 %r03 = insertelement <8 x i32> %r02, i32 %r3, i32 3 356 %r04 = insertelement <8 x i32> %r03, i32 %r4, i32 4 357 %r05 = insertelement <8 x i32> %r04, i32 %r5, i32 5 358 %r06 = insertelement <8 x i32> %r05, i32 %r6, i32 6 359 %r07 = insertelement <8 x i32> %r06, i32 %r7, i32 7 360 ret <8 x i32> %r07 361} 362 363define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { 364; SSE-LABEL: @test_v16i16( 365; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22> 366; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 367; SSE-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23> 368; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 369; SSE-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[TMP1]], [[TMP3]] 370; SSE-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP2]], [[TMP4]] 371; SSE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 372; SSE-NEXT: ret <16 x i16> [[TMP7]] 373; 374; SLM-LABEL: @test_v16i16( 375; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22> 376; SLM-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 377; SLM-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23> 378; SLM-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 379; SLM-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[TMP1]], [[TMP3]] 380; SLM-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP2]], [[TMP4]] 381; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 382; SLM-NEXT: ret <16 x i16> [[TMP7]] 383; 384; AVX-LABEL: @test_v16i16( 385; AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 386; AVX-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 387; AVX-NEXT: [[TMP3:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] 388; AVX-NEXT: ret <16 x i16> [[TMP3]] 389; 390 %a0 = extractelement <16 x i16> %a, i32 0 391 %a1 = extractelement <16 x i16> %a, i32 1 392 %a2 = extractelement <16 x i16> %a, i32 2 393 %a3 = extractelement <16 x i16> %a, i32 3 394 %a4 = extractelement <16 x i16> %a, i32 4 395 %a5 = extractelement <16 x i16> %a, i32 5 396 %a6 = extractelement <16 x i16> %a, i32 6 397 %a7 = extractelement <16 x i16> %a, i32 7 398 %a8 = extractelement <16 x i16> %a, i32 8 399 %a9 = extractelement <16 x i16> %a, i32 9 400 %a10 = extractelement <16 x i16> %a, i32 10 401 %a11 = extractelement <16 x i16> %a, i32 11 402 %a12 = extractelement <16 x i16> %a, i32 12 403 %a13 = extractelement <16 x i16> %a, i32 13 404 %a14 = extractelement <16 x i16> %a, i32 14 405 %a15 = extractelement <16 x i16> %a, i32 15 406 %b0 = extractelement <16 x i16> %b, i32 0 407 %b1 = extractelement <16 x i16> %b, i32 1 408 %b2 = extractelement <16 x i16> %b, i32 2 409 %b3 = extractelement <16 x i16> %b, i32 3 410 %b4 = extractelement <16 x i16> %b, i32 4 411 %b5 = extractelement <16 x i16> %b, i32 5 412 %b6 = extractelement <16 x i16> %b, i32 6 413 %b7 = extractelement <16 x i16> %b, i32 7 414 %b8 = extractelement <16 x i16> %b, i32 8 415 %b9 = extractelement <16 x i16> %b, i32 9 416 %b10 = extractelement <16 x i16> %b, i32 10 417 %b11 = extractelement <16 x i16> %b, i32 11 418 %b12 = extractelement <16 x i16> %b, i32 12 419 %b13 = extractelement <16 x i16> %b, i32 13 420 %b14 = extractelement <16 x i16> %b, i32 14 421 %b15 = extractelement <16 x i16> %b, i32 15 422 %r0 = sub i16 %a0 , %a1 423 %r1 = sub i16 %a2 , %a3 424 %r2 = sub i16 %a4 , %a5 425 %r3 = sub i16 %a6 , %a7 426 %r4 = sub i16 %b0 , %b1 427 %r5 = sub i16 %b2 , %b3 428 %r6 = sub i16 %b4 , %b5 429 %r7 = sub i16 %b6 , %b7 430 %r8 = sub i16 %a8 , %a9 431 %r9 = sub i16 %a10, %a11 432 %r10 = sub i16 %a12, %a13 433 %r11 = sub i16 %a14, %a15 434 %r12 = sub i16 %b8 , %b9 435 %r13 = sub i16 %b10, %b11 436 %r14 = sub i16 %b12, %b13 437 %r15 = sub i16 %b14, %b15 438 %rv0 = insertelement <16 x i16> poison, i16 %r0 , i32 0 439 %rv1 = insertelement <16 x i16> %rv0 , i16 %r1 , i32 1 440 %rv2 = insertelement <16 x i16> %rv1 , i16 %r2 , i32 2 441 %rv3 = insertelement <16 x i16> %rv2 , i16 %r3 , i32 3 442 %rv4 = insertelement <16 x i16> %rv3 , i16 %r4 , i32 4 443 %rv5 = insertelement <16 x i16> %rv4 , i16 %r5 , i32 5 444 %rv6 = insertelement <16 x i16> %rv5 , i16 %r6 , i32 6 445 %rv7 = insertelement <16 x i16> %rv6 , i16 %r7 , i32 7 446 %rv8 = insertelement <16 x i16> %rv7 , i16 %r8 , i32 8 447 %rv9 = insertelement <16 x i16> %rv8 , i16 %r9 , i32 9 448 %rv10 = insertelement <16 x i16> %rv9 , i16 %r10, i32 10 449 %rv11 = insertelement <16 x i16> %rv10, i16 %r11, i32 11 450 %rv12 = insertelement <16 x i16> %rv11, i16 %r12, i32 12 451 %rv13 = insertelement <16 x i16> %rv12, i16 %r13, i32 13 452 %rv14 = insertelement <16 x i16> %rv13, i16 %r14, i32 14 453 %rv15 = insertelement <16 x i16> %rv14, i16 %r15, i32 15 454 ret <16 x i16> %rv15 455} 456;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 457; AVX1: {{.*}} 458; AVX2: {{.*}} 459; AVX512: {{.*}} 460