1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM 4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX 5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX 6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX 7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX 8 9; 10; 128-bit vectors 11; 12 13define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) { 14; CHECK-LABEL: @test_v2f64( 15; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2> 16; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3> 17; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 18; CHECK-NEXT: ret <2 x double> [[TMP3]] 19; 20 %a0 = extractelement <2 x double> %a, i32 0 21 %a1 = extractelement <2 x double> %a, i32 1 22 %b0 = extractelement <2 x double> %b, i32 0 23 %b1 = extractelement <2 x double> %b, i32 1 24 %r0 = fadd double %a0, %a1 25 %r1 = fadd double %b0, %b1 26 %r00 = insertelement <2 x double> zeroinitializer, double %r0, i32 0 27 %r01 = insertelement <2 x double> %r00, double %r1, i32 1 28 ret <2 x double> %r01 29} 30 31define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) { 32; CHECK-LABEL: @test_v4f32( 33; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 34; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 35; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] 36; CHECK-NEXT: ret <4 x float> [[TMP3]] 37; 38 %a0 = extractelement <4 x float> %a, i32 0 39 %a1 = extractelement <4 x float> %a, i32 1 40 %a2 = extractelement <4 x float> %a, i32 2 41 %a3 = extractelement <4 x float> %a, i32 3 42 %b0 = extractelement <4 x float> %b, i32 0 43 %b1 = extractelement <4 x float> %b, i32 1 44 %b2 = extractelement <4 x float> %b, i32 2 45 %b3 = extractelement <4 x float> %b, i32 3 46 %r0 = fadd float %a0, %a1 47 %r1 = fadd float %a2, %a3 48 %r2 = fadd float %b0, %b1 49 %r3 = fadd float %b2, %b3 50 %r00 = insertelement <4 x float> zeroinitializer, float %r0, i32 0 51 %r01 = insertelement <4 x float> %r00, float %r1, i32 1 52 %r02 = insertelement <4 x float> %r01, float %r2, i32 2 53 %r03 = insertelement <4 x float> %r02, float %r3, i32 3 54 ret <4 x float> %r03 55} 56 57define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) { 58; CHECK-LABEL: @test_v2i64( 59; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 2> 60; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3> 61; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] 62; CHECK-NEXT: ret <2 x i64> [[TMP3]] 63; 64 %a0 = extractelement <2 x i64> %a, i32 0 65 %a1 = extractelement <2 x i64> %a, i32 1 66 %b0 = extractelement <2 x i64> %b, i32 0 67 %b1 = extractelement <2 x i64> %b, i32 1 68 %r0 = add i64 %a0, %a1 69 %r1 = add i64 %b0, %b1 70 %r00 = insertelement <2 x i64> zeroinitializer, i64 %r0, i32 0 71 %r01 = insertelement <2 x i64> %r00, i64 %r1, i32 1 72 ret <2 x i64> %r01 73} 74 75define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) { 76; CHECK-LABEL: @test_v4i32( 77; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 78; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 79; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] 80; CHECK-NEXT: ret <4 x i32> [[TMP3]] 81; 82 %a0 = extractelement <4 x i32> %a, i32 0 83 %a1 = extractelement <4 x i32> %a, i32 1 84 %a2 = extractelement <4 x i32> %a, i32 2 85 %a3 = extractelement <4 x i32> %a, i32 3 86 %b0 = extractelement <4 x i32> %b, i32 0 87 %b1 = extractelement <4 x i32> %b, i32 1 88 %b2 = extractelement <4 x i32> %b, i32 2 89 %b3 = extractelement <4 x i32> %b, i32 3 90 %r0 = add i32 %a0, %a1 91 %r1 = add i32 %a2, %a3 92 %r2 = add i32 %b0, %b1 93 %r3 = add i32 %b2, %b3 94 %r00 = insertelement <4 x i32> zeroinitializer, i32 %r0, i32 0 95 %r01 = insertelement <4 x i32> %r00, i32 %r1, i32 1 96 %r02 = insertelement <4 x i32> %r01, i32 %r2, i32 2 97 %r03 = insertelement <4 x i32> %r02, i32 %r3, i32 3 98 ret <4 x i32> %r03 99} 100 101define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { 102; CHECK-LABEL: @test_v8i16( 103; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 104; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 105; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]] 106; CHECK-NEXT: ret <8 x i16> [[TMP3]] 107; 108 %a0 = extractelement <8 x i16> %a, i32 0 109 %a1 = extractelement <8 x i16> %a, i32 1 110 %a2 = extractelement <8 x i16> %a, i32 2 111 %a3 = extractelement <8 x i16> %a, i32 3 112 %a4 = extractelement <8 x i16> %a, i32 4 113 %a5 = extractelement <8 x i16> %a, i32 5 114 %a6 = extractelement <8 x i16> %a, i32 6 115 %a7 = extractelement <8 x i16> %a, i32 7 116 %b0 = extractelement <8 x i16> %b, i32 0 117 %b1 = extractelement <8 x i16> %b, i32 1 118 %b2 = extractelement <8 x i16> %b, i32 2 119 %b3 = extractelement <8 x i16> %b, i32 3 120 %b4 = extractelement <8 x i16> %b, i32 4 121 %b5 = extractelement <8 x i16> %b, i32 5 122 %b6 = extractelement <8 x i16> %b, i32 6 123 %b7 = extractelement <8 x i16> %b, i32 7 124 %r0 = add i16 %a0, %a1 125 %r1 = add i16 %a2, %a3 126 %r2 = add i16 %a4, %a5 127 %r3 = add i16 %a6, %a7 128 %r4 = add i16 %b0, %b1 129 %r5 = add i16 %b2, %b3 130 %r6 = add i16 %b4, %b5 131 %r7 = add i16 %b6, %b7 132 %r00 = insertelement <8 x i16> zeroinitializer, i16 %r0, i32 0 133 %r01 = insertelement <8 x i16> %r00, i16 %r1, i32 1 134 %r02 = insertelement <8 x i16> %r01, i16 %r2, i32 2 135 %r03 = insertelement <8 x i16> %r02, i16 %r3, i32 3 136 %r04 = insertelement <8 x i16> %r03, i16 %r4, i32 4 137 %r05 = insertelement <8 x i16> %r04, i16 %r5, i32 5 138 %r06 = insertelement <8 x i16> %r05, i16 %r6, i32 6 139 %r07 = insertelement <8 x i16> %r06, i16 %r7, i32 7 140 ret <8 x i16> %r07 141} 142 143; PR41892 144define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){ 145; CHECK-LABEL: @test_v4f32_v2f32_store( 146; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> poison, <2 x i32> <i32 1, i32 2> 147; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> poison, <2 x i32> <i32 0, i32 3> 148; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] 149; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[P:%.*]], align 4 150; CHECK-NEXT: ret void 151; 152 %x0 = extractelement <4 x float> %f, i64 0 153 %x1 = extractelement <4 x float> %f, i64 1 154 %add01 = fadd float %x0, %x1 155 store float %add01, ptr %p, align 4 156 %x2 = extractelement <4 x float> %f, i64 2 157 %x3 = extractelement <4 x float> %f, i64 3 158 %add23 = fadd float %x2, %x3 159 %p23 = getelementptr inbounds float, ptr %p, i64 1 160 store float %add23, ptr %p23, align 4 161 ret void 162} 163 164; 165; 256-bit vectors 166; 167 168define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { 169; SSE-LABEL: @test_v4f64( 170; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4> 171; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6> 172; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 173; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7> 174; SSE-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]] 175; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]] 176; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 177; SSE-NEXT: ret <4 x double> [[TMP7]] 178; 179; SLM-LABEL: @test_v4f64( 180; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4> 181; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6> 182; SLM-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 183; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7> 184; SLM-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]] 185; SLM-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]] 186; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 187; SLM-NEXT: ret <4 x double> [[TMP7]] 188; 189; AVX-LABEL: @test_v4f64( 190; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 191; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 192; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] 193; AVX-NEXT: ret <4 x double> [[TMP3]] 194; 195 %a0 = extractelement <4 x double> %a, i32 0 196 %a1 = extractelement <4 x double> %a, i32 1 197 %a2 = extractelement <4 x double> %a, i32 2 198 %a3 = extractelement <4 x double> %a, i32 3 199 %b0 = extractelement <4 x double> %b, i32 0 200 %b1 = extractelement <4 x double> %b, i32 1 201 %b2 = extractelement <4 x double> %b, i32 2 202 %b3 = extractelement <4 x double> %b, i32 3 203 %r0 = fadd double %a0, %a1 204 %r1 = fadd double %b0, %b1 205 %r2 = fadd double %a2, %a3 206 %r3 = fadd double %b2, %b3 207 %r00 = insertelement <4 x double> zeroinitializer, double %r0, i32 0 208 %r01 = insertelement <4 x double> %r00, double %r1, i32 1 209 %r02 = insertelement <4 x double> %r01, double %r2, i32 2 210 %r03 = insertelement <4 x double> %r02, double %r3, i32 3 211 ret <4 x double> %r03 212} 213 214; PR50392 215define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b) { 216; SSE-LABEL: @test_v4f64_partial_swizzle( 217; SSE-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 218; SSE-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 219; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> <i32 0, i32 4> 220; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 221; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 222; SSE-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] 223; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison> 224; SSE-NEXT: [[R0212:%.*]] = insertelement <4 x double> [[TMP4]], double 0.000000e+00, i64 1 225; SSE-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R0212]], double [[R3]], i64 3 226; SSE-NEXT: ret <4 x double> [[R03]] 227; 228; SLM-LABEL: @test_v4f64_partial_swizzle( 229; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 230; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 231; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] 232; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 2> 233; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 0, i32 3> 234; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 235; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> <double poison, double 0.000000e+00, double poison, double poison>, double [[R0]], i64 0 236; SLM-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 237; SLM-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 238; SLM-NEXT: ret <4 x double> [[R031]] 239; 240; AVX-LABEL: @test_v4f64_partial_swizzle( 241; AVX-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 242; AVX-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 243; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> <i32 0, i32 4> 244; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 245; AVX-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 246; AVX-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] 247; AVX-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison> 248; AVX-NEXT: [[R0212:%.*]] = insertelement <4 x double> [[TMP4]], double 0.000000e+00, i64 1 249; AVX-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R0212]], double [[R3]], i64 3 250; AVX-NEXT: ret <4 x double> [[R03]] 251; 252 %a0 = extractelement <4 x double> %a, i64 0 253 %a1 = extractelement <4 x double> %a, i64 1 254 %b0 = extractelement <4 x double> %b, i64 0 255 %b1 = extractelement <4 x double> %b, i64 1 256 %b2 = extractelement <4 x double> %b, i32 2 257 %b3 = extractelement <4 x double> %b, i32 3 258 %r0 = fadd double %a0, %a1 259 %r2 = fadd double %b0, %b1 260 %r3 = fadd double %b2, %b3 261 %r00 = insertelement <4 x double> zeroinitializer, double %r0, i32 0 262 %r02 = insertelement <4 x double> %r00, double %r2, i32 2 263 %r03 = insertelement <4 x double> %r02, double %r3, i32 3 264 ret <4 x double> %r03 265} 266 267define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) { 268; SSE-LABEL: @test_v8f32( 269; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10> 270; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14> 271; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11> 272; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15> 273; SSE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP1]], [[TMP3]] 274; SSE-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP2]], [[TMP4]] 275; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 276; SSE-NEXT: ret <8 x float> [[TMP7]] 277; 278; SLM-LABEL: @test_v8f32( 279; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10> 280; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14> 281; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11> 282; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15> 283; SLM-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP1]], [[TMP3]] 284; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP2]], [[TMP4]] 285; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 286; SLM-NEXT: ret <8 x float> [[TMP7]] 287; 288; AVX-LABEL: @test_v8f32( 289; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 290; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 291; AVX-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]] 292; AVX-NEXT: ret <8 x float> [[TMP3]] 293; 294 %a0 = extractelement <8 x float> %a, i32 0 295 %a1 = extractelement <8 x float> %a, i32 1 296 %a2 = extractelement <8 x float> %a, i32 2 297 %a3 = extractelement <8 x float> %a, i32 3 298 %a4 = extractelement <8 x float> %a, i32 4 299 %a5 = extractelement <8 x float> %a, i32 5 300 %a6 = extractelement <8 x float> %a, i32 6 301 %a7 = extractelement <8 x float> %a, i32 7 302 %b0 = extractelement <8 x float> %b, i32 0 303 %b1 = extractelement <8 x float> %b, i32 1 304 %b2 = extractelement <8 x float> %b, i32 2 305 %b3 = extractelement <8 x float> %b, i32 3 306 %b4 = extractelement <8 x float> %b, i32 4 307 %b5 = extractelement <8 x float> %b, i32 5 308 %b6 = extractelement <8 x float> %b, i32 6 309 %b7 = extractelement <8 x float> %b, i32 7 310 %r0 = fadd float %a0, %a1 311 %r1 = fadd float %a2, %a3 312 %r2 = fadd float %b0, %b1 313 %r3 = fadd float %b2, %b3 314 %r4 = fadd float %a4, %a5 315 %r5 = fadd float %a6, %a7 316 %r6 = fadd float %b4, %b5 317 %r7 = fadd float %b6, %b7 318 %r00 = insertelement <8 x float> zeroinitializer, float %r0, i32 0 319 %r01 = insertelement <8 x float> %r00, float %r1, i32 1 320 %r02 = insertelement <8 x float> %r01, float %r2, i32 2 321 %r03 = insertelement <8 x float> %r02, float %r3, i32 3 322 %r04 = insertelement <8 x float> %r03, float %r4, i32 4 323 %r05 = insertelement <8 x float> %r04, float %r5, i32 5 324 %r06 = insertelement <8 x float> %r05, float %r6, i32 6 325 %r07 = insertelement <8 x float> %r06, float %r7, i32 7 326 ret <8 x float> %r07 327} 328 329define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) { 330; SSE-LABEL: @test_v4i64( 331; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 4> 332; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 2, i32 6> 333; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 1, i32 5> 334; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 3, i32 7> 335; SSE-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] 336; SSE-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] 337; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 338; SSE-NEXT: ret <4 x i64> [[TMP7]] 339; 340; SLM-LABEL: @test_v4i64( 341; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 4> 342; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 2, i32 6> 343; SLM-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 1, i32 5> 344; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 3, i32 7> 345; SLM-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] 346; SLM-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] 347; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 348; SLM-NEXT: ret <4 x i64> [[TMP7]] 349; 350; AVX-LABEL: @test_v4i64( 351; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 352; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 353; AVX-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] 354; AVX-NEXT: ret <4 x i64> [[TMP3]] 355; 356 %a0 = extractelement <4 x i64> %a, i32 0 357 %a1 = extractelement <4 x i64> %a, i32 1 358 %a2 = extractelement <4 x i64> %a, i32 2 359 %a3 = extractelement <4 x i64> %a, i32 3 360 %b0 = extractelement <4 x i64> %b, i32 0 361 %b1 = extractelement <4 x i64> %b, i32 1 362 %b2 = extractelement <4 x i64> %b, i32 2 363 %b3 = extractelement <4 x i64> %b, i32 3 364 %r0 = add i64 %a0, %a1 365 %r1 = add i64 %b0, %b1 366 %r2 = add i64 %a2, %a3 367 %r3 = add i64 %b2, %b3 368 %r00 = insertelement <4 x i64> zeroinitializer, i64 %r0, i32 0 369 %r01 = insertelement <4 x i64> %r00, i64 %r1, i32 1 370 %r02 = insertelement <4 x i64> %r01, i64 %r2, i32 2 371 %r03 = insertelement <4 x i64> %r02, i64 %r3, i32 3 372 ret <4 x i64> %r03 373} 374 375define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) { 376; SSE-LABEL: @test_v8i32( 377; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10> 378; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14> 379; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11> 380; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15> 381; SSE-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]] 382; SSE-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]] 383; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 384; SSE-NEXT: ret <8 x i32> [[TMP7]] 385; 386; SLM-LABEL: @test_v8i32( 387; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10> 388; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14> 389; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11> 390; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15> 391; SLM-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]] 392; SLM-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]] 393; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 394; SLM-NEXT: ret <8 x i32> [[TMP7]] 395; 396; AVX-LABEL: @test_v8i32( 397; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 398; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 399; AVX-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] 400; AVX-NEXT: ret <8 x i32> [[TMP3]] 401; 402 %a0 = extractelement <8 x i32> %a, i32 0 403 %a1 = extractelement <8 x i32> %a, i32 1 404 %a2 = extractelement <8 x i32> %a, i32 2 405 %a3 = extractelement <8 x i32> %a, i32 3 406 %a4 = extractelement <8 x i32> %a, i32 4 407 %a5 = extractelement <8 x i32> %a, i32 5 408 %a6 = extractelement <8 x i32> %a, i32 6 409 %a7 = extractelement <8 x i32> %a, i32 7 410 %b0 = extractelement <8 x i32> %b, i32 0 411 %b1 = extractelement <8 x i32> %b, i32 1 412 %b2 = extractelement <8 x i32> %b, i32 2 413 %b3 = extractelement <8 x i32> %b, i32 3 414 %b4 = extractelement <8 x i32> %b, i32 4 415 %b5 = extractelement <8 x i32> %b, i32 5 416 %b6 = extractelement <8 x i32> %b, i32 6 417 %b7 = extractelement <8 x i32> %b, i32 7 418 %r0 = add i32 %a0, %a1 419 %r1 = add i32 %a2, %a3 420 %r2 = add i32 %b0, %b1 421 %r3 = add i32 %b2, %b3 422 %r4 = add i32 %a4, %a5 423 %r5 = add i32 %a6, %a7 424 %r6 = add i32 %b4, %b5 425 %r7 = add i32 %b6, %b7 426 %r00 = insertelement <8 x i32> zeroinitializer, i32 %r0, i32 0 427 %r01 = insertelement <8 x i32> %r00, i32 %r1, i32 1 428 %r02 = insertelement <8 x i32> %r01, i32 %r2, i32 2 429 %r03 = insertelement <8 x i32> %r02, i32 %r3, i32 3 430 %r04 = insertelement <8 x i32> %r03, i32 %r4, i32 4 431 %r05 = insertelement <8 x i32> %r04, i32 %r5, i32 5 432 %r06 = insertelement <8 x i32> %r05, i32 %r6, i32 6 433 %r07 = insertelement <8 x i32> %r06, i32 %r7, i32 7 434 ret <8 x i32> %r07 435} 436 437define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { 438; SSE-LABEL: @test_v16i16( 439; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22> 440; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 441; SSE-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23> 442; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 443; SSE-NEXT: [[TMP5:%.*]] = add <8 x i16> [[TMP1]], [[TMP3]] 444; SSE-NEXT: [[TMP6:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]] 445; SSE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 446; SSE-NEXT: ret <16 x i16> [[TMP7]] 447; 448; SLM-LABEL: @test_v16i16( 449; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22> 450; SLM-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 451; SLM-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23> 452; SLM-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 453; SLM-NEXT: [[TMP5:%.*]] = add <8 x i16> [[TMP1]], [[TMP3]] 454; SLM-NEXT: [[TMP6:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]] 455; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 456; SLM-NEXT: ret <16 x i16> [[TMP7]] 457; 458; AVX-LABEL: @test_v16i16( 459; AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 460; AVX-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 461; AVX-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] 462; AVX-NEXT: ret <16 x i16> [[TMP3]] 463; 464 %a0 = extractelement <16 x i16> %a, i32 0 465 %a1 = extractelement <16 x i16> %a, i32 1 466 %a2 = extractelement <16 x i16> %a, i32 2 467 %a3 = extractelement <16 x i16> %a, i32 3 468 %a4 = extractelement <16 x i16> %a, i32 4 469 %a5 = extractelement <16 x i16> %a, i32 5 470 %a6 = extractelement <16 x i16> %a, i32 6 471 %a7 = extractelement <16 x i16> %a, i32 7 472 %a8 = extractelement <16 x i16> %a, i32 8 473 %a9 = extractelement <16 x i16> %a, i32 9 474 %a10 = extractelement <16 x i16> %a, i32 10 475 %a11 = extractelement <16 x i16> %a, i32 11 476 %a12 = extractelement <16 x i16> %a, i32 12 477 %a13 = extractelement <16 x i16> %a, i32 13 478 %a14 = extractelement <16 x i16> %a, i32 14 479 %a15 = extractelement <16 x i16> %a, i32 15 480 %b0 = extractelement <16 x i16> %b, i32 0 481 %b1 = extractelement <16 x i16> %b, i32 1 482 %b2 = extractelement <16 x i16> %b, i32 2 483 %b3 = extractelement <16 x i16> %b, i32 3 484 %b4 = extractelement <16 x i16> %b, i32 4 485 %b5 = extractelement <16 x i16> %b, i32 5 486 %b6 = extractelement <16 x i16> %b, i32 6 487 %b7 = extractelement <16 x i16> %b, i32 7 488 %b8 = extractelement <16 x i16> %b, i32 8 489 %b9 = extractelement <16 x i16> %b, i32 9 490 %b10 = extractelement <16 x i16> %b, i32 10 491 %b11 = extractelement <16 x i16> %b, i32 11 492 %b12 = extractelement <16 x i16> %b, i32 12 493 %b13 = extractelement <16 x i16> %b, i32 13 494 %b14 = extractelement <16 x i16> %b, i32 14 495 %b15 = extractelement <16 x i16> %b, i32 15 496 %r0 = add i16 %a0 , %a1 497 %r1 = add i16 %a2 , %a3 498 %r2 = add i16 %a4 , %a5 499 %r3 = add i16 %a6 , %a7 500 %r4 = add i16 %b0 , %b1 501 %r5 = add i16 %b2 , %b3 502 %r6 = add i16 %b4 , %b5 503 %r7 = add i16 %b6 , %b7 504 %r8 = add i16 %a8 , %a9 505 %r9 = add i16 %a10, %a11 506 %r10 = add i16 %a12, %a13 507 %r11 = add i16 %a14, %a15 508 %r12 = add i16 %b8 , %b9 509 %r13 = add i16 %b10, %b11 510 %r14 = add i16 %b12, %b13 511 %r15 = add i16 %b14, %b15 512 %rv0 = insertelement <16 x i16> zeroinitializer, i16 %r0 , i32 0 513 %rv1 = insertelement <16 x i16> %rv0 , i16 %r1 , i32 1 514 %rv2 = insertelement <16 x i16> %rv1 , i16 %r2 , i32 2 515 %rv3 = insertelement <16 x i16> %rv2 , i16 %r3 , i32 3 516 %rv4 = insertelement <16 x i16> %rv3 , i16 %r4 , i32 4 517 %rv5 = insertelement <16 x i16> %rv4 , i16 %r5 , i32 5 518 %rv6 = insertelement <16 x i16> %rv5 , i16 %r6 , i32 6 519 %rv7 = insertelement <16 x i16> %rv6 , i16 %r7 , i32 7 520 %rv8 = insertelement <16 x i16> %rv7 , i16 %r8 , i32 8 521 %rv9 = insertelement <16 x i16> %rv8 , i16 %r9 , i32 9 522 %rv10 = insertelement <16 x i16> %rv9 , i16 %r10, i32 10 523 %rv11 = insertelement <16 x i16> %rv10, i16 %r11, i32 11 524 %rv12 = insertelement <16 x i16> %rv11, i16 %r12, i32 12 525 %rv13 = insertelement <16 x i16> %rv12, i16 %r13, i32 13 526 %rv14 = insertelement <16 x i16> %rv13, i16 %r14, i32 14 527 %rv15 = insertelement <16 x i16> %rv14, i16 %r15, i32 15 528 ret <16 x i16> %rv15 529} 530