1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,X86-SSE 3; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,X86-AVX,AVX1,X86-AVX1 4; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,X86-AVX,AVX512,X86-AVX512 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,X64-SSE 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,X64-AVX,AVX1,X64-AVX1 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,X64-AVX,AVX512,X64-AVX512 8 9; Tests for SSE2 and below, without SSE3+. 10 11define void @test1(ptr %r, ptr %A, double %B) nounwind { 12; X86-SSE-LABEL: test1: 13; X86-SSE: # %bb.0: 14; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 15; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 16; X86-SSE-NEXT: movaps (%ecx), %xmm0 17; X86-SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 18; X86-SSE-NEXT: movaps %xmm0, (%eax) 19; X86-SSE-NEXT: retl 20; 21; X86-AVX-LABEL: test1: 22; X86-AVX: # %bb.0: 23; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 24; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 25; X86-AVX-NEXT: vmovaps (%ecx), %xmm0 26; X86-AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 27; X86-AVX-NEXT: vmovaps %xmm0, (%eax) 28; X86-AVX-NEXT: retl 29; 30; X64-SSE-LABEL: test1: 31; X64-SSE: # %bb.0: 32; X64-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],mem[1] 33; X64-SSE-NEXT: movapd %xmm0, (%rdi) 34; X64-SSE-NEXT: retq 35; 36; X64-AVX-LABEL: test1: 37; X64-AVX: # %bb.0: 38; X64-AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3] 39; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) 40; X64-AVX-NEXT: retq 41 %tmp3 = load <2 x double>, ptr %A, align 16 42 %tmp7 = insertelement <2 x double> undef, double %B, i32 0 43 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 > 44 store <2 x double> %tmp9, ptr %r, align 16 45 ret void 46} 47 48define void @test2(ptr %r, ptr %A, double %B) nounwind { 49; X86-SSE-LABEL: test2: 50; X86-SSE: # %bb.0: 51; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 52; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 53; X86-SSE-NEXT: movaps (%ecx), %xmm0 54; X86-SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 55; X86-SSE-NEXT: movaps %xmm0, (%eax) 56; X86-SSE-NEXT: retl 57; 58; X86-AVX-LABEL: test2: 59; X86-AVX: # %bb.0: 60; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 61; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 62; X86-AVX-NEXT: vmovaps (%ecx), %xmm0 63; X86-AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 64; X86-AVX-NEXT: vmovaps %xmm0, (%eax) 65; X86-AVX-NEXT: retl 66; 67; X64-SSE-LABEL: test2: 68; X64-SSE: # %bb.0: 69; X64-SSE-NEXT: movaps (%rsi), %xmm1 70; X64-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 71; X64-SSE-NEXT: movaps %xmm1, (%rdi) 72; X64-SSE-NEXT: retq 73; 74; X64-AVX-LABEL: test2: 75; X64-AVX: # %bb.0: 76; X64-AVX-NEXT: vmovaps (%rsi), %xmm1 77; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 78; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) 79; X64-AVX-NEXT: retq 80 %tmp3 = load <2 x double>, ptr %A, align 16 81 %tmp7 = insertelement <2 x double> undef, double %B, i32 0 82 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 > 83 store <2 x double> %tmp9, ptr %r, align 16 84 ret void 85} 86 87 88define void @test3(ptr %res, ptr %A, ptr %B) nounwind { 89; X86-SSE-LABEL: test3: 90; X86-SSE: # %bb.0: 91; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 92; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 93; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx 94; X86-SSE-NEXT: movaps (%edx), %xmm0 95; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 96; X86-SSE-NEXT: movaps %xmm0, (%eax) 97; X86-SSE-NEXT: retl 98; 99; X86-AVX-LABEL: test3: 100; X86-AVX: # %bb.0: 101; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 102; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 103; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx 104; X86-AVX-NEXT: vmovaps (%edx), %xmm0 105; X86-AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 106; X86-AVX-NEXT: vmovaps %xmm0, (%eax) 107; X86-AVX-NEXT: retl 108; 109; X64-SSE-LABEL: test3: 110; X64-SSE: # %bb.0: 111; X64-SSE-NEXT: movaps (%rsi), %xmm0 112; X64-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 113; X64-SSE-NEXT: movaps %xmm0, (%rdi) 114; X64-SSE-NEXT: retq 115; 116; X64-AVX-LABEL: test3: 117; X64-AVX: # %bb.0: 118; X64-AVX-NEXT: vmovaps (%rsi), %xmm0 119; X64-AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 120; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) 121; X64-AVX-NEXT: retq 122 %tmp = load <4 x float>, ptr %B ; <<4 x float>> [#uses=2] 123 %tmp3 = load <4 x float>, ptr %A ; <<4 x float>> [#uses=2] 124 %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0 ; <float> [#uses=1] 125 %tmp7 = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1] 126 %tmp8 = extractelement <4 x float> %tmp3, i32 1 ; <float> [#uses=1] 127 %tmp9 = extractelement <4 x float> %tmp, i32 1 ; <float> [#uses=1] 128 %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0 ; <<4 x float>> [#uses=1] 129 %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1 ; <<4 x float>> [#uses=1] 130 %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2 ; <<4 x float>> [#uses=1] 131 %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3 ; <<4 x float>> [#uses=1] 132 store <4 x float> %tmp13, ptr %res 133 ret void 134} 135 136define void @test4(<4 x float> %X, ptr %res) nounwind { 137; X86-SSE-LABEL: test4: 138; X86-SSE: # %bb.0: 139; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 140; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,1,3,3] 141; X86-SSE-NEXT: movaps %xmm0, (%eax) 142; X86-SSE-NEXT: retl 143; 144; X86-AVX-LABEL: test4: 145; X86-AVX: # %bb.0: 146; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 147; X86-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,1,3,3] 148; X86-AVX-NEXT: vmovaps %xmm0, (%eax) 149; X86-AVX-NEXT: retl 150; 151; X64-SSE-LABEL: test4: 152; X64-SSE: # %bb.0: 153; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,1,3,3] 154; X64-SSE-NEXT: movaps %xmm0, (%rdi) 155; X64-SSE-NEXT: retq 156; 157; X64-AVX-LABEL: test4: 158; X64-AVX: # %bb.0: 159; X64-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,1,3,3] 160; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) 161; X64-AVX-NEXT: retq 162 %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] 163 store <4 x float> %tmp5, ptr %res 164 ret void 165} 166 167define <4 x i32> @test5(ptr %ptr) nounwind { 168; X86-SSE-LABEL: test5: 169; X86-SSE: # %bb.0: 170; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 171; X86-SSE-NEXT: movl (%eax), %eax 172; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 173; X86-SSE-NEXT: pxor %xmm0, %xmm0 174; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 175; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 176; X86-SSE-NEXT: retl 177; 178; X86-AVX-LABEL: test5: 179; X86-AVX: # %bb.0: 180; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 181; X86-AVX-NEXT: movl (%eax), %eax 182; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 183; X86-AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 184; X86-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 185; X86-AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 186; X86-AVX-NEXT: retl 187; 188; X64-SSE-LABEL: test5: 189; X64-SSE: # %bb.0: 190; X64-SSE-NEXT: movq (%rdi), %rax 191; X64-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 192; X64-SSE-NEXT: pxor %xmm0, %xmm0 193; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 194; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 195; X64-SSE-NEXT: retq 196; 197; X64-AVX-LABEL: test5: 198; X64-AVX: # %bb.0: 199; X64-AVX-NEXT: movq (%rdi), %rax 200; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 201; X64-AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 202; X64-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 203; X64-AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 204; X64-AVX-NEXT: retq 205 %tmp = load ptr, ptr %ptr ; <ptr> [#uses=1] 206 %tmp.upgrd.2 = load float, ptr %tmp ; <float> [#uses=1] 207 %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0 ; <<4 x float>> [#uses=1] 208 %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1] 209 %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1] 210 %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1] 211 %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8> ; <<16 x i8>> [#uses=1] 212 %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 > ; <<16 x i8>> [#uses=1] 213 %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16> ; <<8 x i16>> [#uses=1] 214 %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1] 215 %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32> ; <<4 x i32>> [#uses=1] 216 ret <4 x i32> %tmp36 217} 218 219define void @test6(ptr %res, ptr %A) nounwind { 220; X86-SSE-LABEL: test6: 221; X86-SSE: # %bb.0: 222; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 223; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 224; X86-SSE-NEXT: movaps (%ecx), %xmm0 225; X86-SSE-NEXT: movaps %xmm0, (%eax) 226; X86-SSE-NEXT: retl 227; 228; X86-AVX-LABEL: test6: 229; X86-AVX: # %bb.0: 230; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 231; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 232; X86-AVX-NEXT: vmovaps (%ecx), %xmm0 233; X86-AVX-NEXT: vmovaps %xmm0, (%eax) 234; X86-AVX-NEXT: retl 235; 236; X64-SSE-LABEL: test6: 237; X64-SSE: # %bb.0: 238; X64-SSE-NEXT: movaps (%rsi), %xmm0 239; X64-SSE-NEXT: movaps %xmm0, (%rdi) 240; X64-SSE-NEXT: retq 241; 242; X64-AVX-LABEL: test6: 243; X64-AVX: # %bb.0: 244; X64-AVX-NEXT: vmovaps (%rsi), %xmm0 245; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) 246; X64-AVX-NEXT: retq 247 %tmp1 = load <4 x float>, ptr %A ; <<4 x float>> [#uses=1] 248 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1] 249 store <4 x float> %tmp2, ptr %res 250 ret void 251} 252 253define void @test7() nounwind { 254; SSE-LABEL: test7: 255; SSE: # %bb.0: 256; SSE-NEXT: xorps %xmm0, %xmm0 257; SSE-NEXT: movaps %xmm0, 0 258; SSE-NEXT: ret{{[l|q]}} 259; 260; AVX-LABEL: test7: 261; AVX: # %bb.0: 262; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 263; AVX-NEXT: vmovaps %xmm0, 0 264; AVX-NEXT: ret{{[l|q]}} 265 bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:1 [#uses=1] 266 shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1] 267 store <4 x float> %2, ptr null 268 ret void 269} 270 271@x = external dso_local global [4 x i32] 272 273define <2 x i64> @test8() nounwind { 274; X86-SSE-LABEL: test8: 275; X86-SSE: # %bb.0: 276; X86-SSE-NEXT: movups x, %xmm0 277; X86-SSE-NEXT: retl 278; 279; X86-AVX-LABEL: test8: 280; X86-AVX: # %bb.0: 281; X86-AVX-NEXT: vmovups x, %xmm0 282; X86-AVX-NEXT: retl 283; 284; X64-SSE-LABEL: test8: 285; X64-SSE: # %bb.0: 286; X64-SSE-NEXT: movups x(%rip), %xmm0 287; X64-SSE-NEXT: retq 288; 289; X64-AVX-LABEL: test8: 290; X64-AVX: # %bb.0: 291; X64-AVX-NEXT: vmovups x(%rip), %xmm0 292; X64-AVX-NEXT: retq 293 %tmp = load i32, ptr @x ; <i32> [#uses=1] 294 %tmp3 = load i32, ptr getelementptr ([4 x i32], ptr @x, i32 0, i32 1) ; <i32> [#uses=1] 295 %tmp5 = load i32, ptr getelementptr ([4 x i32], ptr @x, i32 0, i32 2) ; <i32> [#uses=1] 296 %tmp7 = load i32, ptr getelementptr ([4 x i32], ptr @x, i32 0, i32 3) ; <i32> [#uses=1] 297 %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 ; <<4 x i32>> [#uses=1] 298 %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] 299 %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2 ; <<4 x i32>> [#uses=1] 300 %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3 ; <<4 x i32>> [#uses=1] 301 %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64> ; <<2 x i64>> [#uses=1] 302 ret <2 x i64> %tmp16 303} 304 305define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind { 306; X86-SSE-LABEL: test9: 307; X86-SSE: # %bb.0: 308; X86-SSE-NEXT: movups {{[0-9]+}}(%esp), %xmm0 309; X86-SSE-NEXT: retl 310; 311; X86-AVX-LABEL: test9: 312; X86-AVX: # %bb.0: 313; X86-AVX-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 314; X86-AVX-NEXT: retl 315; 316; X64-SSE-LABEL: test9: 317; X64-SSE: # %bb.0: 318; X64-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 319; X64-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 320; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 321; X64-SSE-NEXT: retq 322; 323; X64-AVX-LABEL: test9: 324; X64-AVX: # %bb.0: 325; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 326; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 327; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 328; X64-AVX-NEXT: retq 329 %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] 330 %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] 331 %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] 332 %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] 333 ret <4 x float> %tmp13 334} 335 336define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind { 337; X86-SSE-LABEL: test10: 338; X86-SSE: # %bb.0: 339; X86-SSE-NEXT: movups {{[0-9]+}}(%esp), %xmm0 340; X86-SSE-NEXT: retl 341; 342; X86-AVX-LABEL: test10: 343; X86-AVX: # %bb.0: 344; X86-AVX-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 345; X86-AVX-NEXT: retl 346; 347; X64-SSE-LABEL: test10: 348; X64-SSE: # %bb.0: 349; X64-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 350; X64-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 351; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 352; X64-SSE-NEXT: retq 353; 354; X64-AVX-LABEL: test10: 355; X64-AVX: # %bb.0: 356; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 357; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 358; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 359; X64-AVX-NEXT: retq 360 %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] 361 %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] 362 %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] 363 %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] 364 ret <4 x float> %tmp13 365} 366 367define <2 x double> @test11(double %a, double %b) nounwind { 368; X86-SSE-LABEL: test11: 369; X86-SSE: # %bb.0: 370; X86-SSE-NEXT: movups {{[0-9]+}}(%esp), %xmm0 371; X86-SSE-NEXT: retl 372; 373; X86-AVX-LABEL: test11: 374; X86-AVX: # %bb.0: 375; X86-AVX-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 376; X86-AVX-NEXT: retl 377; 378; X64-SSE-LABEL: test11: 379; X64-SSE: # %bb.0: 380; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 381; X64-SSE-NEXT: retq 382; 383; X64-AVX-LABEL: test11: 384; X64-AVX: # %bb.0: 385; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 386; X64-AVX-NEXT: retq 387 %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1] 388 %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1] 389 ret <2 x double> %tmp7 390} 391 392define void @test12() nounwind { 393; SSE-LABEL: test12: 394; SSE: # %bb.0: 395; SSE-NEXT: movapd 0, %xmm0 396; SSE-NEXT: movapd {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 397; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 398; SSE-NEXT: xorps %xmm2, %xmm2 399; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] 400; SSE-NEXT: addps %xmm1, %xmm2 401; SSE-NEXT: movaps %xmm2, 0 402; SSE-NEXT: ret{{[l|q]}} 403; 404; AVX1-LABEL: test12: 405; AVX1: # %bb.0: 406; AVX1-NEXT: vmovaps 0, %xmm0 407; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0,1],mem[2,3] 408; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 409; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1] 410; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0 411; AVX1-NEXT: vmovaps %xmm0, 0 412; AVX1-NEXT: ret{{[l|q]}} 413; 414; AVX512-LABEL: test12: 415; AVX512: # %bb.0: 416; AVX512-NEXT: vmovaps 0, %xmm0 417; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 418; AVX512-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3] 419; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 420; AVX512-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1] 421; AVX512-NEXT: vaddps %xmm0, %xmm1, %xmm0 422; AVX512-NEXT: vmovaps %xmm0, 0 423; AVX512-NEXT: ret{{[l|q]}} 424 %tmp1 = load <4 x float>, ptr null ; <<4 x float>> [#uses=2] 425 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1] 426 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] 427 %tmp4 = fadd <4 x float> %tmp2, %tmp3 ; <<4 x float>> [#uses=1] 428 store <4 x float> %tmp4, ptr null 429 ret void 430} 431 432define void @test13(ptr %res, ptr %A, ptr %B, ptr %C) nounwind { 433; X86-SSE-LABEL: test13: 434; X86-SSE: # %bb.0: 435; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 436; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 437; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx 438; X86-SSE-NEXT: movaps (%edx), %xmm0 439; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1] 440; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 441; X86-SSE-NEXT: movaps %xmm0, (%eax) 442; X86-SSE-NEXT: retl 443; 444; X86-AVX-LABEL: test13: 445; X86-AVX: # %bb.0: 446; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 447; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 448; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx 449; X86-AVX-NEXT: vmovaps (%edx), %xmm0 450; X86-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1] 451; X86-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 452; X86-AVX-NEXT: vmovaps %xmm0, (%eax) 453; X86-AVX-NEXT: retl 454; 455; X64-SSE-LABEL: test13: 456; X64-SSE: # %bb.0: 457; X64-SSE-NEXT: movaps (%rdx), %xmm0 458; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1] 459; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 460; X64-SSE-NEXT: movaps %xmm0, (%rdi) 461; X64-SSE-NEXT: retq 462; 463; X64-AVX-LABEL: test13: 464; X64-AVX: # %bb.0: 465; X64-AVX-NEXT: vmovaps (%rdx), %xmm0 466; X64-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1] 467; X64-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 468; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) 469; X64-AVX-NEXT: retq 470 %tmp3 = load <4 x float>, ptr %B ; <<4 x float>> [#uses=1] 471 %tmp5 = load <4 x float>, ptr %C ; <<4 x float>> [#uses=1] 472 %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] 473 store <4 x float> %tmp11, ptr %res 474 ret void 475} 476 477define <4 x float> @test14(ptr %x, ptr %y) nounwind { 478; X86-SSE-LABEL: test14: 479; X86-SSE: # %bb.0: 480; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 481; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 482; X86-SSE-NEXT: movaps (%ecx), %xmm1 483; X86-SSE-NEXT: movaps (%eax), %xmm2 484; X86-SSE-NEXT: movaps %xmm2, %xmm0 485; X86-SSE-NEXT: addps %xmm1, %xmm0 486; X86-SSE-NEXT: subps %xmm1, %xmm2 487; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 488; X86-SSE-NEXT: retl 489; 490; X86-AVX-LABEL: test14: 491; X86-AVX: # %bb.0: 492; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 493; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 494; X86-AVX-NEXT: vmovaps (%ecx), %xmm0 495; X86-AVX-NEXT: vmovaps (%eax), %xmm1 496; X86-AVX-NEXT: vaddps %xmm0, %xmm1, %xmm2 497; X86-AVX-NEXT: vsubps %xmm0, %xmm1, %xmm0 498; X86-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm0[0] 499; X86-AVX-NEXT: retl 500; 501; X64-SSE-LABEL: test14: 502; X64-SSE: # %bb.0: 503; X64-SSE-NEXT: movaps (%rsi), %xmm1 504; X64-SSE-NEXT: movaps (%rdi), %xmm2 505; X64-SSE-NEXT: movaps %xmm2, %xmm0 506; X64-SSE-NEXT: addps %xmm1, %xmm0 507; X64-SSE-NEXT: subps %xmm1, %xmm2 508; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 509; X64-SSE-NEXT: retq 510; 511; X64-AVX-LABEL: test14: 512; X64-AVX: # %bb.0: 513; X64-AVX-NEXT: vmovaps (%rsi), %xmm0 514; X64-AVX-NEXT: vmovaps (%rdi), %xmm1 515; X64-AVX-NEXT: vaddps %xmm0, %xmm1, %xmm2 516; X64-AVX-NEXT: vsubps %xmm0, %xmm1, %xmm0 517; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm0[0] 518; X64-AVX-NEXT: retq 519 %tmp = load <4 x float>, ptr %y ; <<4 x float>> [#uses=2] 520 %tmp5 = load <4 x float>, ptr %x ; <<4 x float>> [#uses=2] 521 %tmp9 = fadd <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] 522 %tmp21 = fsub <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] 523 %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1] 524 ret <4 x float> %tmp27 525} 526 527define <4 x float> @test15(ptr %x, ptr %y) nounwind { 528; X86-SSE-LABEL: test15: 529; X86-SSE: # %bb.0: # %entry 530; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 531; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 532; X86-SSE-NEXT: movaps (%ecx), %xmm0 533; X86-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] 534; X86-SSE-NEXT: retl 535; 536; X86-AVX-LABEL: test15: 537; X86-AVX: # %bb.0: # %entry 538; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 539; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 540; X86-AVX-NEXT: vmovaps (%ecx), %xmm0 541; X86-AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] 542; X86-AVX-NEXT: retl 543; 544; X64-SSE-LABEL: test15: 545; X64-SSE: # %bb.0: # %entry 546; X64-SSE-NEXT: movaps (%rdi), %xmm0 547; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] 548; X64-SSE-NEXT: retq 549; 550; X64-AVX-LABEL: test15: 551; X64-AVX: # %bb.0: # %entry 552; X64-AVX-NEXT: vmovaps (%rdi), %xmm0 553; X64-AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] 554; X64-AVX-NEXT: retq 555entry: 556 %tmp = load <4 x float>, ptr %y ; <<4 x float>> [#uses=1] 557 %tmp3 = load <4 x float>, ptr %x ; <<4 x float>> [#uses=1] 558 %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] 559 ret <4 x float> %tmp4 560} 561 562; PR8900 563 564define <2 x double> @test16(ptr nocapture %srcA, ptr nocapture %dst) { 565; X86-SSE-LABEL: test16: 566; X86-SSE: # %bb.0: 567; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 568; X86-SSE-NEXT: movaps 96(%eax), %xmm0 569; X86-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] 570; X86-SSE-NEXT: retl 571; 572; X86-AVX-LABEL: test16: 573; X86-AVX: # %bb.0: 574; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 575; X86-AVX-NEXT: vmovaps 96(%eax), %xmm0 576; X86-AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] 577; X86-AVX-NEXT: retl 578; 579; X64-SSE-LABEL: test16: 580; X64-SSE: # %bb.0: 581; X64-SSE-NEXT: movaps 96(%rdi), %xmm0 582; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] 583; X64-SSE-NEXT: retq 584; 585; X64-AVX-LABEL: test16: 586; X64-AVX: # %bb.0: 587; X64-AVX-NEXT: vmovaps 96(%rdi), %xmm0 588; X64-AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] 589; X64-AVX-NEXT: retq 590 %i5 = getelementptr inbounds <4 x double>, ptr %srcA, i32 3 591 %i6 = load <4 x double>, ptr %i5, align 32 592 %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2> 593 ret <2 x double> %i7 594} 595 596; PR9009 597define fastcc void @test17() nounwind { 598; X86-SSE-LABEL: test17: 599; X86-SSE: # %bb.0: # %entry 600; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = [u,u,32768,32768] 601; X86-SSE-NEXT: movaps %xmm0, (%eax) 602; X86-SSE-NEXT: retl 603; 604; X86-AVX-LABEL: test17: 605; X86-AVX: # %bb.0: # %entry 606; X86-AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [32768,32768,32768,32768] 607; X86-AVX-NEXT: vmovaps %xmm0, (%eax) 608; X86-AVX-NEXT: retl 609; 610; X64-SSE-LABEL: test17: 611; X64-SSE: # %bb.0: # %entry 612; X64-SSE-NEXT: movaps {{.*#+}} xmm0 = [u,u,32768,32768] 613; X64-SSE-NEXT: movaps %xmm0, (%rax) 614; X64-SSE-NEXT: retq 615; 616; X64-AVX-LABEL: test17: 617; X64-AVX: # %bb.0: # %entry 618; X64-AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [32768,32768,32768,32768] 619; X64-AVX-NEXT: vmovaps %xmm0, (%rax) 620; X64-AVX-NEXT: retq 621entry: 622 %0 = insertelement <4 x i32> undef, i32 undef, i32 1 623 %1 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 32768, i32 32768>, <4 x i32> %0, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 624 %2 = bitcast <4 x i32> %1 to <4 x float> 625 store <4 x float> %2, ptr undef 626 ret void 627} 628 629; PR9210 630define <4 x float> @f(<4 x double>) nounwind { 631; SSE-LABEL: f: 632; SSE: # %bb.0: # %entry 633; SSE-NEXT: cvtpd2ps %xmm1, %xmm1 634; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 635; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 636; SSE-NEXT: ret{{[l|q]}} 637; 638; AVX-LABEL: f: 639; AVX: # %bb.0: # %entry 640; AVX-NEXT: vcvtpd2ps %ymm0, %xmm0 641; AVX-NEXT: vzeroupper 642; AVX-NEXT: ret{{[l|q]}} 643entry: 644 %double2float.i = fptrunc <4 x double> %0 to <4 x float> 645 ret <4 x float> %double2float.i 646} 647 648define <2 x i64> @test_insert_64_zext(<2 x i64> %i) { 649; SSE-LABEL: test_insert_64_zext: 650; SSE: # %bb.0: 651; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 652; SSE-NEXT: ret{{[l|q]}} 653; 654; AVX-LABEL: test_insert_64_zext: 655; AVX: # %bb.0: 656; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 657; AVX-NEXT: ret{{[l|q]}} 658 %1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2> 659 ret <2 x i64> %1 660} 661 662define <4 x i32> @PR19721(<4 x i32> %i) { 663; X86-SSE-LABEL: PR19721: 664; X86-SSE: # %bb.0: 665; X86-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 666; X86-SSE-NEXT: retl 667; 668; AVX-LABEL: PR19721: 669; AVX: # %bb.0: 670; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 671; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 672; AVX-NEXT: ret{{[l|q]}} 673; 674; X64-SSE-LABEL: PR19721: 675; X64-SSE: # %bb.0: 676; X64-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 677; X64-SSE-NEXT: retq 678 %bc = bitcast <4 x i32> %i to i128 679 %insert = and i128 %bc, -4294967296 680 %bc2 = bitcast i128 %insert to <4 x i32> 681 ret <4 x i32> %bc2 682} 683 684define <4 x i32> @test_mul(<4 x i32> %x, <4 x i32> %y) { 685; SSE-LABEL: test_mul: 686; SSE: # %bb.0: 687; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 688; SSE-NEXT: pmuludq %xmm1, %xmm0 689; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 690; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 691; SSE-NEXT: pmuludq %xmm2, %xmm1 692; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 693; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 694; SSE-NEXT: ret{{[l|q]}} 695; 696; AVX-LABEL: test_mul: 697; AVX: # %bb.0: 698; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 699; AVX-NEXT: ret{{[l|q]}} 700 %m = mul <4 x i32> %x, %y 701 ret <4 x i32> %m 702} 703;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 704; X64-AVX1: {{.*}} 705; X64-AVX512: {{.*}} 706; X86-AVX1: {{.*}} 707; X86-AVX512: {{.*}} 708