xref: /llvm-project/llvm/test/CodeGen/X86/vec_insert-5.ll (revision 122874c955e06defb619b1afd4e26db482dbbf19)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-unknown -mattr=+mmx,+sse2,+ssse3 | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2,+ssse3 | FileCheck %s --check-prefixes=X64,ALIGN
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2,+ssse3,sse-unaligned-mem | FileCheck %s --check-prefixes=X64,UNALIGN
5
6; There are no MMX operations in @t1
7
8define void  @t1(i32 %a, ptr %P) nounwind {
9; X86-LABEL: t1:
10; X86:       # %bb.0:
11; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
12; X86-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
13; X86-NEXT:    pslld $12, %xmm0
14; X86-NEXT:    psllq $32, %xmm0
15; X86-NEXT:    movq %xmm0, (%eax)
16; X86-NEXT:    retl
17;
18; X64-LABEL: t1:
19; X64:       # %bb.0:
20; X64-NEXT:    movd %edi, %xmm0
21; X64-NEXT:    pslld $12, %xmm0
22; X64-NEXT:    psllq $32, %xmm0
23; X64-NEXT:    movq %xmm0, (%rsi)
24; X64-NEXT:    retq
25 %tmp12 = shl i32 %a, 12
26 %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
27 %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
28 %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64>
29 store <1 x i64> %tmp23, ptr %P
30 ret void
31}
32
33define <4 x float> @t2(ptr %P) nounwind {
34; X86-LABEL: t2:
35; X86:       # %bb.0:
36; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
37; X86-NEXT:    xorps %xmm0, %xmm0
38; X86-NEXT:    xorps %xmm1, %xmm1
39; X86-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0]
40; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
41; X86-NEXT:    retl
42;
43; X64-LABEL: t2:
44; X64:       # %bb.0:
45; X64-NEXT:    xorps %xmm0, %xmm0
46; X64-NEXT:    xorps %xmm1, %xmm1
47; X64-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0]
48; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
49; X64-NEXT:    retq
50  %tmp1 = load <4 x float>, ptr %P
51  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
52  ret <4 x float> %tmp2
53}
54
55define <4 x float> @t3(ptr %P) nounwind {
56; X86-LABEL: t3:
57; X86:       # %bb.0:
58; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
59; X86-NEXT:    xorps %xmm0, %xmm0
60; X86-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
61; X86-NEXT:    retl
62;
63; X64-LABEL: t3:
64; X64:       # %bb.0:
65; X64-NEXT:    xorps %xmm0, %xmm0
66; X64-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
67; X64-NEXT:    retq
68  %tmp1 = load <4 x float>, ptr %P
69  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
70  ret <4 x float> %tmp2
71}
72
73define <4 x float> @t4(ptr %P) nounwind {
74; X86-LABEL: t4:
75; X86:       # %bb.0:
76; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
77; X86-NEXT:    xorps %xmm1, %xmm1
78; X86-NEXT:    xorps %xmm0, %xmm0
79; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0]
80; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
81; X86-NEXT:    retl
82;
83; X64-LABEL: t4:
84; X64:       # %bb.0:
85; X64-NEXT:    xorps %xmm1, %xmm1
86; X64-NEXT:    xorps %xmm0, %xmm0
87; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0]
88; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
89; X64-NEXT:    retq
90  %tmp1 = load <4 x float>, ptr %P
91  %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
92  ret <4 x float> %tmp2
93}
94
95define <4 x float> @t4_under_aligned(ptr %P) nounwind {
96; X86-LABEL: t4_under_aligned:
97; X86:       # %bb.0:
98; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
99; X86-NEXT:    movups (%eax), %xmm0
100; X86-NEXT:    xorps %xmm1, %xmm1
101; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
102; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
103; X86-NEXT:    retl
104;
105; ALIGN-LABEL: t4_under_aligned:
106; ALIGN:       # %bb.0:
107; ALIGN-NEXT:    movups (%rdi), %xmm0
108; ALIGN-NEXT:    xorps %xmm1, %xmm1
109; ALIGN-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
110; ALIGN-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
111; ALIGN-NEXT:    retq
112;
113; UNALIGN-LABEL: t4_under_aligned:
114; UNALIGN:       # %bb.0:
115; UNALIGN-NEXT:    xorps %xmm1, %xmm1
116; UNALIGN-NEXT:    xorps %xmm0, %xmm0
117; UNALIGN-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0]
118; UNALIGN-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
119; UNALIGN-NEXT:    retq
120  %tmp1 = load <4 x float>, ptr %P, align 4
121  %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
122  ret <4 x float> %tmp2
123}
124
125define <16 x i8> @t5(<16 x i8> %x) nounwind {
126; X86-LABEL: t5:
127; X86:       # %bb.0:
128; X86-NEXT:    psrlw $8, %xmm0
129; X86-NEXT:    retl
130;
131; X64-LABEL: t5:
132; X64:       # %bb.0:
133; X64-NEXT:    psrlw $8, %xmm0
134; X64-NEXT:    retq
135  %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
136  ret <16 x i8> %s
137}
138
139define <16 x i8> @t6(<16 x i8> %x) nounwind {
140; X86-LABEL: t6:
141; X86:       # %bb.0:
142; X86-NEXT:    psrlw $8, %xmm0
143; X86-NEXT:    retl
144;
145; X64-LABEL: t6:
146; X64:       # %bb.0:
147; X64-NEXT:    psrlw $8, %xmm0
148; X64-NEXT:    retq
149  %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
150  ret <16 x i8> %s
151}
152
153define <16 x i8> @t7(<16 x i8> %x) nounwind {
154; X86-LABEL: t7:
155; X86:       # %bb.0:
156; X86-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
157; X86-NEXT:    retl
158;
159; X64-LABEL: t7:
160; X64:       # %bb.0:
161; X64-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
162; X64-NEXT:    retq
163  %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
164  ret <16 x i8> %s
165}
166
167define <16 x i8> @t8(<16 x i8> %x) nounwind {
168; X86-LABEL: t8:
169; X86:       # %bb.0:
170; X86-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
171; X86-NEXT:    retl
172;
173; X64-LABEL: t8:
174; X64:       # %bb.0:
175; X64-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
176; X64-NEXT:    retq
177  %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
178  ret <16 x i8> %s
179}
180
181define <16 x i8> @t9(<16 x i8> %x) nounwind {
182; X86-LABEL: t9:
183; X86:       # %bb.0:
184; X86-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
185; X86-NEXT:    retl
186;
187; X64-LABEL: t9:
188; X64:       # %bb.0:
189; X64-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
190; X64-NEXT:    retq
191  %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 undef, i32 undef>
192  ret <16 x i8> %s
193}
194