xref: /llvm-project/llvm/test/CodeGen/X86/sse-insertelt.ll (revision 7d8fd4f5db0dd52cf9802889690aab876ad6646b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2   | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx    | FileCheck %s --check-prefixes=AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2   | FileCheck %s --check-prefixes=AVX
6
7; 0'th element insertion into an SSE register.
8
9define <4 x float> @insert_f32_firstelt(<4 x float> %x, float %s) {
10; SSE2-LABEL: insert_f32_firstelt:
11; SSE2:       # %bb.0:
12; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
13; SSE2-NEXT:    retq
14;
15; SSE41-LABEL: insert_f32_firstelt:
16; SSE41:       # %bb.0:
17; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
18; SSE41-NEXT:    retq
19;
20; AVX-LABEL: insert_f32_firstelt:
21; AVX:       # %bb.0:
22; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
23; AVX-NEXT:    retq
24  %i0 = insertelement <4 x float> %x, float %s, i32 0
25  ret <4 x float> %i0
26}
27
28define <2 x double> @insert_f64_firstelt(<2 x double> %x, double %s) {
29; SSE2-LABEL: insert_f64_firstelt:
30; SSE2:       # %bb.0:
31; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
32; SSE2-NEXT:    retq
33;
34; SSE41-LABEL: insert_f64_firstelt:
35; SSE41:       # %bb.0:
36; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
37; SSE41-NEXT:    retq
38;
39; AVX-LABEL: insert_f64_firstelt:
40; AVX:       # %bb.0:
41; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
42; AVX-NEXT:    retq
43  %i0 = insertelement <2 x double> %x, double %s, i32 0
44  ret <2 x double> %i0
45}
46
47define <16 x i8> @insert_i8_firstelt(<16 x i8> %x, i8 %s) {
48; SSE2-LABEL: insert_i8_firstelt:
49; SSE2:       # %bb.0:
50; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
51; SSE2-NEXT:    pand %xmm1, %xmm0
52; SSE2-NEXT:    movd %edi, %xmm2
53; SSE2-NEXT:    pandn %xmm2, %xmm1
54; SSE2-NEXT:    por %xmm1, %xmm0
55; SSE2-NEXT:    retq
56;
57; SSE41-LABEL: insert_i8_firstelt:
58; SSE41:       # %bb.0:
59; SSE41-NEXT:    pinsrb $0, %edi, %xmm0
60; SSE41-NEXT:    retq
61;
62; AVX-LABEL: insert_i8_firstelt:
63; AVX:       # %bb.0:
64; AVX-NEXT:    vpinsrb $0, %edi, %xmm0, %xmm0
65; AVX-NEXT:    retq
66  %i0 = insertelement <16 x i8> %x, i8 %s, i32 0
67  ret <16 x i8> %i0
68}
69
70define <8 x i16> @insert_i16_firstelt(<8 x i16> %x, i16 %s) {
71; SSE-LABEL: insert_i16_firstelt:
72; SSE:       # %bb.0:
73; SSE-NEXT:    pinsrw $0, %edi, %xmm0
74; SSE-NEXT:    retq
75;
76; AVX-LABEL: insert_i16_firstelt:
77; AVX:       # %bb.0:
78; AVX-NEXT:    vpinsrw $0, %edi, %xmm0, %xmm0
79; AVX-NEXT:    retq
80  %i0 = insertelement <8 x i16> %x, i16 %s, i32 0
81  ret <8 x i16> %i0
82}
83
84define <4 x i32> @insert_i32_firstelt(<4 x i32> %x, i32 %s) {
85; SSE2-LABEL: insert_i32_firstelt:
86; SSE2:       # %bb.0:
87; SSE2-NEXT:    movd %edi, %xmm1
88; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
89; SSE2-NEXT:    retq
90;
91; SSE41-LABEL: insert_i32_firstelt:
92; SSE41:       # %bb.0:
93; SSE41-NEXT:    pinsrd $0, %edi, %xmm0
94; SSE41-NEXT:    retq
95;
96; AVX-LABEL: insert_i32_firstelt:
97; AVX:       # %bb.0:
98; AVX-NEXT:    vpinsrd $0, %edi, %xmm0, %xmm0
99; AVX-NEXT:    retq
100  %i0 = insertelement <4 x i32> %x, i32 %s, i32 0
101  ret <4 x i32> %i0
102}
103
104define <2 x i64> @insert_i64_firstelt(<2 x i64> %x, i64 %s) {
105; SSE2-LABEL: insert_i64_firstelt:
106; SSE2:       # %bb.0:
107; SSE2-NEXT:    movq %rdi, %xmm1
108; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
109; SSE2-NEXT:    retq
110;
111; SSE41-LABEL: insert_i64_firstelt:
112; SSE41:       # %bb.0:
113; SSE41-NEXT:    pinsrq $0, %rdi, %xmm0
114; SSE41-NEXT:    retq
115;
116; AVX-LABEL: insert_i64_firstelt:
117; AVX:       # %bb.0:
118; AVX-NEXT:    vpinsrq $0, %rdi, %xmm0, %xmm0
119; AVX-NEXT:    retq
120  %i0 = insertelement <2 x i64> %x, i64 %s, i32 0
121  ret <2 x i64> %i0
122}
123
124; 1'th element insertion.
125
126define <4 x float> @insert_f32_secondelt(<4 x float> %x, float %s) {
127; SSE2-LABEL: insert_f32_secondelt:
128; SSE2:       # %bb.0:
129; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
130; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
131; SSE2-NEXT:    movaps %xmm1, %xmm0
132; SSE2-NEXT:    retq
133;
134; SSE41-LABEL: insert_f32_secondelt:
135; SSE41:       # %bb.0:
136; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
137; SSE41-NEXT:    retq
138;
139; AVX-LABEL: insert_f32_secondelt:
140; AVX:       # %bb.0:
141; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
142; AVX-NEXT:    retq
143  %i0 = insertelement <4 x float> %x, float %s, i32 1
144  ret <4 x float> %i0
145}
146
147define <2 x double> @insert_f64_secondelt(<2 x double> %x, double %s) {
148; SSE-LABEL: insert_f64_secondelt:
149; SSE:       # %bb.0:
150; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
151; SSE-NEXT:    retq
152;
153; AVX-LABEL: insert_f64_secondelt:
154; AVX:       # %bb.0:
155; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
156; AVX-NEXT:    retq
157  %i0 = insertelement <2 x double> %x, double %s, i32 1
158  ret <2 x double> %i0
159}
160
161define <16 x i8> @insert_i8_secondelt(<16 x i8> %x, i8 %s) {
162; SSE2-LABEL: insert_i8_secondelt:
163; SSE2:       # %bb.0:
164; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
165; SSE2-NEXT:    pand %xmm1, %xmm0
166; SSE2-NEXT:    movd %edi, %xmm2
167; SSE2-NEXT:    psllw $8, %xmm2
168; SSE2-NEXT:    pandn %xmm2, %xmm1
169; SSE2-NEXT:    por %xmm1, %xmm0
170; SSE2-NEXT:    retq
171;
172; SSE41-LABEL: insert_i8_secondelt:
173; SSE41:       # %bb.0:
174; SSE41-NEXT:    pinsrb $1, %edi, %xmm0
175; SSE41-NEXT:    retq
176;
177; AVX-LABEL: insert_i8_secondelt:
178; AVX:       # %bb.0:
179; AVX-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0
180; AVX-NEXT:    retq
181  %i0 = insertelement <16 x i8> %x, i8 %s, i32 1
182  ret <16 x i8> %i0
183}
184
185define <8 x i16> @insert_i16_secondelt(<8 x i16> %x, i16 %s) {
186; SSE-LABEL: insert_i16_secondelt:
187; SSE:       # %bb.0:
188; SSE-NEXT:    pinsrw $1, %edi, %xmm0
189; SSE-NEXT:    retq
190;
191; AVX-LABEL: insert_i16_secondelt:
192; AVX:       # %bb.0:
193; AVX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0
194; AVX-NEXT:    retq
195  %i0 = insertelement <8 x i16> %x, i16 %s, i32 1
196  ret <8 x i16> %i0
197}
198
199define <4 x i32> @insert_i32_secondelt(<4 x i32> %x, i32 %s) {
200; SSE2-LABEL: insert_i32_secondelt:
201; SSE2:       # %bb.0:
202; SSE2-NEXT:    movd %edi, %xmm1
203; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
204; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
205; SSE2-NEXT:    movaps %xmm1, %xmm0
206; SSE2-NEXT:    retq
207;
208; SSE41-LABEL: insert_i32_secondelt:
209; SSE41:       # %bb.0:
210; SSE41-NEXT:    pinsrd $1, %edi, %xmm0
211; SSE41-NEXT:    retq
212;
213; AVX-LABEL: insert_i32_secondelt:
214; AVX:       # %bb.0:
215; AVX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0
216; AVX-NEXT:    retq
217  %i0 = insertelement <4 x i32> %x, i32 %s, i32 1
218  ret <4 x i32> %i0
219}
220
221define <2 x i64> @insert_i64_secondelt(<2 x i64> %x, i64 %s) {
222; SSE2-LABEL: insert_i64_secondelt:
223; SSE2:       # %bb.0:
224; SSE2-NEXT:    movq %rdi, %xmm1
225; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
226; SSE2-NEXT:    retq
227;
228; SSE41-LABEL: insert_i64_secondelt:
229; SSE41:       # %bb.0:
230; SSE41-NEXT:    pinsrq $1, %rdi, %xmm0
231; SSE41-NEXT:    retq
232;
233; AVX-LABEL: insert_i64_secondelt:
234; AVX:       # %bb.0:
235; AVX-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0
236; AVX-NEXT:    retq
237  %i0 = insertelement <2 x i64> %x, i64 %s, i32 1
238  ret <2 x i64> %i0
239}
240
241; element insertion into two elements
242
243define <4 x float> @insert_f32_two_elts(<4 x float> %x, float %s) {
244; SSE-LABEL: insert_f32_two_elts:
245; SSE:       # %bb.0:
246; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3]
247; SSE-NEXT:    movaps %xmm1, %xmm0
248; SSE-NEXT:    retq
249;
250; AVX-LABEL: insert_f32_two_elts:
251; AVX:       # %bb.0:
252; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,0],xmm0[2,3]
253; AVX-NEXT:    retq
254  %i0 = insertelement <4 x float> %x, float %s, i32 0
255  %i1 = insertelement <4 x float> %i0, float %s, i32 1
256  ret <4 x float> %i1
257}
258
259define <2 x double> @insert_f64_two_elts(<2 x double> %x, double %s) {
260; SSE2-LABEL: insert_f64_two_elts:
261; SSE2:       # %bb.0:
262; SSE2-NEXT:    movaps %xmm1, %xmm0
263; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
264; SSE2-NEXT:    retq
265;
266; SSE41-LABEL: insert_f64_two_elts:
267; SSE41:       # %bb.0:
268; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
269; SSE41-NEXT:    retq
270;
271; AVX-LABEL: insert_f64_two_elts:
272; AVX:       # %bb.0:
273; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm1[0,0]
274; AVX-NEXT:    retq
275  %i0 = insertelement <2 x double> %x, double %s, i32 0
276  %i1 = insertelement <2 x double> %i0, double %s, i32 1
277  ret <2 x double> %i1
278}
279
280define <16 x i8> @insert_i8_two_elts(<16 x i8> %x, i8 %s) {
281; SSE2-LABEL: insert_i8_two_elts:
282; SSE2:       # %bb.0:
283; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
284; SSE2-NEXT:    pand %xmm1, %xmm0
285; SSE2-NEXT:    movd %edi, %xmm2
286; SSE2-NEXT:    pandn %xmm2, %xmm1
287; SSE2-NEXT:    por %xmm1, %xmm0
288; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
289; SSE2-NEXT:    pand %xmm1, %xmm0
290; SSE2-NEXT:    psllw $8, %xmm2
291; SSE2-NEXT:    pandn %xmm2, %xmm1
292; SSE2-NEXT:    por %xmm1, %xmm0
293; SSE2-NEXT:    retq
294;
295; SSE41-LABEL: insert_i8_two_elts:
296; SSE41:       # %bb.0:
297; SSE41-NEXT:    pinsrb $0, %edi, %xmm0
298; SSE41-NEXT:    pinsrb $1, %edi, %xmm0
299; SSE41-NEXT:    retq
300;
301; AVX-LABEL: insert_i8_two_elts:
302; AVX:       # %bb.0:
303; AVX-NEXT:    vpinsrb $0, %edi, %xmm0, %xmm0
304; AVX-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0
305; AVX-NEXT:    retq
306  %i0 = insertelement <16 x i8> %x, i8 %s, i32 0
307  %i1 = insertelement <16 x i8> %i0, i8 %s, i32 1
308  ret <16 x i8> %i1
309}
310
311define <8 x i16> @insert_i16_two_elts(<8 x i16> %x, i16 %s) {
312; SSE-LABEL: insert_i16_two_elts:
313; SSE:       # %bb.0:
314; SSE-NEXT:    pinsrw $0, %edi, %xmm0
315; SSE-NEXT:    pinsrw $1, %edi, %xmm0
316; SSE-NEXT:    retq
317;
318; AVX-LABEL: insert_i16_two_elts:
319; AVX:       # %bb.0:
320; AVX-NEXT:    vpinsrw $0, %edi, %xmm0, %xmm0
321; AVX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0
322; AVX-NEXT:    retq
323  %i0 = insertelement <8 x i16> %x, i16 %s, i32 0
324  %i1 = insertelement <8 x i16> %i0, i16 %s, i32 1
325  ret <8 x i16> %i1
326}
327
328define <4 x i32> @insert_i32_two_elts(<4 x i32> %x, i32 %s) {
329; SSE2-LABEL: insert_i32_two_elts:
330; SSE2:       # %bb.0:
331; SSE2-NEXT:    movd %edi, %xmm2
332; SSE2-NEXT:    movd %edi, %xmm1
333; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
334; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
335; SSE2-NEXT:    movaps %xmm1, %xmm0
336; SSE2-NEXT:    retq
337;
338; SSE41-LABEL: insert_i32_two_elts:
339; SSE41:       # %bb.0:
340; SSE41-NEXT:    pinsrd $0, %edi, %xmm0
341; SSE41-NEXT:    pinsrd $1, %edi, %xmm0
342; SSE41-NEXT:    retq
343;
344; AVX-LABEL: insert_i32_two_elts:
345; AVX:       # %bb.0:
346; AVX-NEXT:    vpinsrd $0, %edi, %xmm0, %xmm0
347; AVX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0
348; AVX-NEXT:    retq
349  %i0 = insertelement <4 x i32> %x, i32 %s, i32 0
350  %i1 = insertelement <4 x i32> %i0, i32 %s, i32 1
351  ret <4 x i32> %i1
352}
353
354define <2 x i64> @insert_i64_two_elts(<2 x i64> %x, i64 %s) {
355; SSE-LABEL: insert_i64_two_elts:
356; SSE:       # %bb.0:
357; SSE-NEXT:    movq %rdi, %xmm0
358; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
359; SSE-NEXT:    retq
360  %i0 = insertelement <2 x i64> %x, i64 %s, i32 0
361  %i1 = insertelement <2 x i64> %i0, i64 %s, i32 1
362  ret <2 x i64> %i1
363}
364