xref: /llvm-project/llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+ssse3 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X86-SSE
3; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX,AVX1,X86-AVX1
4; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX,AVX512,X86-AVX512
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+ssse3 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X64-SSE
6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX,AVX1,X64-AVX1
7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX,AVX512,X64-AVX512
8
9define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
10; SSE-LABEL: test_x86_ssse3_phadd_d_128:
11; SSE:       ## %bb.0:
12; SSE-NEXT:    phaddd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x02,0xc1]
13; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
14;
15; AVX-LABEL: test_x86_ssse3_phadd_d_128:
16; AVX:       ## %bb.0:
17; AVX-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x02,0xc1]
18; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
19  %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
20  ret <4 x i32> %res
21}
22declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
23
24
25define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
26; SSE-LABEL: test_x86_ssse3_phadd_sw_128:
27; SSE:       ## %bb.0:
28; SSE-NEXT:    phaddsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x03,0xc1]
29; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
30;
31; AVX-LABEL: test_x86_ssse3_phadd_sw_128:
32; AVX:       ## %bb.0:
33; AVX-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x03,0xc1]
34; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
35  %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
36  ret <8 x i16> %res
37}
38declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
39
40
41define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
42; SSE-LABEL: test_x86_ssse3_phadd_w_128:
43; SSE:       ## %bb.0:
44; SSE-NEXT:    phaddw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x01,0xc1]
45; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
46;
47; AVX-LABEL: test_x86_ssse3_phadd_w_128:
48; AVX:       ## %bb.0:
49; AVX-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x01,0xc1]
50; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
51  %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
52  ret <8 x i16> %res
53}
54declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
55
56
57define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
58; SSE-LABEL: test_x86_ssse3_phsub_d_128:
59; SSE:       ## %bb.0:
60; SSE-NEXT:    phsubd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x06,0xc1]
61; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
62;
63; AVX-LABEL: test_x86_ssse3_phsub_d_128:
64; AVX:       ## %bb.0:
65; AVX-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x06,0xc1]
66; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
67  %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
68  ret <4 x i32> %res
69}
70declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
71
72
73define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
74; SSE-LABEL: test_x86_ssse3_phsub_sw_128:
75; SSE:       ## %bb.0:
76; SSE-NEXT:    phsubsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x07,0xc1]
77; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
78;
79; AVX-LABEL: test_x86_ssse3_phsub_sw_128:
80; AVX:       ## %bb.0:
81; AVX-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x07,0xc1]
82; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
83  %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
84  ret <8 x i16> %res
85}
86declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
87
88
89define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
90; SSE-LABEL: test_x86_ssse3_phsub_w_128:
91; SSE:       ## %bb.0:
92; SSE-NEXT:    phsubw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x05,0xc1]
93; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
94;
95; AVX-LABEL: test_x86_ssse3_phsub_w_128:
96; AVX:       ## %bb.0:
97; AVX-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x05,0xc1]
98; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
99  %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
100  ret <8 x i16> %res
101}
102declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
103
104
105define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
106; SSE-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
107; SSE:       ## %bb.0:
108; SSE-NEXT:    pmaddubsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x04,0xc1]
109; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
110;
111; AVX1-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
112; AVX1:       ## %bb.0:
113; AVX1-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x04,0xc1]
114; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
115;
116; AVX512-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
117; AVX512:       ## %bb.0:
118; AVX512-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1]
119; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
120  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
121  ret <8 x i16> %res
122}
123declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
124
125
126; Make sure we don't commute this operation.
127define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128_load_op0(ptr %ptr, <16 x i8> %a1) {
128; X86-SSE-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0:
129; X86-SSE:       ## %bb.0:
130; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
131; X86-SSE-NEXT:    movdqa (%eax), %xmm1 ## encoding: [0x66,0x0f,0x6f,0x08]
132; X86-SSE-NEXT:    pmaddubsw %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x38,0x04,0xc8]
133; X86-SSE-NEXT:    movdqa %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc1]
134; X86-SSE-NEXT:    retl ## encoding: [0xc3]
135;
136; X86-AVX1-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0:
137; X86-AVX1:       ## %bb.0:
138; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
139; X86-AVX1-NEXT:    vmovdqa (%eax), %xmm1 ## encoding: [0xc5,0xf9,0x6f,0x08]
140; X86-AVX1-NEXT:    vpmaddubsw %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0x04,0xc0]
141; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
142;
143; X86-AVX512-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0:
144; X86-AVX512:       ## %bb.0:
145; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
146; X86-AVX512-NEXT:    vmovdqa (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x08]
147; X86-AVX512-NEXT:    vpmaddubsw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x04,0xc0]
148; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
149;
150; X64-SSE-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0:
151; X64-SSE:       ## %bb.0:
152; X64-SSE-NEXT:    movdqa (%rdi), %xmm1 ## encoding: [0x66,0x0f,0x6f,0x0f]
153; X64-SSE-NEXT:    pmaddubsw %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x38,0x04,0xc8]
154; X64-SSE-NEXT:    movdqa %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc1]
155; X64-SSE-NEXT:    retq ## encoding: [0xc3]
156;
157; X64-AVX1-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0:
158; X64-AVX1:       ## %bb.0:
159; X64-AVX1-NEXT:    vmovdqa (%rdi), %xmm1 ## encoding: [0xc5,0xf9,0x6f,0x0f]
160; X64-AVX1-NEXT:    vpmaddubsw %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0x04,0xc0]
161; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
162;
163; X64-AVX512-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0:
164; X64-AVX512:       ## %bb.0:
165; X64-AVX512-NEXT:    vmovdqa (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x0f]
166; X64-AVX512-NEXT:    vpmaddubsw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x04,0xc0]
167; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
168  %a0 = load <16 x i8>, ptr %ptr
169  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
170  ret <8 x i16> %res
171}
172
173
174define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
175; SSE-LABEL: test_x86_ssse3_pmul_hr_sw_128:
176; SSE:       ## %bb.0:
177; SSE-NEXT:    pmulhrsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x0b,0xc1]
178; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
179;
180; AVX1-LABEL: test_x86_ssse3_pmul_hr_sw_128:
181; AVX1:       ## %bb.0:
182; AVX1-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x0b,0xc1]
183; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
184;
185; AVX512-LABEL: test_x86_ssse3_pmul_hr_sw_128:
186; AVX512:       ## %bb.0:
187; AVX512-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xc1]
188; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
189  %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
190  ret <8 x i16> %res
191}
192declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
193
194
195define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
196; SSE-LABEL: test_x86_ssse3_pshuf_b_128:
197; SSE:       ## %bb.0:
198; SSE-NEXT:    pshufb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x00,0xc1]
199; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
200;
201; AVX1-LABEL: test_x86_ssse3_pshuf_b_128:
202; AVX1:       ## %bb.0:
203; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x00,0xc1]
204; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
205;
206; AVX512-LABEL: test_x86_ssse3_pshuf_b_128:
207; AVX512:       ## %bb.0:
208; AVX512-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xc1]
209; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
210  %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
211  ret <16 x i8> %res
212}
213declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
214
215
216define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
217; SSE-LABEL: test_x86_ssse3_psign_b_128:
218; SSE:       ## %bb.0:
219; SSE-NEXT:    psignb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x08,0xc1]
220; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
221;
222; AVX-LABEL: test_x86_ssse3_psign_b_128:
223; AVX:       ## %bb.0:
224; AVX-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x08,0xc1]
225; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
226  %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
227  ret <16 x i8> %res
228}
229declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
230
231
232define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
233; SSE-LABEL: test_x86_ssse3_psign_d_128:
234; SSE:       ## %bb.0:
235; SSE-NEXT:    psignd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x0a,0xc1]
236; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
237;
238; AVX-LABEL: test_x86_ssse3_psign_d_128:
239; AVX:       ## %bb.0:
240; AVX-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x0a,0xc1]
241; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
242  %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
243  ret <4 x i32> %res
244}
245declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
246
247
248define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
249; SSE-LABEL: test_x86_ssse3_psign_w_128:
250; SSE:       ## %bb.0:
251; SSE-NEXT:    psignw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x09,0xc1]
252; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
253;
254; AVX-LABEL: test_x86_ssse3_psign_w_128:
255; AVX:       ## %bb.0:
256; AVX-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x09,0xc1]
257; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
258  %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
259  ret <8 x i16> %res
260}
261declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
262