xref: /llvm-project/llvm/test/CodeGen/X86/vector-ext-logic.ll (revision 98b0f1360df0a3e33c12e13f7433abfad3e1c590)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
4
5define <8 x i32> @zext_and_v8i32(<8 x i16> %x, <8 x i16> %y) {
6; SSE2-LABEL: zext_and_v8i32:
7; SSE2:       # %bb.0:
8; SSE2-NEXT:    pand %xmm0, %xmm1
9; SSE2-NEXT:    pxor %xmm2, %xmm2
10; SSE2-NEXT:    movdqa %xmm1, %xmm0
11; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
12; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
13; SSE2-NEXT:    retq
14;
15; AVX2-LABEL: zext_and_v8i32:
16; AVX2:       # %bb.0:
17; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
18; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
19; AVX2-NEXT:    retq
20  %xz = zext <8 x i16> %x to <8 x i32>
21  %yz = zext <8 x i16> %y to <8 x i32>
22  %r = and <8 x i32> %xz, %yz
23  ret <8 x i32> %r
24}
25
26define <8 x i32> @zext_or_v8i32(<8 x i16> %x, <8 x i16> %y) {
27; SSE2-LABEL: zext_or_v8i32:
28; SSE2:       # %bb.0:
29; SSE2-NEXT:    por %xmm0, %xmm1
30; SSE2-NEXT:    pxor %xmm2, %xmm2
31; SSE2-NEXT:    movdqa %xmm1, %xmm0
32; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
33; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
34; SSE2-NEXT:    retq
35;
36; AVX2-LABEL: zext_or_v8i32:
37; AVX2:       # %bb.0:
38; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
39; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
40; AVX2-NEXT:    retq
41  %xz = zext <8 x i16> %x to <8 x i32>
42  %yz = zext <8 x i16> %y to <8 x i32>
43  %r = or <8 x i32> %xz, %yz
44  ret <8 x i32> %r
45}
46
47define <8 x i32> @zext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) {
48; SSE2-LABEL: zext_xor_v8i32:
49; SSE2:       # %bb.0:
50; SSE2-NEXT:    pxor %xmm0, %xmm1
51; SSE2-NEXT:    pxor %xmm2, %xmm2
52; SSE2-NEXT:    movdqa %xmm1, %xmm0
53; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
54; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
55; SSE2-NEXT:    retq
56;
57; AVX2-LABEL: zext_xor_v8i32:
58; AVX2:       # %bb.0:
59; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
60; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
61; AVX2-NEXT:    retq
62  %xz = zext <8 x i16> %x to <8 x i32>
63  %yz = zext <8 x i16> %y to <8 x i32>
64  %r = xor <8 x i32> %xz, %yz
65  ret <8 x i32> %r
66}
67
68define <8 x i32> @sext_and_v8i32(<8 x i16> %x, <8 x i16> %y) {
69; SSE2-LABEL: sext_and_v8i32:
70; SSE2:       # %bb.0:
71; SSE2-NEXT:    pand %xmm1, %xmm0
72; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
73; SSE2-NEXT:    psrad $16, %xmm2
74; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
75; SSE2-NEXT:    psrad $16, %xmm1
76; SSE2-NEXT:    movdqa %xmm2, %xmm0
77; SSE2-NEXT:    retq
78;
79; AVX2-LABEL: sext_and_v8i32:
80; AVX2:       # %bb.0:
81; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
82; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
83; AVX2-NEXT:    retq
84  %xs = sext <8 x i16> %x to <8 x i32>
85  %ys = sext <8 x i16> %y to <8 x i32>
86  %r = and <8 x i32> %xs, %ys
87  ret <8 x i32> %r
88}
89
90define <8 x i32> @sext_or_v8i32(<8 x i16> %x, <8 x i16> %y) {
91; SSE2-LABEL: sext_or_v8i32:
92; SSE2:       # %bb.0:
93; SSE2-NEXT:    por %xmm1, %xmm0
94; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
95; SSE2-NEXT:    psrad $16, %xmm2
96; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
97; SSE2-NEXT:    psrad $16, %xmm1
98; SSE2-NEXT:    movdqa %xmm2, %xmm0
99; SSE2-NEXT:    retq
100;
101; AVX2-LABEL: sext_or_v8i32:
102; AVX2:       # %bb.0:
103; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
104; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
105; AVX2-NEXT:    retq
106  %xs = sext <8 x i16> %x to <8 x i32>
107  %ys = sext <8 x i16> %y to <8 x i32>
108  %r = or <8 x i32> %xs, %ys
109  ret <8 x i32> %r
110}
111
112define <8 x i32> @sext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) {
113; SSE2-LABEL: sext_xor_v8i32:
114; SSE2:       # %bb.0:
115; SSE2-NEXT:    pxor %xmm1, %xmm0
116; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
117; SSE2-NEXT:    psrad $16, %xmm2
118; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
119; SSE2-NEXT:    psrad $16, %xmm1
120; SSE2-NEXT:    movdqa %xmm2, %xmm0
121; SSE2-NEXT:    retq
122;
123; AVX2-LABEL: sext_xor_v8i32:
124; AVX2:       # %bb.0:
125; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
126; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
127; AVX2-NEXT:    retq
128  %xs = sext <8 x i16> %x to <8 x i32>
129  %ys = sext <8 x i16> %y to <8 x i32>
130  %r = xor <8 x i32> %xs, %ys
131  ret <8 x i32> %r
132}
133
134define <8 x i16> @zext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
135; SSE2-LABEL: zext_and_v8i16:
136; SSE2:       # %bb.0:
137; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
138; SSE2-NEXT:    pxor %xmm2, %xmm2
139; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
140; SSE2-NEXT:    pand %xmm1, %xmm0
141; SSE2-NEXT:    retq
142;
143; AVX2-LABEL: zext_and_v8i16:
144; AVX2:       # %bb.0:
145; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
146; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
147; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
148; AVX2-NEXT:    retq
149  %xz = zext <8 x i8> %x to <8 x i16>
150  %yz = zext <8 x i8> %y to <8 x i16>
151  %r = and <8 x i16> %xz, %yz
152  ret <8 x i16> %r
153}
154
155define <8 x i16> @zext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
156; SSE2-LABEL: zext_or_v8i16:
157; SSE2:       # %bb.0:
158; SSE2-NEXT:    por %xmm1, %xmm0
159; SSE2-NEXT:    pxor %xmm1, %xmm1
160; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
161; SSE2-NEXT:    retq
162;
163; AVX2-LABEL: zext_or_v8i16:
164; AVX2:       # %bb.0:
165; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
166; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
167; AVX2-NEXT:    retq
168  %xz = zext <8 x i8> %x to <8 x i16>
169  %yz = zext <8 x i8> %y to <8 x i16>
170  %r = or <8 x i16> %xz, %yz
171  ret <8 x i16> %r
172}
173
174define <8 x i16> @zext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
175; SSE2-LABEL: zext_xor_v8i16:
176; SSE2:       # %bb.0:
177; SSE2-NEXT:    pxor %xmm1, %xmm0
178; SSE2-NEXT:    pxor %xmm1, %xmm1
179; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
180; SSE2-NEXT:    retq
181;
182; AVX2-LABEL: zext_xor_v8i16:
183; AVX2:       # %bb.0:
184; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
185; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
186; AVX2-NEXT:    retq
187  %xz = zext <8 x i8> %x to <8 x i16>
188  %yz = zext <8 x i8> %y to <8 x i16>
189  %r = xor <8 x i16> %xz, %yz
190  ret <8 x i16> %r
191}
192
193define <8 x i16> @sext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
194; SSE2-LABEL: sext_and_v8i16:
195; SSE2:       # %bb.0:
196; SSE2-NEXT:    pand %xmm1, %xmm0
197; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
198; SSE2-NEXT:    psraw $8, %xmm0
199; SSE2-NEXT:    retq
200;
201; AVX2-LABEL: sext_and_v8i16:
202; AVX2:       # %bb.0:
203; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
204; AVX2-NEXT:    vpmovsxbw %xmm0, %xmm0
205; AVX2-NEXT:    retq
206  %xs = sext <8 x i8> %x to <8 x i16>
207  %ys = sext <8 x i8> %y to <8 x i16>
208  %r = and <8 x i16> %xs, %ys
209  ret <8 x i16> %r
210}
211
212define <8 x i16> @sext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
213; SSE2-LABEL: sext_or_v8i16:
214; SSE2:       # %bb.0:
215; SSE2-NEXT:    por %xmm1, %xmm0
216; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
217; SSE2-NEXT:    psraw $8, %xmm0
218; SSE2-NEXT:    retq
219;
220; AVX2-LABEL: sext_or_v8i16:
221; AVX2:       # %bb.0:
222; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
223; AVX2-NEXT:    vpmovsxbw %xmm0, %xmm0
224; AVX2-NEXT:    retq
225  %xs = sext <8 x i8> %x to <8 x i16>
226  %ys = sext <8 x i8> %y to <8 x i16>
227  %r = or <8 x i16> %xs, %ys
228  ret <8 x i16> %r
229}
230
231define <8 x i16> @sext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
232; SSE2-LABEL: sext_xor_v8i16:
233; SSE2:       # %bb.0:
234; SSE2-NEXT:    pxor %xmm1, %xmm0
235; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
236; SSE2-NEXT:    psraw $8, %xmm0
237; SSE2-NEXT:    retq
238;
239; AVX2-LABEL: sext_xor_v8i16:
240; AVX2:       # %bb.0:
241; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
242; AVX2-NEXT:    vpmovsxbw %xmm0, %xmm0
243; AVX2-NEXT:    retq
244  %xs = sext <8 x i8> %x to <8 x i16>
245  %ys = sext <8 x i8> %y to <8 x i16>
246  %r = xor <8 x i16> %xs, %ys
247  ret <8 x i16> %r
248}
249
250define <8 x i32> @bool_zext_and(<8 x i1> %x, <8 x i1> %y) {
251; SSE2-LABEL: bool_zext_and:
252; SSE2:       # %bb.0:
253; SSE2-NEXT:    movdqa %xmm0, %xmm3
254; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7]
255; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
256; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
257; SSE2-NEXT:    pxor %xmm4, %xmm4
258; SSE2-NEXT:    movdqa %xmm1, %xmm2
259; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
260; SSE2-NEXT:    pand %xmm3, %xmm2
261; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
262; SSE2-NEXT:    pand %xmm1, %xmm0
263; SSE2-NEXT:    movdqa %xmm2, %xmm1
264; SSE2-NEXT:    retq
265;
266; AVX2-LABEL: bool_zext_and:
267; AVX2:       # %bb.0:
268; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
269; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
270; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
271; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
272; AVX2-NEXT:    retq
273  %xz = zext <8 x i1> %x to <8 x i32>
274  %yz = zext <8 x i1> %y to <8 x i32>
275  %r = and <8 x i32> %xz, %yz
276  ret <8 x i32> %r
277}
278
279define <8 x i32> @bool_zext_or(<8 x i1> %x, <8 x i1> %y) {
280; SSE2-LABEL: bool_zext_or:
281; SSE2:       # %bb.0:
282; SSE2-NEXT:    por %xmm0, %xmm1
283; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
284; SSE2-NEXT:    pxor %xmm2, %xmm2
285; SSE2-NEXT:    movdqa %xmm1, %xmm0
286; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
287; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
288; SSE2-NEXT:    retq
289;
290; AVX2-LABEL: bool_zext_or:
291; AVX2:       # %bb.0:
292; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
293; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
294; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
295; AVX2-NEXT:    retq
296  %xz = zext <8 x i1> %x to <8 x i32>
297  %yz = zext <8 x i1> %y to <8 x i32>
298  %r = or <8 x i32> %xz, %yz
299  ret <8 x i32> %r
300}
301
302define <8 x i32> @bool_zext_xor(<8 x i1> %x, <8 x i1> %y) {
303; SSE2-LABEL: bool_zext_xor:
304; SSE2:       # %bb.0:
305; SSE2-NEXT:    pxor %xmm0, %xmm1
306; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
307; SSE2-NEXT:    pxor %xmm2, %xmm2
308; SSE2-NEXT:    movdqa %xmm1, %xmm0
309; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
310; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
311; SSE2-NEXT:    retq
312;
313; AVX2-LABEL: bool_zext_xor:
314; AVX2:       # %bb.0:
315; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
316; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
317; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
318; AVX2-NEXT:    retq
319  %xz = zext <8 x i1> %x to <8 x i32>
320  %yz = zext <8 x i1> %y to <8 x i32>
321  %r = xor <8 x i32> %xz, %yz
322  ret <8 x i32> %r
323}
324
325define <8 x i32> @bool_sext_and(<8 x i1> %x, <8 x i1> %y) {
326; SSE2-LABEL: bool_sext_and:
327; SSE2:       # %bb.0:
328; SSE2-NEXT:    pand %xmm0, %xmm1
329; SSE2-NEXT:    movdqa %xmm1, %xmm0
330; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
331; SSE2-NEXT:    pslld $31, %xmm0
332; SSE2-NEXT:    psrad $31, %xmm0
333; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
334; SSE2-NEXT:    pslld $31, %xmm1
335; SSE2-NEXT:    psrad $31, %xmm1
336; SSE2-NEXT:    retq
337;
338; AVX2-LABEL: bool_sext_and:
339; AVX2:       # %bb.0:
340; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
341; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
342; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
343; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
344; AVX2-NEXT:    retq
345  %xs = sext <8 x i1> %x to <8 x i32>
346  %ys = sext <8 x i1> %y to <8 x i32>
347  %r = and <8 x i32> %xs, %ys
348  ret <8 x i32> %r
349}
350
351define <8 x i32> @bool_sext_or(<8 x i1> %x, <8 x i1> %y) {
352; SSE2-LABEL: bool_sext_or:
353; SSE2:       # %bb.0:
354; SSE2-NEXT:    por %xmm0, %xmm1
355; SSE2-NEXT:    movdqa %xmm1, %xmm0
356; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
357; SSE2-NEXT:    pslld $31, %xmm0
358; SSE2-NEXT:    psrad $31, %xmm0
359; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
360; SSE2-NEXT:    pslld $31, %xmm1
361; SSE2-NEXT:    psrad $31, %xmm1
362; SSE2-NEXT:    retq
363;
364; AVX2-LABEL: bool_sext_or:
365; AVX2:       # %bb.0:
366; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
367; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
368; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
369; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
370; AVX2-NEXT:    retq
371  %xs = sext <8 x i1> %x to <8 x i32>
372  %ys = sext <8 x i1> %y to <8 x i32>
373  %r = or <8 x i32> %xs, %ys
374  ret <8 x i32> %r
375}
376
377define <8 x i32> @bool_sext_xor(<8 x i1> %x, <8 x i1> %y) {
378; SSE2-LABEL: bool_sext_xor:
379; SSE2:       # %bb.0:
380; SSE2-NEXT:    pxor %xmm0, %xmm1
381; SSE2-NEXT:    movdqa %xmm1, %xmm0
382; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
383; SSE2-NEXT:    pslld $31, %xmm0
384; SSE2-NEXT:    psrad $31, %xmm0
385; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
386; SSE2-NEXT:    pslld $31, %xmm1
387; SSE2-NEXT:    psrad $31, %xmm1
388; SSE2-NEXT:    retq
389;
390; AVX2-LABEL: bool_sext_xor:
391; AVX2:       # %bb.0:
392; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
393; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
394; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
395; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
396; AVX2-NEXT:    retq
397  %xs = sext <8 x i1> %x to <8 x i32>
398  %ys = sext <8 x i1> %y to <8 x i32>
399  %r = xor <8 x i32> %xs, %ys
400  ret <8 x i32> %r
401}
402
403