xref: /llvm-project/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll (revision 310e79875752886a7713911e2a1ec14bc75bd4b3)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 < %s | FileCheck %s
3
4; ============================================================================ ;
5; 8-bit vector width
6; ============================================================================ ;
7
8define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
9; CHECK-LABEL: out_v1i8(
10; CHECK:       {
11; CHECK-NEXT:    .reg .b16 %rs<8>;
12; CHECK-EMPTY:
13; CHECK-NEXT:  // %bb.0:
14; CHECK-NEXT:    ld.param.u8 %rs1, [out_v1i8_param_0];
15; CHECK-NEXT:    ld.param.u8 %rs2, [out_v1i8_param_2];
16; CHECK-NEXT:    and.b16 %rs3, %rs1, %rs2;
17; CHECK-NEXT:    ld.param.u8 %rs4, [out_v1i8_param_1];
18; CHECK-NEXT:    not.b16 %rs5, %rs2;
19; CHECK-NEXT:    and.b16 %rs6, %rs4, %rs5;
20; CHECK-NEXT:    or.b16 %rs7, %rs3, %rs6;
21; CHECK-NEXT:    st.param.b8 [func_retval0], %rs7;
22; CHECK-NEXT:    ret;
23  %mx = and <1 x i8> %x, %mask
24  %notmask = xor <1 x i8> %mask, <i8 -1>
25  %my = and <1 x i8> %y, %notmask
26  %r = or <1 x i8> %mx, %my
27  ret <1 x i8> %r
28}
29
30; ============================================================================ ;
31; 16-bit vector width
32; ============================================================================ ;
33
34define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind {
35; CHECK-LABEL: out_v1i16(
36; CHECK:       {
37; CHECK-NEXT:    .reg .b16 %rs<8>;
38; CHECK-EMPTY:
39; CHECK-NEXT:  // %bb.0:
40; CHECK-NEXT:    ld.param.u16 %rs1, [out_v1i16_param_0];
41; CHECK-NEXT:    ld.param.u16 %rs2, [out_v1i16_param_2];
42; CHECK-NEXT:    and.b16 %rs3, %rs1, %rs2;
43; CHECK-NEXT:    ld.param.u16 %rs4, [out_v1i16_param_1];
44; CHECK-NEXT:    not.b16 %rs5, %rs2;
45; CHECK-NEXT:    and.b16 %rs6, %rs4, %rs5;
46; CHECK-NEXT:    or.b16 %rs7, %rs3, %rs6;
47; CHECK-NEXT:    st.param.b16 [func_retval0], %rs7;
48; CHECK-NEXT:    ret;
49  %mx = and <1 x i16> %x, %mask
50  %notmask = xor <1 x i16> %mask, <i16 -1>
51  %my = and <1 x i16> %y, %notmask
52  %r = or <1 x i16> %mx, %my
53  ret <1 x i16> %r
54}
55
56; ============================================================================ ;
57; 32-bit vector width
58; ============================================================================ ;
59
60define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
61; CHECK-LABEL: out_v4i8(
62; CHECK:       {
63; CHECK-NEXT:    .reg .b32 %r<8>;
64; CHECK-EMPTY:
65; CHECK-NEXT:  // %bb.0:
66; CHECK-NEXT:    ld.param.u32 %r1, [out_v4i8_param_1];
67; CHECK-NEXT:    ld.param.u32 %r2, [out_v4i8_param_0];
68; CHECK-NEXT:    ld.param.u32 %r3, [out_v4i8_param_2];
69; CHECK-NEXT:    and.b32 %r4, %r2, %r3;
70; CHECK-NEXT:    xor.b32 %r5, %r3, -1;
71; CHECK-NEXT:    and.b32 %r6, %r1, %r5;
72; CHECK-NEXT:    or.b32 %r7, %r4, %r6;
73; CHECK-NEXT:    st.param.b32 [func_retval0], %r7;
74; CHECK-NEXT:    ret;
75  %mx = and <4 x i8> %x, %mask
76  %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1>
77  %my = and <4 x i8> %y, %notmask
78  %r = or <4 x i8> %mx, %my
79  ret <4 x i8> %r
80}
81
82define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
83; CHECK-LABEL: out_v4i8_undef(
84; CHECK:       {
85; CHECK-NEXT:    .reg .b32 %r<8>;
86; CHECK-EMPTY:
87; CHECK-NEXT:  // %bb.0:
88; CHECK-NEXT:    ld.param.u32 %r1, [out_v4i8_undef_param_1];
89; CHECK-NEXT:    ld.param.u32 %r2, [out_v4i8_undef_param_0];
90; CHECK-NEXT:    ld.param.u32 %r3, [out_v4i8_undef_param_2];
91; CHECK-NEXT:    and.b32 %r4, %r2, %r3;
92; CHECK-NEXT:    xor.b32 %r5, %r3, -16711681;
93; CHECK-NEXT:    and.b32 %r6, %r1, %r5;
94; CHECK-NEXT:    or.b32 %r7, %r4, %r6;
95; CHECK-NEXT:    st.param.b32 [func_retval0], %r7;
96; CHECK-NEXT:    ret;
97  %mx = and <4 x i8> %x, %mask
98  %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 undef, i8 -1>
99  %my = and <4 x i8> %y, %notmask
100  %r = or <4 x i8> %mx, %my
101  ret <4 x i8> %r
102}
103
104define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind {
105; CHECK-LABEL: out_v2i16(
106; CHECK:       {
107; CHECK-NEXT:    .reg .b32 %r<8>;
108; CHECK-EMPTY:
109; CHECK-NEXT:  // %bb.0:
110; CHECK-NEXT:    ld.param.u32 %r1, [out_v2i16_param_1];
111; CHECK-NEXT:    ld.param.u32 %r2, [out_v2i16_param_0];
112; CHECK-NEXT:    ld.param.u32 %r3, [out_v2i16_param_2];
113; CHECK-NEXT:    and.b32 %r4, %r2, %r3;
114; CHECK-NEXT:    xor.b32 %r5, %r3, -1;
115; CHECK-NEXT:    and.b32 %r6, %r1, %r5;
116; CHECK-NEXT:    or.b32 %r7, %r4, %r6;
117; CHECK-NEXT:    st.param.b32 [func_retval0], %r7;
118; CHECK-NEXT:    ret;
119  %mx = and <2 x i16> %x, %mask
120  %notmask = xor <2 x i16> %mask, <i16 -1, i16 -1>
121  %my = and <2 x i16> %y, %notmask
122  %r = or <2 x i16> %mx, %my
123  ret <2 x i16> %r
124}
125
126define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind {
127; CHECK-LABEL: out_v1i32(
128; CHECK:       {
129; CHECK-NEXT:    .reg .b32 %r<8>;
130; CHECK-EMPTY:
131; CHECK-NEXT:  // %bb.0:
132; CHECK-NEXT:    ld.param.u32 %r1, [out_v1i32_param_0];
133; CHECK-NEXT:    ld.param.u32 %r2, [out_v1i32_param_2];
134; CHECK-NEXT:    and.b32 %r3, %r1, %r2;
135; CHECK-NEXT:    ld.param.u32 %r4, [out_v1i32_param_1];
136; CHECK-NEXT:    not.b32 %r5, %r2;
137; CHECK-NEXT:    and.b32 %r6, %r4, %r5;
138; CHECK-NEXT:    or.b32 %r7, %r3, %r6;
139; CHECK-NEXT:    st.param.b32 [func_retval0], %r7;
140; CHECK-NEXT:    ret;
141  %mx = and <1 x i32> %x, %mask
142  %notmask = xor <1 x i32> %mask, <i32 -1>
143  %my = and <1 x i32> %y, %notmask
144  %r = or <1 x i32> %mx, %my
145  ret <1 x i32> %r
146}
147
148; ============================================================================ ;
149; 64-bit vector width
150; ============================================================================ ;
151
152define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind {
153; CHECK-LABEL: out_v8i8(
154; CHECK:       {
155; CHECK-NEXT:    .reg .b32 %r<15>;
156; CHECK-EMPTY:
157; CHECK-NEXT:  // %bb.0:
158; CHECK-NEXT:    ld.param.v2.u32 {%r1, %r2}, [out_v8i8_param_0];
159; CHECK-NEXT:    ld.param.v2.u32 {%r3, %r4}, [out_v8i8_param_2];
160; CHECK-NEXT:    and.b32 %r5, %r1, %r3;
161; CHECK-NEXT:    and.b32 %r6, %r2, %r4;
162; CHECK-NEXT:    ld.param.v2.u32 {%r7, %r8}, [out_v8i8_param_1];
163; CHECK-NEXT:    xor.b32 %r9, %r4, -1;
164; CHECK-NEXT:    xor.b32 %r10, %r3, -1;
165; CHECK-NEXT:    and.b32 %r11, %r7, %r10;
166; CHECK-NEXT:    and.b32 %r12, %r8, %r9;
167; CHECK-NEXT:    or.b32 %r13, %r6, %r12;
168; CHECK-NEXT:    or.b32 %r14, %r5, %r11;
169; CHECK-NEXT:    st.param.v2.b32 [func_retval0], {%r14, %r13};
170; CHECK-NEXT:    ret;
171  %mx = and <8 x i8> %x, %mask
172  %notmask = xor <8 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
173  %my = and <8 x i8> %y, %notmask
174  %r = or <8 x i8> %mx, %my
175  ret <8 x i8> %r
176}
177
178define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind {
179; CHECK-LABEL: out_v4i16(
180; CHECK:       {
181; CHECK-NEXT:    .reg .b32 %r<15>;
182; CHECK-EMPTY:
183; CHECK-NEXT:  // %bb.0:
184; CHECK-NEXT:    ld.param.v2.u32 {%r1, %r2}, [out_v4i16_param_0];
185; CHECK-NEXT:    ld.param.v2.u32 {%r3, %r4}, [out_v4i16_param_2];
186; CHECK-NEXT:    and.b32 %r5, %r1, %r3;
187; CHECK-NEXT:    and.b32 %r6, %r2, %r4;
188; CHECK-NEXT:    ld.param.v2.u32 {%r7, %r8}, [out_v4i16_param_1];
189; CHECK-NEXT:    xor.b32 %r9, %r4, -1;
190; CHECK-NEXT:    xor.b32 %r10, %r3, -1;
191; CHECK-NEXT:    and.b32 %r11, %r7, %r10;
192; CHECK-NEXT:    and.b32 %r12, %r8, %r9;
193; CHECK-NEXT:    or.b32 %r13, %r6, %r12;
194; CHECK-NEXT:    or.b32 %r14, %r5, %r11;
195; CHECK-NEXT:    st.param.v2.b32 [func_retval0], {%r14, %r13};
196; CHECK-NEXT:    ret;
197  %mx = and <4 x i16> %x, %mask
198  %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1>
199  %my = and <4 x i16> %y, %notmask
200  %r = or <4 x i16> %mx, %my
201  ret <4 x i16> %r
202}
203
204define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind {
205; CHECK-LABEL: out_v4i16_undef(
206; CHECK:       {
207; CHECK-NEXT:    .reg .b32 %r<15>;
208; CHECK-EMPTY:
209; CHECK-NEXT:  // %bb.0:
210; CHECK-NEXT:    ld.param.v2.u32 {%r1, %r2}, [out_v4i16_undef_param_0];
211; CHECK-NEXT:    ld.param.v2.u32 {%r3, %r4}, [out_v4i16_undef_param_2];
212; CHECK-NEXT:    and.b32 %r5, %r1, %r3;
213; CHECK-NEXT:    and.b32 %r6, %r2, %r4;
214; CHECK-NEXT:    ld.param.v2.u32 {%r7, %r8}, [out_v4i16_undef_param_1];
215; CHECK-NEXT:    xor.b32 %r9, %r4, -65536;
216; CHECK-NEXT:    xor.b32 %r10, %r3, -1;
217; CHECK-NEXT:    and.b32 %r11, %r7, %r10;
218; CHECK-NEXT:    and.b32 %r12, %r8, %r9;
219; CHECK-NEXT:    or.b32 %r13, %r6, %r12;
220; CHECK-NEXT:    or.b32 %r14, %r5, %r11;
221; CHECK-NEXT:    st.param.v2.b32 [func_retval0], {%r14, %r13};
222; CHECK-NEXT:    ret;
223  %mx = and <4 x i16> %x, %mask
224  %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 undef, i16 -1>
225  %my = and <4 x i16> %y, %notmask
226  %r = or <4 x i16> %mx, %my
227  ret <4 x i16> %r
228}
229
230define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind {
231; CHECK-LABEL: out_v2i32(
232; CHECK:       {
233; CHECK-NEXT:    .reg .b32 %r<15>;
234; CHECK-EMPTY:
235; CHECK-NEXT:  // %bb.0:
236; CHECK-NEXT:    ld.param.v2.u32 {%r1, %r2}, [out_v2i32_param_0];
237; CHECK-NEXT:    ld.param.v2.u32 {%r3, %r4}, [out_v2i32_param_2];
238; CHECK-NEXT:    and.b32 %r5, %r1, %r3;
239; CHECK-NEXT:    and.b32 %r6, %r2, %r4;
240; CHECK-NEXT:    ld.param.v2.u32 {%r7, %r8}, [out_v2i32_param_1];
241; CHECK-NEXT:    not.b32 %r9, %r4;
242; CHECK-NEXT:    not.b32 %r10, %r3;
243; CHECK-NEXT:    and.b32 %r11, %r7, %r10;
244; CHECK-NEXT:    and.b32 %r12, %r8, %r9;
245; CHECK-NEXT:    or.b32 %r13, %r6, %r12;
246; CHECK-NEXT:    or.b32 %r14, %r5, %r11;
247; CHECK-NEXT:    st.param.v2.b32 [func_retval0], {%r14, %r13};
248; CHECK-NEXT:    ret;
249  %mx = and <2 x i32> %x, %mask
250  %notmask = xor <2 x i32> %mask, <i32 -1, i32 -1>
251  %my = and <2 x i32> %y, %notmask
252  %r = or <2 x i32> %mx, %my
253  ret <2 x i32> %r
254}
255
256define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind {
257; CHECK-LABEL: out_v1i64(
258; CHECK:       {
259; CHECK-NEXT:    .reg .b64 %rd<8>;
260; CHECK-EMPTY:
261; CHECK-NEXT:  // %bb.0:
262; CHECK-NEXT:    ld.param.u64 %rd1, [out_v1i64_param_0];
263; CHECK-NEXT:    ld.param.u64 %rd2, [out_v1i64_param_2];
264; CHECK-NEXT:    and.b64 %rd3, %rd1, %rd2;
265; CHECK-NEXT:    ld.param.u64 %rd4, [out_v1i64_param_1];
266; CHECK-NEXT:    not.b64 %rd5, %rd2;
267; CHECK-NEXT:    and.b64 %rd6, %rd4, %rd5;
268; CHECK-NEXT:    or.b64 %rd7, %rd3, %rd6;
269; CHECK-NEXT:    st.param.b64 [func_retval0], %rd7;
270; CHECK-NEXT:    ret;
271  %mx = and <1 x i64> %x, %mask
272  %notmask = xor <1 x i64> %mask, <i64 -1>
273  %my = and <1 x i64> %y, %notmask
274  %r = or <1 x i64> %mx, %my
275  ret <1 x i64> %r
276}
277
278; ============================================================================ ;
279; 128-bit vector width
280; ============================================================================ ;
281
282define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind {
283; CHECK-LABEL: out_v16i8(
284; CHECK:       {
285; CHECK-NEXT:    .reg .b32 %r<29>;
286; CHECK-EMPTY:
287; CHECK-NEXT:  // %bb.0:
288; CHECK-NEXT:    ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [out_v16i8_param_0];
289; CHECK-NEXT:    ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [out_v16i8_param_2];
290; CHECK-NEXT:    and.b32 %r9, %r1, %r5;
291; CHECK-NEXT:    and.b32 %r10, %r2, %r6;
292; CHECK-NEXT:    and.b32 %r11, %r3, %r7;
293; CHECK-NEXT:    and.b32 %r12, %r4, %r8;
294; CHECK-NEXT:    ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [out_v16i8_param_1];
295; CHECK-NEXT:    xor.b32 %r17, %r8, -1;
296; CHECK-NEXT:    xor.b32 %r18, %r7, -1;
297; CHECK-NEXT:    xor.b32 %r19, %r6, -1;
298; CHECK-NEXT:    xor.b32 %r20, %r5, -1;
299; CHECK-NEXT:    and.b32 %r21, %r13, %r20;
300; CHECK-NEXT:    and.b32 %r22, %r14, %r19;
301; CHECK-NEXT:    and.b32 %r23, %r15, %r18;
302; CHECK-NEXT:    and.b32 %r24, %r16, %r17;
303; CHECK-NEXT:    or.b32 %r25, %r12, %r24;
304; CHECK-NEXT:    or.b32 %r26, %r11, %r23;
305; CHECK-NEXT:    or.b32 %r27, %r10, %r22;
306; CHECK-NEXT:    or.b32 %r28, %r9, %r21;
307; CHECK-NEXT:    st.param.v4.b32 [func_retval0], {%r28, %r27, %r26, %r25};
308; CHECK-NEXT:    ret;
309  %mx = and <16 x i8> %x, %mask
310  %notmask = xor <16 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
311  %my = and <16 x i8> %y, %notmask
312  %r = or <16 x i8> %mx, %my
313  ret <16 x i8> %r
314}
315
316define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind {
317; CHECK-LABEL: out_v8i16(
318; CHECK:       {
319; CHECK-NEXT:    .reg .b32 %r<29>;
320; CHECK-EMPTY:
321; CHECK-NEXT:  // %bb.0:
322; CHECK-NEXT:    ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [out_v8i16_param_0];
323; CHECK-NEXT:    ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [out_v8i16_param_2];
324; CHECK-NEXT:    and.b32 %r9, %r1, %r5;
325; CHECK-NEXT:    and.b32 %r10, %r2, %r6;
326; CHECK-NEXT:    and.b32 %r11, %r3, %r7;
327; CHECK-NEXT:    and.b32 %r12, %r4, %r8;
328; CHECK-NEXT:    ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [out_v8i16_param_1];
329; CHECK-NEXT:    xor.b32 %r17, %r8, -1;
330; CHECK-NEXT:    xor.b32 %r18, %r7, -1;
331; CHECK-NEXT:    xor.b32 %r19, %r6, -1;
332; CHECK-NEXT:    xor.b32 %r20, %r5, -1;
333; CHECK-NEXT:    and.b32 %r21, %r13, %r20;
334; CHECK-NEXT:    and.b32 %r22, %r14, %r19;
335; CHECK-NEXT:    and.b32 %r23, %r15, %r18;
336; CHECK-NEXT:    and.b32 %r24, %r16, %r17;
337; CHECK-NEXT:    or.b32 %r25, %r12, %r24;
338; CHECK-NEXT:    or.b32 %r26, %r11, %r23;
339; CHECK-NEXT:    or.b32 %r27, %r10, %r22;
340; CHECK-NEXT:    or.b32 %r28, %r9, %r21;
341; CHECK-NEXT:    st.param.v4.b32 [func_retval0], {%r28, %r27, %r26, %r25};
342; CHECK-NEXT:    ret;
343  %mx = and <8 x i16> %x, %mask
344  %notmask = xor <8 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
345  %my = and <8 x i16> %y, %notmask
346  %r = or <8 x i16> %mx, %my
347  ret <8 x i16> %r
348}
349
350define <4 x i32> @out_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind {
351; CHECK-LABEL: out_v4i32(
352; CHECK:       {
353; CHECK-NEXT:    .reg .b32 %r<29>;
354; CHECK-EMPTY:
355; CHECK-NEXT:  // %bb.0:
356; CHECK-NEXT:    ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [out_v4i32_param_0];
357; CHECK-NEXT:    ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [out_v4i32_param_2];
358; CHECK-NEXT:    and.b32 %r9, %r1, %r5;
359; CHECK-NEXT:    and.b32 %r10, %r2, %r6;
360; CHECK-NEXT:    and.b32 %r11, %r3, %r7;
361; CHECK-NEXT:    and.b32 %r12, %r4, %r8;
362; CHECK-NEXT:    ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [out_v4i32_param_1];
363; CHECK-NEXT:    not.b32 %r17, %r8;
364; CHECK-NEXT:    not.b32 %r18, %r7;
365; CHECK-NEXT:    not.b32 %r19, %r6;
366; CHECK-NEXT:    not.b32 %r20, %r5;
367; CHECK-NEXT:    and.b32 %r21, %r13, %r20;
368; CHECK-NEXT:    and.b32 %r22, %r14, %r19;
369; CHECK-NEXT:    and.b32 %r23, %r15, %r18;
370; CHECK-NEXT:    and.b32 %r24, %r16, %r17;
371; CHECK-NEXT:    or.b32 %r25, %r12, %r24;
372; CHECK-NEXT:    or.b32 %r26, %r11, %r23;
373; CHECK-NEXT:    or.b32 %r27, %r10, %r22;
374; CHECK-NEXT:    or.b32 %r28, %r9, %r21;
375; CHECK-NEXT:    st.param.v4.b32 [func_retval0], {%r28, %r27, %r26, %r25};
376; CHECK-NEXT:    ret;
377  %mx = and <4 x i32> %x, %mask
378  %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
379  %my = and <4 x i32> %y, %notmask
380  %r = or <4 x i32> %mx, %my
381  ret <4 x i32> %r
382}
383
384define <4 x i32> @out_v4i32_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind {
385; CHECK-LABEL: out_v4i32_undef(
386; CHECK:       {
387; CHECK-NEXT:    .reg .b32 %r<26>;
388; CHECK-EMPTY:
389; CHECK-NEXT:  // %bb.0:
390; CHECK-NEXT:    ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [out_v4i32_undef_param_0];
391; CHECK-NEXT:    ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [out_v4i32_undef_param_2];
392; CHECK-NEXT:    and.b32 %r9, %r3, %r7;
393; CHECK-NEXT:    and.b32 %r10, %r1, %r5;
394; CHECK-NEXT:    and.b32 %r11, %r2, %r6;
395; CHECK-NEXT:    and.b32 %r12, %r4, %r8;
396; CHECK-NEXT:    ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [out_v4i32_undef_param_1];
397; CHECK-NEXT:    not.b32 %r17, %r8;
398; CHECK-NEXT:    not.b32 %r18, %r6;
399; CHECK-NEXT:    not.b32 %r19, %r5;
400; CHECK-NEXT:    and.b32 %r20, %r13, %r19;
401; CHECK-NEXT:    and.b32 %r21, %r14, %r18;
402; CHECK-NEXT:    and.b32 %r22, %r16, %r17;
403; CHECK-NEXT:    or.b32 %r23, %r12, %r22;
404; CHECK-NEXT:    or.b32 %r24, %r11, %r21;
405; CHECK-NEXT:    or.b32 %r25, %r10, %r20;
406; CHECK-NEXT:    st.param.v4.b32 [func_retval0], {%r25, %r24, %r9, %r23};
407; CHECK-NEXT:    ret;
408  %mx = and <4 x i32> %x, %mask
409  %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 undef, i32 -1>
410  %my = and <4 x i32> %y, %notmask
411  %r = or <4 x i32> %mx, %my
412  ret <4 x i32> %r
413}
414
415define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind {
416; CHECK-LABEL: out_v2i64(
417; CHECK:       {
418; CHECK-NEXT:    .reg .b64 %rd<15>;
419; CHECK-EMPTY:
420; CHECK-NEXT:  // %bb.0:
421; CHECK-NEXT:    ld.param.v2.u64 {%rd1, %rd2}, [out_v2i64_param_0];
422; CHECK-NEXT:    ld.param.v2.u64 {%rd3, %rd4}, [out_v2i64_param_2];
423; CHECK-NEXT:    and.b64 %rd5, %rd1, %rd3;
424; CHECK-NEXT:    and.b64 %rd6, %rd2, %rd4;
425; CHECK-NEXT:    ld.param.v2.u64 {%rd7, %rd8}, [out_v2i64_param_1];
426; CHECK-NEXT:    not.b64 %rd9, %rd4;
427; CHECK-NEXT:    not.b64 %rd10, %rd3;
428; CHECK-NEXT:    and.b64 %rd11, %rd7, %rd10;
429; CHECK-NEXT:    and.b64 %rd12, %rd8, %rd9;
430; CHECK-NEXT:    or.b64 %rd13, %rd6, %rd12;
431; CHECK-NEXT:    or.b64 %rd14, %rd5, %rd11;
432; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd14, %rd13};
433; CHECK-NEXT:    ret;
434  %mx = and <2 x i64> %x, %mask
435  %notmask = xor <2 x i64> %mask, <i64 -1, i64 -1>
436  %my = and <2 x i64> %y, %notmask
437  %r = or <2 x i64> %mx, %my
438  ret <2 x i64> %r
439}
440
441;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
442; Should be the same as the previous one.
443;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
444
445; ============================================================================ ;
446; 8-bit vector width
447; ============================================================================ ;
448
449define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
450; CHECK-LABEL: in_v1i8(
451; CHECK:       {
452; CHECK-NEXT:    .reg .b16 %rs<7>;
453; CHECK-EMPTY:
454; CHECK-NEXT:  // %bb.0:
455; CHECK-NEXT:    ld.param.u8 %rs1, [in_v1i8_param_0];
456; CHECK-NEXT:    ld.param.u8 %rs2, [in_v1i8_param_1];
457; CHECK-NEXT:    xor.b16 %rs3, %rs1, %rs2;
458; CHECK-NEXT:    ld.param.u8 %rs4, [in_v1i8_param_2];
459; CHECK-NEXT:    and.b16 %rs5, %rs3, %rs4;
460; CHECK-NEXT:    xor.b16 %rs6, %rs5, %rs2;
461; CHECK-NEXT:    st.param.b8 [func_retval0], %rs6;
462; CHECK-NEXT:    ret;
463  %n0 = xor <1 x i8> %x, %y
464  %n1 = and <1 x i8> %n0, %mask
465  %r = xor <1 x i8> %n1, %y
466  ret <1 x i8> %r
467}
468
469; ============================================================================ ;
470; 16-bit vector width
471; ============================================================================ ;
472
473define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind {
474; CHECK-LABEL: in_v1i16(
475; CHECK:       {
476; CHECK-NEXT:    .reg .b16 %rs<7>;
477; CHECK-EMPTY:
478; CHECK-NEXT:  // %bb.0:
479; CHECK-NEXT:    ld.param.u16 %rs1, [in_v1i16_param_0];
480; CHECK-NEXT:    ld.param.u16 %rs2, [in_v1i16_param_1];
481; CHECK-NEXT:    xor.b16 %rs3, %rs1, %rs2;
482; CHECK-NEXT:    ld.param.u16 %rs4, [in_v1i16_param_2];
483; CHECK-NEXT:    and.b16 %rs5, %rs3, %rs4;
484; CHECK-NEXT:    xor.b16 %rs6, %rs5, %rs2;
485; CHECK-NEXT:    st.param.b16 [func_retval0], %rs6;
486; CHECK-NEXT:    ret;
487  %n0 = xor <1 x i16> %x, %y
488  %n1 = and <1 x i16> %n0, %mask
489  %r = xor <1 x i16> %n1, %y
490  ret <1 x i16> %r
491}
492
493; ============================================================================ ;
494; 32-bit vector width
495; ============================================================================ ;
496
497define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
498; CHECK-LABEL: in_v4i8(
499; CHECK:       {
500; CHECK-NEXT:    .reg .b32 %r<7>;
501; CHECK-EMPTY:
502; CHECK-NEXT:  // %bb.0:
503; CHECK-NEXT:    ld.param.u32 %r1, [in_v4i8_param_0];
504; CHECK-NEXT:    ld.param.u32 %r2, [in_v4i8_param_1];
505; CHECK-NEXT:    xor.b32 %r3, %r1, %r2;
506; CHECK-NEXT:    ld.param.u32 %r4, [in_v4i8_param_2];
507; CHECK-NEXT:    and.b32 %r5, %r3, %r4;
508; CHECK-NEXT:    xor.b32 %r6, %r5, %r2;
509; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
510; CHECK-NEXT:    ret;
511  %n0 = xor <4 x i8> %x, %y
512  %n1 = and <4 x i8> %n0, %mask
513  %r = xor <4 x i8> %n1, %y
514  ret <4 x i8> %r
515}
516
517define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind {
518; CHECK-LABEL: in_v2i16(
519; CHECK:       {
520; CHECK-NEXT:    .reg .b32 %r<7>;
521; CHECK-EMPTY:
522; CHECK-NEXT:  // %bb.0:
523; CHECK-NEXT:    ld.param.u32 %r1, [in_v2i16_param_0];
524; CHECK-NEXT:    ld.param.u32 %r2, [in_v2i16_param_1];
525; CHECK-NEXT:    xor.b32 %r3, %r1, %r2;
526; CHECK-NEXT:    ld.param.u32 %r4, [in_v2i16_param_2];
527; CHECK-NEXT:    and.b32 %r5, %r3, %r4;
528; CHECK-NEXT:    xor.b32 %r6, %r5, %r2;
529; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
530; CHECK-NEXT:    ret;
531  %n0 = xor <2 x i16> %x, %y
532  %n1 = and <2 x i16> %n0, %mask
533  %r = xor <2 x i16> %n1, %y
534  ret <2 x i16> %r
535}
536
537define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind {
538; CHECK-LABEL: in_v1i32(
539; CHECK:       {
540; CHECK-NEXT:    .reg .b32 %r<7>;
541; CHECK-EMPTY:
542; CHECK-NEXT:  // %bb.0:
543; CHECK-NEXT:    ld.param.u32 %r1, [in_v1i32_param_0];
544; CHECK-NEXT:    ld.param.u32 %r2, [in_v1i32_param_1];
545; CHECK-NEXT:    xor.b32 %r3, %r1, %r2;
546; CHECK-NEXT:    ld.param.u32 %r4, [in_v1i32_param_2];
547; CHECK-NEXT:    and.b32 %r5, %r3, %r4;
548; CHECK-NEXT:    xor.b32 %r6, %r5, %r2;
549; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
550; CHECK-NEXT:    ret;
551  %n0 = xor <1 x i32> %x, %y
552  %n1 = and <1 x i32> %n0, %mask
553  %r = xor <1 x i32> %n1, %y
554  ret <1 x i32> %r
555}
556
557; ============================================================================ ;
558; 64-bit vector width
559; ============================================================================ ;
560
561define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind {
562; CHECK-LABEL: in_v8i8(
563; CHECK:       {
564; CHECK-NEXT:    .reg .b32 %r<13>;
565; CHECK-EMPTY:
566; CHECK-NEXT:  // %bb.0:
567; CHECK-NEXT:    ld.param.v2.u32 {%r1, %r2}, [in_v8i8_param_0];
568; CHECK-NEXT:    ld.param.v2.u32 {%r3, %r4}, [in_v8i8_param_1];
569; CHECK-NEXT:    ld.param.v2.u32 {%r5, %r6}, [in_v8i8_param_2];
570; CHECK-NEXT:    xor.b32 %r7, %r2, %r4;
571; CHECK-NEXT:    and.b32 %r8, %r7, %r6;
572; CHECK-NEXT:    xor.b32 %r9, %r8, %r4;
573; CHECK-NEXT:    xor.b32 %r10, %r1, %r3;
574; CHECK-NEXT:    and.b32 %r11, %r10, %r5;
575; CHECK-NEXT:    xor.b32 %r12, %r11, %r3;
576; CHECK-NEXT:    st.param.v2.b32 [func_retval0], {%r12, %r9};
577; CHECK-NEXT:    ret;
578  %n0 = xor <8 x i8> %x, %y
579  %n1 = and <8 x i8> %n0, %mask
580  %r = xor <8 x i8> %n1, %y
581  ret <8 x i8> %r
582}
583
584define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind {
585; CHECK-LABEL: in_v4i16(
586; CHECK:       {
587; CHECK-NEXT:    .reg .b32 %r<13>;
588; CHECK-EMPTY:
589; CHECK-NEXT:  // %bb.0:
590; CHECK-NEXT:    ld.param.v2.u32 {%r1, %r2}, [in_v4i16_param_0];
591; CHECK-NEXT:    ld.param.v2.u32 {%r3, %r4}, [in_v4i16_param_1];
592; CHECK-NEXT:    ld.param.v2.u32 {%r5, %r6}, [in_v4i16_param_2];
593; CHECK-NEXT:    xor.b32 %r7, %r2, %r4;
594; CHECK-NEXT:    and.b32 %r8, %r7, %r6;
595; CHECK-NEXT:    xor.b32 %r9, %r8, %r4;
596; CHECK-NEXT:    xor.b32 %r10, %r1, %r3;
597; CHECK-NEXT:    and.b32 %r11, %r10, %r5;
598; CHECK-NEXT:    xor.b32 %r12, %r11, %r3;
599; CHECK-NEXT:    st.param.v2.b32 [func_retval0], {%r12, %r9};
600; CHECK-NEXT:    ret;
601  %n0 = xor <4 x i16> %x, %y
602  %n1 = and <4 x i16> %n0, %mask
603  %r = xor <4 x i16> %n1, %y
604  ret <4 x i16> %r
605}
606
607define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind {
608; CHECK-LABEL: in_v2i32(
609; CHECK:       {
610; CHECK-NEXT:    .reg .b32 %r<13>;
611; CHECK-EMPTY:
612; CHECK-NEXT:  // %bb.0:
613; CHECK-NEXT:    ld.param.v2.u32 {%r1, %r2}, [in_v2i32_param_0];
614; CHECK-NEXT:    ld.param.v2.u32 {%r3, %r4}, [in_v2i32_param_1];
615; CHECK-NEXT:    xor.b32 %r5, %r2, %r4;
616; CHECK-NEXT:    xor.b32 %r6, %r1, %r3;
617; CHECK-NEXT:    ld.param.v2.u32 {%r7, %r8}, [in_v2i32_param_2];
618; CHECK-NEXT:    and.b32 %r9, %r6, %r7;
619; CHECK-NEXT:    and.b32 %r10, %r5, %r8;
620; CHECK-NEXT:    xor.b32 %r11, %r10, %r4;
621; CHECK-NEXT:    xor.b32 %r12, %r9, %r3;
622; CHECK-NEXT:    st.param.v2.b32 [func_retval0], {%r12, %r11};
623; CHECK-NEXT:    ret;
624  %n0 = xor <2 x i32> %x, %y
625  %n1 = and <2 x i32> %n0, %mask
626  %r = xor <2 x i32> %n1, %y
627  ret <2 x i32> %r
628}
629
630define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind {
631; CHECK-LABEL: in_v1i64(
632; CHECK:       {
633; CHECK-NEXT:    .reg .b64 %rd<7>;
634; CHECK-EMPTY:
635; CHECK-NEXT:  // %bb.0:
636; CHECK-NEXT:    ld.param.u64 %rd1, [in_v1i64_param_0];
637; CHECK-NEXT:    ld.param.u64 %rd2, [in_v1i64_param_1];
638; CHECK-NEXT:    xor.b64 %rd3, %rd1, %rd2;
639; CHECK-NEXT:    ld.param.u64 %rd4, [in_v1i64_param_2];
640; CHECK-NEXT:    and.b64 %rd5, %rd3, %rd4;
641; CHECK-NEXT:    xor.b64 %rd6, %rd5, %rd2;
642; CHECK-NEXT:    st.param.b64 [func_retval0], %rd6;
643; CHECK-NEXT:    ret;
644  %n0 = xor <1 x i64> %x, %y
645  %n1 = and <1 x i64> %n0, %mask
646  %r = xor <1 x i64> %n1, %y
647  ret <1 x i64> %r
648}
649
650; ============================================================================ ;
651; 128-bit vector width
652; ============================================================================ ;
653
654define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind {
655; CHECK-LABEL: in_v16i8(
656; CHECK:       {
657; CHECK-NEXT:    .reg .b32 %r<25>;
658; CHECK-EMPTY:
659; CHECK-NEXT:  // %bb.0:
660; CHECK-NEXT:    ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [in_v16i8_param_0];
661; CHECK-NEXT:    ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [in_v16i8_param_1];
662; CHECK-NEXT:    xor.b32 %r9, %r4, %r8;
663; CHECK-NEXT:    xor.b32 %r10, %r3, %r7;
664; CHECK-NEXT:    xor.b32 %r11, %r2, %r6;
665; CHECK-NEXT:    xor.b32 %r12, %r1, %r5;
666; CHECK-NEXT:    ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [in_v16i8_param_2];
667; CHECK-NEXT:    and.b32 %r17, %r12, %r13;
668; CHECK-NEXT:    and.b32 %r18, %r11, %r14;
669; CHECK-NEXT:    and.b32 %r19, %r10, %r15;
670; CHECK-NEXT:    and.b32 %r20, %r9, %r16;
671; CHECK-NEXT:    xor.b32 %r21, %r20, %r8;
672; CHECK-NEXT:    xor.b32 %r22, %r19, %r7;
673; CHECK-NEXT:    xor.b32 %r23, %r18, %r6;
674; CHECK-NEXT:    xor.b32 %r24, %r17, %r5;
675; CHECK-NEXT:    st.param.v4.b32 [func_retval0], {%r24, %r23, %r22, %r21};
676; CHECK-NEXT:    ret;
677  %n0 = xor <16 x i8> %x, %y
678  %n1 = and <16 x i8> %n0, %mask
679  %r = xor <16 x i8> %n1, %y
680  ret <16 x i8> %r
681}
682
683define <8 x i16> @in_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind {
684; CHECK-LABEL: in_v8i16(
685; CHECK:       {
686; CHECK-NEXT:    .reg .b32 %r<25>;
687; CHECK-EMPTY:
688; CHECK-NEXT:  // %bb.0:
689; CHECK-NEXT:    ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [in_v8i16_param_0];
690; CHECK-NEXT:    ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [in_v8i16_param_1];
691; CHECK-NEXT:    xor.b32 %r9, %r4, %r8;
692; CHECK-NEXT:    xor.b32 %r10, %r3, %r7;
693; CHECK-NEXT:    xor.b32 %r11, %r2, %r6;
694; CHECK-NEXT:    xor.b32 %r12, %r1, %r5;
695; CHECK-NEXT:    ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [in_v8i16_param_2];
696; CHECK-NEXT:    and.b32 %r17, %r12, %r13;
697; CHECK-NEXT:    and.b32 %r18, %r11, %r14;
698; CHECK-NEXT:    and.b32 %r19, %r10, %r15;
699; CHECK-NEXT:    and.b32 %r20, %r9, %r16;
700; CHECK-NEXT:    xor.b32 %r21, %r20, %r8;
701; CHECK-NEXT:    xor.b32 %r22, %r19, %r7;
702; CHECK-NEXT:    xor.b32 %r23, %r18, %r6;
703; CHECK-NEXT:    xor.b32 %r24, %r17, %r5;
704; CHECK-NEXT:    st.param.v4.b32 [func_retval0], {%r24, %r23, %r22, %r21};
705; CHECK-NEXT:    ret;
706  %n0 = xor <8 x i16> %x, %y
707  %n1 = and <8 x i16> %n0, %mask
708  %r = xor <8 x i16> %n1, %y
709  ret <8 x i16> %r
710}
711
712define <4 x i32> @in_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind {
713; CHECK-LABEL: in_v4i32(
714; CHECK:       {
715; CHECK-NEXT:    .reg .b32 %r<25>;
716; CHECK-EMPTY:
717; CHECK-NEXT:  // %bb.0:
718; CHECK-NEXT:    ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [in_v4i32_param_0];
719; CHECK-NEXT:    ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [in_v4i32_param_1];
720; CHECK-NEXT:    xor.b32 %r9, %r4, %r8;
721; CHECK-NEXT:    xor.b32 %r10, %r3, %r7;
722; CHECK-NEXT:    xor.b32 %r11, %r2, %r6;
723; CHECK-NEXT:    xor.b32 %r12, %r1, %r5;
724; CHECK-NEXT:    ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [in_v4i32_param_2];
725; CHECK-NEXT:    and.b32 %r17, %r12, %r13;
726; CHECK-NEXT:    and.b32 %r18, %r11, %r14;
727; CHECK-NEXT:    and.b32 %r19, %r10, %r15;
728; CHECK-NEXT:    and.b32 %r20, %r9, %r16;
729; CHECK-NEXT:    xor.b32 %r21, %r20, %r8;
730; CHECK-NEXT:    xor.b32 %r22, %r19, %r7;
731; CHECK-NEXT:    xor.b32 %r23, %r18, %r6;
732; CHECK-NEXT:    xor.b32 %r24, %r17, %r5;
733; CHECK-NEXT:    st.param.v4.b32 [func_retval0], {%r24, %r23, %r22, %r21};
734; CHECK-NEXT:    ret;
735  %n0 = xor <4 x i32> %x, %y
736  %n1 = and <4 x i32> %n0, %mask
737  %r = xor <4 x i32> %n1, %y
738  ret <4 x i32> %r
739}
740
741define <2 x i64> @in_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind {
742; CHECK-LABEL: in_v2i64(
743; CHECK:       {
744; CHECK-NEXT:    .reg .b64 %rd<13>;
745; CHECK-EMPTY:
746; CHECK-NEXT:  // %bb.0:
747; CHECK-NEXT:    ld.param.v2.u64 {%rd1, %rd2}, [in_v2i64_param_0];
748; CHECK-NEXT:    ld.param.v2.u64 {%rd3, %rd4}, [in_v2i64_param_1];
749; CHECK-NEXT:    xor.b64 %rd5, %rd2, %rd4;
750; CHECK-NEXT:    xor.b64 %rd6, %rd1, %rd3;
751; CHECK-NEXT:    ld.param.v2.u64 {%rd7, %rd8}, [in_v2i64_param_2];
752; CHECK-NEXT:    and.b64 %rd9, %rd6, %rd7;
753; CHECK-NEXT:    and.b64 %rd10, %rd5, %rd8;
754; CHECK-NEXT:    xor.b64 %rd11, %rd10, %rd4;
755; CHECK-NEXT:    xor.b64 %rd12, %rd9, %rd3;
756; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd12, %rd11};
757; CHECK-NEXT:    ret;
758  %n0 = xor <2 x i64> %x, %y
759  %n1 = and <2 x i64> %n0, %mask
760  %r = xor <2 x i64> %n1, %y
761  ret <2 x i64> %r
762}
763