xref: /llvm-project/llvm/test/CodeGen/Mips/msa/bitwise.ll (revision ae26f50aea4ef1a6c7058019f0db11a91bbcdade)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=mips -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s --check-prefixes=CHECK,MIPS
3; RUN: llc -mtriple=mipsel -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s --check-prefixes=CHECK,MIPSEL
4
5define void @and_v16i8(ptr %c, ptr %a, ptr %b) nounwind {
6; CHECK-LABEL: and_v16i8:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    ld.b $w0, 0($6)
9; CHECK-NEXT:    ld.b $w1, 0($5)
10; CHECK-NEXT:    and.v $w0, $w1, $w0
11; CHECK-NEXT:    jr $ra
12; CHECK-NEXT:    st.b $w0, 0($4)
13  %1 = load <16 x i8>, ptr %a
14  %2 = load <16 x i8>, ptr %b
15  %3 = and <16 x i8> %1, %2
16  store <16 x i8> %3, ptr %c
17  ret void
18}
19
20define void @and_v8i16(ptr %c, ptr %a, ptr %b) nounwind {
21; CHECK-LABEL: and_v8i16:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    ld.h $w0, 0($6)
24; CHECK-NEXT:    ld.h $w1, 0($5)
25; CHECK-NEXT:    and.v $w0, $w1, $w0
26; CHECK-NEXT:    jr $ra
27; CHECK-NEXT:    st.h $w0, 0($4)
28  %1 = load <8 x i16>, ptr %a
29  %2 = load <8 x i16>, ptr %b
30  %3 = and <8 x i16> %1, %2
31  store <8 x i16> %3, ptr %c
32  ret void
33}
34
35define void @and_v4i32(ptr %c, ptr %a, ptr %b) nounwind {
36; CHECK-LABEL: and_v4i32:
37; CHECK:       # %bb.0:
38; CHECK-NEXT:    ld.w $w0, 0($6)
39; CHECK-NEXT:    ld.w $w1, 0($5)
40; CHECK-NEXT:    and.v $w0, $w1, $w0
41; CHECK-NEXT:    jr $ra
42; CHECK-NEXT:    st.w $w0, 0($4)
43  %1 = load <4 x i32>, ptr %a
44  %2 = load <4 x i32>, ptr %b
45  %3 = and <4 x i32> %1, %2
46  store <4 x i32> %3, ptr %c
47  ret void
48}
49
50define void @and_v2i64(ptr %c, ptr %a, ptr %b) nounwind {
51; CHECK-LABEL: and_v2i64:
52; CHECK:       # %bb.0:
53; CHECK-NEXT:    ld.d $w0, 0($6)
54; CHECK-NEXT:    ld.d $w1, 0($5)
55; CHECK-NEXT:    and.v $w0, $w1, $w0
56; CHECK-NEXT:    jr $ra
57; CHECK-NEXT:    st.d $w0, 0($4)
58  %1 = load <2 x i64>, ptr %a
59  %2 = load <2 x i64>, ptr %b
60  %3 = and <2 x i64> %1, %2
61  store <2 x i64> %3, ptr %c
62  ret void
63}
64
65define void @and_v16i8_i(ptr %c, ptr %a) nounwind {
66; CHECK-LABEL: and_v16i8_i:
67; CHECK:       # %bb.0:
68; CHECK-NEXT:    ld.b $w0, 0($5)
69; CHECK-NEXT:    andi.b $w0, $w0, 1
70; CHECK-NEXT:    jr $ra
71; CHECK-NEXT:    st.b $w0, 0($4)
72  %1 = load <16 x i8>, ptr %a
73  %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
74  store <16 x i8> %2, ptr %c
75  ret void
76}
77
78define void @and_v8i16_i(ptr %c, ptr %a) nounwind {
79; CHECK-LABEL: and_v8i16_i:
80; CHECK:       # %bb.0:
81; CHECK-NEXT:    ld.h $w0, 0($5)
82; CHECK-NEXT:    ldi.h $w1, 1
83; CHECK-NEXT:    and.v $w0, $w0, $w1
84; CHECK-NEXT:    jr $ra
85; CHECK-NEXT:    st.h $w0, 0($4)
86  %1 = load <8 x i16>, ptr %a
87  %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
88  store <8 x i16> %2, ptr %c
89  ret void
90}
91
92define void @and_v4i32_i(ptr %c, ptr %a) nounwind {
93; CHECK-LABEL: and_v4i32_i:
94; CHECK:       # %bb.0:
95; CHECK-NEXT:    ld.w $w0, 0($5)
96; CHECK-NEXT:    ldi.w $w1, 1
97; CHECK-NEXT:    and.v $w0, $w0, $w1
98; CHECK-NEXT:    jr $ra
99; CHECK-NEXT:    st.w $w0, 0($4)
100  %1 = load <4 x i32>, ptr %a
101  %2 = and <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
102  store <4 x i32> %2, ptr %c
103  ret void
104}
105
106define void @and_v2i64_i(ptr %c, ptr %a) nounwind {
107; MIPS-LABEL: and_v2i64_i:
108; MIPS:       # %bb.0:
109; MIPS-NEXT:    ldi.d $w0, 1
110; MIPS-NEXT:    shf.w $w0, $w0, 177
111; MIPS-NEXT:    ld.d $w1, 0($5)
112; MIPS-NEXT:    and.v $w0, $w1, $w0
113; MIPS-NEXT:    jr $ra
114; MIPS-NEXT:    st.d $w0, 0($4)
115;
116; MIPSEL-LABEL: and_v2i64_i:
117; MIPSEL:       # %bb.0:
118; MIPSEL-NEXT:    ldi.d $w0, 1
119; MIPSEL-NEXT:    ld.d $w1, 0($5)
120; MIPSEL-NEXT:    and.v $w0, $w1, $w0
121; MIPSEL-NEXT:    jr $ra
122; MIPSEL-NEXT:    st.d $w0, 0($4)
123  %1 = load <2 x i64>, ptr %a
124  %2 = and <2 x i64> %1, <i64 1, i64 1>
125  store <2 x i64> %2, ptr %c
126  ret void
127}
128
129define void @or_v16i8(ptr %c, ptr %a, ptr %b) nounwind {
130; CHECK-LABEL: or_v16i8:
131; CHECK:       # %bb.0:
132; CHECK-NEXT:    ld.b $w0, 0($6)
133; CHECK-NEXT:    ld.b $w1, 0($5)
134; CHECK-NEXT:    or.v $w0, $w1, $w0
135; CHECK-NEXT:    jr $ra
136; CHECK-NEXT:    st.b $w0, 0($4)
137  %1 = load <16 x i8>, ptr %a
138  %2 = load <16 x i8>, ptr %b
139  %3 = or <16 x i8> %1, %2
140  store <16 x i8> %3, ptr %c
141  ret void
142}
143
144define void @or_v8i16(ptr %c, ptr %a, ptr %b) nounwind {
145; CHECK-LABEL: or_v8i16:
146; CHECK:       # %bb.0:
147; CHECK-NEXT:    ld.h $w0, 0($6)
148; CHECK-NEXT:    ld.h $w1, 0($5)
149; CHECK-NEXT:    or.v $w0, $w1, $w0
150; CHECK-NEXT:    jr $ra
151; CHECK-NEXT:    st.h $w0, 0($4)
152  %1 = load <8 x i16>, ptr %a
153  %2 = load <8 x i16>, ptr %b
154  %3 = or <8 x i16> %1, %2
155  store <8 x i16> %3, ptr %c
156  ret void
157}
158
159define void @or_v4i32(ptr %c, ptr %a, ptr %b) nounwind {
160; CHECK-LABEL: or_v4i32:
161; CHECK:       # %bb.0:
162; CHECK-NEXT:    ld.w $w0, 0($6)
163; CHECK-NEXT:    ld.w $w1, 0($5)
164; CHECK-NEXT:    or.v $w0, $w1, $w0
165; CHECK-NEXT:    jr $ra
166; CHECK-NEXT:    st.w $w0, 0($4)
167  %1 = load <4 x i32>, ptr %a
168  %2 = load <4 x i32>, ptr %b
169  %3 = or <4 x i32> %1, %2
170  store <4 x i32> %3, ptr %c
171  ret void
172}
173
174define void @or_v2i64(ptr %c, ptr %a, ptr %b) nounwind {
175; CHECK-LABEL: or_v2i64:
176; CHECK:       # %bb.0:
177; CHECK-NEXT:    ld.d $w0, 0($6)
178; CHECK-NEXT:    ld.d $w1, 0($5)
179; CHECK-NEXT:    or.v $w0, $w1, $w0
180; CHECK-NEXT:    jr $ra
181; CHECK-NEXT:    st.d $w0, 0($4)
182  %1 = load <2 x i64>, ptr %a
183  %2 = load <2 x i64>, ptr %b
184  %3 = or <2 x i64> %1, %2
185  store <2 x i64> %3, ptr %c
186  ret void
187}
188
189define void @or_v16i8_i(ptr %c, ptr %a) nounwind {
190; CHECK-LABEL: or_v16i8_i:
191; CHECK:       # %bb.0:
192; CHECK-NEXT:    ld.b $w0, 0($5)
193; CHECK-NEXT:    ori.b $w0, $w0, 3
194; CHECK-NEXT:    jr $ra
195; CHECK-NEXT:    st.b $w0, 0($4)
196  %1 = load <16 x i8>, ptr %a
197  %2 = or <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
198  store <16 x i8> %2, ptr %c
199  ret void
200}
201
202define void @or_v8i16_i(ptr %c, ptr %a) nounwind {
203; CHECK-LABEL: or_v8i16_i:
204; CHECK:       # %bb.0:
205; CHECK-NEXT:    ld.h $w0, 0($5)
206; CHECK-NEXT:    ldi.h $w1, 3
207; CHECK-NEXT:    or.v $w0, $w0, $w1
208; CHECK-NEXT:    jr $ra
209; CHECK-NEXT:    st.h $w0, 0($4)
210  %1 = load <8 x i16>, ptr %a
211  %2 = or <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
212  store <8 x i16> %2, ptr %c
213  ret void
214}
215
216define void @or_v4i32_i(ptr %c, ptr %a) nounwind {
217; CHECK-LABEL: or_v4i32_i:
218; CHECK:       # %bb.0:
219; CHECK-NEXT:    ld.w $w0, 0($5)
220; CHECK-NEXT:    ldi.w $w1, 3
221; CHECK-NEXT:    or.v $w0, $w0, $w1
222; CHECK-NEXT:    jr $ra
223; CHECK-NEXT:    st.w $w0, 0($4)
224  %1 = load <4 x i32>, ptr %a
225  %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
226  store <4 x i32> %2, ptr %c
227  ret void
228}
229
230define void @or_v2i64_i(ptr %c, ptr %a) nounwind {
231; MIPS-LABEL: or_v2i64_i:
232; MIPS:       # %bb.0:
233; MIPS-NEXT:    ldi.d $w0, 3
234; MIPS-NEXT:    shf.w $w0, $w0, 177
235; MIPS-NEXT:    ld.d $w1, 0($5)
236; MIPS-NEXT:    or.v $w0, $w1, $w0
237; MIPS-NEXT:    jr $ra
238; MIPS-NEXT:    st.d $w0, 0($4)
239;
240; MIPSEL-LABEL: or_v2i64_i:
241; MIPSEL:       # %bb.0:
242; MIPSEL-NEXT:    ldi.d $w0, 3
243; MIPSEL-NEXT:    ld.d $w1, 0($5)
244; MIPSEL-NEXT:    or.v $w0, $w1, $w0
245; MIPSEL-NEXT:    jr $ra
246; MIPSEL-NEXT:    st.d $w0, 0($4)
247  %1 = load <2 x i64>, ptr %a
248  %2 = or <2 x i64> %1, <i64 3, i64 3>
249  store <2 x i64> %2, ptr %c
250  ret void
251}
252
253define void @nor_v16i8(ptr %c, ptr %a, ptr %b) nounwind {
254; CHECK-LABEL: nor_v16i8:
255; CHECK:       # %bb.0:
256; CHECK-NEXT:    ld.b $w0, 0($6)
257; CHECK-NEXT:    ld.b $w1, 0($5)
258; CHECK-NEXT:    nor.v $w0, $w1, $w0
259; CHECK-NEXT:    jr $ra
260; CHECK-NEXT:    st.b $w0, 0($4)
261  %1 = load <16 x i8>, ptr %a
262  %2 = load <16 x i8>, ptr %b
263  %3 = or <16 x i8> %1, %2
264  %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
265  store <16 x i8> %4, ptr %c
266  ret void
267}
268
269define void @nor_v8i16(ptr %c, ptr %a, ptr %b) nounwind {
270; CHECK-LABEL: nor_v8i16:
271; CHECK:       # %bb.0:
272; CHECK-NEXT:    ld.h $w0, 0($6)
273; CHECK-NEXT:    ld.h $w1, 0($5)
274; CHECK-NEXT:    nor.v $w0, $w1, $w0
275; CHECK-NEXT:    jr $ra
276; CHECK-NEXT:    st.h $w0, 0($4)
277  %1 = load <8 x i16>, ptr %a
278  %2 = load <8 x i16>, ptr %b
279  %3 = or <8 x i16> %1, %2
280  %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
281  store <8 x i16> %4, ptr %c
282  ret void
283}
284
285define void @nor_v4i32(ptr %c, ptr %a, ptr %b) nounwind {
286; CHECK-LABEL: nor_v4i32:
287; CHECK:       # %bb.0:
288; CHECK-NEXT:    ld.w $w0, 0($6)
289; CHECK-NEXT:    ld.w $w1, 0($5)
290; CHECK-NEXT:    nor.v $w0, $w1, $w0
291; CHECK-NEXT:    jr $ra
292; CHECK-NEXT:    st.w $w0, 0($4)
293  %1 = load <4 x i32>, ptr %a
294  %2 = load <4 x i32>, ptr %b
295  %3 = or <4 x i32> %1, %2
296  %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
297  store <4 x i32> %4, ptr %c
298  ret void
299}
300
301define void @nor_v2i64(ptr %c, ptr %a, ptr %b) nounwind {
302; CHECK-LABEL: nor_v2i64:
303; CHECK:       # %bb.0:
304; CHECK-NEXT:    ld.d $w0, 0($6)
305; CHECK-NEXT:    ld.d $w1, 0($5)
306; CHECK-NEXT:    nor.v $w0, $w1, $w0
307; CHECK-NEXT:    jr $ra
308; CHECK-NEXT:    st.d $w0, 0($4)
309  %1 = load <2 x i64>, ptr %a
310  %2 = load <2 x i64>, ptr %b
311  %3 = or <2 x i64> %1, %2
312  %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
313  store <2 x i64> %4, ptr %c
314  ret void
315}
316
317define void @nor_v16i8_i(ptr %c, ptr %a) nounwind {
318; CHECK-LABEL: nor_v16i8_i:
319; CHECK:       # %bb.0:
320; CHECK-NEXT:    ld.b $w0, 0($5)
321; CHECK-NEXT:    nori.b $w0, $w0, 1
322; CHECK-NEXT:    jr $ra
323; CHECK-NEXT:    st.b $w0, 0($4)
324  %1 = load <16 x i8>, ptr %a
325  %2 = or <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
326  %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
327  store <16 x i8> %3, ptr %c
328  ret void
329}
330
331define void @nor_v8i16_i(ptr %c, ptr %a) nounwind {
332; CHECK-LABEL: nor_v8i16_i:
333; CHECK:       # %bb.0:
334; CHECK-NEXT:    ld.h $w0, 0($5)
335; CHECK-NEXT:    ldi.h $w1, 1
336; CHECK-NEXT:    nor.v $w0, $w0, $w1
337; CHECK-NEXT:    jr $ra
338; CHECK-NEXT:    st.h $w0, 0($4)
339  %1 = load <8 x i16>, ptr %a
340  %2 = or <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
341  %3 = xor <8 x i16> %2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
342  store <8 x i16> %3, ptr %c
343  ret void
344}
345
346define void @nor_v4i32_i(ptr %c, ptr %a) nounwind {
347; CHECK-LABEL: nor_v4i32_i:
348; CHECK:       # %bb.0:
349; CHECK-NEXT:    ld.w $w0, 0($5)
350; CHECK-NEXT:    ldi.w $w1, 1
351; CHECK-NEXT:    nor.v $w0, $w0, $w1
352; CHECK-NEXT:    jr $ra
353; CHECK-NEXT:    st.w $w0, 0($4)
354  %1 = load <4 x i32>, ptr %a
355  %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
356  %3 = xor <4 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1>
357  store <4 x i32> %3, ptr %c
358  ret void
359}
360
361define void @nor_v2i64_i(ptr %c, ptr %a) nounwind {
362; MIPS-LABEL: nor_v2i64_i:
363; MIPS:       # %bb.0:
364; MIPS-NEXT:    ldi.d $w0, 1
365; MIPS-NEXT:    shf.w $w0, $w0, 177
366; MIPS-NEXT:    ld.d $w1, 0($5)
367; MIPS-NEXT:    nor.v $w0, $w1, $w0
368; MIPS-NEXT:    jr $ra
369; MIPS-NEXT:    st.d $w0, 0($4)
370;
371; MIPSEL-LABEL: nor_v2i64_i:
372; MIPSEL:       # %bb.0:
373; MIPSEL-NEXT:    ldi.d $w0, 1
374; MIPSEL-NEXT:    ld.d $w1, 0($5)
375; MIPSEL-NEXT:    nor.v $w0, $w1, $w0
376; MIPSEL-NEXT:    jr $ra
377; MIPSEL-NEXT:    st.d $w0, 0($4)
378  %1 = load <2 x i64>, ptr %a
379  %2 = or <2 x i64> %1, <i64 1, i64 1>
380  %3 = xor <2 x i64> %2, <i64 -1, i64 -1>
381  store <2 x i64> %3, ptr %c
382  ret void
383}
384
385define void @xor_v16i8(ptr %c, ptr %a, ptr %b) nounwind {
386; CHECK-LABEL: xor_v16i8:
387; CHECK:       # %bb.0:
388; CHECK-NEXT:    ld.b $w0, 0($6)
389; CHECK-NEXT:    ld.b $w1, 0($5)
390; CHECK-NEXT:    xor.v $w0, $w1, $w0
391; CHECK-NEXT:    jr $ra
392; CHECK-NEXT:    st.b $w0, 0($4)
393  %1 = load <16 x i8>, ptr %a
394  %2 = load <16 x i8>, ptr %b
395  %3 = xor <16 x i8> %1, %2
396  store <16 x i8> %3, ptr %c
397  ret void
398}
399
400define void @xor_v8i16(ptr %c, ptr %a, ptr %b) nounwind {
401; CHECK-LABEL: xor_v8i16:
402; CHECK:       # %bb.0:
403; CHECK-NEXT:    ld.h $w0, 0($6)
404; CHECK-NEXT:    ld.h $w1, 0($5)
405; CHECK-NEXT:    xor.v $w0, $w1, $w0
406; CHECK-NEXT:    jr $ra
407; CHECK-NEXT:    st.h $w0, 0($4)
408  %1 = load <8 x i16>, ptr %a
409  %2 = load <8 x i16>, ptr %b
410  %3 = xor <8 x i16> %1, %2
411  store <8 x i16> %3, ptr %c
412  ret void
413}
414
415define void @xor_v4i32(ptr %c, ptr %a, ptr %b) nounwind {
416; CHECK-LABEL: xor_v4i32:
417; CHECK:       # %bb.0:
418; CHECK-NEXT:    ld.w $w0, 0($6)
419; CHECK-NEXT:    ld.w $w1, 0($5)
420; CHECK-NEXT:    xor.v $w0, $w1, $w0
421; CHECK-NEXT:    jr $ra
422; CHECK-NEXT:    st.w $w0, 0($4)
423  %1 = load <4 x i32>, ptr %a
424  %2 = load <4 x i32>, ptr %b
425  %3 = xor <4 x i32> %1, %2
426  store <4 x i32> %3, ptr %c
427  ret void
428}
429
430define void @xor_v2i64(ptr %c, ptr %a, ptr %b) nounwind {
431; CHECK-LABEL: xor_v2i64:
432; CHECK:       # %bb.0:
433; CHECK-NEXT:    ld.d $w0, 0($6)
434; CHECK-NEXT:    ld.d $w1, 0($5)
435; CHECK-NEXT:    xor.v $w0, $w1, $w0
436; CHECK-NEXT:    jr $ra
437; CHECK-NEXT:    st.d $w0, 0($4)
438  %1 = load <2 x i64>, ptr %a
439  %2 = load <2 x i64>, ptr %b
440  %3 = xor <2 x i64> %1, %2
441  store <2 x i64> %3, ptr %c
442  ret void
443}
444
445define void @xor_v16i8_i(ptr %c, ptr %a) nounwind {
446; CHECK-LABEL: xor_v16i8_i:
447; CHECK:       # %bb.0:
448; CHECK-NEXT:    ld.b $w0, 0($5)
449; CHECK-NEXT:    xori.b $w0, $w0, 3
450; CHECK-NEXT:    jr $ra
451; CHECK-NEXT:    st.b $w0, 0($4)
452  %1 = load <16 x i8>, ptr %a
453  %2 = xor <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
454  store <16 x i8> %2, ptr %c
455  ret void
456}
457
458define void @xor_v8i16_i(ptr %c, ptr %a) nounwind {
459; CHECK-LABEL: xor_v8i16_i:
460; CHECK:       # %bb.0:
461; CHECK-NEXT:    ld.h $w0, 0($5)
462; CHECK-NEXT:    ldi.h $w1, 3
463; CHECK-NEXT:    xor.v $w0, $w0, $w1
464; CHECK-NEXT:    jr $ra
465; CHECK-NEXT:    st.h $w0, 0($4)
466  %1 = load <8 x i16>, ptr %a
467  %2 = xor <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
468  store <8 x i16> %2, ptr %c
469  ret void
470}
471
472define void @xor_v4i32_i(ptr %c, ptr %a) nounwind {
473; CHECK-LABEL: xor_v4i32_i:
474; CHECK:       # %bb.0:
475; CHECK-NEXT:    ld.w $w0, 0($5)
476; CHECK-NEXT:    ldi.w $w1, 3
477; CHECK-NEXT:    xor.v $w0, $w0, $w1
478; CHECK-NEXT:    jr $ra
479; CHECK-NEXT:    st.w $w0, 0($4)
480  %1 = load <4 x i32>, ptr %a
481  %2 = xor <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
482  store <4 x i32> %2, ptr %c
483  ret void
484}
485
486define void @xor_v2i64_i(ptr %c, ptr %a) nounwind {
487; MIPS-LABEL: xor_v2i64_i:
488; MIPS:       # %bb.0:
489; MIPS-NEXT:    ldi.d $w0, 3
490; MIPS-NEXT:    shf.w $w0, $w0, 177
491; MIPS-NEXT:    ld.d $w1, 0($5)
492; MIPS-NEXT:    xor.v $w0, $w1, $w0
493; MIPS-NEXT:    jr $ra
494; MIPS-NEXT:    st.d $w0, 0($4)
495;
496; MIPSEL-LABEL: xor_v2i64_i:
497; MIPSEL:       # %bb.0:
498; MIPSEL-NEXT:    ldi.d $w0, 3
499; MIPSEL-NEXT:    ld.d $w1, 0($5)
500; MIPSEL-NEXT:    xor.v $w0, $w1, $w0
501; MIPSEL-NEXT:    jr $ra
502; MIPSEL-NEXT:    st.d $w0, 0($4)
503  %1 = load <2 x i64>, ptr %a
504  %2 = xor <2 x i64> %1, <i64 3, i64 3>
505  store <2 x i64> %2, ptr %c
506  ret void
507}
508
509define void @sll_v16i8(ptr %c, ptr %a, ptr %b) nounwind {
510; CHECK-LABEL: sll_v16i8:
511; CHECK:       # %bb.0:
512; CHECK-NEXT:    ld.b $w0, 0($6)
513; CHECK-NEXT:    ld.b $w1, 0($5)
514; CHECK-NEXT:    sll.b $w0, $w1, $w0
515; CHECK-NEXT:    jr $ra
516; CHECK-NEXT:    st.b $w0, 0($4)
517  %1 = load <16 x i8>, ptr %a
518  %2 = load <16 x i8>, ptr %b
519  %3 = shl <16 x i8> %1, %2
520  store <16 x i8> %3, ptr %c
521  ret void
522}
523
524define void @sll_v8i16(ptr %c, ptr %a, ptr %b) nounwind {
525; CHECK-LABEL: sll_v8i16:
526; CHECK:       # %bb.0:
527; CHECK-NEXT:    ld.h $w0, 0($6)
528; CHECK-NEXT:    ld.h $w1, 0($5)
529; CHECK-NEXT:    sll.h $w0, $w1, $w0
530; CHECK-NEXT:    jr $ra
531; CHECK-NEXT:    st.h $w0, 0($4)
532  %1 = load <8 x i16>, ptr %a
533  %2 = load <8 x i16>, ptr %b
534  %3 = shl <8 x i16> %1, %2
535  store <8 x i16> %3, ptr %c
536  ret void
537}
538
539define void @sll_v4i32(ptr %c, ptr %a, ptr %b) nounwind {
540; CHECK-LABEL: sll_v4i32:
541; CHECK:       # %bb.0:
542; CHECK-NEXT:    ld.w $w0, 0($6)
543; CHECK-NEXT:    ld.w $w1, 0($5)
544; CHECK-NEXT:    sll.w $w0, $w1, $w0
545; CHECK-NEXT:    jr $ra
546; CHECK-NEXT:    st.w $w0, 0($4)
547  %1 = load <4 x i32>, ptr %a
548  %2 = load <4 x i32>, ptr %b
549  %3 = shl <4 x i32> %1, %2
550  store <4 x i32> %3, ptr %c
551  ret void
552}
553
554define void @sll_v2i64(ptr %c, ptr %a, ptr %b) nounwind {
555; CHECK-LABEL: sll_v2i64:
556; CHECK:       # %bb.0:
557; CHECK-NEXT:    ld.d $w0, 0($6)
558; CHECK-NEXT:    ld.d $w1, 0($5)
559; CHECK-NEXT:    sll.d $w0, $w1, $w0
560; CHECK-NEXT:    jr $ra
561; CHECK-NEXT:    st.d $w0, 0($4)
562  %1 = load <2 x i64>, ptr %a
563  %2 = load <2 x i64>, ptr %b
564  %3 = shl <2 x i64> %1, %2
565  store <2 x i64> %3, ptr %c
566  ret void
567}
568
569define void @sll_v16i8_i(ptr %c, ptr %a) nounwind {
570; CHECK-LABEL: sll_v16i8_i:
571; CHECK:       # %bb.0:
572; CHECK-NEXT:    ld.b $w0, 0($5)
573; CHECK-NEXT:    slli.b $w0, $w0, 1
574; CHECK-NEXT:    jr $ra
575; CHECK-NEXT:    st.b $w0, 0($4)
576  %1 = load <16 x i8>, ptr %a
577  %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
578  store <16 x i8> %2, ptr %c
579  ret void
580}
581
582define void @sll_v8i16_i(ptr %c, ptr %a) nounwind {
583; CHECK-LABEL: sll_v8i16_i:
584; CHECK:       # %bb.0:
585; CHECK-NEXT:    ld.h $w0, 0($5)
586; CHECK-NEXT:    slli.h $w0, $w0, 1
587; CHECK-NEXT:    jr $ra
588; CHECK-NEXT:    st.h $w0, 0($4)
589  %1 = load <8 x i16>, ptr %a
590  %2 = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
591  store <8 x i16> %2, ptr %c
592  ret void
593}
594
595define void @sll_v4i32_i(ptr %c, ptr %a) nounwind {
596; CHECK-LABEL: sll_v4i32_i:
597; CHECK:       # %bb.0:
598; CHECK-NEXT:    ld.w $w0, 0($5)
599; CHECK-NEXT:    slli.w $w0, $w0, 1
600; CHECK-NEXT:    jr $ra
601; CHECK-NEXT:    st.w $w0, 0($4)
602  %1 = load <4 x i32>, ptr %a
603  %2 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
604  store <4 x i32> %2, ptr %c
605  ret void
606}
607
608define void @sll_v2i64_i(ptr %c, ptr %a) nounwind {
609; CHECK-LABEL: sll_v2i64_i:
610; CHECK:       # %bb.0:
611; CHECK-NEXT:    ld.d $w0, 0($5)
612; CHECK-NEXT:    slli.d $w0, $w0, 1
613; CHECK-NEXT:    jr $ra
614; CHECK-NEXT:    st.d $w0, 0($4)
615  %1 = load <2 x i64>, ptr %a
616  %2 = shl <2 x i64> %1, <i64 1, i64 1>
617  store <2 x i64> %2, ptr %c
618  ret void
619}
620
621define void @sra_v16i8(ptr %c, ptr %a, ptr %b) nounwind {
622; CHECK-LABEL: sra_v16i8:
623; CHECK:       # %bb.0:
624; CHECK-NEXT:    ld.b $w0, 0($6)
625; CHECK-NEXT:    ld.b $w1, 0($5)
626; CHECK-NEXT:    sra.b $w0, $w1, $w0
627; CHECK-NEXT:    jr $ra
628; CHECK-NEXT:    st.b $w0, 0($4)
629  %1 = load <16 x i8>, ptr %a
630  %2 = load <16 x i8>, ptr %b
631  %3 = ashr <16 x i8> %1, %2
632  store <16 x i8> %3, ptr %c
633  ret void
634}
635
636define void @sra_v8i16(ptr %c, ptr %a, ptr %b) nounwind {
637; CHECK-LABEL: sra_v8i16:
638; CHECK:       # %bb.0:
639; CHECK-NEXT:    ld.h $w0, 0($6)
640; CHECK-NEXT:    ld.h $w1, 0($5)
641; CHECK-NEXT:    sra.h $w0, $w1, $w0
642; CHECK-NEXT:    jr $ra
643; CHECK-NEXT:    st.h $w0, 0($4)
644  %1 = load <8 x i16>, ptr %a
645  %2 = load <8 x i16>, ptr %b
646  %3 = ashr <8 x i16> %1, %2
647  store <8 x i16> %3, ptr %c
648  ret void
649}
650
651define void @sra_v4i32(ptr %c, ptr %a, ptr %b) nounwind {
652; CHECK-LABEL: sra_v4i32:
653; CHECK:       # %bb.0:
654; CHECK-NEXT:    ld.w $w0, 0($6)
655; CHECK-NEXT:    ld.w $w1, 0($5)
656; CHECK-NEXT:    sra.w $w0, $w1, $w0
657; CHECK-NEXT:    jr $ra
658; CHECK-NEXT:    st.w $w0, 0($4)
659  %1 = load <4 x i32>, ptr %a
660  %2 = load <4 x i32>, ptr %b
661  %3 = ashr <4 x i32> %1, %2
662  store <4 x i32> %3, ptr %c
663  ret void
664}
665
666define void @sra_v2i64(ptr %c, ptr %a, ptr %b) nounwind {
667; CHECK-LABEL: sra_v2i64:
668; CHECK:       # %bb.0:
669; CHECK-NEXT:    ld.d $w0, 0($6)
670; CHECK-NEXT:    ld.d $w1, 0($5)
671; CHECK-NEXT:    sra.d $w0, $w1, $w0
672; CHECK-NEXT:    jr $ra
673; CHECK-NEXT:    st.d $w0, 0($4)
674  %1 = load <2 x i64>, ptr %a
675  %2 = load <2 x i64>, ptr %b
676  %3 = ashr <2 x i64> %1, %2
677  store <2 x i64> %3, ptr %c
678  ret void
679}
680
681define void @sra_v16i8_i(ptr %c, ptr %a) nounwind {
682; CHECK-LABEL: sra_v16i8_i:
683; CHECK:       # %bb.0:
684; CHECK-NEXT:    ld.b $w0, 0($5)
685; CHECK-NEXT:    srai.b $w0, $w0, 1
686; CHECK-NEXT:    jr $ra
687; CHECK-NEXT:    st.b $w0, 0($4)
688  %1 = load <16 x i8>, ptr %a
689  %2 = ashr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
690  store <16 x i8> %2, ptr %c
691  ret void
692}
693
694define void @sra_v8i16_i(ptr %c, ptr %a) nounwind {
695; CHECK-LABEL: sra_v8i16_i:
696; CHECK:       # %bb.0:
697; CHECK-NEXT:    ld.h $w0, 0($5)
698; CHECK-NEXT:    srai.h $w0, $w0, 1
699; CHECK-NEXT:    jr $ra
700; CHECK-NEXT:    st.h $w0, 0($4)
701  %1 = load <8 x i16>, ptr %a
702  %2 = ashr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
703  store <8 x i16> %2, ptr %c
704  ret void
705}
706
707define void @sra_v4i32_i(ptr %c, ptr %a) nounwind {
708; CHECK-LABEL: sra_v4i32_i:
709; CHECK:       # %bb.0:
710; CHECK-NEXT:    ld.w $w0, 0($5)
711; CHECK-NEXT:    srai.w $w0, $w0, 1
712; CHECK-NEXT:    jr $ra
713; CHECK-NEXT:    st.w $w0, 0($4)
714  %1 = load <4 x i32>, ptr %a
715  %2 = ashr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
716  store <4 x i32> %2, ptr %c
717  ret void
718}
719
720define void @sra_v2i64_i(ptr %c, ptr %a) nounwind {
721; CHECK-LABEL: sra_v2i64_i:
722; CHECK:       # %bb.0:
723; CHECK-NEXT:    ld.d $w0, 0($5)
724; CHECK-NEXT:    srai.d $w0, $w0, 1
725; CHECK-NEXT:    jr $ra
726; CHECK-NEXT:    st.d $w0, 0($4)
727  %1 = load <2 x i64>, ptr %a
728  %2 = ashr <2 x i64> %1, <i64 1, i64 1>
729  store <2 x i64> %2, ptr %c
730  ret void
731}
732
733define void @srl_v16i8(ptr %c, ptr %a, ptr %b) nounwind {
734; CHECK-LABEL: srl_v16i8:
735; CHECK:       # %bb.0:
736; CHECK-NEXT:    ld.b $w0, 0($6)
737; CHECK-NEXT:    ld.b $w1, 0($5)
738; CHECK-NEXT:    srl.b $w0, $w1, $w0
739; CHECK-NEXT:    jr $ra
740; CHECK-NEXT:    st.b $w0, 0($4)
741  %1 = load <16 x i8>, ptr %a
742  %2 = load <16 x i8>, ptr %b
743  %3 = lshr <16 x i8> %1, %2
744  store <16 x i8> %3, ptr %c
745  ret void
746}
747
748define void @srl_v8i16(ptr %c, ptr %a, ptr %b) nounwind {
749; CHECK-LABEL: srl_v8i16:
750; CHECK:       # %bb.0:
751; CHECK-NEXT:    ld.h $w0, 0($6)
752; CHECK-NEXT:    ld.h $w1, 0($5)
753; CHECK-NEXT:    srl.h $w0, $w1, $w0
754; CHECK-NEXT:    jr $ra
755; CHECK-NEXT:    st.h $w0, 0($4)
756  %1 = load <8 x i16>, ptr %a
757  %2 = load <8 x i16>, ptr %b
758  %3 = lshr <8 x i16> %1, %2
759  store <8 x i16> %3, ptr %c
760  ret void
761}
762
763define void @srl_v4i32(ptr %c, ptr %a, ptr %b) nounwind {
764; CHECK-LABEL: srl_v4i32:
765; CHECK:       # %bb.0:
766; CHECK-NEXT:    ld.w $w0, 0($6)
767; CHECK-NEXT:    ld.w $w1, 0($5)
768; CHECK-NEXT:    srl.w $w0, $w1, $w0
769; CHECK-NEXT:    jr $ra
770; CHECK-NEXT:    st.w $w0, 0($4)
771  %1 = load <4 x i32>, ptr %a
772  %2 = load <4 x i32>, ptr %b
773  %3 = lshr <4 x i32> %1, %2
774  store <4 x i32> %3, ptr %c
775  ret void
776}
777
778define void @srl_v2i64(ptr %c, ptr %a, ptr %b) nounwind {
779; CHECK-LABEL: srl_v2i64:
780; CHECK:       # %bb.0:
781; CHECK-NEXT:    ld.d $w0, 0($6)
782; CHECK-NEXT:    ld.d $w1, 0($5)
783; CHECK-NEXT:    srl.d $w0, $w1, $w0
784; CHECK-NEXT:    jr $ra
785; CHECK-NEXT:    st.d $w0, 0($4)
786  %1 = load <2 x i64>, ptr %a
787  %2 = load <2 x i64>, ptr %b
788  %3 = lshr <2 x i64> %1, %2
789  store <2 x i64> %3, ptr %c
790  ret void
791}
792
793define void @srl_v16i8_i(ptr %c, ptr %a) nounwind {
794; CHECK-LABEL: srl_v16i8_i:
795; CHECK:       # %bb.0:
796; CHECK-NEXT:    ld.b $w0, 0($5)
797; CHECK-NEXT:    srli.b $w0, $w0, 1
798; CHECK-NEXT:    jr $ra
799; CHECK-NEXT:    st.b $w0, 0($4)
800  %1 = load <16 x i8>, ptr %a
801  %2 = lshr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
802  store <16 x i8> %2, ptr %c
803  ret void
804}
805
806define void @srl_v8i16_i(ptr %c, ptr %a) nounwind {
807; CHECK-LABEL: srl_v8i16_i:
808; CHECK:       # %bb.0:
809; CHECK-NEXT:    ld.h $w0, 0($5)
810; CHECK-NEXT:    srli.h $w0, $w0, 1
811; CHECK-NEXT:    jr $ra
812; CHECK-NEXT:    st.h $w0, 0($4)
813  %1 = load <8 x i16>, ptr %a
814  %2 = lshr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
815  store <8 x i16> %2, ptr %c
816  ret void
817}
818
819define void @srl_v4i32_i(ptr %c, ptr %a) nounwind {
820; CHECK-LABEL: srl_v4i32_i:
821; CHECK:       # %bb.0:
822; CHECK-NEXT:    ld.w $w0, 0($5)
823; CHECK-NEXT:    srli.w $w0, $w0, 1
824; CHECK-NEXT:    jr $ra
825; CHECK-NEXT:    st.w $w0, 0($4)
826  %1 = load <4 x i32>, ptr %a
827  %2 = lshr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
828  store <4 x i32> %2, ptr %c
829  ret void
830}
831
832define void @srl_v2i64_i(ptr %c, ptr %a) nounwind {
833; CHECK-LABEL: srl_v2i64_i:
834; CHECK:       # %bb.0:
835; CHECK-NEXT:    ld.d $w0, 0($5)
836; CHECK-NEXT:    srli.d $w0, $w0, 1
837; CHECK-NEXT:    jr $ra
838; CHECK-NEXT:    st.d $w0, 0($4)
839  %1 = load <2 x i64>, ptr %a
840  %2 = lshr <2 x i64> %1, <i64 1, i64 1>
841  store <2 x i64> %2, ptr %c
842  ret void
843}
844
845define void @ctpop_v16i8(ptr %c, ptr %a) nounwind {
846; CHECK-LABEL: ctpop_v16i8:
847; CHECK:       # %bb.0:
848; CHECK-NEXT:    ld.b $w0, 0($5)
849; CHECK-NEXT:    pcnt.b $w0, $w0
850; CHECK-NEXT:    jr $ra
851; CHECK-NEXT:    st.b $w0, 0($4)
852  %1 = load <16 x i8>, ptr %a
853  %2 = tail call <16 x i8> @llvm.ctpop.v16i8 (<16 x i8> %1)
854  store <16 x i8> %2, ptr %c
855  ret void
856}
857
858define void @ctpop_v8i16(ptr %c, ptr %a) nounwind {
859; CHECK-LABEL: ctpop_v8i16:
860; CHECK:       # %bb.0:
861; CHECK-NEXT:    ld.h $w0, 0($5)
862; CHECK-NEXT:    pcnt.h $w0, $w0
863; CHECK-NEXT:    jr $ra
864; CHECK-NEXT:    st.h $w0, 0($4)
865  %1 = load <8 x i16>, ptr %a
866  %2 = tail call <8 x i16> @llvm.ctpop.v8i16 (<8 x i16> %1)
867  store <8 x i16> %2, ptr %c
868  ret void
869}
870
871define void @ctpop_v4i32(ptr %c, ptr %a) nounwind {
872; CHECK-LABEL: ctpop_v4i32:
873; CHECK:       # %bb.0:
874; CHECK-NEXT:    ld.w $w0, 0($5)
875; CHECK-NEXT:    pcnt.w $w0, $w0
876; CHECK-NEXT:    jr $ra
877; CHECK-NEXT:    st.w $w0, 0($4)
878  %1 = load <4 x i32>, ptr %a
879  %2 = tail call <4 x i32> @llvm.ctpop.v4i32 (<4 x i32> %1)
880  store <4 x i32> %2, ptr %c
881  ret void
882}
883
884define void @ctpop_v2i64(ptr %c, ptr %a) nounwind {
885; CHECK-LABEL: ctpop_v2i64:
886; CHECK:       # %bb.0:
887; CHECK-NEXT:    ld.d $w0, 0($5)
888; CHECK-NEXT:    pcnt.d $w0, $w0
889; CHECK-NEXT:    jr $ra
890; CHECK-NEXT:    st.d $w0, 0($4)
891  %1 = load <2 x i64>, ptr %a
892  %2 = tail call <2 x i64> @llvm.ctpop.v2i64 (<2 x i64> %1)
893  store <2 x i64> %2, ptr %c
894  ret void
895}
896
897define void @ctlz_v16i8(ptr %c, ptr %a) nounwind {
898; CHECK-LABEL: ctlz_v16i8:
899; CHECK:       # %bb.0:
900; CHECK-NEXT:    ld.b $w0, 0($5)
901; CHECK-NEXT:    nlzc.b $w0, $w0
902; CHECK-NEXT:    jr $ra
903; CHECK-NEXT:    st.b $w0, 0($4)
904  %1 = load <16 x i8>, ptr %a
905  %2 = tail call <16 x i8> @llvm.ctlz.v16i8 (<16 x i8> %1)
906  store <16 x i8> %2, ptr %c
907  ret void
908}
909
910define void @ctlz_v8i16(ptr %c, ptr %a) nounwind {
911; CHECK-LABEL: ctlz_v8i16:
912; CHECK:       # %bb.0:
913; CHECK-NEXT:    ld.h $w0, 0($5)
914; CHECK-NEXT:    nlzc.h $w0, $w0
915; CHECK-NEXT:    jr $ra
916; CHECK-NEXT:    st.h $w0, 0($4)
917  %1 = load <8 x i16>, ptr %a
918  %2 = tail call <8 x i16> @llvm.ctlz.v8i16 (<8 x i16> %1)
919  store <8 x i16> %2, ptr %c
920  ret void
921}
922
923define void @ctlz_v4i32(ptr %c, ptr %a) nounwind {
924; CHECK-LABEL: ctlz_v4i32:
925; CHECK:       # %bb.0:
926; CHECK-NEXT:    ld.w $w0, 0($5)
927; CHECK-NEXT:    nlzc.w $w0, $w0
928; CHECK-NEXT:    jr $ra
929; CHECK-NEXT:    st.w $w0, 0($4)
930  %1 = load <4 x i32>, ptr %a
931  %2 = tail call <4 x i32> @llvm.ctlz.v4i32 (<4 x i32> %1)
932  store <4 x i32> %2, ptr %c
933  ret void
934}
935
936define void @ctlz_v2i64(ptr %c, ptr %a) nounwind {
937; CHECK-LABEL: ctlz_v2i64:
938; CHECK:       # %bb.0:
939; CHECK-NEXT:    ld.d $w0, 0($5)
940; CHECK-NEXT:    nlzc.d $w0, $w0
941; CHECK-NEXT:    jr $ra
942; CHECK-NEXT:    st.d $w0, 0($4)
943  %1 = load <2 x i64>, ptr %a
944  %2 = tail call <2 x i64> @llvm.ctlz.v2i64 (<2 x i64> %1)
945  store <2 x i64> %2, ptr %c
946  ret void
947}
948
949define void @bsel_v16i8(ptr %c, ptr %a, ptr %b, ptr %m) nounwind {
950; CHECK-LABEL: bsel_v16i8:
951; CHECK:       # %bb.0:
952; CHECK-NEXT:    ld.b $w0, 0($7)
953; CHECK-NEXT:    ld.b $w1, 0($5)
954; CHECK-NEXT:    ld.b $w2, 0($6)
955; CHECK-NEXT:    bmnz.v $w2, $w1, $w0
956; CHECK-NEXT:    jr $ra
957; CHECK-NEXT:    st.b $w2, 0($4)
958  %1 = load <16 x i8>, ptr %a
959  %2 = load <16 x i8>, ptr %b
960  %3 = load <16 x i8>, ptr %m
961  %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1,
962                          i8 -1, i8 -1, i8 -1, i8 -1,
963                          i8 -1, i8 -1, i8 -1, i8 -1,
964                          i8 -1, i8 -1, i8 -1, i8 -1>
965  %5 = and <16 x i8> %1, %3
966  %6 = and <16 x i8> %2, %4
967  %7 = or <16 x i8> %5, %6
968  ; bmnz is the same operation
969  ; (vselect Mask, IfSet, IfClr) -> (BMNZ IfClr, IfSet, Mask)
970  store <16 x i8> %7, ptr %c
971  ret void
972}
973
974define void @bsel_v16i8_i(ptr %c, ptr %a, ptr %m) nounwind {
975; CHECK-LABEL: bsel_v16i8_i:
976; CHECK:       # %bb.0:
977; CHECK-NEXT:    ld.b $w0, 0($5)
978; CHECK-NEXT:    ld.b $w1, 0($6)
979; CHECK-NEXT:    bseli.b $w1, $w0, 6
980; CHECK-NEXT:    jr $ra
981; CHECK-NEXT:    st.b $w1, 0($4)
982  %1 = load <16 x i8>, ptr %a
983  %2 = load <16 x i8>, ptr %m
984  %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1,
985                          i8 -1, i8 -1, i8 -1, i8 -1,
986                          i8 -1, i8 -1, i8 -1, i8 -1,
987                          i8 -1, i8 -1, i8 -1, i8 -1>
988  %4 = and <16 x i8> %1, %3
989  %5 = and <16 x i8> <i8 6, i8 6, i8 6, i8 6,
990                      i8 6, i8 6, i8 6, i8 6,
991                      i8 6, i8 6, i8 6, i8 6,
992                      i8 6, i8 6, i8 6, i8 6>, %2
993  %6 = or <16 x i8> %4, %5
994  store <16 x i8> %6, ptr %c
995  ret void
996}
997
998define void @bsel_v8i16(ptr %c, ptr %a, ptr %b) nounwind {
999; CHECK-LABEL: bsel_v8i16:
1000; CHECK:       # %bb.0:
1001; CHECK-NEXT:    ld.h $w0, 0($5)
1002; CHECK-NEXT:    ld.h $w1, 0($6)
1003; CHECK-NEXT:    ldi.h $w2, 6
1004; CHECK-NEXT:    bsel.v $w2, $w1, $w0
1005; CHECK-NEXT:    jr $ra
1006; CHECK-NEXT:    st.h $w2, 0($4)
1007  %1 = load <8 x i16>, ptr %a
1008  %2 = load <8 x i16>, ptr %b
1009  %3 = and <8 x i16> %1, <i16 6, i16 6, i16 6, i16 6,
1010                          i16 6, i16 6, i16 6, i16 6>
1011  %4 = and <8 x i16> %2, <i16 65529, i16 65529, i16 65529, i16 65529,
1012                          i16 65529, i16 65529, i16 65529, i16 65529>
1013  %5 = or <8 x i16> %3, %4
1014  store <8 x i16> %5, ptr %c
1015  ret void
1016}
1017
1018define void @bsel_v4i32(ptr %c, ptr %a, ptr %b) nounwind {
1019; CHECK-LABEL: bsel_v4i32:
1020; CHECK:       # %bb.0:
1021; CHECK-NEXT:    ld.w $w0, 0($5)
1022; CHECK-NEXT:    ld.w $w1, 0($6)
1023; CHECK-NEXT:    ldi.w $w2, 6
1024; CHECK-NEXT:    bsel.v $w2, $w1, $w0
1025; CHECK-NEXT:    jr $ra
1026; CHECK-NEXT:    st.w $w2, 0($4)
1027  %1 = load <4 x i32>, ptr %a
1028  %2 = load <4 x i32>, ptr %b
1029  %3 = and <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6>
1030  %4 = and <4 x i32> %2, <i32 4294967289, i32 4294967289, i32 4294967289, i32 4294967289>
1031  %5 = or <4 x i32> %3, %4
1032  store <4 x i32> %5, ptr %c
1033  ret void
1034}
1035
1036define void @bsel_v2i64(ptr %c, ptr %a, ptr %b) nounwind {
1037; MIPS-LABEL: bsel_v2i64:
1038; MIPS:       # %bb.0:
1039; MIPS-NEXT:    ldi.d $w0, 6
1040; MIPS-NEXT:    shf.w $w0, $w0, 177
1041; MIPS-NEXT:    ld.d $w1, 0($5)
1042; MIPS-NEXT:    ld.d $w2, 0($6)
1043; MIPS-NEXT:    bsel.v $w0, $w2, $w1
1044; MIPS-NEXT:    jr $ra
1045; MIPS-NEXT:    st.d $w0, 0($4)
1046;
1047; MIPSEL-LABEL: bsel_v2i64:
1048; MIPSEL:       # %bb.0:
1049; MIPSEL-NEXT:    ldi.d $w0, 6
1050; MIPSEL-NEXT:    ld.d $w1, 0($5)
1051; MIPSEL-NEXT:    ld.d $w2, 0($6)
1052; MIPSEL-NEXT:    bsel.v $w0, $w2, $w1
1053; MIPSEL-NEXT:    jr $ra
1054; MIPSEL-NEXT:    st.d $w0, 0($4)
1055  %1 = load <2 x i64>, ptr %a
1056  %2 = load <2 x i64>, ptr %b
1057  %3 = and <2 x i64> %1, <i64 6, i64 6>
1058  %4 = and <2 x i64> %2, <i64 18446744073709551609, i64 18446744073709551609>
1059  %5 = or <2 x i64> %3, %4
1060  store <2 x i64> %5, ptr %c
1061  ret void
1062}
1063
1064define void @binsl_v16i8_i(ptr %c, ptr %a, ptr %b) nounwind {
1065; CHECK-LABEL: binsl_v16i8_i:
1066; CHECK:       # %bb.0:
1067; CHECK-NEXT:    ld.b $w0, 0($5)
1068; CHECK-NEXT:    ld.b $w1, 0($6)
1069; CHECK-NEXT:    binsli.b $w1, $w0, 1
1070; CHECK-NEXT:    jr $ra
1071; CHECK-NEXT:    st.b $w1, 0($4)
1072  %1 = load <16 x i8>, ptr %a
1073  %2 = load <16 x i8>, ptr %b
1074  %3 = and <16 x i8> %1, <i8 192, i8 192, i8 192, i8 192,
1075                          i8 192, i8 192, i8 192, i8 192,
1076                          i8 192, i8 192, i8 192, i8 192,
1077                          i8 192, i8 192, i8 192, i8 192>
1078  %4 = and <16 x i8> %2, <i8 63, i8 63, i8 63, i8 63,
1079                          i8 63, i8 63, i8 63, i8 63,
1080                          i8 63, i8 63, i8 63, i8 63,
1081                          i8 63, i8 63, i8 63, i8 63>
1082  %5 = or <16 x i8> %3, %4
1083  store <16 x i8> %5, ptr %c
1084  ret void
1085}
1086
1087define void @binsl_v8i16_i(ptr %c, ptr %a, ptr %b) nounwind {
1088; CHECK-LABEL: binsl_v8i16_i:
1089; CHECK:       # %bb.0:
1090; CHECK-NEXT:    ld.h $w0, 0($5)
1091; CHECK-NEXT:    ld.h $w1, 0($6)
1092; CHECK-NEXT:    binsli.h $w1, $w0, 1
1093; CHECK-NEXT:    jr $ra
1094; CHECK-NEXT:    st.h $w1, 0($4)
1095  %1 = load <8 x i16>, ptr %a
1096  %2 = load <8 x i16>, ptr %b
1097  %3 = and <8 x i16> %1, <i16 49152, i16 49152, i16 49152, i16 49152,
1098                          i16 49152, i16 49152, i16 49152, i16 49152>
1099  %4 = and <8 x i16> %2, <i16 16383, i16 16383, i16 16383, i16 16383,
1100                          i16 16383, i16 16383, i16 16383, i16 16383>
1101  %5 = or <8 x i16> %3, %4
1102  store <8 x i16> %5, ptr %c
1103  ret void
1104}
1105
1106define void @binsl_v4i32_i(ptr %c, ptr %a, ptr %b) nounwind {
1107; CHECK-LABEL: binsl_v4i32_i:
1108; CHECK:       # %bb.0:
1109; CHECK-NEXT:    ld.w $w0, 0($5)
1110; CHECK-NEXT:    ld.w $w1, 0($6)
1111; CHECK-NEXT:    binsli.w $w1, $w0, 1
1112; CHECK-NEXT:    jr $ra
1113; CHECK-NEXT:    st.w $w1, 0($4)
1114  %1 = load <4 x i32>, ptr %a
1115  %2 = load <4 x i32>, ptr %b
1116  %3 = and <4 x i32> %1, <i32 3221225472, i32 3221225472, i32 3221225472, i32 3221225472>
1117  %4 = and <4 x i32> %2, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
1118  %5 = or <4 x i32> %3, %4
1119  store <4 x i32> %5, ptr %c
1120  ret void
1121}
1122
1123define void @binsl_v2i64_i(ptr %c, ptr %a, ptr %b) nounwind {
1124; CHECK-LABEL: binsl_v2i64_i:
1125; CHECK:       # %bb.0:
1126; CHECK-NEXT:    ld.d $w0, 0($5)
1127; CHECK-NEXT:    ld.d $w1, 0($6)
1128; CHECK-NEXT:    binsli.d $w1, $w0, 60
1129; CHECK-NEXT:    jr $ra
1130; CHECK-NEXT:    st.d $w1, 0($4)
1131  %1 = load <2 x i64>, ptr %a
1132  %2 = load <2 x i64>, ptr %b
1133  %3 = and <2 x i64> %1, <i64 18446744073709551608, i64 18446744073709551608>
1134  %4 = and <2 x i64> %2, <i64 7, i64 7>
1135  %5 = or <2 x i64> %3, %4
1136  ; TODO: We use a particularly wide mask here to work around a legalization
1137  ;       issue. If the mask doesn't fit within a 10-bit immediate, it gets
1138  ;       legalized into a constant pool. We should add a test to cover the
1139  ;       other cases once they correctly select binsli.d.
1140  store <2 x i64> %5, ptr %c
1141  ret void
1142}
1143
1144define void @binsr_v16i8_i(ptr %c, ptr %a, ptr %b) nounwind {
1145; CHECK-LABEL: binsr_v16i8_i:
1146; CHECK:       # %bb.0:
1147; CHECK-NEXT:    ld.b $w0, 0($5)
1148; CHECK-NEXT:    ld.b $w1, 0($6)
1149; CHECK-NEXT:    binsri.b $w1, $w0, 1
1150; CHECK-NEXT:    jr $ra
1151; CHECK-NEXT:    st.b $w1, 0($4)
1152  %1 = load <16 x i8>, ptr %a
1153  %2 = load <16 x i8>, ptr %b
1154  %3 = and <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3,
1155                          i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
1156  %4 = and <16 x i8> %2, <i8 252, i8 252, i8 252, i8 252,
1157                          i8 252, i8 252, i8 252, i8 252,
1158                          i8 252, i8 252, i8 252, i8 252,
1159                          i8 252, i8 252, i8 252, i8 252>
1160  %5 = or <16 x i8> %3, %4
1161  store <16 x i8> %5, ptr %c
1162  ret void
1163}
1164
1165define void @binsr_v8i16_i(ptr %c, ptr %a, ptr %b) nounwind {
1166; CHECK-LABEL: binsr_v8i16_i:
1167; CHECK:       # %bb.0:
1168; CHECK-NEXT:    ld.h $w0, 0($5)
1169; CHECK-NEXT:    ld.h $w1, 0($6)
1170; CHECK-NEXT:    binsri.h $w1, $w0, 1
1171; CHECK-NEXT:    jr $ra
1172; CHECK-NEXT:    st.h $w1, 0($4)
1173  %1 = load <8 x i16>, ptr %a
1174  %2 = load <8 x i16>, ptr %b
1175  %3 = and <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3,
1176                          i16 3, i16 3, i16 3, i16 3>
1177  %4 = and <8 x i16> %2, <i16 65532, i16 65532, i16 65532, i16 65532,
1178                          i16 65532, i16 65532, i16 65532, i16 65532>
1179  %5 = or <8 x i16> %3, %4
1180  store <8 x i16> %5, ptr %c
1181  ret void
1182}
1183
1184define void @binsr_v4i32_i(ptr %c, ptr %a, ptr %b) nounwind {
1185; CHECK-LABEL: binsr_v4i32_i:
1186; CHECK:       # %bb.0:
1187; CHECK-NEXT:    ld.w $w0, 0($5)
1188; CHECK-NEXT:    ld.w $w1, 0($6)
1189; CHECK-NEXT:    binsri.w $w1, $w0, 1
1190; CHECK-NEXT:    jr $ra
1191; CHECK-NEXT:    st.w $w1, 0($4)
1192  %1 = load <4 x i32>, ptr %a
1193  %2 = load <4 x i32>, ptr %b
1194  %3 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
1195  %4 = and <4 x i32> %2, <i32 4294967292, i32 4294967292, i32 4294967292, i32 4294967292>
1196  %5 = or <4 x i32> %3, %4
1197  store <4 x i32> %5, ptr %c
1198  ret void
1199}
1200
1201define void @binsr_v2i64_i(ptr %c, ptr %a, ptr %b) nounwind {
1202; CHECK-LABEL: binsr_v2i64_i:
1203; CHECK:       # %bb.0:
1204; CHECK-NEXT:    ld.d $w0, 0($5)
1205; CHECK-NEXT:    ld.d $w1, 0($6)
1206; CHECK-NEXT:    binsri.d $w1, $w0, 1
1207; CHECK-NEXT:    jr $ra
1208; CHECK-NEXT:    st.d $w1, 0($4)
1209  %1 = load <2 x i64>, ptr %a
1210  %2 = load <2 x i64>, ptr %b
1211  %3 = and <2 x i64> %1, <i64 3, i64 3>
1212  %4 = and <2 x i64> %2, <i64 18446744073709551612, i64 18446744073709551612>
1213  %5 = or <2 x i64> %3, %4
1214  store <2 x i64> %5, ptr %c
1215  ret void
1216}
1217
1218define void @bclr_v16i8(ptr %c, ptr %a, ptr %b) nounwind {
1219; CHECK-LABEL: bclr_v16i8:
1220; CHECK:       # %bb.0:
1221; CHECK-NEXT:    ld.b $w0, 0($6)
1222; CHECK-NEXT:    ld.b $w1, 0($5)
1223; CHECK-NEXT:    bclr.b $w0, $w1, $w0
1224; CHECK-NEXT:    jr $ra
1225; CHECK-NEXT:    st.b $w0, 0($4)
1226  %1 = load <16 x i8>, ptr %a
1227  %2 = load <16 x i8>, ptr %b
1228  %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
1229  %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1230  %5 = and <16 x i8> %1, %4
1231  store <16 x i8> %5, ptr %c
1232  ret void
1233}
1234
1235define void @bclr_v8i16(ptr %c, ptr %a, ptr %b) nounwind {
1236; CHECK-LABEL: bclr_v8i16:
1237; CHECK:       # %bb.0:
1238; CHECK-NEXT:    ld.h $w0, 0($6)
1239; CHECK-NEXT:    ld.h $w1, 0($5)
1240; CHECK-NEXT:    bclr.h $w0, $w1, $w0
1241; CHECK-NEXT:    jr $ra
1242; CHECK-NEXT:    st.h $w0, 0($4)
1243  %1 = load <8 x i16>, ptr %a
1244  %2 = load <8 x i16>, ptr %b
1245  %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
1246  %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1247  %5 = and <8 x i16> %1, %4
1248  store <8 x i16> %5, ptr %c
1249  ret void
1250}
1251
1252define void @bclr_v4i32(ptr %c, ptr %a, ptr %b) nounwind {
1253; CHECK-LABEL: bclr_v4i32:
1254; CHECK:       # %bb.0:
1255; CHECK-NEXT:    ld.w $w0, 0($6)
1256; CHECK-NEXT:    ld.w $w1, 0($5)
1257; CHECK-NEXT:    bclr.w $w0, $w1, $w0
1258; CHECK-NEXT:    jr $ra
1259; CHECK-NEXT:    st.w $w0, 0($4)
1260  %1 = load <4 x i32>, ptr %a
1261  %2 = load <4 x i32>, ptr %b
1262  %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
1263  %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
1264  %5 = and <4 x i32> %1, %4
1265  store <4 x i32> %5, ptr %c
1266  ret void
1267}
1268
1269define void @bclr_v2i64(ptr %c, ptr %a, ptr %b) nounwind {
1270; CHECK-LABEL: bclr_v2i64:
1271; CHECK:       # %bb.0:
1272; CHECK-NEXT:    ld.d $w0, 0($6)
1273; CHECK-NEXT:    ld.d $w1, 0($5)
1274; CHECK-NEXT:    bclr.d $w0, $w1, $w0
1275; CHECK-NEXT:    jr $ra
1276; CHECK-NEXT:    st.d $w0, 0($4)
1277  %1 = load <2 x i64>, ptr %a
1278  %2 = load <2 x i64>, ptr %b
1279  %3 = shl <2 x i64> <i64 1, i64 1>, %2
1280  %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
1281  %5 = and <2 x i64> %1, %4
1282  store <2 x i64> %5, ptr %c
1283  ret void
1284}
1285
1286define void @bset_v16i8(ptr %c, ptr %a, ptr %b) nounwind {
1287; CHECK-LABEL: bset_v16i8:
1288; CHECK:       # %bb.0:
1289; CHECK-NEXT:    ld.b $w0, 0($6)
1290; CHECK-NEXT:    ld.b $w1, 0($5)
1291; CHECK-NEXT:    bset.b $w0, $w1, $w0
1292; CHECK-NEXT:    jr $ra
1293; CHECK-NEXT:    st.b $w0, 0($4)
1294  %1 = load <16 x i8>, ptr %a
1295  %2 = load <16 x i8>, ptr %b
1296  %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
1297  %4 = or <16 x i8> %1, %3
1298  store <16 x i8> %4, ptr %c
1299  ret void
1300}
1301
1302define void @bset_v8i16(ptr %c, ptr %a, ptr %b) nounwind {
1303; CHECK-LABEL: bset_v8i16:
1304; CHECK:       # %bb.0:
1305; CHECK-NEXT:    ld.h $w0, 0($6)
1306; CHECK-NEXT:    ld.h $w1, 0($5)
1307; CHECK-NEXT:    bset.h $w0, $w1, $w0
1308; CHECK-NEXT:    jr $ra
1309; CHECK-NEXT:    st.h $w0, 0($4)
1310  %1 = load <8 x i16>, ptr %a
1311  %2 = load <8 x i16>, ptr %b
1312  %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
1313  %4 = or <8 x i16> %1, %3
1314  store <8 x i16> %4, ptr %c
1315  ret void
1316}
1317
1318define void @bset_v4i32(ptr %c, ptr %a, ptr %b) nounwind {
1319; CHECK-LABEL: bset_v4i32:
1320; CHECK:       # %bb.0:
1321; CHECK-NEXT:    ld.w $w0, 0($6)
1322; CHECK-NEXT:    ld.w $w1, 0($5)
1323; CHECK-NEXT:    bset.w $w0, $w1, $w0
1324; CHECK-NEXT:    jr $ra
1325; CHECK-NEXT:    st.w $w0, 0($4)
1326  %1 = load <4 x i32>, ptr %a
1327  %2 = load <4 x i32>, ptr %b
1328  %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
1329  %4 = or <4 x i32> %1, %3
1330  store <4 x i32> %4, ptr %c
1331  ret void
1332}
1333
1334define void @bset_v2i64(ptr %c, ptr %a, ptr %b) nounwind {
1335; CHECK-LABEL: bset_v2i64:
1336; CHECK:       # %bb.0:
1337; CHECK-NEXT:    ld.d $w0, 0($6)
1338; CHECK-NEXT:    ld.d $w1, 0($5)
1339; CHECK-NEXT:    bset.d $w0, $w1, $w0
1340; CHECK-NEXT:    jr $ra
1341; CHECK-NEXT:    st.d $w0, 0($4)
1342  %1 = load <2 x i64>, ptr %a
1343  %2 = load <2 x i64>, ptr %b
1344  %3 = shl <2 x i64> <i64 1, i64 1>, %2
1345  %4 = or <2 x i64> %1, %3
1346  store <2 x i64> %4, ptr %c
1347  ret void
1348}
1349
1350define void @bneg_v16i8(ptr %c, ptr %a, ptr %b) nounwind {
1351; CHECK-LABEL: bneg_v16i8:
1352; CHECK:       # %bb.0:
1353; CHECK-NEXT:    ld.b $w0, 0($6)
1354; CHECK-NEXT:    ld.b $w1, 0($5)
1355; CHECK-NEXT:    bneg.b $w0, $w1, $w0
1356; CHECK-NEXT:    jr $ra
1357; CHECK-NEXT:    st.b $w0, 0($4)
1358  %1 = load <16 x i8>, ptr %a
1359  %2 = load <16 x i8>, ptr %b
1360  %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
1361  %4 = xor <16 x i8> %1, %3
1362  store <16 x i8> %4, ptr %c
1363  ret void
1364}
1365
1366define void @bneg_v8i16(ptr %c, ptr %a, ptr %b) nounwind {
1367; CHECK-LABEL: bneg_v8i16:
1368; CHECK:       # %bb.0:
1369; CHECK-NEXT:    ld.h $w0, 0($6)
1370; CHECK-NEXT:    ld.h $w1, 0($5)
1371; CHECK-NEXT:    bneg.h $w0, $w1, $w0
1372; CHECK-NEXT:    jr $ra
1373; CHECK-NEXT:    st.h $w0, 0($4)
1374  %1 = load <8 x i16>, ptr %a
1375  %2 = load <8 x i16>, ptr %b
1376  %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
1377  %4 = xor <8 x i16> %1, %3
1378  store <8 x i16> %4, ptr %c
1379  ret void
1380}
1381
1382define void @bneg_v4i32(ptr %c, ptr %a, ptr %b) nounwind {
1383; CHECK-LABEL: bneg_v4i32:
1384; CHECK:       # %bb.0:
1385; CHECK-NEXT:    ld.w $w0, 0($6)
1386; CHECK-NEXT:    ld.w $w1, 0($5)
1387; CHECK-NEXT:    bneg.w $w0, $w1, $w0
1388; CHECK-NEXT:    jr $ra
1389; CHECK-NEXT:    st.w $w0, 0($4)
1390  %1 = load <4 x i32>, ptr %a
1391  %2 = load <4 x i32>, ptr %b
1392  %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
1393  %4 = xor <4 x i32> %1, %3
1394  store <4 x i32> %4, ptr %c
1395  ret void
1396}
1397
1398define void @bneg_v2i64(ptr %c, ptr %a, ptr %b) nounwind {
1399; CHECK-LABEL: bneg_v2i64:
1400; CHECK:       # %bb.0:
1401; CHECK-NEXT:    ld.d $w0, 0($6)
1402; CHECK-NEXT:    ld.d $w1, 0($5)
1403; CHECK-NEXT:    bneg.d $w0, $w1, $w0
1404; CHECK-NEXT:    jr $ra
1405; CHECK-NEXT:    st.d $w0, 0($4)
1406  %1 = load <2 x i64>, ptr %a
1407  %2 = load <2 x i64>, ptr %b
1408  %3 = shl <2 x i64> <i64 1, i64 1>, %2
1409  %4 = xor <2 x i64> %1, %3
1410  store <2 x i64> %4, ptr %c
1411  ret void
1412}
1413
1414define void @bclri_v16i8(ptr %c, ptr %a) nounwind {
1415; CHECK-LABEL: bclri_v16i8:
1416; CHECK:       # %bb.0:
1417; CHECK-NEXT:    ld.b $w0, 0($5)
1418; CHECK-NEXT:    andi.b $w0, $w0, 247
1419; CHECK-NEXT:    jr $ra
1420; CHECK-NEXT:    st.b $w0, 0($4)
1421  %1 = load <16 x i8>, ptr %a
1422  %2 = xor <16 x i8> <i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8>,
1423                     <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1424  %3 = and <16 x i8> %1, %2
1425  ; bclri.b and andi.b are exactly equivalent.
1426  store <16 x i8> %3, ptr %c
1427  ret void
1428}
1429
1430define void @bclri_v8i16(ptr %c, ptr %a) nounwind {
1431; CHECK-LABEL: bclri_v8i16:
1432; CHECK:       # %bb.0:
1433; CHECK-NEXT:    ld.h $w0, 0($5)
1434; CHECK-NEXT:    bclri.h $w0, $w0, 3
1435; CHECK-NEXT:    jr $ra
1436; CHECK-NEXT:    st.h $w0, 0($4)
1437  %1 = load <8 x i16>, ptr %a
1438  %2 = xor <8 x i16> <i16  8, i16  8, i16  8, i16  8, i16  8, i16  8, i16  8, i16  8>,
1439                     <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1440  %3 = and <8 x i16> %1, %2
1441  store <8 x i16> %3, ptr %c
1442  ret void
1443}
1444
1445define void @bclri_v4i32(ptr %c, ptr %a) nounwind {
1446; CHECK-LABEL: bclri_v4i32:
1447; CHECK:       # %bb.0:
1448; CHECK-NEXT:    ld.w $w0, 0($5)
1449; CHECK-NEXT:    bclri.w $w0, $w0, 3
1450; CHECK-NEXT:    jr $ra
1451; CHECK-NEXT:    st.w $w0, 0($4)
1452  %1 = load <4 x i32>, ptr %a
1453  %2 = xor <4 x i32> <i32  8, i32  8, i32  8, i32  8>,
1454                     <i32 -1, i32 -1, i32 -1, i32 -1>
1455  %3 = and <4 x i32> %1, %2
1456  store <4 x i32> %3, ptr %c
1457  ret void
1458}
1459
1460define void @bclri_v2i64(ptr %c, ptr %a) nounwind {
1461; CHECK-LABEL: bclri_v2i64:
1462; CHECK:       # %bb.0:
1463; CHECK-NEXT:    ld.d $w0, 0($5)
1464; CHECK-NEXT:    bclri.d $w0, $w0, 3
1465; CHECK-NEXT:    jr $ra
1466; CHECK-NEXT:    st.d $w0, 0($4)
1467  %1 = load <2 x i64>, ptr %a
1468  %2 = xor <2 x i64> <i64  8, i64  8>,
1469                     <i64 -1, i64 -1>
1470  %3 = and <2 x i64> %1, %2
1471  store <2 x i64> %3, ptr %c
1472  ret void
1473}
1474
1475define void @bseti_v16i8(ptr %c, ptr %a) nounwind {
1476; CHECK-LABEL: bseti_v16i8:
1477; CHECK:       # %bb.0:
1478; CHECK-NEXT:    ld.b $w0, 0($5)
1479; CHECK-NEXT:    bseti.b $w0, $w0, 3
1480; CHECK-NEXT:    jr $ra
1481; CHECK-NEXT:    st.b $w0, 0($4)
1482  %1 = load <16 x i8>, ptr %a
1483  %2 = or <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
1484  store <16 x i8> %2, ptr %c
1485  ret void
1486}
1487
1488define void @bseti_v8i16(ptr %c, ptr %a) nounwind {
1489; CHECK-LABEL: bseti_v8i16:
1490; CHECK:       # %bb.0:
1491; CHECK-NEXT:    ld.h $w0, 0($5)
1492; CHECK-NEXT:    bseti.h $w0, $w0, 3
1493; CHECK-NEXT:    jr $ra
1494; CHECK-NEXT:    st.h $w0, 0($4)
1495  %1 = load <8 x i16>, ptr %a
1496  %2 = or <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1497  store <8 x i16> %2, ptr %c
1498  ret void
1499}
1500
1501define void @bseti_v4i32(ptr %c, ptr %a) nounwind {
1502; CHECK-LABEL: bseti_v4i32:
1503; CHECK:       # %bb.0:
1504; CHECK-NEXT:    ld.w $w0, 0($5)
1505; CHECK-NEXT:    bseti.w $w0, $w0, 3
1506; CHECK-NEXT:    jr $ra
1507; CHECK-NEXT:    st.w $w0, 0($4)
1508  %1 = load <4 x i32>, ptr %a
1509  %2 = or <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
1510  store <4 x i32> %2, ptr %c
1511  ret void
1512}
1513
1514define void @bseti_v2i64(ptr %c, ptr %a) nounwind {
1515; CHECK-LABEL: bseti_v2i64:
1516; CHECK:       # %bb.0:
1517; CHECK-NEXT:    ld.d $w0, 0($5)
1518; CHECK-NEXT:    bseti.d $w0, $w0, 3
1519; CHECK-NEXT:    jr $ra
1520; CHECK-NEXT:    st.d $w0, 0($4)
1521  %1 = load <2 x i64>, ptr %a
1522  %2 = or <2 x i64> %1, <i64 8, i64 8>
1523  store <2 x i64> %2, ptr %c
1524  ret void
1525}
1526
1527define void @bnegi_v16i8(ptr %c, ptr %a) nounwind {
1528; CHECK-LABEL: bnegi_v16i8:
1529; CHECK:       # %bb.0:
1530; CHECK-NEXT:    ld.b $w0, 0($5)
1531; CHECK-NEXT:    bnegi.b $w0, $w0, 3
1532; CHECK-NEXT:    jr $ra
1533; CHECK-NEXT:    st.b $w0, 0($4)
1534  %1 = load <16 x i8>, ptr %a
1535  %2 = xor <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
1536  store <16 x i8> %2, ptr %c
1537  ret void
1538}
1539
1540define void @bnegi_v8i16(ptr %c, ptr %a) nounwind {
1541; CHECK-LABEL: bnegi_v8i16:
1542; CHECK:       # %bb.0:
1543; CHECK-NEXT:    ld.h $w0, 0($5)
1544; CHECK-NEXT:    bnegi.h $w0, $w0, 3
1545; CHECK-NEXT:    jr $ra
1546; CHECK-NEXT:    st.h $w0, 0($4)
1547  %1 = load <8 x i16>, ptr %a
1548  %2 = xor <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1549  store <8 x i16> %2, ptr %c
1550  ret void
1551}
1552
1553define void @bnegi_v4i32(ptr %c, ptr %a) nounwind {
1554; CHECK-LABEL: bnegi_v4i32:
1555; CHECK:       # %bb.0:
1556; CHECK-NEXT:    ld.w $w0, 0($5)
1557; CHECK-NEXT:    bnegi.w $w0, $w0, 3
1558; CHECK-NEXT:    jr $ra
1559; CHECK-NEXT:    st.w $w0, 0($4)
1560  %1 = load <4 x i32>, ptr %a
1561  %2 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
1562  store <4 x i32> %2, ptr %c
1563  ret void
1564}
1565
1566define void @bnegi_v2i64(ptr %c, ptr %a) nounwind {
1567; CHECK-LABEL: bnegi_v2i64:
1568; CHECK:       # %bb.0:
1569; CHECK-NEXT:    ld.d $w0, 0($5)
1570; CHECK-NEXT:    bnegi.d $w0, $w0, 3
1571; CHECK-NEXT:    jr $ra
1572; CHECK-NEXT:    st.d $w0, 0($4)
1573  %1 = load <2 x i64>, ptr %a
1574  %2 = xor <2 x i64> %1, <i64 8, i64 8>
1575  store <2 x i64> %2, ptr %c
1576  ret void
1577}
1578
1579declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val)
1580declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val)
1581declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val)
1582declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val)
1583declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %val)
1584declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %val)
1585declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val)
1586declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %val)
1587