xref: /llvm-project/llvm/test/CodeGen/PowerPC/mma-outer-product.ll (revision 706e1975400b3f30bd406b694bb711a7c7dbe1c4)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -mcpu=pwr10 -enable-subreg-liveness -ppc-asm-full-reg-names \
4; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6; RUN:   -mcpu=pwr10 -enable-subreg-liveness -ppc-asm-full-reg-names \
7; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
8
9declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
10declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>)
11define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, ptr %ptr) {
12; CHECK-LABEL: intrinsics1:
13; CHECK:       # %bb.0:
14; CHECK-NEXT:    vmr v1, v4
15; CHECK-NEXT:    vmr v4, v3
16; CHECK-NEXT:    vmr v0, v2
17; CHECK-NEXT:    xxlor vs3, v5, v5
18; CHECK-NEXT:    ld r3, 96(r1)
19; CHECK-NEXT:    xxlor vs0, v0, v0
20; CHECK-NEXT:    xxlor vs1, v1, v1
21; CHECK-NEXT:    xxlor vs2, v4, v4
22; CHECK-NEXT:    xxmtacc acc0
23; CHECK-NEXT:    xvi4ger8pp acc0, v2, v3
24; CHECK-NEXT:    xvf16ger2pp acc0, v2, v1
25; CHECK-NEXT:    pmxvf32gerpn acc0, v3, v5, 0, 0
26; CHECK-NEXT:    vmr v3, v2
27; CHECK-NEXT:    vmr v2, v5
28; CHECK-NEXT:    pmxvf64gernp acc0, vsp34, v0, 0, 0
29; CHECK-NEXT:    xxmfacc acc0
30; CHECK-NEXT:    stxv vs0, 48(r3)
31; CHECK-NEXT:    stxv vs1, 32(r3)
32; CHECK-NEXT:    stxv vs2, 16(r3)
33; CHECK-NEXT:    stxv vs3, 0(r3)
34; CHECK-NEXT:    blr
35;
36; CHECK-BE-LABEL: intrinsics1:
37; CHECK-BE:       # %bb.0:
38; CHECK-BE-NEXT:    vmr v1, v4
39; CHECK-BE-NEXT:    vmr v4, v3
40; CHECK-BE-NEXT:    vmr v0, v2
41; CHECK-BE-NEXT:    xxlor vs3, v5, v5
42; CHECK-BE-NEXT:    ld r3, 112(r1)
43; CHECK-BE-NEXT:    xxlor vs0, v0, v0
44; CHECK-BE-NEXT:    xxlor vs1, v1, v1
45; CHECK-BE-NEXT:    xxlor vs2, v4, v4
46; CHECK-BE-NEXT:    xxmtacc acc0
47; CHECK-BE-NEXT:    xvi4ger8pp acc0, v2, v3
48; CHECK-BE-NEXT:    xvf16ger2pp acc0, v2, v1
49; CHECK-BE-NEXT:    pmxvf32gerpn acc0, v3, v5, 0, 0
50; CHECK-BE-NEXT:    vmr v3, v2
51; CHECK-BE-NEXT:    vmr v2, v5
52; CHECK-BE-NEXT:    pmxvf64gernp acc0, vsp34, v0, 0, 0
53; CHECK-BE-NEXT:    xxmfacc acc0
54; CHECK-BE-NEXT:    stxv vs1, 16(r3)
55; CHECK-BE-NEXT:    stxv vs0, 0(r3)
56; CHECK-BE-NEXT:    stxv vs3, 48(r3)
57; CHECK-BE-NEXT:    stxv vs2, 32(r3)
58; CHECK-BE-NEXT:    blr
59  %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc3, <16 x i8> %vc2, <16 x i8> %vc4)
60  %2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc2)
61  %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3)
62  %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %3, <16 x i8> %vc2, <16 x i8> %vc4, i32 0, i32 0)
63  %5 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %vc4, <16 x i8> %vc1)
64  %6 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %4, <256 x i1> %5, <16 x i8> %vc1, i32 0, i32 0)
65  store <512 x i1> %6, ptr %ptr, align 64
66  ret void
67}
68
69declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
70define void @intrinsics2(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr) {
71; CHECK-LABEL: intrinsics2:
72; CHECK:       # %bb.0:
73; CHECK-NEXT:    lxv v2, 0(r3)
74; CHECK-NEXT:    lxv v3, 0(r4)
75; CHECK-NEXT:    xxlor vs0, v2, v2
76; CHECK-NEXT:    lxv v4, 0(r5)
77; CHECK-NEXT:    lxv v5, 0(r6)
78; CHECK-NEXT:    xxlor vs1, v3, v3
79; CHECK-NEXT:    xxlor vs2, v4, v4
80; CHECK-NEXT:    xxlor vs3, v5, v5
81; CHECK-NEXT:    vmr v1, v2
82; CHECK-NEXT:    vmr v0, v5
83; CHECK-NEXT:    xxmtacc acc0
84; CHECK-NEXT:    xvi8ger4pp acc0, v2, v3
85; CHECK-NEXT:    xvf16ger2pn acc0, v2, v4
86; CHECK-NEXT:    pmxvf32gernn acc0, v3, v5, 0, 0
87; CHECK-NEXT:    pmxvf64gernn acc0, vsp32, v2, 0, 0
88; CHECK-NEXT:    xxmfacc acc0
89; CHECK-NEXT:    stxv vs3, 0(r3)
90; CHECK-NEXT:    stxv vs2, 0(r4)
91; CHECK-NEXT:    stxv vs1, 0(r5)
92; CHECK-NEXT:    stxv vs0, 0(r6)
93; CHECK-NEXT:    blr
94;
95; CHECK-BE-LABEL: intrinsics2:
96; CHECK-BE:       # %bb.0:
97; CHECK-BE-NEXT:    lxv v2, 0(r3)
98; CHECK-BE-NEXT:    lxv v3, 0(r4)
99; CHECK-BE-NEXT:    xxlor vs0, v2, v2
100; CHECK-BE-NEXT:    lxv v4, 0(r5)
101; CHECK-BE-NEXT:    lxv v5, 0(r6)
102; CHECK-BE-NEXT:    xxlor vs1, v3, v3
103; CHECK-BE-NEXT:    xxlor vs2, v4, v4
104; CHECK-BE-NEXT:    xxlor vs3, v5, v5
105; CHECK-BE-NEXT:    vmr v1, v2
106; CHECK-BE-NEXT:    vmr v0, v5
107; CHECK-BE-NEXT:    xxmtacc acc0
108; CHECK-BE-NEXT:    xvi8ger4pp acc0, v2, v3
109; CHECK-BE-NEXT:    xvf16ger2pn acc0, v2, v4
110; CHECK-BE-NEXT:    pmxvf32gernn acc0, v3, v5, 0, 0
111; CHECK-BE-NEXT:    pmxvf64gernn acc0, vsp32, v2, 0, 0
112; CHECK-BE-NEXT:    xxmfacc acc0
113; CHECK-BE-NEXT:    stxv vs0, 0(r3)
114; CHECK-BE-NEXT:    stxv vs1, 0(r4)
115; CHECK-BE-NEXT:    stxv vs2, 0(r5)
116; CHECK-BE-NEXT:    stxv vs3, 0(r6)
117; CHECK-BE-NEXT:    blr
118  %vc1 = load <16 x i8>, ptr %ptr1, align 16
119  %vc2 = load <16 x i8>, ptr %ptr2, align 16
120  %vc3 = load <16 x i8>, ptr %ptr3, align 16
121  %vc4 = load <16 x i8>, ptr %ptr4, align 16
122  %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4)
123  %2 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc2)
124  %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3)
125  %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %3, <16 x i8> %vc2, <16 x i8> %vc4, i32 0, i32 0)
126  %5 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %vc4, <16 x i8> %vc1)
127  %6 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %4, <256 x i1> %5, <16 x i8> %vc1, i32 0, i32 0)
128  %7 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %6)
129  %8 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %7, 0
130  %9 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %7, 1
131  %10 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %7, 2
132  %11 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %7, 3
133  store <16 x i8> %8, ptr %ptr1, align 16
134  store <16 x i8> %9, ptr %ptr2, align 16
135  store <16 x i8> %10, ptr %ptr3, align 16
136  store <16 x i8> %11, ptr %ptr4, align 16
137  ret void
138}
139
140define void @test1(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
141; CHECK-LABEL: test1:
142; CHECK:       # %bb.0: # %entry
143; CHECK-NEXT:    xvi4ger8 acc0, v2, v2
144; CHECK-NEXT:    xxmfacc acc0
145; CHECK-NEXT:    stxv vs0, 48(r7)
146; CHECK-NEXT:    stxv vs1, 32(r7)
147; CHECK-NEXT:    stxv vs2, 16(r7)
148; CHECK-NEXT:    stxv vs3, 0(r7)
149; CHECK-NEXT:    blr
150;
151; CHECK-BE-LABEL: test1:
152; CHECK-BE:       # %bb.0: # %entry
153; CHECK-BE-NEXT:    xvi4ger8 acc0, v2, v2
154; CHECK-BE-NEXT:    xxmfacc acc0
155; CHECK-BE-NEXT:    stxv vs1, 16(r7)
156; CHECK-BE-NEXT:    stxv vs0, 0(r7)
157; CHECK-BE-NEXT:    stxv vs3, 48(r7)
158; CHECK-BE-NEXT:    stxv vs2, 32(r7)
159; CHECK-BE-NEXT:    blr
160entry:
161  %0 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> %vc, <16 x i8> %vc)
162  store <512 x i1> %0, ptr %resp, align 64
163  ret void
164}
165
166
167declare <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8>, <16 x i8>)
168
169define void @test2(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
170; CHECK-LABEL: test2:
171; CHECK:       # %bb.0: # %entry
172; CHECK-NEXT:    lxv vs1, 32(r3)
173; CHECK-NEXT:    lxv vs0, 48(r3)
174; CHECK-NEXT:    lxv vs3, 0(r3)
175; CHECK-NEXT:    lxv vs2, 16(r3)
176; CHECK-NEXT:    xxmtacc acc0
177; CHECK-NEXT:    xvi4ger8pp acc0, v2, v2
178; CHECK-NEXT:    xxmfacc acc0
179; CHECK-NEXT:    stxv vs0, 48(r7)
180; CHECK-NEXT:    stxv vs1, 32(r7)
181; CHECK-NEXT:    stxv vs2, 16(r7)
182; CHECK-NEXT:    stxv vs3, 0(r7)
183; CHECK-NEXT:    blr
184;
185; CHECK-BE-LABEL: test2:
186; CHECK-BE:       # %bb.0: # %entry
187; CHECK-BE-NEXT:    lxv vs1, 16(r3)
188; CHECK-BE-NEXT:    lxv vs0, 0(r3)
189; CHECK-BE-NEXT:    lxv vs3, 48(r3)
190; CHECK-BE-NEXT:    lxv vs2, 32(r3)
191; CHECK-BE-NEXT:    xxmtacc acc0
192; CHECK-BE-NEXT:    xvi4ger8pp acc0, v2, v2
193; CHECK-BE-NEXT:    xxmfacc acc0
194; CHECK-BE-NEXT:    stxv vs1, 16(r7)
195; CHECK-BE-NEXT:    stxv vs0, 0(r7)
196; CHECK-BE-NEXT:    stxv vs3, 48(r7)
197; CHECK-BE-NEXT:    stxv vs2, 32(r7)
198; CHECK-BE-NEXT:    blr
199entry:
200  %0 = load <512 x i1>, ptr %vqp, align 64
201  %1 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
202  store <512 x i1> %1, ptr %resp, align 64
203  ret void
204}
205
206
207declare <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>)
208
209define void @test3(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
210; CHECK-LABEL: test3:
211; CHECK:       # %bb.0: # %entry
212; CHECK-NEXT:    pmxvi4ger8 acc0, v2, v2, 0, 0, 0
213; CHECK-NEXT:    xxmfacc acc0
214; CHECK-NEXT:    stxv vs0, 48(r7)
215; CHECK-NEXT:    stxv vs1, 32(r7)
216; CHECK-NEXT:    stxv vs2, 16(r7)
217; CHECK-NEXT:    stxv vs3, 0(r7)
218; CHECK-NEXT:    blr
219;
220; CHECK-BE-LABEL: test3:
221; CHECK-BE:       # %bb.0: # %entry
222; CHECK-BE-NEXT:    pmxvi4ger8 acc0, v2, v2, 0, 0, 0
223; CHECK-BE-NEXT:    xxmfacc acc0
224; CHECK-BE-NEXT:    stxv vs1, 16(r7)
225; CHECK-BE-NEXT:    stxv vs0, 0(r7)
226; CHECK-BE-NEXT:    stxv vs3, 48(r7)
227; CHECK-BE-NEXT:    stxv vs2, 32(r7)
228; CHECK-BE-NEXT:    blr
229entry:
230  %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
231  store <512 x i1> %0, ptr %resp, align 64
232  ret void
233}
234
235
236declare <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8>, <16 x i8>, i32, i32, i32)
237
238define void @test4(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
239; CHECK-LABEL: test4:
240; CHECK:       # %bb.0: # %entry
241; CHECK-NEXT:    lxv vs1, 32(r3)
242; CHECK-NEXT:    lxv vs0, 48(r3)
243; CHECK-NEXT:    lxv vs3, 0(r3)
244; CHECK-NEXT:    lxv vs2, 16(r3)
245; CHECK-NEXT:    xxmtacc acc0
246; CHECK-NEXT:    pmxvi4ger8pp acc0, v2, v2, 0, 0, 0
247; CHECK-NEXT:    xxmfacc acc0
248; CHECK-NEXT:    stxv vs0, 48(r7)
249; CHECK-NEXT:    stxv vs1, 32(r7)
250; CHECK-NEXT:    stxv vs2, 16(r7)
251; CHECK-NEXT:    stxv vs3, 0(r7)
252; CHECK-NEXT:    blr
253;
254; CHECK-BE-LABEL: test4:
255; CHECK-BE:       # %bb.0: # %entry
256; CHECK-BE-NEXT:    lxv vs1, 16(r3)
257; CHECK-BE-NEXT:    lxv vs0, 0(r3)
258; CHECK-BE-NEXT:    lxv vs3, 48(r3)
259; CHECK-BE-NEXT:    lxv vs2, 32(r3)
260; CHECK-BE-NEXT:    xxmtacc acc0
261; CHECK-BE-NEXT:    pmxvi4ger8pp acc0, v2, v2, 0, 0, 0
262; CHECK-BE-NEXT:    xxmfacc acc0
263; CHECK-BE-NEXT:    stxv vs1, 16(r7)
264; CHECK-BE-NEXT:    stxv vs0, 0(r7)
265; CHECK-BE-NEXT:    stxv vs3, 48(r7)
266; CHECK-BE-NEXT:    stxv vs2, 32(r7)
267; CHECK-BE-NEXT:    blr
268entry:
269  %0 = load <512 x i1>, ptr %vqp, align 64
270  %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
271  store <512 x i1> %1, ptr %resp, align 64
272  ret void
273}
274
275
276declare <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
277
278define void @test5(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
279; CHECK-LABEL: test5:
280; CHECK:       # %bb.0: # %entry
281; CHECK-NEXT:    xvi8ger4 acc0, v2, v2
282; CHECK-NEXT:    xxmfacc acc0
283; CHECK-NEXT:    stxv vs0, 48(r7)
284; CHECK-NEXT:    stxv vs1, 32(r7)
285; CHECK-NEXT:    stxv vs2, 16(r7)
286; CHECK-NEXT:    stxv vs3, 0(r7)
287; CHECK-NEXT:    blr
288;
289; CHECK-BE-LABEL: test5:
290; CHECK-BE:       # %bb.0: # %entry
291; CHECK-BE-NEXT:    xvi8ger4 acc0, v2, v2
292; CHECK-BE-NEXT:    xxmfacc acc0
293; CHECK-BE-NEXT:    stxv vs1, 16(r7)
294; CHECK-BE-NEXT:    stxv vs0, 0(r7)
295; CHECK-BE-NEXT:    stxv vs3, 48(r7)
296; CHECK-BE-NEXT:    stxv vs2, 32(r7)
297; CHECK-BE-NEXT:    blr
298entry:
299  %0 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> %vc, <16 x i8> %vc)
300  store <512 x i1> %0, ptr %resp, align 64
301  ret void
302}
303
304
305declare <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8>, <16 x i8>)
306
307define void @test6(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
308; CHECK-LABEL: test6:
309; CHECK:       # %bb.0: # %entry
310; CHECK-NEXT:    lxv vs1, 32(r3)
311; CHECK-NEXT:    lxv vs0, 48(r3)
312; CHECK-NEXT:    lxv vs3, 0(r3)
313; CHECK-NEXT:    lxv vs2, 16(r3)
314; CHECK-NEXT:    xxmtacc acc0
315; CHECK-NEXT:    xvi8ger4pp acc0, v2, v2
316; CHECK-NEXT:    xxmfacc acc0
317; CHECK-NEXT:    stxv vs0, 48(r7)
318; CHECK-NEXT:    stxv vs1, 32(r7)
319; CHECK-NEXT:    stxv vs2, 16(r7)
320; CHECK-NEXT:    stxv vs3, 0(r7)
321; CHECK-NEXT:    blr
322;
323; CHECK-BE-LABEL: test6:
324; CHECK-BE:       # %bb.0: # %entry
325; CHECK-BE-NEXT:    lxv vs1, 16(r3)
326; CHECK-BE-NEXT:    lxv vs0, 0(r3)
327; CHECK-BE-NEXT:    lxv vs3, 48(r3)
328; CHECK-BE-NEXT:    lxv vs2, 32(r3)
329; CHECK-BE-NEXT:    xxmtacc acc0
330; CHECK-BE-NEXT:    xvi8ger4pp acc0, v2, v2
331; CHECK-BE-NEXT:    xxmfacc acc0
332; CHECK-BE-NEXT:    stxv vs1, 16(r7)
333; CHECK-BE-NEXT:    stxv vs0, 0(r7)
334; CHECK-BE-NEXT:    stxv vs3, 48(r7)
335; CHECK-BE-NEXT:    stxv vs2, 32(r7)
336; CHECK-BE-NEXT:    blr
337entry:
338  %0 = load <512 x i1>, ptr %vqp, align 64
339  %1 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
340  store <512 x i1> %1, ptr %resp, align 64
341  ret void
342}
343
344
345declare <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1>, <16 x i8>, <16 x i8>)
346
347define void @test7(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
348; CHECK-LABEL: test7:
349; CHECK:       # %bb.0: # %entry
350; CHECK-NEXT:    pmxvi8ger4 acc0, v2, v2, 0, 0, 0
351; CHECK-NEXT:    xxmfacc acc0
352; CHECK-NEXT:    stxv vs0, 48(r7)
353; CHECK-NEXT:    stxv vs1, 32(r7)
354; CHECK-NEXT:    stxv vs2, 16(r7)
355; CHECK-NEXT:    stxv vs3, 0(r7)
356; CHECK-NEXT:    blr
357;
358; CHECK-BE-LABEL: test7:
359; CHECK-BE:       # %bb.0: # %entry
360; CHECK-BE-NEXT:    pmxvi8ger4 acc0, v2, v2, 0, 0, 0
361; CHECK-BE-NEXT:    xxmfacc acc0
362; CHECK-BE-NEXT:    stxv vs1, 16(r7)
363; CHECK-BE-NEXT:    stxv vs0, 0(r7)
364; CHECK-BE-NEXT:    stxv vs3, 48(r7)
365; CHECK-BE-NEXT:    stxv vs2, 32(r7)
366; CHECK-BE-NEXT:    blr
367entry:
368  %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
369  store <512 x i1> %0, ptr %resp, align 64
370  ret void
371}
372
373
374declare <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8>, <16 x i8>, i32, i32, i32)
375
376define void @test8(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
377; CHECK-LABEL: test8:
378; CHECK:       # %bb.0: # %entry
379; CHECK-NEXT:    lxv vs1, 32(r3)
380; CHECK-NEXT:    lxv vs0, 48(r3)
381; CHECK-NEXT:    lxv vs3, 0(r3)
382; CHECK-NEXT:    lxv vs2, 16(r3)
383; CHECK-NEXT:    xxmtacc acc0
384; CHECK-NEXT:    pmxvi8ger4pp acc0, v2, v2, 0, 0, 0
385; CHECK-NEXT:    xxmfacc acc0
386; CHECK-NEXT:    stxv vs0, 48(r7)
387; CHECK-NEXT:    stxv vs1, 32(r7)
388; CHECK-NEXT:    stxv vs2, 16(r7)
389; CHECK-NEXT:    stxv vs3, 0(r7)
390; CHECK-NEXT:    blr
391;
392; CHECK-BE-LABEL: test8:
393; CHECK-BE:       # %bb.0: # %entry
394; CHECK-BE-NEXT:    lxv vs1, 16(r3)
395; CHECK-BE-NEXT:    lxv vs0, 0(r3)
396; CHECK-BE-NEXT:    lxv vs3, 48(r3)
397; CHECK-BE-NEXT:    lxv vs2, 32(r3)
398; CHECK-BE-NEXT:    xxmtacc acc0
399; CHECK-BE-NEXT:    pmxvi8ger4pp acc0, v2, v2, 0, 0, 0
400; CHECK-BE-NEXT:    xxmfacc acc0
401; CHECK-BE-NEXT:    stxv vs1, 16(r7)
402; CHECK-BE-NEXT:    stxv vs0, 0(r7)
403; CHECK-BE-NEXT:    stxv vs3, 48(r7)
404; CHECK-BE-NEXT:    stxv vs2, 32(r7)
405; CHECK-BE-NEXT:    blr
406entry:
407  %0 = load <512 x i1>, ptr %vqp, align 64
408  %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
409  store <512 x i1> %1, ptr %resp, align 64
410  ret void
411}
412
413
414declare <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
415
416define void @test9(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
417; CHECK-LABEL: test9:
418; CHECK:       # %bb.0: # %entry
419; CHECK-NEXT:    xvi16ger2s acc0, v2, v2
420; CHECK-NEXT:    xxmfacc acc0
421; CHECK-NEXT:    stxv vs0, 48(r7)
422; CHECK-NEXT:    stxv vs1, 32(r7)
423; CHECK-NEXT:    stxv vs2, 16(r7)
424; CHECK-NEXT:    stxv vs3, 0(r7)
425; CHECK-NEXT:    blr
426;
427; CHECK-BE-LABEL: test9:
428; CHECK-BE:       # %bb.0: # %entry
429; CHECK-BE-NEXT:    xvi16ger2s acc0, v2, v2
430; CHECK-BE-NEXT:    xxmfacc acc0
431; CHECK-BE-NEXT:    stxv vs1, 16(r7)
432; CHECK-BE-NEXT:    stxv vs0, 0(r7)
433; CHECK-BE-NEXT:    stxv vs3, 48(r7)
434; CHECK-BE-NEXT:    stxv vs2, 32(r7)
435; CHECK-BE-NEXT:    blr
436entry:
437  %0 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> %vc, <16 x i8> %vc)
438  store <512 x i1> %0, ptr %resp, align 64
439  ret void
440}
441
442
443declare <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8>, <16 x i8>)
444
445define void @test10(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
446; CHECK-LABEL: test10:
447; CHECK:       # %bb.0: # %entry
448; CHECK-NEXT:    lxv vs1, 32(r3)
449; CHECK-NEXT:    lxv vs0, 48(r3)
450; CHECK-NEXT:    lxv vs3, 0(r3)
451; CHECK-NEXT:    lxv vs2, 16(r3)
452; CHECK-NEXT:    xxmtacc acc0
453; CHECK-NEXT:    xvi16ger2spp acc0, v2, v2
454; CHECK-NEXT:    xxmfacc acc0
455; CHECK-NEXT:    stxv vs0, 48(r7)
456; CHECK-NEXT:    stxv vs1, 32(r7)
457; CHECK-NEXT:    stxv vs2, 16(r7)
458; CHECK-NEXT:    stxv vs3, 0(r7)
459; CHECK-NEXT:    blr
460;
461; CHECK-BE-LABEL: test10:
462; CHECK-BE:       # %bb.0: # %entry
463; CHECK-BE-NEXT:    lxv vs1, 16(r3)
464; CHECK-BE-NEXT:    lxv vs0, 0(r3)
465; CHECK-BE-NEXT:    lxv vs3, 48(r3)
466; CHECK-BE-NEXT:    lxv vs2, 32(r3)
467; CHECK-BE-NEXT:    xxmtacc acc0
468; CHECK-BE-NEXT:    xvi16ger2spp acc0, v2, v2
469; CHECK-BE-NEXT:    xxmfacc acc0
470; CHECK-BE-NEXT:    stxv vs1, 16(r7)
471; CHECK-BE-NEXT:    stxv vs0, 0(r7)
472; CHECK-BE-NEXT:    stxv vs3, 48(r7)
473; CHECK-BE-NEXT:    stxv vs2, 32(r7)
474; CHECK-BE-NEXT:    blr
475entry:
476  %0 = load <512 x i1>, ptr %vqp, align 64
477  %1 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
478  store <512 x i1> %1, ptr %resp, align 64
479  ret void
480}
481
482
483declare <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1>, <16 x i8>, <16 x i8>)
484
485define void @test11(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
486; CHECK-LABEL: test11:
487; CHECK:       # %bb.0: # %entry
488; CHECK-NEXT:    pmxvi16ger2s acc0, v2, v2, 0, 0, 0
489; CHECK-NEXT:    xxmfacc acc0
490; CHECK-NEXT:    stxv vs0, 48(r7)
491; CHECK-NEXT:    stxv vs1, 32(r7)
492; CHECK-NEXT:    stxv vs2, 16(r7)
493; CHECK-NEXT:    stxv vs3, 0(r7)
494; CHECK-NEXT:    blr
495;
496; CHECK-BE-LABEL: test11:
497; CHECK-BE:       # %bb.0: # %entry
498; CHECK-BE-NEXT:    pmxvi16ger2s acc0, v2, v2, 0, 0, 0
499; CHECK-BE-NEXT:    xxmfacc acc0
500; CHECK-BE-NEXT:    stxv vs1, 16(r7)
501; CHECK-BE-NEXT:    stxv vs0, 0(r7)
502; CHECK-BE-NEXT:    stxv vs3, 48(r7)
503; CHECK-BE-NEXT:    stxv vs2, 32(r7)
504; CHECK-BE-NEXT:    blr
505entry:
506  %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
507  store <512 x i1> %0, ptr %resp, align 64
508  ret void
509}
510
511
512declare <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8>, <16 x i8>, i32, i32, i32)
513
514define void @test12(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
515; CHECK-LABEL: test12:
516; CHECK:       # %bb.0: # %entry
517; CHECK-NEXT:    lxv vs1, 32(r3)
518; CHECK-NEXT:    lxv vs0, 48(r3)
519; CHECK-NEXT:    lxv vs3, 0(r3)
520; CHECK-NEXT:    lxv vs2, 16(r3)
521; CHECK-NEXT:    xxmtacc acc0
522; CHECK-NEXT:    pmxvi16ger2spp acc0, v2, v2, 0, 0, 0
523; CHECK-NEXT:    xxmfacc acc0
524; CHECK-NEXT:    stxv vs0, 48(r7)
525; CHECK-NEXT:    stxv vs1, 32(r7)
526; CHECK-NEXT:    stxv vs2, 16(r7)
527; CHECK-NEXT:    stxv vs3, 0(r7)
528; CHECK-NEXT:    blr
529;
530; CHECK-BE-LABEL: test12:
531; CHECK-BE:       # %bb.0: # %entry
532; CHECK-BE-NEXT:    lxv vs1, 16(r3)
533; CHECK-BE-NEXT:    lxv vs0, 0(r3)
534; CHECK-BE-NEXT:    lxv vs3, 48(r3)
535; CHECK-BE-NEXT:    lxv vs2, 32(r3)
536; CHECK-BE-NEXT:    xxmtacc acc0
537; CHECK-BE-NEXT:    pmxvi16ger2spp acc0, v2, v2, 0, 0, 0
538; CHECK-BE-NEXT:    xxmfacc acc0
539; CHECK-BE-NEXT:    stxv vs1, 16(r7)
540; CHECK-BE-NEXT:    stxv vs0, 0(r7)
541; CHECK-BE-NEXT:    stxv vs3, 48(r7)
542; CHECK-BE-NEXT:    stxv vs2, 32(r7)
543; CHECK-BE-NEXT:    blr
544entry:
545  %0 = load <512 x i1>, ptr %vqp, align 64
546  %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
547  store <512 x i1> %1, ptr %resp, align 64
548  ret void
549}
550
551
552declare <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
553
554define void @test13(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
555; CHECK-LABEL: test13:
556; CHECK:       # %bb.0: # %entry
557; CHECK-NEXT:    xvf16ger2 acc0, v2, v2
558; CHECK-NEXT:    xxmfacc acc0
559; CHECK-NEXT:    stxv vs0, 48(r7)
560; CHECK-NEXT:    stxv vs1, 32(r7)
561; CHECK-NEXT:    stxv vs2, 16(r7)
562; CHECK-NEXT:    stxv vs3, 0(r7)
563; CHECK-NEXT:    blr
564;
565; CHECK-BE-LABEL: test13:
566; CHECK-BE:       # %bb.0: # %entry
567; CHECK-BE-NEXT:    xvf16ger2 acc0, v2, v2
568; CHECK-BE-NEXT:    xxmfacc acc0
569; CHECK-BE-NEXT:    stxv vs1, 16(r7)
570; CHECK-BE-NEXT:    stxv vs0, 0(r7)
571; CHECK-BE-NEXT:    stxv vs3, 48(r7)
572; CHECK-BE-NEXT:    stxv vs2, 32(r7)
573; CHECK-BE-NEXT:    blr
574entry:
575  %0 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> %vc, <16 x i8> %vc)
576  store <512 x i1> %0, ptr %resp, align 64
577  ret void
578}
579
580
581declare <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8>, <16 x i8>)
582
583define void @test14(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
584; CHECK-LABEL: test14:
585; CHECK:       # %bb.0: # %entry
586; CHECK-NEXT:    lxv vs1, 32(r3)
587; CHECK-NEXT:    lxv vs0, 48(r3)
588; CHECK-NEXT:    lxv vs3, 0(r3)
589; CHECK-NEXT:    lxv vs2, 16(r3)
590; CHECK-NEXT:    xxmtacc acc0
591; CHECK-NEXT:    xvf16ger2pp acc0, v2, v2
592; CHECK-NEXT:    xxmfacc acc0
593; CHECK-NEXT:    stxv vs0, 48(r7)
594; CHECK-NEXT:    stxv vs1, 32(r7)
595; CHECK-NEXT:    stxv vs2, 16(r7)
596; CHECK-NEXT:    stxv vs3, 0(r7)
597; CHECK-NEXT:    blr
598;
599; CHECK-BE-LABEL: test14:
600; CHECK-BE:       # %bb.0: # %entry
601; CHECK-BE-NEXT:    lxv vs1, 16(r3)
602; CHECK-BE-NEXT:    lxv vs0, 0(r3)
603; CHECK-BE-NEXT:    lxv vs3, 48(r3)
604; CHECK-BE-NEXT:    lxv vs2, 32(r3)
605; CHECK-BE-NEXT:    xxmtacc acc0
606; CHECK-BE-NEXT:    xvf16ger2pp acc0, v2, v2
607; CHECK-BE-NEXT:    xxmfacc acc0
608; CHECK-BE-NEXT:    stxv vs1, 16(r7)
609; CHECK-BE-NEXT:    stxv vs0, 0(r7)
610; CHECK-BE-NEXT:    stxv vs3, 48(r7)
611; CHECK-BE-NEXT:    stxv vs2, 32(r7)
612; CHECK-BE-NEXT:    blr
613entry:
614  %0 = load <512 x i1>, ptr %vqp, align 64
615  %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
616  store <512 x i1> %1, ptr %resp, align 64
617  ret void
618}
619
620
621declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>)
622
623define void @test15(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
624; CHECK-LABEL: test15:
625; CHECK:       # %bb.0: # %entry
626; CHECK-NEXT:    lxv vs1, 32(r3)
627; CHECK-NEXT:    lxv vs0, 48(r3)
628; CHECK-NEXT:    lxv vs3, 0(r3)
629; CHECK-NEXT:    lxv vs2, 16(r3)
630; CHECK-NEXT:    xxmtacc acc0
631; CHECK-NEXT:    xvf16ger2pn acc0, v2, v2
632; CHECK-NEXT:    xxmfacc acc0
633; CHECK-NEXT:    stxv vs0, 48(r7)
634; CHECK-NEXT:    stxv vs1, 32(r7)
635; CHECK-NEXT:    stxv vs2, 16(r7)
636; CHECK-NEXT:    stxv vs3, 0(r7)
637; CHECK-NEXT:    blr
638;
639; CHECK-BE-LABEL: test15:
640; CHECK-BE:       # %bb.0: # %entry
641; CHECK-BE-NEXT:    lxv vs1, 16(r3)
642; CHECK-BE-NEXT:    lxv vs0, 0(r3)
643; CHECK-BE-NEXT:    lxv vs3, 48(r3)
644; CHECK-BE-NEXT:    lxv vs2, 32(r3)
645; CHECK-BE-NEXT:    xxmtacc acc0
646; CHECK-BE-NEXT:    xvf16ger2pn acc0, v2, v2
647; CHECK-BE-NEXT:    xxmfacc acc0
648; CHECK-BE-NEXT:    stxv vs1, 16(r7)
649; CHECK-BE-NEXT:    stxv vs0, 0(r7)
650; CHECK-BE-NEXT:    stxv vs3, 48(r7)
651; CHECK-BE-NEXT:    stxv vs2, 32(r7)
652; CHECK-BE-NEXT:    blr
653entry:
654  %0 = load <512 x i1>, ptr %vqp, align 64
655  %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
656  store <512 x i1> %1, ptr %resp, align 64
657  ret void
658}
659
660
661declare <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>)
662
663define void @test16(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
664; CHECK-LABEL: test16:
665; CHECK:       # %bb.0: # %entry
666; CHECK-NEXT:    lxv vs1, 32(r3)
667; CHECK-NEXT:    lxv vs0, 48(r3)
668; CHECK-NEXT:    lxv vs3, 0(r3)
669; CHECK-NEXT:    lxv vs2, 16(r3)
670; CHECK-NEXT:    xxmtacc acc0
671; CHECK-NEXT:    xvf16ger2np acc0, v2, v2
672; CHECK-NEXT:    xxmfacc acc0
673; CHECK-NEXT:    stxv vs0, 48(r7)
674; CHECK-NEXT:    stxv vs1, 32(r7)
675; CHECK-NEXT:    stxv vs2, 16(r7)
676; CHECK-NEXT:    stxv vs3, 0(r7)
677; CHECK-NEXT:    blr
678;
679; CHECK-BE-LABEL: test16:
680; CHECK-BE:       # %bb.0: # %entry
681; CHECK-BE-NEXT:    lxv vs1, 16(r3)
682; CHECK-BE-NEXT:    lxv vs0, 0(r3)
683; CHECK-BE-NEXT:    lxv vs3, 48(r3)
684; CHECK-BE-NEXT:    lxv vs2, 32(r3)
685; CHECK-BE-NEXT:    xxmtacc acc0
686; CHECK-BE-NEXT:    xvf16ger2np acc0, v2, v2
687; CHECK-BE-NEXT:    xxmfacc acc0
688; CHECK-BE-NEXT:    stxv vs1, 16(r7)
689; CHECK-BE-NEXT:    stxv vs0, 0(r7)
690; CHECK-BE-NEXT:    stxv vs3, 48(r7)
691; CHECK-BE-NEXT:    stxv vs2, 32(r7)
692; CHECK-BE-NEXT:    blr
693entry:
694  %0 = load <512 x i1>, ptr %vqp, align 64
695  %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
696  store <512 x i1> %1, ptr %resp, align 64
697  ret void
698}
699
700
701declare <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>)
702
703define void @test17(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
704; CHECK-LABEL: test17:
705; CHECK:       # %bb.0: # %entry
706; CHECK-NEXT:    lxv vs1, 32(r3)
707; CHECK-NEXT:    lxv vs0, 48(r3)
708; CHECK-NEXT:    lxv vs3, 0(r3)
709; CHECK-NEXT:    lxv vs2, 16(r3)
710; CHECK-NEXT:    xxmtacc acc0
711; CHECK-NEXT:    xvf16ger2nn acc0, v2, v2
712; CHECK-NEXT:    xxmfacc acc0
713; CHECK-NEXT:    stxv vs0, 48(r7)
714; CHECK-NEXT:    stxv vs1, 32(r7)
715; CHECK-NEXT:    stxv vs2, 16(r7)
716; CHECK-NEXT:    stxv vs3, 0(r7)
717; CHECK-NEXT:    blr
718;
719; CHECK-BE-LABEL: test17:
720; CHECK-BE:       # %bb.0: # %entry
721; CHECK-BE-NEXT:    lxv vs1, 16(r3)
722; CHECK-BE-NEXT:    lxv vs0, 0(r3)
723; CHECK-BE-NEXT:    lxv vs3, 48(r3)
724; CHECK-BE-NEXT:    lxv vs2, 32(r3)
725; CHECK-BE-NEXT:    xxmtacc acc0
726; CHECK-BE-NEXT:    xvf16ger2nn acc0, v2, v2
727; CHECK-BE-NEXT:    xxmfacc acc0
728; CHECK-BE-NEXT:    stxv vs1, 16(r7)
729; CHECK-BE-NEXT:    stxv vs0, 0(r7)
730; CHECK-BE-NEXT:    stxv vs3, 48(r7)
731; CHECK-BE-NEXT:    stxv vs2, 32(r7)
732; CHECK-BE-NEXT:    blr
733entry:
734  %0 = load <512 x i1>, ptr %vqp, align 64
735  %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
736  store <512 x i1> %1, ptr %resp, align 64
737  ret void
738}
739
740
741declare <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>)
742
743define void @test18(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
744; CHECK-LABEL: test18:
745; CHECK:       # %bb.0: # %entry
746; CHECK-NEXT:    pmxvf16ger2 acc0, v2, v2, 0, 0, 0
747; CHECK-NEXT:    xxmfacc acc0
748; CHECK-NEXT:    stxv vs0, 48(r7)
749; CHECK-NEXT:    stxv vs1, 32(r7)
750; CHECK-NEXT:    stxv vs2, 16(r7)
751; CHECK-NEXT:    stxv vs3, 0(r7)
752; CHECK-NEXT:    blr
753;
754; CHECK-BE-LABEL: test18:
755; CHECK-BE:       # %bb.0: # %entry
756; CHECK-BE-NEXT:    pmxvf16ger2 acc0, v2, v2, 0, 0, 0
757; CHECK-BE-NEXT:    xxmfacc acc0
758; CHECK-BE-NEXT:    stxv vs1, 16(r7)
759; CHECK-BE-NEXT:    stxv vs0, 0(r7)
760; CHECK-BE-NEXT:    stxv vs3, 48(r7)
761; CHECK-BE-NEXT:    stxv vs2, 32(r7)
762; CHECK-BE-NEXT:    blr
763entry:
764  %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
765  store <512 x i1> %0, ptr %resp, align 64
766  ret void
767}
768
769
770declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8>, <16 x i8>, i32, i32, i32)
771
772define void @test19(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
773; CHECK-LABEL: test19:
774; CHECK:       # %bb.0: # %entry
775; CHECK-NEXT:    lxv vs1, 32(r3)
776; CHECK-NEXT:    lxv vs0, 48(r3)
777; CHECK-NEXT:    lxv vs3, 0(r3)
778; CHECK-NEXT:    lxv vs2, 16(r3)
779; CHECK-NEXT:    xxmtacc acc0
780; CHECK-NEXT:    pmxvf16ger2pp acc0, v2, v2, 0, 0, 0
781; CHECK-NEXT:    xxmfacc acc0
782; CHECK-NEXT:    stxv vs0, 48(r7)
783; CHECK-NEXT:    stxv vs1, 32(r7)
784; CHECK-NEXT:    stxv vs2, 16(r7)
785; CHECK-NEXT:    stxv vs3, 0(r7)
786; CHECK-NEXT:    blr
787;
788; CHECK-BE-LABEL: test19:
789; CHECK-BE:       # %bb.0: # %entry
790; CHECK-BE-NEXT:    lxv vs1, 16(r3)
791; CHECK-BE-NEXT:    lxv vs0, 0(r3)
792; CHECK-BE-NEXT:    lxv vs3, 48(r3)
793; CHECK-BE-NEXT:    lxv vs2, 32(r3)
794; CHECK-BE-NEXT:    xxmtacc acc0
795; CHECK-BE-NEXT:    pmxvf16ger2pp acc0, v2, v2, 0, 0, 0
796; CHECK-BE-NEXT:    xxmfacc acc0
797; CHECK-BE-NEXT:    stxv vs1, 16(r7)
798; CHECK-BE-NEXT:    stxv vs0, 0(r7)
799; CHECK-BE-NEXT:    stxv vs3, 48(r7)
800; CHECK-BE-NEXT:    stxv vs2, 32(r7)
801; CHECK-BE-NEXT:    blr
802entry:
803  %0 = load <512 x i1>, ptr %vqp, align 64
804  %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
805  store <512 x i1> %1, ptr %resp, align 64
806  ret void
807}
808
809
810declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
811
812define void @test20(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
813; CHECK-LABEL: test20:
814; CHECK:       # %bb.0: # %entry
815; CHECK-NEXT:    lxv vs1, 32(r3)
816; CHECK-NEXT:    lxv vs0, 48(r3)
817; CHECK-NEXT:    lxv vs3, 0(r3)
818; CHECK-NEXT:    lxv vs2, 16(r3)
819; CHECK-NEXT:    xxmtacc acc0
820; CHECK-NEXT:    pmxvf16ger2pn acc0, v2, v2, 0, 0, 0
821; CHECK-NEXT:    xxmfacc acc0
822; CHECK-NEXT:    stxv vs0, 48(r7)
823; CHECK-NEXT:    stxv vs1, 32(r7)
824; CHECK-NEXT:    stxv vs2, 16(r7)
825; CHECK-NEXT:    stxv vs3, 0(r7)
826; CHECK-NEXT:    blr
827;
828; CHECK-BE-LABEL: test20:
829; CHECK-BE:       # %bb.0: # %entry
830; CHECK-BE-NEXT:    lxv vs1, 16(r3)
831; CHECK-BE-NEXT:    lxv vs0, 0(r3)
832; CHECK-BE-NEXT:    lxv vs3, 48(r3)
833; CHECK-BE-NEXT:    lxv vs2, 32(r3)
834; CHECK-BE-NEXT:    xxmtacc acc0
835; CHECK-BE-NEXT:    pmxvf16ger2pn acc0, v2, v2, 0, 0, 0
836; CHECK-BE-NEXT:    xxmfacc acc0
837; CHECK-BE-NEXT:    stxv vs1, 16(r7)
838; CHECK-BE-NEXT:    stxv vs0, 0(r7)
839; CHECK-BE-NEXT:    stxv vs3, 48(r7)
840; CHECK-BE-NEXT:    stxv vs2, 32(r7)
841; CHECK-BE-NEXT:    blr
842entry:
843  %0 = load <512 x i1>, ptr %vqp, align 64
844  %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
845  store <512 x i1> %1, ptr %resp, align 64
846  ret void
847}
848
849
850declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
851
852define void @test21(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
853; CHECK-LABEL: test21:
854; CHECK:       # %bb.0: # %entry
855; CHECK-NEXT:    lxv vs1, 32(r3)
856; CHECK-NEXT:    lxv vs0, 48(r3)
857; CHECK-NEXT:    lxv vs3, 0(r3)
858; CHECK-NEXT:    lxv vs2, 16(r3)
859; CHECK-NEXT:    xxmtacc acc0
860; CHECK-NEXT:    pmxvf16ger2np acc0, v2, v2, 0, 0, 0
861; CHECK-NEXT:    xxmfacc acc0
862; CHECK-NEXT:    stxv vs0, 48(r7)
863; CHECK-NEXT:    stxv vs1, 32(r7)
864; CHECK-NEXT:    stxv vs2, 16(r7)
865; CHECK-NEXT:    stxv vs3, 0(r7)
866; CHECK-NEXT:    blr
867;
868; CHECK-BE-LABEL: test21:
869; CHECK-BE:       # %bb.0: # %entry
870; CHECK-BE-NEXT:    lxv vs1, 16(r3)
871; CHECK-BE-NEXT:    lxv vs0, 0(r3)
872; CHECK-BE-NEXT:    lxv vs3, 48(r3)
873; CHECK-BE-NEXT:    lxv vs2, 32(r3)
874; CHECK-BE-NEXT:    xxmtacc acc0
875; CHECK-BE-NEXT:    pmxvf16ger2np acc0, v2, v2, 0, 0, 0
876; CHECK-BE-NEXT:    xxmfacc acc0
877; CHECK-BE-NEXT:    stxv vs1, 16(r7)
878; CHECK-BE-NEXT:    stxv vs0, 0(r7)
879; CHECK-BE-NEXT:    stxv vs3, 48(r7)
880; CHECK-BE-NEXT:    stxv vs2, 32(r7)
881; CHECK-BE-NEXT:    blr
882entry:
883  %0 = load <512 x i1>, ptr %vqp, align 64
884  %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
885  store <512 x i1> %1, ptr %resp, align 64
886  ret void
887}
888
889
890declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
891
892define void @test22(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
893; CHECK-LABEL: test22:
894; CHECK:       # %bb.0: # %entry
895; CHECK-NEXT:    lxv vs1, 32(r3)
896; CHECK-NEXT:    lxv vs0, 48(r3)
897; CHECK-NEXT:    lxv vs3, 0(r3)
898; CHECK-NEXT:    lxv vs2, 16(r3)
899; CHECK-NEXT:    xxmtacc acc0
900; CHECK-NEXT:    pmxvf16ger2nn acc0, v2, v2, 0, 0, 0
901; CHECK-NEXT:    xxmfacc acc0
902; CHECK-NEXT:    stxv vs0, 48(r7)
903; CHECK-NEXT:    stxv vs1, 32(r7)
904; CHECK-NEXT:    stxv vs2, 16(r7)
905; CHECK-NEXT:    stxv vs3, 0(r7)
906; CHECK-NEXT:    blr
907;
908; CHECK-BE-LABEL: test22:
909; CHECK-BE:       # %bb.0: # %entry
910; CHECK-BE-NEXT:    lxv vs1, 16(r3)
911; CHECK-BE-NEXT:    lxv vs0, 0(r3)
912; CHECK-BE-NEXT:    lxv vs3, 48(r3)
913; CHECK-BE-NEXT:    lxv vs2, 32(r3)
914; CHECK-BE-NEXT:    xxmtacc acc0
915; CHECK-BE-NEXT:    pmxvf16ger2nn acc0, v2, v2, 0, 0, 0
916; CHECK-BE-NEXT:    xxmfacc acc0
917; CHECK-BE-NEXT:    stxv vs1, 16(r7)
918; CHECK-BE-NEXT:    stxv vs0, 0(r7)
919; CHECK-BE-NEXT:    stxv vs3, 48(r7)
920; CHECK-BE-NEXT:    stxv vs2, 32(r7)
921; CHECK-BE-NEXT:    blr
922entry:
923  %0 = load <512 x i1>, ptr %vqp, align 64
924  %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
925  store <512 x i1> %1, ptr %resp, align 64
926  ret void
927}
928
929
930declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
931
932define void @test23(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
933; CHECK-LABEL: test23:
934; CHECK:       # %bb.0: # %entry
935; CHECK-NEXT:    xvf32ger acc0, v2, v2
936; CHECK-NEXT:    xxmfacc acc0
937; CHECK-NEXT:    stxv vs0, 48(r7)
938; CHECK-NEXT:    stxv vs1, 32(r7)
939; CHECK-NEXT:    stxv vs2, 16(r7)
940; CHECK-NEXT:    stxv vs3, 0(r7)
941; CHECK-NEXT:    blr
942;
943; CHECK-BE-LABEL: test23:
944; CHECK-BE:       # %bb.0: # %entry
945; CHECK-BE-NEXT:    xvf32ger acc0, v2, v2
946; CHECK-BE-NEXT:    xxmfacc acc0
947; CHECK-BE-NEXT:    stxv vs1, 16(r7)
948; CHECK-BE-NEXT:    stxv vs0, 0(r7)
949; CHECK-BE-NEXT:    stxv vs3, 48(r7)
950; CHECK-BE-NEXT:    stxv vs2, 32(r7)
951; CHECK-BE-NEXT:    blr
952entry:
953  %0 = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %vc, <16 x i8> %vc)
954  store <512 x i1> %0, ptr %resp, align 64
955  ret void
956}
957
958
959declare <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8>, <16 x i8>)
960
961define void @test24(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
962; CHECK-LABEL: test24:
963; CHECK:       # %bb.0: # %entry
964; CHECK-NEXT:    lxv vs1, 32(r3)
965; CHECK-NEXT:    lxv vs0, 48(r3)
966; CHECK-NEXT:    lxv vs3, 0(r3)
967; CHECK-NEXT:    lxv vs2, 16(r3)
968; CHECK-NEXT:    xxmtacc acc0
969; CHECK-NEXT:    xvf32gerpp acc0, v2, v2
970; CHECK-NEXT:    xxmfacc acc0
971; CHECK-NEXT:    stxv vs0, 48(r7)
972; CHECK-NEXT:    stxv vs1, 32(r7)
973; CHECK-NEXT:    stxv vs2, 16(r7)
974; CHECK-NEXT:    stxv vs3, 0(r7)
975; CHECK-NEXT:    blr
976;
977; CHECK-BE-LABEL: test24:
978; CHECK-BE:       # %bb.0: # %entry
979; CHECK-BE-NEXT:    lxv vs1, 16(r3)
980; CHECK-BE-NEXT:    lxv vs0, 0(r3)
981; CHECK-BE-NEXT:    lxv vs3, 48(r3)
982; CHECK-BE-NEXT:    lxv vs2, 32(r3)
983; CHECK-BE-NEXT:    xxmtacc acc0
984; CHECK-BE-NEXT:    xvf32gerpp acc0, v2, v2
985; CHECK-BE-NEXT:    xxmfacc acc0
986; CHECK-BE-NEXT:    stxv vs1, 16(r7)
987; CHECK-BE-NEXT:    stxv vs0, 0(r7)
988; CHECK-BE-NEXT:    stxv vs3, 48(r7)
989; CHECK-BE-NEXT:    stxv vs2, 32(r7)
990; CHECK-BE-NEXT:    blr
991entry:
992  %0 = load <512 x i1>, ptr %vqp, align 64
993  %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
994  store <512 x i1> %1, ptr %resp, align 64
995  ret void
996}
997
998
999declare <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>)
1000
1001define void @test25(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1002; CHECK-LABEL: test25:
1003; CHECK:       # %bb.0: # %entry
1004; CHECK-NEXT:    lxv vs1, 32(r3)
1005; CHECK-NEXT:    lxv vs0, 48(r3)
1006; CHECK-NEXT:    lxv vs3, 0(r3)
1007; CHECK-NEXT:    lxv vs2, 16(r3)
1008; CHECK-NEXT:    xxmtacc acc0
1009; CHECK-NEXT:    xvf32gerpn acc0, v2, v2
1010; CHECK-NEXT:    xxmfacc acc0
1011; CHECK-NEXT:    stxv vs0, 48(r7)
1012; CHECK-NEXT:    stxv vs1, 32(r7)
1013; CHECK-NEXT:    stxv vs2, 16(r7)
1014; CHECK-NEXT:    stxv vs3, 0(r7)
1015; CHECK-NEXT:    blr
1016;
1017; CHECK-BE-LABEL: test25:
1018; CHECK-BE:       # %bb.0: # %entry
1019; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1020; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1021; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1022; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1023; CHECK-BE-NEXT:    xxmtacc acc0
1024; CHECK-BE-NEXT:    xvf32gerpn acc0, v2, v2
1025; CHECK-BE-NEXT:    xxmfacc acc0
1026; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1027; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1028; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1029; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1030; CHECK-BE-NEXT:    blr
1031entry:
1032  %0 = load <512 x i1>, ptr %vqp, align 64
1033  %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
1034  store <512 x i1> %1, ptr %resp, align 64
1035  ret void
1036}
1037
1038
1039declare <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>)
1040
1041define void @test26(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1042; CHECK-LABEL: test26:
1043; CHECK:       # %bb.0: # %entry
1044; CHECK-NEXT:    lxv vs1, 32(r3)
1045; CHECK-NEXT:    lxv vs0, 48(r3)
1046; CHECK-NEXT:    lxv vs3, 0(r3)
1047; CHECK-NEXT:    lxv vs2, 16(r3)
1048; CHECK-NEXT:    xxmtacc acc0
1049; CHECK-NEXT:    xvf32gernp acc0, v2, v2
1050; CHECK-NEXT:    xxmfacc acc0
1051; CHECK-NEXT:    stxv vs0, 48(r7)
1052; CHECK-NEXT:    stxv vs1, 32(r7)
1053; CHECK-NEXT:    stxv vs2, 16(r7)
1054; CHECK-NEXT:    stxv vs3, 0(r7)
1055; CHECK-NEXT:    blr
1056;
1057; CHECK-BE-LABEL: test26:
1058; CHECK-BE:       # %bb.0: # %entry
1059; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1060; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1061; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1062; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1063; CHECK-BE-NEXT:    xxmtacc acc0
1064; CHECK-BE-NEXT:    xvf32gernp acc0, v2, v2
1065; CHECK-BE-NEXT:    xxmfacc acc0
1066; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1067; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1068; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1069; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1070; CHECK-BE-NEXT:    blr
1071entry:
1072  %0 = load <512 x i1>, ptr %vqp, align 64
1073  %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
1074  store <512 x i1> %1, ptr %resp, align 64
1075  ret void
1076}
1077
1078
1079declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>)
1080
1081define void @test27(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1082; CHECK-LABEL: test27:
1083; CHECK:       # %bb.0: # %entry
1084; CHECK-NEXT:    lxv vs1, 32(r3)
1085; CHECK-NEXT:    lxv vs0, 48(r3)
1086; CHECK-NEXT:    lxv vs3, 0(r3)
1087; CHECK-NEXT:    lxv vs2, 16(r3)
1088; CHECK-NEXT:    xxmtacc acc0
1089; CHECK-NEXT:    xvf32gernn acc0, v2, v2
1090; CHECK-NEXT:    xxmfacc acc0
1091; CHECK-NEXT:    stxv vs0, 48(r7)
1092; CHECK-NEXT:    stxv vs1, 32(r7)
1093; CHECK-NEXT:    stxv vs2, 16(r7)
1094; CHECK-NEXT:    stxv vs3, 0(r7)
1095; CHECK-NEXT:    blr
1096;
1097; CHECK-BE-LABEL: test27:
1098; CHECK-BE:       # %bb.0: # %entry
1099; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1100; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1101; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1102; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1103; CHECK-BE-NEXT:    xxmtacc acc0
1104; CHECK-BE-NEXT:    xvf32gernn acc0, v2, v2
1105; CHECK-BE-NEXT:    xxmfacc acc0
1106; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1107; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1108; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1109; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1110; CHECK-BE-NEXT:    blr
1111entry:
1112  %0 = load <512 x i1>, ptr %vqp, align 64
1113  %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
1114  store <512 x i1> %1, ptr %resp, align 64
1115  ret void
1116}
1117
1118
1119declare <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1>, <16 x i8>, <16 x i8>)
1120
1121define void @test28(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1122; CHECK-LABEL: test28:
1123; CHECK:       # %bb.0: # %entry
1124; CHECK-NEXT:    pmxvf32ger acc0, v2, v2, 0, 0
1125; CHECK-NEXT:    xxmfacc acc0
1126; CHECK-NEXT:    stxv vs0, 48(r7)
1127; CHECK-NEXT:    stxv vs1, 32(r7)
1128; CHECK-NEXT:    stxv vs2, 16(r7)
1129; CHECK-NEXT:    stxv vs3, 0(r7)
1130; CHECK-NEXT:    blr
1131;
1132; CHECK-BE-LABEL: test28:
1133; CHECK-BE:       # %bb.0: # %entry
1134; CHECK-BE-NEXT:    pmxvf32ger acc0, v2, v2, 0, 0
1135; CHECK-BE-NEXT:    xxmfacc acc0
1136; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1137; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1138; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1139; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1140; CHECK-BE-NEXT:    blr
1141entry:
1142  %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
1143  store <512 x i1> %0, ptr %resp, align 64
1144  ret void
1145}
1146
1147
1148declare <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8>, <16 x i8>, i32, i32)
1149
1150define void @test29(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1151; CHECK-LABEL: test29:
1152; CHECK:       # %bb.0: # %entry
1153; CHECK-NEXT:    lxv vs1, 32(r3)
1154; CHECK-NEXT:    lxv vs0, 48(r3)
1155; CHECK-NEXT:    lxv vs3, 0(r3)
1156; CHECK-NEXT:    lxv vs2, 16(r3)
1157; CHECK-NEXT:    xxmtacc acc0
1158; CHECK-NEXT:    pmxvf32gerpp acc0, v2, v2, 0, 0
1159; CHECK-NEXT:    xxmfacc acc0
1160; CHECK-NEXT:    stxv vs0, 48(r7)
1161; CHECK-NEXT:    stxv vs1, 32(r7)
1162; CHECK-NEXT:    stxv vs2, 16(r7)
1163; CHECK-NEXT:    stxv vs3, 0(r7)
1164; CHECK-NEXT:    blr
1165;
1166; CHECK-BE-LABEL: test29:
1167; CHECK-BE:       # %bb.0: # %entry
1168; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1169; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1170; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1171; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1172; CHECK-BE-NEXT:    xxmtacc acc0
1173; CHECK-BE-NEXT:    pmxvf32gerpp acc0, v2, v2, 0, 0
1174; CHECK-BE-NEXT:    xxmfacc acc0
1175; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1176; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1177; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1178; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1179; CHECK-BE-NEXT:    blr
1180entry:
1181  %0 = load <512 x i1>, ptr %vqp, align 64
1182  %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
1183  store <512 x i1> %1, ptr %resp, align 64
1184  ret void
1185}
1186
1187
1188declare <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32)
1189
1190define void @test30(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1191; CHECK-LABEL: test30:
1192; CHECK:       # %bb.0: # %entry
1193; CHECK-NEXT:    lxv vs1, 32(r3)
1194; CHECK-NEXT:    lxv vs0, 48(r3)
1195; CHECK-NEXT:    lxv vs3, 0(r3)
1196; CHECK-NEXT:    lxv vs2, 16(r3)
1197; CHECK-NEXT:    xxmtacc acc0
1198; CHECK-NEXT:    pmxvf32gerpn acc0, v2, v2, 0, 0
1199; CHECK-NEXT:    xxmfacc acc0
1200; CHECK-NEXT:    stxv vs0, 48(r7)
1201; CHECK-NEXT:    stxv vs1, 32(r7)
1202; CHECK-NEXT:    stxv vs2, 16(r7)
1203; CHECK-NEXT:    stxv vs3, 0(r7)
1204; CHECK-NEXT:    blr
1205;
1206; CHECK-BE-LABEL: test30:
1207; CHECK-BE:       # %bb.0: # %entry
1208; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1209; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1210; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1211; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1212; CHECK-BE-NEXT:    xxmtacc acc0
1213; CHECK-BE-NEXT:    pmxvf32gerpn acc0, v2, v2, 0, 0
1214; CHECK-BE-NEXT:    xxmfacc acc0
1215; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1216; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1217; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1218; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1219; CHECK-BE-NEXT:    blr
1220entry:
1221  %0 = load <512 x i1>, ptr %vqp, align 64
1222  %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
1223  store <512 x i1> %1, ptr %resp, align 64
1224  ret void
1225}
1226
1227
1228declare <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32)
1229
1230define void @test31(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1231; CHECK-LABEL: test31:
1232; CHECK:       # %bb.0: # %entry
1233; CHECK-NEXT:    lxv vs1, 32(r3)
1234; CHECK-NEXT:    lxv vs0, 48(r3)
1235; CHECK-NEXT:    lxv vs3, 0(r3)
1236; CHECK-NEXT:    lxv vs2, 16(r3)
1237; CHECK-NEXT:    xxmtacc acc0
1238; CHECK-NEXT:    pmxvf32gernp acc0, v2, v2, 0, 0
1239; CHECK-NEXT:    xxmfacc acc0
1240; CHECK-NEXT:    stxv vs0, 48(r7)
1241; CHECK-NEXT:    stxv vs1, 32(r7)
1242; CHECK-NEXT:    stxv vs2, 16(r7)
1243; CHECK-NEXT:    stxv vs3, 0(r7)
1244; CHECK-NEXT:    blr
1245;
1246; CHECK-BE-LABEL: test31:
1247; CHECK-BE:       # %bb.0: # %entry
1248; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1249; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1250; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1251; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1252; CHECK-BE-NEXT:    xxmtacc acc0
1253; CHECK-BE-NEXT:    pmxvf32gernp acc0, v2, v2, 0, 0
1254; CHECK-BE-NEXT:    xxmfacc acc0
1255; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1256; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1257; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1258; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1259; CHECK-BE-NEXT:    blr
1260entry:
1261  %0 = load <512 x i1>, ptr %vqp, align 64
1262  %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
1263  store <512 x i1> %1, ptr %resp, align 64
1264  ret void
1265}
1266
1267
1268declare <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32)
1269
1270define void @test32(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1271; CHECK-LABEL: test32:
1272; CHECK:       # %bb.0: # %entry
1273; CHECK-NEXT:    lxv vs1, 32(r3)
1274; CHECK-NEXT:    lxv vs0, 48(r3)
1275; CHECK-NEXT:    lxv vs3, 0(r3)
1276; CHECK-NEXT:    lxv vs2, 16(r3)
1277; CHECK-NEXT:    xxmtacc acc0
1278; CHECK-NEXT:    pmxvf32gernn acc0, v2, v2, 0, 0
1279; CHECK-NEXT:    xxmfacc acc0
1280; CHECK-NEXT:    stxv vs0, 48(r7)
1281; CHECK-NEXT:    stxv vs1, 32(r7)
1282; CHECK-NEXT:    stxv vs2, 16(r7)
1283; CHECK-NEXT:    stxv vs3, 0(r7)
1284; CHECK-NEXT:    blr
1285;
1286; CHECK-BE-LABEL: test32:
1287; CHECK-BE:       # %bb.0: # %entry
1288; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1289; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1290; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1291; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1292; CHECK-BE-NEXT:    xxmtacc acc0
1293; CHECK-BE-NEXT:    pmxvf32gernn acc0, v2, v2, 0, 0
1294; CHECK-BE-NEXT:    xxmfacc acc0
1295; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1296; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1297; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1298; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1299; CHECK-BE-NEXT:    blr
1300entry:
1301  %0 = load <512 x i1>, ptr %vqp, align 64
1302  %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
1303  store <512 x i1> %1, ptr %resp, align 64
1304  ret void
1305}
1306
1307
1308declare <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32)
1309
1310define void @test33(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1311; CHECK-LABEL: test33:
1312; CHECK:       # %bb.0: # %entry
1313; CHECK-NEXT:    lxv v5, 0(r4)
1314; CHECK-NEXT:    lxv v4, 16(r4)
1315; CHECK-NEXT:    xvf64ger acc0, vsp36, v2
1316; CHECK-NEXT:    xxmfacc acc0
1317; CHECK-NEXT:    stxv vs0, 48(r7)
1318; CHECK-NEXT:    stxv vs1, 32(r7)
1319; CHECK-NEXT:    stxv vs2, 16(r7)
1320; CHECK-NEXT:    stxv vs3, 0(r7)
1321; CHECK-NEXT:    blr
1322;
1323; CHECK-BE-LABEL: test33:
1324; CHECK-BE:       # %bb.0: # %entry
1325; CHECK-BE-NEXT:    lxv v5, 16(r4)
1326; CHECK-BE-NEXT:    lxv v4, 0(r4)
1327; CHECK-BE-NEXT:    xvf64ger acc0, vsp36, v2
1328; CHECK-BE-NEXT:    xxmfacc acc0
1329; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1330; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1331; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1332; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1333; CHECK-BE-NEXT:    blr
1334entry:
1335  %0 = load <256 x i1>, ptr %vpp, align 32
1336  %1 = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %0, <16 x i8> %vc)
1337  store <512 x i1> %1, ptr %resp, align 64
1338  ret void
1339}
1340
1341
1342declare <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1>, <16 x i8>)
1343
1344define void @test34(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1345; CHECK-LABEL: test34:
1346; CHECK:       # %bb.0: # %entry
1347; CHECK-NEXT:    lxv vs1, 32(r3)
1348; CHECK-NEXT:    lxv vs0, 48(r3)
1349; CHECK-NEXT:    lxv vs3, 0(r3)
1350; CHECK-NEXT:    lxv vs2, 16(r3)
1351; CHECK-NEXT:    lxv v5, 0(r4)
1352; CHECK-NEXT:    xxmtacc acc0
1353; CHECK-NEXT:    lxv v4, 16(r4)
1354; CHECK-NEXT:    xvf64gerpp acc0, vsp36, v2
1355; CHECK-NEXT:    xxmfacc acc0
1356; CHECK-NEXT:    stxv vs0, 48(r7)
1357; CHECK-NEXT:    stxv vs1, 32(r7)
1358; CHECK-NEXT:    stxv vs2, 16(r7)
1359; CHECK-NEXT:    stxv vs3, 0(r7)
1360; CHECK-NEXT:    blr
1361;
1362; CHECK-BE-LABEL: test34:
1363; CHECK-BE:       # %bb.0: # %entry
1364; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1365; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1366; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1367; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1368; CHECK-BE-NEXT:    lxv v5, 16(r4)
1369; CHECK-BE-NEXT:    xxmtacc acc0
1370; CHECK-BE-NEXT:    lxv v4, 0(r4)
1371; CHECK-BE-NEXT:    xvf64gerpp acc0, vsp36, v2
1372; CHECK-BE-NEXT:    xxmfacc acc0
1373; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1374; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1375; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1376; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1377; CHECK-BE-NEXT:    blr
1378entry:
1379  %0 = load <512 x i1>, ptr %vqp, align 64
1380  %1 = load <256 x i1>, ptr %vpp, align 32
1381  %2 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc)
1382  store <512 x i1> %2, ptr %resp, align 64
1383  ret void
1384}
1385
1386
1387declare <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>)
1388
1389define void @test35(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1390; CHECK-LABEL: test35:
1391; CHECK:       # %bb.0: # %entry
1392; CHECK-NEXT:    lxv vs1, 32(r3)
1393; CHECK-NEXT:    lxv vs0, 48(r3)
1394; CHECK-NEXT:    lxv vs3, 0(r3)
1395; CHECK-NEXT:    lxv vs2, 16(r3)
1396; CHECK-NEXT:    lxv v5, 0(r4)
1397; CHECK-NEXT:    xxmtacc acc0
1398; CHECK-NEXT:    lxv v4, 16(r4)
1399; CHECK-NEXT:    xvf64gerpn acc0, vsp36, v2
1400; CHECK-NEXT:    xxmfacc acc0
1401; CHECK-NEXT:    stxv vs0, 48(r7)
1402; CHECK-NEXT:    stxv vs1, 32(r7)
1403; CHECK-NEXT:    stxv vs2, 16(r7)
1404; CHECK-NEXT:    stxv vs3, 0(r7)
1405; CHECK-NEXT:    blr
1406;
1407; CHECK-BE-LABEL: test35:
1408; CHECK-BE:       # %bb.0: # %entry
1409; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1410; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1411; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1412; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1413; CHECK-BE-NEXT:    lxv v5, 16(r4)
1414; CHECK-BE-NEXT:    xxmtacc acc0
1415; CHECK-BE-NEXT:    lxv v4, 0(r4)
1416; CHECK-BE-NEXT:    xvf64gerpn acc0, vsp36, v2
1417; CHECK-BE-NEXT:    xxmfacc acc0
1418; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1419; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1420; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1421; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1422; CHECK-BE-NEXT:    blr
1423entry:
1424  %0 = load <512 x i1>, ptr %vqp, align 64
1425  %1 = load <256 x i1>, ptr %vpp, align 32
1426  %2 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc)
1427  store <512 x i1> %2, ptr %resp, align 64
1428  ret void
1429}
1430
1431
1432declare <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1>, <256 x i1>, <16 x i8>)
1433
1434define void @test36(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1435; CHECK-LABEL: test36:
1436; CHECK:       # %bb.0: # %entry
1437; CHECK-NEXT:    lxv vs1, 32(r3)
1438; CHECK-NEXT:    lxv vs0, 48(r3)
1439; CHECK-NEXT:    lxv vs3, 0(r3)
1440; CHECK-NEXT:    lxv vs2, 16(r3)
1441; CHECK-NEXT:    lxv v5, 0(r4)
1442; CHECK-NEXT:    xxmtacc acc0
1443; CHECK-NEXT:    lxv v4, 16(r4)
1444; CHECK-NEXT:    xvf64gernp acc0, vsp36, v2
1445; CHECK-NEXT:    xxmfacc acc0
1446; CHECK-NEXT:    stxv vs0, 48(r7)
1447; CHECK-NEXT:    stxv vs1, 32(r7)
1448; CHECK-NEXT:    stxv vs2, 16(r7)
1449; CHECK-NEXT:    stxv vs3, 0(r7)
1450; CHECK-NEXT:    blr
1451;
1452; CHECK-BE-LABEL: test36:
1453; CHECK-BE:       # %bb.0: # %entry
1454; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1455; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1456; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1457; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1458; CHECK-BE-NEXT:    lxv v5, 16(r4)
1459; CHECK-BE-NEXT:    xxmtacc acc0
1460; CHECK-BE-NEXT:    lxv v4, 0(r4)
1461; CHECK-BE-NEXT:    xvf64gernp acc0, vsp36, v2
1462; CHECK-BE-NEXT:    xxmfacc acc0
1463; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1464; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1465; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1466; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1467; CHECK-BE-NEXT:    blr
1468entry:
1469  %0 = load <512 x i1>, ptr %vqp, align 64
1470  %1 = load <256 x i1>, ptr %vpp, align 32
1471  %2 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc)
1472  store <512 x i1> %2, ptr %resp, align 64
1473  ret void
1474}
1475
1476
1477declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
1478
1479define void @test37(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1480; CHECK-LABEL: test37:
1481; CHECK:       # %bb.0: # %entry
1482; CHECK-NEXT:    lxv vs1, 32(r3)
1483; CHECK-NEXT:    lxv vs0, 48(r3)
1484; CHECK-NEXT:    lxv vs3, 0(r3)
1485; CHECK-NEXT:    lxv vs2, 16(r3)
1486; CHECK-NEXT:    lxv v5, 0(r4)
1487; CHECK-NEXT:    xxmtacc acc0
1488; CHECK-NEXT:    lxv v4, 16(r4)
1489; CHECK-NEXT:    xvf64gernn acc0, vsp36, v2
1490; CHECK-NEXT:    xxmfacc acc0
1491; CHECK-NEXT:    stxv vs0, 48(r7)
1492; CHECK-NEXT:    stxv vs1, 32(r7)
1493; CHECK-NEXT:    stxv vs2, 16(r7)
1494; CHECK-NEXT:    stxv vs3, 0(r7)
1495; CHECK-NEXT:    blr
1496;
1497; CHECK-BE-LABEL: test37:
1498; CHECK-BE:       # %bb.0: # %entry
1499; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1500; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1501; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1502; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1503; CHECK-BE-NEXT:    lxv v5, 16(r4)
1504; CHECK-BE-NEXT:    xxmtacc acc0
1505; CHECK-BE-NEXT:    lxv v4, 0(r4)
1506; CHECK-BE-NEXT:    xvf64gernn acc0, vsp36, v2
1507; CHECK-BE-NEXT:    xxmfacc acc0
1508; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1509; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1510; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1511; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1512; CHECK-BE-NEXT:    blr
1513entry:
1514  %0 = load <512 x i1>, ptr %vqp, align 64
1515  %1 = load <256 x i1>, ptr %vpp, align 32
1516  %2 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc)
1517  store <512 x i1> %2, ptr %resp, align 64
1518  ret void
1519}
1520
1521
1522declare <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>)
1523
1524define void @test38(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1525; CHECK-LABEL: test38:
1526; CHECK:       # %bb.0: # %entry
1527; CHECK-NEXT:    lxv v5, 0(r4)
1528; CHECK-NEXT:    lxv v4, 16(r4)
1529; CHECK-NEXT:    pmxvf64ger acc0, vsp36, v2, 0, 0
1530; CHECK-NEXT:    xxmfacc acc0
1531; CHECK-NEXT:    stxv vs0, 48(r7)
1532; CHECK-NEXT:    stxv vs1, 32(r7)
1533; CHECK-NEXT:    stxv vs2, 16(r7)
1534; CHECK-NEXT:    stxv vs3, 0(r7)
1535; CHECK-NEXT:    blr
1536;
1537; CHECK-BE-LABEL: test38:
1538; CHECK-BE:       # %bb.0: # %entry
1539; CHECK-BE-NEXT:    lxv v5, 16(r4)
1540; CHECK-BE-NEXT:    lxv v4, 0(r4)
1541; CHECK-BE-NEXT:    pmxvf64ger acc0, vsp36, v2, 0, 0
1542; CHECK-BE-NEXT:    xxmfacc acc0
1543; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1544; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1545; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1546; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1547; CHECK-BE-NEXT:    blr
1548entry:
1549  %0 = load <256 x i1>, ptr %vpp, align 32
1550  %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %0, <16 x i8> %vc, i32 0, i32 0)
1551  store <512 x i1> %1, ptr %resp, align 64
1552  ret void
1553}
1554
1555
1556declare <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1>, <16 x i8>, i32, i32)
1557
1558define void @test39(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1559; CHECK-LABEL: test39:
1560; CHECK:       # %bb.0: # %entry
1561; CHECK-NEXT:    lxv vs1, 32(r3)
1562; CHECK-NEXT:    lxv vs0, 48(r3)
1563; CHECK-NEXT:    lxv vs3, 0(r3)
1564; CHECK-NEXT:    lxv vs2, 16(r3)
1565; CHECK-NEXT:    lxv v5, 0(r4)
1566; CHECK-NEXT:    xxmtacc acc0
1567; CHECK-NEXT:    lxv v4, 16(r4)
1568; CHECK-NEXT:    pmxvf64gerpp acc0, vsp36, v2, 0, 0
1569; CHECK-NEXT:    xxmfacc acc0
1570; CHECK-NEXT:    stxv vs0, 48(r7)
1571; CHECK-NEXT:    stxv vs1, 32(r7)
1572; CHECK-NEXT:    stxv vs2, 16(r7)
1573; CHECK-NEXT:    stxv vs3, 0(r7)
1574; CHECK-NEXT:    blr
1575;
1576; CHECK-BE-LABEL: test39:
1577; CHECK-BE:       # %bb.0: # %entry
1578; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1579; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1580; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1581; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1582; CHECK-BE-NEXT:    lxv v5, 16(r4)
1583; CHECK-BE-NEXT:    xxmtacc acc0
1584; CHECK-BE-NEXT:    lxv v4, 0(r4)
1585; CHECK-BE-NEXT:    pmxvf64gerpp acc0, vsp36, v2, 0, 0
1586; CHECK-BE-NEXT:    xxmfacc acc0
1587; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1588; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1589; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1590; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1591; CHECK-BE-NEXT:    blr
1592entry:
1593  %0 = load <512 x i1>, ptr %vqp, align 64
1594  %1 = load <256 x i1>, ptr %vpp, align 32
1595  %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc, i32 0, i32 0)
1596  store <512 x i1> %2, ptr %resp, align 64
1597  ret void
1598}
1599
1600
1601declare <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
1602
1603define void @test40(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1604; CHECK-LABEL: test40:
1605; CHECK:       # %bb.0: # %entry
1606; CHECK-NEXT:    lxv vs1, 32(r3)
1607; CHECK-NEXT:    lxv vs0, 48(r3)
1608; CHECK-NEXT:    lxv vs3, 0(r3)
1609; CHECK-NEXT:    lxv vs2, 16(r3)
1610; CHECK-NEXT:    lxv v5, 0(r4)
1611; CHECK-NEXT:    xxmtacc acc0
1612; CHECK-NEXT:    lxv v4, 16(r4)
1613; CHECK-NEXT:    pmxvf64gerpn acc0, vsp36, v2, 0, 0
1614; CHECK-NEXT:    xxmfacc acc0
1615; CHECK-NEXT:    stxv vs0, 48(r7)
1616; CHECK-NEXT:    stxv vs1, 32(r7)
1617; CHECK-NEXT:    stxv vs2, 16(r7)
1618; CHECK-NEXT:    stxv vs3, 0(r7)
1619; CHECK-NEXT:    blr
1620;
1621; CHECK-BE-LABEL: test40:
1622; CHECK-BE:       # %bb.0: # %entry
1623; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1624; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1625; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1626; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1627; CHECK-BE-NEXT:    lxv v5, 16(r4)
1628; CHECK-BE-NEXT:    xxmtacc acc0
1629; CHECK-BE-NEXT:    lxv v4, 0(r4)
1630; CHECK-BE-NEXT:    pmxvf64gerpn acc0, vsp36, v2, 0, 0
1631; CHECK-BE-NEXT:    xxmfacc acc0
1632; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1633; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1634; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1635; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1636; CHECK-BE-NEXT:    blr
1637entry:
1638  %0 = load <512 x i1>, ptr %vqp, align 64
1639  %1 = load <256 x i1>, ptr %vpp, align 32
1640  %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc, i32 0, i32 0)
1641  store <512 x i1> %2, ptr %resp, align 64
1642  ret void
1643}
1644
1645
1646declare <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
1647
1648define void @test41(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1649; CHECK-LABEL: test41:
1650; CHECK:       # %bb.0: # %entry
1651; CHECK-NEXT:    lxv vs1, 32(r3)
1652; CHECK-NEXT:    lxv vs0, 48(r3)
1653; CHECK-NEXT:    lxv vs3, 0(r3)
1654; CHECK-NEXT:    lxv vs2, 16(r3)
1655; CHECK-NEXT:    lxv v5, 0(r4)
1656; CHECK-NEXT:    xxmtacc acc0
1657; CHECK-NEXT:    lxv v4, 16(r4)
1658; CHECK-NEXT:    pmxvf64gernp acc0, vsp36, v2, 0, 0
1659; CHECK-NEXT:    xxmfacc acc0
1660; CHECK-NEXT:    stxv vs0, 48(r7)
1661; CHECK-NEXT:    stxv vs1, 32(r7)
1662; CHECK-NEXT:    stxv vs2, 16(r7)
1663; CHECK-NEXT:    stxv vs3, 0(r7)
1664; CHECK-NEXT:    blr
1665;
1666; CHECK-BE-LABEL: test41:
1667; CHECK-BE:       # %bb.0: # %entry
1668; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1669; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1670; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1671; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1672; CHECK-BE-NEXT:    lxv v5, 16(r4)
1673; CHECK-BE-NEXT:    xxmtacc acc0
1674; CHECK-BE-NEXT:    lxv v4, 0(r4)
1675; CHECK-BE-NEXT:    pmxvf64gernp acc0, vsp36, v2, 0, 0
1676; CHECK-BE-NEXT:    xxmfacc acc0
1677; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1678; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1679; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1680; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1681; CHECK-BE-NEXT:    blr
1682entry:
1683  %0 = load <512 x i1>, ptr %vqp, align 64
1684  %1 = load <256 x i1>, ptr %vpp, align 32
1685  %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc, i32 0, i32 0)
1686  store <512 x i1> %2, ptr %resp, align 64
1687  ret void
1688}
1689
1690
1691declare <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
1692
1693define void @test42(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
1694; CHECK-LABEL: test42:
1695; CHECK:       # %bb.0: # %entry
1696; CHECK-NEXT:    lxv vs1, 32(r3)
1697; CHECK-NEXT:    lxv vs0, 48(r3)
1698; CHECK-NEXT:    lxv vs3, 0(r3)
1699; CHECK-NEXT:    lxv vs2, 16(r3)
1700; CHECK-NEXT:    lxv v5, 0(r4)
1701; CHECK-NEXT:    xxmtacc acc0
1702; CHECK-NEXT:    lxv v4, 16(r4)
1703; CHECK-NEXT:    pmxvf64gernn acc0, vsp36, v2, 0, 0
1704; CHECK-NEXT:    xxmfacc acc0
1705; CHECK-NEXT:    stxv vs0, 48(r7)
1706; CHECK-NEXT:    stxv vs1, 32(r7)
1707; CHECK-NEXT:    stxv vs2, 16(r7)
1708; CHECK-NEXT:    stxv vs3, 0(r7)
1709; CHECK-NEXT:    blr
1710;
1711; CHECK-BE-LABEL: test42:
1712; CHECK-BE:       # %bb.0: # %entry
1713; CHECK-BE-NEXT:    lxv vs1, 16(r3)
1714; CHECK-BE-NEXT:    lxv vs0, 0(r3)
1715; CHECK-BE-NEXT:    lxv vs3, 48(r3)
1716; CHECK-BE-NEXT:    lxv vs2, 32(r3)
1717; CHECK-BE-NEXT:    lxv v5, 16(r4)
1718; CHECK-BE-NEXT:    xxmtacc acc0
1719; CHECK-BE-NEXT:    lxv v4, 0(r4)
1720; CHECK-BE-NEXT:    pmxvf64gernn acc0, vsp36, v2, 0, 0
1721; CHECK-BE-NEXT:    xxmfacc acc0
1722; CHECK-BE-NEXT:    stxv vs1, 16(r7)
1723; CHECK-BE-NEXT:    stxv vs0, 0(r7)
1724; CHECK-BE-NEXT:    stxv vs3, 48(r7)
1725; CHECK-BE-NEXT:    stxv vs2, 32(r7)
1726; CHECK-BE-NEXT:    blr
1727entry:
1728  %0 = load <512 x i1>, ptr %vqp, align 64
1729  %1 = load <256 x i1>, ptr %vpp, align 32
1730  %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc, i32 0, i32 0)
1731  store <512 x i1> %2, ptr %resp, align 64
1732  ret void
1733}
1734
1735
1736declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
1737