xref: /llvm-project/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll (revision 1df4d866cca51eeab8f012a97cc50957b45971fe)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
4; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
7; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
8
9; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
10; RUN:   -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
11; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
12; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
13; RUN:   -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
14; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
15
16declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>)
17declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
18declare void @foo()
19define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, ptr %ptr) {
20; CHECK-LABEL: intrinsics1:
21; CHECK:       # %bb.0:
22; CHECK-NEXT:    mflr r0
23; CHECK-NEXT:    std r0, 16(r1)
24; CHECK-NEXT:    stdu r1, -176(r1)
25; CHECK-NEXT:    .cfi_def_cfa_offset 176
26; CHECK-NEXT:    .cfi_offset lr, 16
27; CHECK-NEXT:    .cfi_offset r30, -16
28; CHECK-NEXT:    .cfi_offset v28, -80
29; CHECK-NEXT:    .cfi_offset v29, -64
30; CHECK-NEXT:    .cfi_offset v30, -48
31; CHECK-NEXT:    .cfi_offset v31, -32
32; CHECK-NEXT:    stxv v28, 96(r1) # 16-byte Folded Spill
33; CHECK-NEXT:    stxv v29, 112(r1) # 16-byte Folded Spill
34; CHECK-NEXT:    vmr v29, v3
35; CHECK-NEXT:    vmr v28, v2
36; CHECK-NEXT:    xxlor vs0, v28, v28
37; CHECK-NEXT:    stxv v30, 128(r1) # 16-byte Folded Spill
38; CHECK-NEXT:    stxv v31, 144(r1) # 16-byte Folded Spill
39; CHECK-NEXT:    vmr v31, v5
40; CHECK-NEXT:    vmr v30, v4
41; CHECK-NEXT:    xxlor vs1, v29, v29
42; CHECK-NEXT:    xxlor vs2, v30, v30
43; CHECK-NEXT:    xxlor vs3, v31, v31
44; CHECK-NEXT:    std r30, 160(r1) # 8-byte Folded Spill
45; CHECK-NEXT:    ld r30, 272(r1)
46; CHECK-NEXT:    xxmtacc acc0
47; CHECK-NEXT:    xvf16ger2pp acc0, v2, v4
48; CHECK-NEXT:    xxmfacc acc0
49; CHECK-NEXT:    stxvp vsp0, 64(r1)
50; CHECK-NEXT:    stxvp vsp2, 32(r1)
51; CHECK-NEXT:    bl foo@notoc
52; CHECK-NEXT:    lxvp vsp0, 64(r1)
53; CHECK-NEXT:    lxvp vsp2, 32(r1)
54; CHECK-NEXT:    xxmtacc acc0
55; CHECK-NEXT:    xvf16ger2pp acc0, v28, v30
56; CHECK-NEXT:    lxv v31, 144(r1) # 16-byte Folded Reload
57; CHECK-NEXT:    lxv v30, 128(r1) # 16-byte Folded Reload
58; CHECK-NEXT:    lxv v29, 112(r1) # 16-byte Folded Reload
59; CHECK-NEXT:    lxv v28, 96(r1) # 16-byte Folded Reload
60; CHECK-NEXT:    xxmfacc acc0
61; CHECK-NEXT:    stxv vs0, 48(r30)
62; CHECK-NEXT:    stxv vs1, 32(r30)
63; CHECK-NEXT:    stxv vs2, 16(r30)
64; CHECK-NEXT:    stxv vs3, 0(r30)
65; CHECK-NEXT:    ld r30, 160(r1) # 8-byte Folded Reload
66; CHECK-NEXT:    addi r1, r1, 176
67; CHECK-NEXT:    ld r0, 16(r1)
68; CHECK-NEXT:    mtlr r0
69; CHECK-NEXT:    blr
70;
71; CHECK-BE-LABEL: intrinsics1:
72; CHECK-BE:       # %bb.0:
73; CHECK-BE-NEXT:    mflr r0
74; CHECK-BE-NEXT:    std r0, 16(r1)
75; CHECK-BE-NEXT:    stdu r1, -256(r1)
76; CHECK-BE-NEXT:    .cfi_def_cfa_offset 256
77; CHECK-BE-NEXT:    .cfi_offset lr, 16
78; CHECK-BE-NEXT:    .cfi_offset r30, -16
79; CHECK-BE-NEXT:    .cfi_offset v28, -80
80; CHECK-BE-NEXT:    .cfi_offset v29, -64
81; CHECK-BE-NEXT:    .cfi_offset v30, -48
82; CHECK-BE-NEXT:    .cfi_offset v31, -32
83; CHECK-BE-NEXT:    stxv v28, 176(r1) # 16-byte Folded Spill
84; CHECK-BE-NEXT:    stxv v29, 192(r1) # 16-byte Folded Spill
85; CHECK-BE-NEXT:    vmr v29, v3
86; CHECK-BE-NEXT:    vmr v28, v2
87; CHECK-BE-NEXT:    xxlor vs0, v28, v28
88; CHECK-BE-NEXT:    stxv v30, 208(r1) # 16-byte Folded Spill
89; CHECK-BE-NEXT:    stxv v31, 224(r1) # 16-byte Folded Spill
90; CHECK-BE-NEXT:    vmr v31, v5
91; CHECK-BE-NEXT:    vmr v30, v4
92; CHECK-BE-NEXT:    xxlor vs1, v29, v29
93; CHECK-BE-NEXT:    xxlor vs2, v30, v30
94; CHECK-BE-NEXT:    xxlor vs3, v31, v31
95; CHECK-BE-NEXT:    std r30, 240(r1) # 8-byte Folded Spill
96; CHECK-BE-NEXT:    ld r30, 368(r1)
97; CHECK-BE-NEXT:    xxmtacc acc0
98; CHECK-BE-NEXT:    xvf16ger2pp acc0, v2, v4
99; CHECK-BE-NEXT:    xxmfacc acc0
100; CHECK-BE-NEXT:    stxvp vsp0, 112(r1)
101; CHECK-BE-NEXT:    stxvp vsp2, 144(r1)
102; CHECK-BE-NEXT:    bl foo
103; CHECK-BE-NEXT:    nop
104; CHECK-BE-NEXT:    lxvp vsp0, 112(r1)
105; CHECK-BE-NEXT:    lxvp vsp2, 144(r1)
106; CHECK-BE-NEXT:    xxmtacc acc0
107; CHECK-BE-NEXT:    xvf16ger2pp acc0, v28, v30
108; CHECK-BE-NEXT:    lxv v31, 224(r1) # 16-byte Folded Reload
109; CHECK-BE-NEXT:    lxv v30, 208(r1) # 16-byte Folded Reload
110; CHECK-BE-NEXT:    lxv v29, 192(r1) # 16-byte Folded Reload
111; CHECK-BE-NEXT:    lxv v28, 176(r1) # 16-byte Folded Reload
112; CHECK-BE-NEXT:    xxmfacc acc0
113; CHECK-BE-NEXT:    stxv vs1, 16(r30)
114; CHECK-BE-NEXT:    stxv vs0, 0(r30)
115; CHECK-BE-NEXT:    stxv vs3, 48(r30)
116; CHECK-BE-NEXT:    stxv vs2, 32(r30)
117; CHECK-BE-NEXT:    ld r30, 240(r1) # 8-byte Folded Reload
118; CHECK-BE-NEXT:    addi r1, r1, 256
119; CHECK-BE-NEXT:    ld r0, 16(r1)
120; CHECK-BE-NEXT:    mtlr r0
121; CHECK-BE-NEXT:    blr
122  %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4)
123  %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc3)
124  tail call void @foo()
125  %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3)
126  store <512 x i1> %3, ptr %ptr, align 64
127  ret void
128}
129