xref: /llvm-project/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll (revision 0ad57bf236df9be06811e52c85d4a8ff5f89d387)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
4; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
7; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
8
9define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, ptr nocapture noundef writeonly %c) local_unnamed_addr #0 {
10; CHECK-LABEL: testMultiply:
11; CHECK:       # %bb.0: # %entry
12; CHECK-NEXT:    mflr r0
13; CHECK-NEXT:    std r30, -16(r1)
14; CHECK-NEXT:    mr r30, r1
15; CHECK-NEXT:    std r0, 16(r1)
16; CHECK-NEXT:    clrldi r0, r1, 59
17; CHECK-NEXT:    subfic r0, r0, -128
18; CHECK-NEXT:    stdux r1, r1, r0
19; CHECK-NEXT:    stxv v30, -64(r30) # 16-byte Folded Spill
20; CHECK-NEXT:    stxv v31, -48(r30) # 16-byte Folded Spill
21; CHECK-NEXT:    lxv v31, 0(r3)
22; CHECK-NEXT:    lxv v30, 0(r4)
23; CHECK-NEXT:    addi r3, r1, 32
24; CHECK-NEXT:    vmr v2, v31
25; CHECK-NEXT:    vmr v3, v30
26; CHECK-NEXT:    std r29, -24(r30) # 8-byte Folded Spill
27; CHECK-NEXT:    mr r29, r5
28; CHECK-NEXT:    bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_@notoc
29; CHECK-NEXT:    xxsetaccz acc0
30; CHECK-NEXT:    xvf32gerpp acc0, v31, v30
31; CHECK-NEXT:    lxv v3, 32(r1)
32; CHECK-NEXT:    lxv v2, 48(r1)
33; CHECK-NEXT:    xvf32gerpp acc0, v3, v2
34; CHECK-NEXT:    lxv v31, -48(r30) # 16-byte Folded Reload
35; CHECK-NEXT:    lxv v30, -64(r30) # 16-byte Folded Reload
36; CHECK-NEXT:    xxmfacc acc0
37; CHECK-NEXT:    stxv vs3, 0(r29)
38; CHECK-NEXT:    pstxv vs2, 8(r29), 0
39; CHECK-NEXT:    stxv vs1, 16(r29)
40; CHECK-NEXT:    pstxv vs0, 24(r29), 0
41; CHECK-NEXT:    ld r29, -24(r30) # 8-byte Folded Reload
42; CHECK-NEXT:    mr r1, r30
43; CHECK-NEXT:    ld r0, 16(r1)
44; CHECK-NEXT:    ld r30, -16(r1)
45; CHECK-NEXT:    mtlr r0
46; CHECK-NEXT:    blr
47;
48; CHECK-BE-LABEL: testMultiply:
49; CHECK-BE:       # %bb.0: # %entry
50; CHECK-BE-NEXT:    mflr r0
51; CHECK-BE-NEXT:    std r30, -16(r1)
52; CHECK-BE-NEXT:    mr r30, r1
53; CHECK-BE-NEXT:    std r0, 16(r1)
54; CHECK-BE-NEXT:    clrldi r0, r1, 59
55; CHECK-BE-NEXT:    subfic r0, r0, -224
56; CHECK-BE-NEXT:    stdux r1, r1, r0
57; CHECK-BE-NEXT:    stxv v30, -64(r30) # 16-byte Folded Spill
58; CHECK-BE-NEXT:    stxv v31, -48(r30) # 16-byte Folded Spill
59; CHECK-BE-NEXT:    lxv v31, 0(r3)
60; CHECK-BE-NEXT:    lxv v30, 0(r4)
61; CHECK-BE-NEXT:    addi r3, r1, 128
62; CHECK-BE-NEXT:    vmr v2, v31
63; CHECK-BE-NEXT:    vmr v3, v30
64; CHECK-BE-NEXT:    std r29, -24(r30) # 8-byte Folded Spill
65; CHECK-BE-NEXT:    mr r29, r5
66; CHECK-BE-NEXT:    bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_
67; CHECK-BE-NEXT:    nop
68; CHECK-BE-NEXT:    xxsetaccz acc1
69; CHECK-BE-NEXT:    xvf32gerpp acc1, v31, v30
70; CHECK-BE-NEXT:    lxv v3, 144(r1)
71; CHECK-BE-NEXT:    lxv v2, 128(r1)
72; CHECK-BE-NEXT:    xvf32gerpp acc1, v2, v3
73; CHECK-BE-NEXT:    lxv v31, -48(r30) # 16-byte Folded Reload
74; CHECK-BE-NEXT:    lxv v30, -64(r30) # 16-byte Folded Reload
75; CHECK-BE-NEXT:    xxmfacc acc1
76; CHECK-BE-NEXT:    xxlor vs1, vs6, vs6
77; CHECK-BE-NEXT:    xxlor vs0, vs7, vs7
78; CHECK-BE-NEXT:    xxlor vs3, vs4, vs4
79; CHECK-BE-NEXT:    xxlor vs2, vs5, vs5
80; CHECK-BE-NEXT:    stxv vs0, 0(r29)
81; CHECK-BE-NEXT:    pstxv vs1, 8(r29), 0
82; CHECK-BE-NEXT:    stxv vs2, 16(r29)
83; CHECK-BE-NEXT:    pstxv vs3, 24(r29), 0
84; CHECK-BE-NEXT:    ld r29, -24(r30) # 8-byte Folded Reload
85; CHECK-BE-NEXT:    mr r1, r30
86; CHECK-BE-NEXT:    ld r0, 16(r1)
87; CHECK-BE-NEXT:    ld r30, -16(r1)
88; CHECK-BE-NEXT:    mtlr r0
89; CHECK-BE-NEXT:    blr
90entry:
91  %vP = alloca <256 x i1>, align 32
92  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %vP)
93  %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
94  %1 = load <16 x i8>, ptr %a, align 16
95  %2 = load <16 x i8>, ptr %b, align 16
96  call void @_Z15buildVectorPairPu13__vector_pairDv16_hS0_(ptr noundef nonnull %vP, <16 x i8> noundef %1, <16 x i8> noundef %2)
97  %3 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %1, <16 x i8> %2)
98  %4 = load <256 x i1>, ptr %vP, align 32
99  %5 = call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %4)
100  %6 = extractvalue { <16 x i8>, <16 x i8> } %5, 0
101  %7 = extractvalue { <16 x i8>, <16 x i8> } %5, 1
102  %8 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %3, <16 x i8> %6, <16 x i8> %7)
103  %9 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %8)
104  %10 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %9, 0
105  %11 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %9, 1
106  %12 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %9, 2
107  %13 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %9, 3
108  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %13, <16 x i8> %12, <16 x i8> %11, <16 x i8> %10)
109  %15 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %14)
110  %16 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %15, 0
111  %17 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %15, 1
112  %18 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %15, 2
113  %19 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %15, 3
114  store <16 x i8> %16, ptr %c, align 16
115  %add.ptr = getelementptr inbounds float, ptr %c, i64 2
116  store <16 x i8> %17, ptr %add.ptr, align 16
117  %add.ptr11 = getelementptr inbounds float, ptr %c, i64 4
118  store <16 x i8> %18, ptr %add.ptr11, align 16
119  %add.ptr13 = getelementptr inbounds float, ptr %c, i64 6
120  store <16 x i8> %19, ptr %add.ptr13, align 16
121  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %vP)
122  ret void
123}
124
125declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
126declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
127declare void @_Z15buildVectorPairPu13__vector_pairDv16_hS0_(ptr noundef, <16 x i8> noundef, <16 x i8> noundef) local_unnamed_addr
128declare <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>)
129declare { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1>)
130declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
131declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
132declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
133
134attributes #0 = { nounwind }
135