xref: /llvm-project/lldb/test/API/macosx/sme-registers/main.c (revision 46e782300765eeac8026377bf30d5f08888c2b25)
1*46e78230SJason Molenda ///  BUILT with
2*46e78230SJason Molenda ///    xcrun -sdk macosx.internal clang -mcpu=apple-m4 -g sme.c -o sme
3*46e78230SJason Molenda 
4*46e78230SJason Molenda #include <stdint.h>
5*46e78230SJason Molenda #include <stdio.h>
6*46e78230SJason Molenda #include <stdlib.h>
7*46e78230SJason Molenda 
8*46e78230SJason Molenda void write_sve_regs() {
9*46e78230SJason Molenda   asm volatile("ptrue p0.b\n\t");
10*46e78230SJason Molenda   asm volatile("ptrue p1.h\n\t");
11*46e78230SJason Molenda   asm volatile("ptrue p2.s\n\t");
12*46e78230SJason Molenda   asm volatile("ptrue p3.d\n\t");
13*46e78230SJason Molenda   asm volatile("pfalse p4.b\n\t");
14*46e78230SJason Molenda   asm volatile("ptrue p5.b\n\t");
15*46e78230SJason Molenda   asm volatile("ptrue p6.h\n\t");
16*46e78230SJason Molenda   asm volatile("ptrue p7.s\n\t");
17*46e78230SJason Molenda   asm volatile("ptrue p8.d\n\t");
18*46e78230SJason Molenda   asm volatile("pfalse p9.b\n\t");
19*46e78230SJason Molenda   asm volatile("ptrue p10.b\n\t");
20*46e78230SJason Molenda   asm volatile("ptrue p11.h\n\t");
21*46e78230SJason Molenda   asm volatile("ptrue p12.s\n\t");
22*46e78230SJason Molenda   asm volatile("ptrue p13.d\n\t");
23*46e78230SJason Molenda   asm volatile("pfalse p14.b\n\t");
24*46e78230SJason Molenda   asm volatile("ptrue p15.b\n\t");
25*46e78230SJason Molenda 
26*46e78230SJason Molenda   asm volatile("cpy  z0.b, p0/z, #1\n\t");
27*46e78230SJason Molenda   asm volatile("cpy  z1.b, p5/z, #2\n\t");
28*46e78230SJason Molenda   asm volatile("cpy  z2.b, p10/z, #3\n\t");
29*46e78230SJason Molenda   asm volatile("cpy  z3.b, p15/z, #4\n\t");
30*46e78230SJason Molenda   asm volatile("cpy  z4.b, p0/z, #5\n\t");
31*46e78230SJason Molenda   asm volatile("cpy  z5.b, p5/z, #6\n\t");
32*46e78230SJason Molenda   asm volatile("cpy  z6.b, p10/z, #7\n\t");
33*46e78230SJason Molenda   asm volatile("cpy  z7.b, p15/z, #8\n\t");
34*46e78230SJason Molenda   asm volatile("cpy  z8.b, p0/z, #9\n\t");
35*46e78230SJason Molenda   asm volatile("cpy  z9.b, p5/z, #10\n\t");
36*46e78230SJason Molenda   asm volatile("cpy  z10.b, p10/z, #11\n\t");
37*46e78230SJason Molenda   asm volatile("cpy  z11.b, p15/z, #12\n\t");
38*46e78230SJason Molenda   asm volatile("cpy  z12.b, p0/z, #13\n\t");
39*46e78230SJason Molenda   asm volatile("cpy  z13.b, p5/z, #14\n\t");
40*46e78230SJason Molenda   asm volatile("cpy  z14.b, p10/z, #15\n\t");
41*46e78230SJason Molenda   asm volatile("cpy  z15.b, p15/z, #16\n\t");
42*46e78230SJason Molenda   asm volatile("cpy  z16.b, p0/z, #17\n\t");
43*46e78230SJason Molenda   asm volatile("cpy  z17.b, p5/z, #18\n\t");
44*46e78230SJason Molenda   asm volatile("cpy  z18.b, p10/z, #19\n\t");
45*46e78230SJason Molenda   asm volatile("cpy  z19.b, p15/z, #20\n\t");
46*46e78230SJason Molenda   asm volatile("cpy  z20.b, p0/z, #21\n\t");
47*46e78230SJason Molenda   asm volatile("cpy  z21.b, p5/z, #22\n\t");
48*46e78230SJason Molenda   asm volatile("cpy  z22.b, p10/z, #23\n\t");
49*46e78230SJason Molenda   asm volatile("cpy  z23.b, p15/z, #24\n\t");
50*46e78230SJason Molenda   asm volatile("cpy  z24.b, p0/z, #25\n\t");
51*46e78230SJason Molenda   asm volatile("cpy  z25.b, p5/z, #26\n\t");
52*46e78230SJason Molenda   asm volatile("cpy  z26.b, p10/z, #27\n\t");
53*46e78230SJason Molenda   asm volatile("cpy  z27.b, p15/z, #28\n\t");
54*46e78230SJason Molenda   asm volatile("cpy  z28.b, p0/z, #29\n\t");
55*46e78230SJason Molenda   asm volatile("cpy  z29.b, p5/z, #30\n\t");
56*46e78230SJason Molenda   asm volatile("cpy  z30.b, p10/z, #31\n\t");
57*46e78230SJason Molenda   asm volatile("cpy  z31.b, p15/z, #32\n\t");
58*46e78230SJason Molenda }
59*46e78230SJason Molenda 
60*46e78230SJason Molenda #define MAX_VL_BYTES 256
61*46e78230SJason Molenda void set_za_register(int svl, int value_offset) {
62*46e78230SJason Molenda   uint8_t data[MAX_VL_BYTES];
63*46e78230SJason Molenda 
64*46e78230SJason Molenda   // ldr za will actually wrap the selected vector row, by the number of rows
65*46e78230SJason Molenda   // you have. So setting one that didn't exist would actually set one that did.
66*46e78230SJason Molenda   // That's why we need the streaming vector length here.
67*46e78230SJason Molenda   for (int i = 0; i < svl; ++i) {
68*46e78230SJason Molenda     // This may involve instructions that require the smefa64 extension.
69*46e78230SJason Molenda     for (int j = 0; j < MAX_VL_BYTES; j++)
70*46e78230SJason Molenda       data[j] = i + value_offset;
71*46e78230SJason Molenda     // Each one of these loads a VL sized row of ZA.
72*46e78230SJason Molenda     asm volatile("mov w12, %w0\n\t"
73*46e78230SJason Molenda                  "ldr za[w12, 0], [%1]\n\t" ::"r"(i),
74*46e78230SJason Molenda                  "r"(&data)
75*46e78230SJason Molenda                  : "w12");
76*46e78230SJason Molenda   }
77*46e78230SJason Molenda }
78*46e78230SJason Molenda 
79*46e78230SJason Molenda static uint16_t arm_sme_svl_b(void) {
80*46e78230SJason Molenda   uint64_t ret = 0;
81*46e78230SJason Molenda   asm volatile("rdsvl  %[ret], #1" : [ret] "=r"(ret));
82*46e78230SJason Molenda   return (uint16_t)ret;
83*46e78230SJason Molenda }
84*46e78230SJason Molenda 
85*46e78230SJason Molenda void arm_sme2_set_zt0() {
86*46e78230SJason Molenda #define ZTO_LEN (512 / 8)
87*46e78230SJason Molenda   uint8_t data[ZTO_LEN];
88*46e78230SJason Molenda   for (unsigned i = 0; i < ZTO_LEN; ++i)
89*46e78230SJason Molenda     data[i] = i + 0;
90*46e78230SJason Molenda 
91*46e78230SJason Molenda   asm volatile("ldr zt0, [%0]" ::"r"(&data));
92*46e78230SJason Molenda #undef ZT0_LEN
93*46e78230SJason Molenda }
94*46e78230SJason Molenda 
95*46e78230SJason Molenda int main() {
96*46e78230SJason Molenda   printf("Enable SME mode\n"); // break before sme
97*46e78230SJason Molenda 
98*46e78230SJason Molenda   asm volatile("smstart");
99*46e78230SJason Molenda 
100*46e78230SJason Molenda   write_sve_regs();
101*46e78230SJason Molenda 
102*46e78230SJason Molenda   set_za_register(arm_sme_svl_b(), 4);
103*46e78230SJason Molenda 
104*46e78230SJason Molenda   arm_sme2_set_zt0();
105*46e78230SJason Molenda 
106*46e78230SJason Molenda   int c = 10; // break while sme
107*46e78230SJason Molenda   c += 5;
108*46e78230SJason Molenda   c += 5;
109*46e78230SJason Molenda 
110*46e78230SJason Molenda   asm volatile("smstop");
111*46e78230SJason Molenda 
112*46e78230SJason Molenda   printf("SME mode disabled\n"); // break after sme
113*46e78230SJason Molenda }
114