xref: /llvm-project/clang/test/CodeGen/matrix-type.c (revision 94473f4db6a6f5f12d7c4081455b5b596094eac5)
1 // RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
2 
3 #if !__has_extension(matrix_types)
4 #error Expected extension 'matrix_types' to be enabled
5 #endif
6 
7 #if !__has_extension(matrix_types_scalar_division)
8 #error Expected extension 'matrix_types_scalar_division' to be enabled
9 #endif
10 
11 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
12 
13 // CHECK: %struct.Matrix = type { i8, [12 x float], float }
14 
15 void load_store_double(dx5x5_t *a, dx5x5_t *b) {
16   // CHECK-LABEL:  define{{.*}} void @load_store_double(
17   // CHECK-NEXT:  entry:
18   // CHECK-NEXT:    %a.addr = alloca ptr, align 8
19   // CHECK-NEXT:    %b.addr = alloca ptr, align 8
20   // CHECK-NEXT:    store ptr %a, ptr %a.addr, align 8
21   // CHECK-NEXT:    store ptr %b, ptr %b.addr, align 8
22   // CHECK-NEXT:    %0 = load ptr, ptr %b.addr, align 8
23   // CHECK-NEXT:    %1 = load <25 x double>, ptr %0, align 8
24   // CHECK-NEXT:    %2 = load ptr, ptr %a.addr, align 8
25   // CHECK-NEXT:    store <25 x double> %1, ptr %2, align 8
26   // CHECK-NEXT:   ret void
27 
28   *a = *b;
29 }
30 
31 typedef float fx3x4_t __attribute__((matrix_type(3, 4)));
32 void load_store_float(fx3x4_t *a, fx3x4_t *b) {
33   // CHECK-LABEL:  define{{.*}} void @load_store_float(
34   // CHECK-NEXT:  entry:
35   // CHECK-NEXT:    %a.addr = alloca ptr, align 8
36   // CHECK-NEXT:    %b.addr = alloca ptr, align 8
37   // CHECK-NEXT:    store ptr %a, ptr %a.addr, align 8
38   // CHECK-NEXT:    store ptr %b, ptr %b.addr, align 8
39   // CHECK-NEXT:    %0 = load ptr, ptr %b.addr, align 8
40   // CHECK-NEXT:    %1 = load <12 x float>, ptr %0, align 4
41   // CHECK-NEXT:    %2 = load ptr, ptr %a.addr, align 8
42   // CHECK-NEXT:    store <12 x float> %1, ptr %2, align 4
43   // CHECK-NEXT:   ret void
44 
45   *a = *b;
46 }
47 
48 typedef int ix3x4_t __attribute__((matrix_type(4, 3)));
49 void load_store_int(ix3x4_t *a, ix3x4_t *b) {
50   // CHECK-LABEL:  define{{.*}} void @load_store_int(
51   // CHECK-NEXT:  entry:
52   // CHECK-NEXT:    %a.addr = alloca ptr, align 8
53   // CHECK-NEXT:    %b.addr = alloca ptr, align 8
54   // CHECK-NEXT:    store ptr %a, ptr %a.addr, align 8
55   // CHECK-NEXT:    store ptr %b, ptr %b.addr, align 8
56   // CHECK-NEXT:    %0 = load ptr, ptr %b.addr, align 8
57   // CHECK-NEXT:    %1 = load <12 x i32>, ptr %0, align 4
58   // CHECK-NEXT:    %2 = load ptr, ptr %a.addr, align 8
59   // CHECK-NEXT:    store <12 x i32> %1, ptr %2, align 4
60   // CHECK-NEXT:   ret void
61 
62   *a = *b;
63 }
64 
65 typedef unsigned long long ullx3x4_t __attribute__((matrix_type(4, 3)));
66 void load_store_ull(ullx3x4_t *a, ullx3x4_t *b) {
67   // CHECK-LABEL:  define{{.*}} void @load_store_ull(
68   // CHECK-NEXT:  entry:
69   // CHECK-NEXT:    %a.addr = alloca ptr, align 8
70   // CHECK-NEXT:    %b.addr = alloca ptr, align 8
71   // CHECK-NEXT:    store ptr %a, ptr %a.addr, align 8
72   // CHECK-NEXT:    store ptr %b, ptr %b.addr, align 8
73   // CHECK-NEXT:    %0 = load ptr, ptr %b.addr, align 8
74   // CHECK-NEXT:    %1 = load <12 x i64>, ptr %0, align 8
75   // CHECK-NEXT:    %2 = load ptr, ptr %a.addr, align 8
76   // CHECK-NEXT:    store <12 x i64> %1, ptr %2, align 8
77   // CHECK-NEXT:   ret void
78 
79   *a = *b;
80 }
81 
82 typedef __fp16 fp16x3x4_t __attribute__((matrix_type(4, 3)));
83 void load_store_fp16(fp16x3x4_t *a, fp16x3x4_t *b) {
84   // CHECK-LABEL:  define{{.*}} void @load_store_fp16(
85   // CHECK-NEXT:  entry:
86   // CHECK-NEXT:    %a.addr = alloca ptr, align 8
87   // CHECK-NEXT:    %b.addr = alloca ptr, align 8
88   // CHECK-NEXT:    store ptr %a, ptr %a.addr, align 8
89   // CHECK-NEXT:    store ptr %b, ptr %b.addr, align 8
90   // CHECK-NEXT:    %0 = load ptr, ptr %b.addr, align 8
91   // CHECK-NEXT:    %1 = load <12 x half>, ptr %0, align 2
92   // CHECK-NEXT:    %2 = load ptr, ptr %a.addr, align 8
93   // CHECK-NEXT:    store <12 x half> %1, ptr %2, align 2
94   // CHECK-NEXT:   ret void
95 
96   *a = *b;
97 }
98 
99 typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
100 
101 void parameter_passing(fx3x3_t a, fx3x3_t *b) {
102   // CHECK-LABEL: define{{.*}} void @parameter_passing(
103   // CHECK-NEXT:  entry:
104   // CHECK-NEXT:    %a.addr = alloca [9 x float], align 4
105   // CHECK-NEXT:    %b.addr = alloca ptr, align 8
106   // CHECK-NEXT:    store <9 x float> %a, ptr %a.addr, align 4
107   // CHECK-NEXT:    store ptr %b, ptr %b.addr, align 8
108   // CHECK-NEXT:    %0 = load <9 x float>, ptr %a.addr, align 4
109   // CHECK-NEXT:    %1 = load ptr, ptr %b.addr, align 8
110   // CHECK-NEXT:    store <9 x float> %0, ptr %1, align 4
111   // CHECK-NEXT:    ret void
112   *b = a;
113 }
114 
115 fx3x3_t return_matrix(fx3x3_t *a) {
116   // CHECK-LABEL: define{{.*}} <9 x float> @return_matrix
117   // CHECK-NEXT:  entry:
118   // CHECK-NEXT:    %a.addr = alloca ptr, align 8
119   // CHECK-NEXT:    store ptr %a, ptr %a.addr, align 8
120   // CHECK-NEXT:    %0 = load ptr, ptr %a.addr, align 8
121   // CHECK-NEXT:    %1 = load <9 x float>, ptr %0, align 4
122   // CHECK-NEXT:    ret <9 x float> %1
123   return *a;
124 }
125 
126 typedef struct {
127   char Tmp1;
128   fx3x4_t Data;
129   float Tmp2;
130 } Matrix;
131 
132 void matrix_struct(Matrix *a, Matrix *b) {
133   // CHECK-LABEL: define{{.*}} void @matrix_struct(
134   // CHECK-NEXT:  entry:
135   // CHECK-NEXT:    %a.addr = alloca ptr, align 8
136   // CHECK-NEXT:    %b.addr = alloca ptr, align 8
137   // CHECK-NEXT:    store ptr %a, ptr %a.addr, align 8
138   // CHECK-NEXT:    store ptr %b, ptr %b.addr, align 8
139   // CHECK-NEXT:    %0 = load ptr, ptr %a.addr, align 8
140   // CHECK-NEXT:    %Data = getelementptr inbounds nuw %struct.Matrix, ptr %0, i32 0, i32 1
141   // CHECK-NEXT:    %1 = load <12 x float>, ptr %Data, align 4
142   // CHECK-NEXT:    %2 = load ptr, ptr %b.addr, align 8
143   // CHECK-NEXT:    %Data1 = getelementptr inbounds nuw %struct.Matrix, ptr %2, i32 0, i32 1
144   // CHECK-NEXT:    store <12 x float> %1, ptr %Data1, align 4
145   // CHECK-NEXT:    ret void
146   b->Data = a->Data;
147 }
148 
149 typedef double dx4x4_t __attribute__((matrix_type(4, 4)));
150 void matrix_inline_asm_memory_readwrite(void) {
151   // CHECK-LABEL: define{{.*}} void @matrix_inline_asm_memory_readwrite()
152   // CHECK-NEXT:  entry:
153   // CHECK-NEXT:    [[ALLOCA:%.+]] = alloca [16 x double], align 8
154   // CHECK-NEXT:    [[VAL:%.+]] = load <16 x double>, ptr [[ALLOCA]], align 8
155   // CHECK-NEXT:    call void asm sideeffect "", "=*r|m,0,~{memory},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(<16 x double>) [[ALLOCA]], <16 x double> [[VAL]])
156   // CHECK-NEXT:    ret void
157 
158   dx4x4_t m;
159   asm volatile(""
160                : "+r,m"(m)
161                :
162                : "memory");
163 }
164