xref: /llvm-project/clang/test/CodeGenCXX/matrix-type-operators.cpp (revision 12f78e740c5419f7d1fbcf8f2106e7a40cd1d6f7)
1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,NOOPT %s
2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,OPT %s
3 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
4 using fx2x3_t = float __attribute__((matrix_type(2, 3)));
5 
6 template <typename EltTy, unsigned Rows, unsigned Columns>
7 struct MyMatrix {
8   using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns)));
9 
10   matrix_t value;
11 };
12 
13 template <typename EltTy0, unsigned R0, unsigned C0>
14 typename MyMatrix<EltTy0, R0, C0>::matrix_t add(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
15   return A.value + B.value;
16 }
17 
18 void test_add_template() {
19   // CHECK-LABEL: define{{.*}} void @_Z17test_add_templatev()
20   // CHECK:       %call = call noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
21 
22   // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
23   // NOOPT:       [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
24   // NOOPT:       [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
25   // OPT:         [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
26   // OPT:         [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
27   // CHECK-NEXT:  [[RES:%.*]] = fadd <10 x float> [[MAT1]], [[MAT2]]
28   // CHECK-NEXT:  ret <10 x float> [[RES]]
29 
30   MyMatrix<float, 2, 5> Mat1;
31   MyMatrix<float, 2, 5> Mat2;
32   Mat1.value = add(Mat1, Mat2);
33 }
34 
35 template <typename EltTy0, unsigned R0, unsigned C0>
36 typename MyMatrix<EltTy0, R0, C0>::matrix_t subtract(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
37   return A.value - B.value;
38 }
39 
40 void test_subtract_template() {
41   // CHECK-LABEL: define{{.*}} void @_Z22test_subtract_templatev()
42   // CHECK:       %call = call noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
43 
44   // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
45   // NOOPT:       [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
46   // NOOPT:       [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
47   // OPT:         [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
48   // OPT:         [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
49   // CHECK-NEXT:  [[RES:%.*]] = fsub <10 x float> [[MAT1]], [[MAT2]]
50   // CHECK-NEXT:  ret <10 x float> [[RES]]
51 
52   MyMatrix<float, 2, 5> Mat1;
53   MyMatrix<float, 2, 5> Mat2;
54   Mat1.value = subtract(Mat1, Mat2);
55 }
56 
57 struct DoubleWrapper1 {
58   int x;
59   operator double() {
60     return x;
61   }
62 };
63 
64 void test_DoubleWrapper1_Sub1(MyMatrix<double, 10, 9> &m) {
65   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE(
66   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
67   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
68   // CHECK-NEXT:  [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1)
69   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
70   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
71   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
72   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
73 
74   DoubleWrapper1 w1;
75   w1.x = 10;
76   m.value = m.value - w1;
77 }
78 
79 void test_DoubleWrapper1_Sub2(MyMatrix<double, 10, 9> &m) {
80   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE(
81   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1)
82   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
83   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
84   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
85   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
86   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
87   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
88 
89   DoubleWrapper1 w1;
90   w1.x = 10;
91   m.value = w1 - m.value;
92 }
93 
94 struct DoubleWrapper2 {
95   int x;
96   operator double() {
97     return x;
98   }
99 };
100 
101 void test_DoubleWrapper2_Add1(MyMatrix<double, 10, 9> &m) {
102   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE(
103   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8{{$}}
104   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8, !tbaa !{{[0-9]+}}{{$}}
105   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2)
106   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
107   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
108   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
109   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
110 
111   DoubleWrapper2 w2;
112   w2.x = 20;
113   m.value = m.value + w2;
114 }
115 
116 void test_DoubleWrapper2_Add2(MyMatrix<double, 10, 9> &m) {
117   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE(
118   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2)
119   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
120   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
121   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
122   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
123   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
124   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
125 
126   DoubleWrapper2 w2;
127   w2.x = 20;
128   m.value = w2 + m.value;
129 }
130 
131 struct IntWrapper {
132   char x;
133   operator int() {
134     return x;
135   }
136 };
137 
138 void test_IntWrapper_Add(MyMatrix<double, 10, 9> &m) {
139   // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_AddR8MyMatrixIdLj10ELj9EE(
140   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
141   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
142   // CHECK-NEXT:  [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3)
143   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
144   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
145   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
146   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
147   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
148 
149   IntWrapper w3;
150   w3.x = 'c';
151   m.value = m.value + w3;
152 }
153 
154 void test_IntWrapper_Sub(MyMatrix<double, 10, 9> &m) {
155   // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_SubR8MyMatrixIdLj10ELj9EE(
156   // CHECK:       [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3)
157   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
158   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
159   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
160   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
161   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
162   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
163   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
164 
165   IntWrapper w3;
166   w3.x = 'c';
167   m.value = w3 - m.value;
168 }
169 
170 template <typename EltTy0, unsigned R0, unsigned C0, unsigned C1>
171 typename MyMatrix<EltTy0, R0, C1>::matrix_t multiply(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, C0, C1> &B) {
172   return A.value * B.value;
173 }
174 
175 MyMatrix<float, 2, 2> test_multiply_template(MyMatrix<float, 2, 5> Mat1,
176                                              MyMatrix<float, 5, 2> Mat2) {
177   // CHECK-LABEL: define{{.*}} void @_Z22test_multiply_template8MyMatrixIfLj2ELj5EES_IfLj5ELj2EE(
178   // CHECK-NEXT:  entry:
179   // CHECK-NEXT:    [[RES:%.*]] = call noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
180   // CHECK-NEXT:    %value = getelementptr inbounds nuw %struct.MyMatrix.1, ptr %agg.result, i32 0, i32 0
181   // CHECK-NEXT:    store <4 x float> [[RES]], ptr %value, align 4
182   // CHECK-NEXT:    ret void
183   //
184   // CHECK-LABEL:  define linkonce_odr noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(
185   // NOOPT:         [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
186   // NOOPT:         [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
187   // OPT:           [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
188   // OPT:           [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
189   // CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.matrix.multiply.v4f32.v10f32.v10f32(<10 x float> [[MAT1]], <10 x float> [[MAT2]], i32 2, i32 5, i32 2)
190   // CHECK-NEXT:    ret <4 x float> [[RES]]
191 
192   MyMatrix<float, 2, 2> Res;
193   Res.value = multiply(Mat1, Mat2);
194   return Res;
195 }
196 
197 void test_IntWrapper_Multiply(MyMatrix<double, 10, 9> &m, IntWrapper &w3) {
198   // CHECK-LABEL: define{{.*}} void @_Z24test_IntWrapper_MultiplyR8MyMatrixIdLj10ELj9EER10IntWrapper(
199   // CHECK:       [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr noundef {{.*}})
200   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
201   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
202   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
203   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
204   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
205   // CHECK-NEXT:  [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
206   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
207   // CHECK-NEXT:  ret void
208   m.value = w3 * m.value;
209 }
210 
211 template <typename EltTy, unsigned Rows, unsigned Columns>
212 void insert(MyMatrix<EltTy, Rows, Columns> &Mat, EltTy e, unsigned i, unsigned j) {
213   Mat.value[i][j] = e;
214 }
215 
216 void test_insert_template1(MyMatrix<unsigned, 2, 2> &Mat, unsigned e, unsigned i, unsigned j) {
217   // CHECK-LABEL: @_Z21test_insert_template1R8MyMatrixIjLj2ELj2EEjjj(
218   // NOOPT:         [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}}
219   // NOOPT-NEXT:    [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}}
220   // NOOPT-NEXT:    [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
221   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
222   // OPT:           [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
223   // OPT-NEXT:      [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
224   // OPT-NEXT:      [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
225   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
226   // CHECK-NEXT:    call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(16) [[MAT_ADDR]], i32 noundef [[E]], i32 noundef [[I]], i32 noundef [[J]])
227   // CHECK-NEXT:    ret void
228   //
229   // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
230   // NOOPT:         [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}}
231   // NOOPT:         [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
232   // OPT:           [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
233   // OPT:           [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
234   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
235   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
236   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
237   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
238   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
239   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
240   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 4
241   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
242   // CHECK-NEXT:    [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}}
243   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], i64 [[IDX2]]
244   // CHECK-NEXT:    store <4 x i32> [[MATINS]], ptr {{.*}}, align 4
245   // CHECK-NEXT:    ret void
246 
247   insert(Mat, e, i, j);
248 }
249 
250 void test_insert_template2(MyMatrix<float, 3, 8> &Mat, float e) {
251   // CHECK-LABEL: @_Z21test_insert_template2R8MyMatrixIfLj3ELj8EEf(
252   // NOOPT:         [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}}
253   // NOOPT-NEXT:    [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
254   // OPT:           [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
255   // OPT-NEXT:      [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
256   // CHECK-NEXT:    call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(96) [[MAT_ADDR]], float noundef [[E]], i32 noundef 2, i32 noundef 5)
257   // CHECK-NEXT:    ret void
258   //
259   // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
260   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
261   // NOOPT:         [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
262   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
263   // OPT:           [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
264   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
265   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
266   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
267   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
268   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 3
269   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
270   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 24
271   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
272   // CHECK-NEXT:    [[MAT:%.*]] = load <24 x float>, ptr {{.*}}, align 4{{$}}
273   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <24 x float> [[MAT]], float [[E]], i64 [[IDX2]]
274   // CHECK-NEXT:    store <24 x float> [[MATINS]], ptr {{.*}}, align 4
275   // CHECK-NEXT:    ret void
276 
277   insert(Mat, e, 2, 5);
278 }
279 
280 template <typename EltTy, unsigned Rows, unsigned Columns>
281 EltTy extract(MyMatrix<EltTy, Rows, Columns> &Mat) {
282   return Mat.value[1u][0u];
283 }
284 
285 int test_extract_template(MyMatrix<int, 2, 2> Mat1) {
286   // CHECK-LABEL: @_Z21test_extract_template8MyMatrixIiLj2ELj2EE(
287   // CHECK-NEXT:  entry:
288   // CHECK-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(ptr noundef nonnull align 4 dereferenceable(16) [[MAT1:%.*]])
289   // CHECK-NEXT:    ret i32 [[CALL]]
290   //
291   // CHECK-LABEL: define linkonce_odr noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(
292   // NOOPT:         [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}}
293   // OPT:           [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
294   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <4 x i32> [[MAT]], i64 1
295   // CHECK-NEXT:    ret i32 [[MATEXT]]
296 
297   return extract(Mat1);
298 }
299 
300 using double4x4 = double __attribute__((matrix_type(4, 4)));
301 
302 template <class R, class C>
303 auto matrix_subscript(double4x4 m, R r, C c) -> decltype(m[r][c]) {
304   // FIXME: We can't actually do 'return m[r][c]' here currently.
305   static double d;
306   return d;
307 }
308 
309 double test_matrix_subscript(double4x4 m) {
310   // CHECK-LABEL: @_Z21test_matrix_subscriptu11matrix_typeILm4ELm4EdE(
311   // NOOPT:         [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}}
312   // OPT:           [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
313   // CHECK-NEXT:    [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) ptr @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_Eu11matrix_typeILm4ELm4EdET_T0_(<16 x double> noundef [[MAT]], i32 noundef 1, i32 noundef 2)
314   // NOOPT-NEXT:    [[RES:%.*]] = load double, ptr [[CALL]], align 8{{$}}
315   // OPT-NEXT:      [[RES:%.*]] = load double, ptr [[CALL]], align 8, !tbaa !{{[0-9]+}}{{$}}
316   // CHECK-NEXT:    ret double [[RES]]
317 
318   return matrix_subscript(m, 1, 2);
319 }
320 
321 const double &test_matrix_subscript_reference(const double4x4 m) {
322   // CHECK-LABEL: @_Z31test_matrix_subscript_referenceu11matrix_typeILm4ELm4EdE(
323   // CHECK-NEXT:  entry:
324   // CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [16 x double], align 8
325   // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca double, align 8
326   // CHECK-NEXT:    store <16 x double> [[M:%.*]], ptr [[M_ADDR]], align 8
327   // NOOPT:         [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8{{$}}
328   // OPT:           [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}}
329   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <16 x double> [[NAMELESS1]], i64 4
330   // CHECK-NEXT:    store double [[MATEXT]], ptr [[REF_TMP]], align 8
331   // CHECK:         ret ptr [[REF_TMP]]
332 
333   return m[0][1];
334 }
335 
336 struct UnsignedWrapper {
337   char x;
338   operator unsigned() {
339     return x;
340   }
341 };
342 
343 double extract_IntWrapper_idx(double4x4 &m, IntWrapper i, UnsignedWrapper j) {
344   // CHECK-LABEL: define{{.*}} double @_Z22extract_IntWrapper_idxRu11matrix_typeILm4ELm4EdE10IntWrapper15UnsignedWrapper(
345   // CHECK:         [[I:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %i)
346   // CHECK-NEXT:    [[I_ADD:%.*]] = add nsw i32 [[I]], 1
347   // CHECK-NEXT:    [[I_ADD_EXT:%.*]] = sext i32 [[I_ADD]] to i64
348   // CHECK-NEXT:    [[J:%.*]] = call noundef i32 @_ZN15UnsignedWrappercvjEv(ptr {{[^,]*}} %j)
349   // CHECK-NEXT:    [[J_SUB:%.*]] = sub i32 [[J]], 1
350   // CHECK-NEXT:    [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64
351   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
352   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
353   // NOOPT-NEXT:    [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
354   // NOOPT-NEXT:    [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8{{$}}
355   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
356   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
357   // OPT-NEXT:      [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
358   // OPT-NEXT:      [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}}
359   // CHECK-NEXT:    [[MATEXT:%.*]]  = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
360   // CHECK-NEXT:    ret double [[MATEXT]]
361   return m[i + 1][j - 1];
362 }
363 
364 template <class T, unsigned R, unsigned C>
365 using matrix_type = T __attribute__((matrix_type(R, C)));
366 struct identmatrix_t {
367   template <class T, unsigned N>
368   operator matrix_type<T, N, N>() const {
369     matrix_type<T, N, N> result;
370     for (unsigned i = 0; i != N; ++i)
371       result[i][i] = 1;
372     return result;
373   }
374 };
375 
376 constexpr identmatrix_t identmatrix;
377 
378 void test_constexpr1(matrix_type<float, 4, 4> &m) {
379   // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr1Ru11matrix_typeILm4ELm4EfE(
380   // NOOPT:         [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4{{$}}
381   // OPT:           [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
382   // CHECK-NEXT:    [[IM:%.*]] = call noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(ptr {{[^,]*}} @_ZL11identmatrix)
383   // CHECK-NEXT:    [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]]
384   // NOOPT-NEXT:    [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
385   // OPT-NEXT:      [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
386   // CHECK-NEXT:    store <16 x float> [[ADD]], ptr [[MAT_ADDR]], align 4
387   // CHECK-NEXT:    ret voi
388 
389   // CHECK-LABEL: define linkonce_odr noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(
390   // CHECK-LABEL: for.body:                                         ; preds = %for.cond
391   // NOOPT-NEXT:   [[I:%.*]] = load i32, ptr %i, align 4{{$}}
392   // OPT-NEXT:     [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
393   // CHECK-NEXT:   [[I_EXT:%.*]] = zext i32 [[I]] to i64
394   // NOOPT-NEXT:   [[I2:%.*]] = load i32, ptr %i, align 4{{$}}
395   // OPT-NEXT:     [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
396   // CHECK-NEXT:   [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
397   // CHECK-NEXT:   [[IDX1:%.*]] = mul i64 [[I2_EXT]], 4
398   // CHECK-NEXT:   [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
399   // OPT-NEXT:     [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
400   // OPT-NEXT:     call void @llvm.assume(i1 [[CMP]])
401   // CHECK-NEXT:   [[MAT:%.*]] = load <16 x float>, ptr %result, align 4{{$}}
402   // CHECK-NEXT:   [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, i64 [[IDX2]]
403   // CHECK-NEXT:   store <16 x float> [[MATINS]], ptr %result, align 4
404   // CHECK-NEXT:   br label %for.inc
405   m = m + identmatrix;
406 }
407 
408 void test_constexpr2(matrix_type<int, 5, 5> &m) {
409   // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr2Ru11matrix_typeILm5ELm5EiE(
410   // CHECK:         [[IM:%.*]] = call noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(ptr {{[^,]*}} @_ZL11identmatrix)
411   // NOOPT:         [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4{{$}}
412   // OPT:           [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
413   // CHECK-NEXT:    [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]]
414   // CHECK-NEXT:    [[SUB2:%.*]] = add <25 x i32> [[SUB]], splat (i32 1)
415   // NOOPT-NEXT:    [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
416   // OPT-NEXT:      [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
417   // CHECK-NEXT:    store <25 x i32> [[SUB2]], ptr [[MAT_ADDR]], align 4
418   // CHECK-NEXT:    ret void
419   //
420 
421   // CHECK-LABEL: define linkonce_odr noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(
422   // CHECK-LABEL: for.body:                                         ; preds = %for.cond
423   // NOOPT-NEXT:   [[I:%.*]] = load i32, ptr %i, align 4{{$}}
424   // OPT-NEXT:     [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
425   // CHECK-NEXT:   [[I_EXT:%.*]] = zext i32 [[I]] to i64
426   // NOOPT-NEXT:   [[I2:%.*]] = load i32, ptr %i, align 4{{$}}
427   // OPT-NEXT:     [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
428   // CHECK-NEXT:   [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
429   // CHECK-NEXT:   [[IDX1:%.*]] = mul i64 [[I2_EXT]], 5
430   // CHECK-NEXT:   [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
431   // OPT-NEXT:     [[CMP:%.*]] = icmp ult i64 [[IDX2]], 25
432   // OPT-NEXT:     call void @llvm.assume(i1 [[CMP]])
433   // CHECK-NEXT:   [[MAT:%.*]] = load <25 x i32>, ptr %result, align 4{{$}}
434   // CHECK-NEXT:   [[MATINS:%.*]] = insertelement <25 x i32> [[MAT]], i32 1, i64 [[IDX2]]
435   // CHECK-NEXT:   store <25 x i32> [[MATINS]], ptr %result, align 4
436   // CHECK-NEXT:   br label %for.inc
437 
438   m = identmatrix - m + 1;
439 }
440