xref: /llvm-project/clang/test/CodeGen/matrix-type-operators.c (revision 38fffa630ee80163dc65e759392ad29798905679)
1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,NOOPT %s
2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,OPT %s
3 
4 
5 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
6 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
7 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
8 typedef unsigned long long ullx4x2_t __attribute__((matrix_type(4, 2)));
9 
10 // Floating point matrix/scalar additions.
11 
12 void add_matrix_matrix_double(dx5x5_t a, dx5x5_t b, dx5x5_t c) {
13   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b, <25 x double> noundef %c)
14   // NOOPT:       [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
15   // NOOPT-NEXT:  [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
16   // OPT:         [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
17   // OPT-NEXT:    [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
18   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[B]], [[C]]
19   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
20 
21   a = b + c;
22 }
23 
24 void add_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
25   // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
26   // NOOPT:       [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
27   // NOOPT-NEXT:  [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
28   // OPT:         [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
29   // OPT-NEXT:    [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
30   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[A]], [[B]]
31   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
32 
33   a += b;
34 }
35 
36 void subtract_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
37   // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
38   // NOOPT:       [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
39   // NOOPT-NEXT:  [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
40   // OPT:         [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
41   // OPT-NEXT:    [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
42   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[A]], [[B]]
43   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
44 
45   a -= b;
46 }
47 
48 void add_matrix_matrix_float(fx2x3_t a, fx2x3_t b, fx2x3_t c) {
49   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b, <6 x float> noundef %c)
50   // NOOPT:       [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
51   // NOOPT-NEXT:  [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
52   // OPT:         [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
53   // OPT-NEXT:    [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
54   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[B]], [[C]]
55   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
56 
57   a = b + c;
58 }
59 
60 void add_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
61   // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
62   // NOOPT:       [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
63   // NOOPT-NEXT:  [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
64   // OPT:         [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
65   // OPT-NEXT:    [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
66   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[A]], [[B]]
67   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
68 
69   a += b;
70 }
71 
72 void subtract_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
73   // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
74   // NOOPT:       [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
75   // NOOPT-NEXT:  [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
76   // OPT:         [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
77   // OPT-NEXT:    [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
78   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[A]], [[B]]
79   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
80 
81   a -= b;
82 }
83 
84 void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
85   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
86   // NOOPT:       [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
87   // NOOPT-NEXT:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
88   // OPT:         [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
89   // OPT-NEXT:    [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
90   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
91   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
92   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
93   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
94   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
95 
96   a = a + vf;
97 }
98 
99 void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
100   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
101   // NOOPT:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
102   // OPT:    [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
103   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
104   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
105   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
106   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
107   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
108   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
109   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
110 
111   a += vf;
112 }
113 
114 void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
115   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
116   // NOOPT:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
117   // OPT:    [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
118   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
119   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
120   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
121   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
122   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
123   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
124   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
125 
126   a -= vf;
127 }
128 
129 void add_matrix_scalar_double_double(dx5x5_t a, double vd) {
130   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
131   // NOOPT:       [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
132   // NOOPT-NEXT:  [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
133   // OPT:         [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
134   // OPT-NEXT:    [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
135   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
136   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
137   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
138   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
139 
140   a = a + vd;
141 }
142 
143 void add_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
144   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
145   // NOOPT:       [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
146   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
147   // OPT:         [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
148   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
149   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
150   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
151   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
152   // store <25 x double> [[RES]], ptr {{.*}}, align 8
153   a += vd;
154 }
155 
156 void subtract_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
157   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
158   // NOOPT:       [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
159   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
160   // OPT:         [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
161   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
162   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
163   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
164   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
165   // store <25 x double> [[RES]], ptr {{.*}}, align 8
166   a -= vd;
167 }
168 
169 void add_matrix_scalar_float_float(fx2x3_t b, float vf) {
170   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
171   // NOOPT:       [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
172   // NOOPT-NEXT:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
173   // OPT:         [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
174   // OPT-NEXT:    [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
175   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
176   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
177   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
178   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
179 
180   b = b + vf;
181 }
182 
183 void add_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
184   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
185   // NOOPT:       [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
186   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}}
187   // OPT:         [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
188   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
189   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
190   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
191   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
192   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
193   b += vf;
194 }
195 
196 void subtract_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
197   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
198   // NOOPT:       [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
199   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}}
200   // OPT:         [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
201   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
202   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
203   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
204   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
205   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
206   b -= vf;
207 }
208 
209 void add_matrix_scalar_float_double(fx2x3_t b, double vd) {
210   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
211   // NOOPT:       [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
212   // NOOPT-NEXT:  [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
213   // OPT:         [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
214   // OPT-NEXT:    [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
215   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
216   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
217   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
218   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
219   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
220 
221   b = b + vd;
222 }
223 
224 void add_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
225   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
226   // NOOPT:       [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
227   // OPT:         [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
228   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
229   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
230   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
231   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
232   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
233   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
234   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
235   b += vd;
236 }
237 
238 void subtract_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
239   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
240   // NOOPT:       [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
241   // OPT:         [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
242   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
243   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
244   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
245   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
246   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
247   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
248   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
249   b -= vd;
250 }
251 
252 // Integer matrix/scalar additions
253 
254 void add_matrix_matrix_int(ix9x3_t a, ix9x3_t b, ix9x3_t c) {
255   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b, <27 x i32> noundef %c)
256   // NOOPT:       [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
257   // NOOPT-NEXT:  [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
258   // OPT:         [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
259   // OPT-NEXT:    [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
260   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[B]], [[C]]
261   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr {{.*}}, align 4
262   a = b + c;
263 }
264 
265 void add_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
266   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
267   // NOOPT:       [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
268   // NOOPT-NEXT:  [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
269   // OPT:         [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
270   // OPT-NEXT:    [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
271   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[A]], [[B]]
272   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr {{.*}}, align 4
273   a += b;
274 }
275 
276 void subtract_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
277   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
278   // NOOPT:       [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
279   // NOOPT-NEXT:  [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
280   // OPT:         [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
281   // OPT-NEXT:    [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
282   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[A]], [[B]]
283   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr {{.*}}, align 4
284   a -= b;
285 }
286 
287 void add_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b, ullx4x2_t c) {
288   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b, <8 x i64> noundef %c)
289   // NOOPT:       [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
290   // NOOPT-NEXT:  [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
291   // OPT:         [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
292   // OPT-NEXT:    [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
293   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[B]], [[C]]
294   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
295 
296   a = b + c;
297 }
298 
299 void add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
300   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
301   // NOOPT:       [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
302   // NOOPT-NEXT:  [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
303   // OPT:         [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
304   // OPT-NEXT:    [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
305   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[A]], [[B]]
306   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
307 
308   a += b;
309 }
310 
311 void subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
312   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
313   // NOOPT:       [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
314   // OPT:         [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
315   // NOOPT-NEXT:  [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
316   // OPT-NEXT:    [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
317   // CHECK-NEXT:  [[RES:%.*]] = sub <8 x i64> [[A]], [[B]]
318   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
319 
320   a -= b;
321 }
322 
323 void add_matrix_scalar_int_short(ix9x3_t a, short vs) {
324   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
325   // NOOPT:        [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
326   // NOOPT-NEXT:   [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
327   // OPT:          [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
328   // OPT-NEXT:     [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
329   // CHECK-NEXT:   [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
330   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i64 0
331   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
332   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
333   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
334 
335   a = a + vs;
336 }
337 
338 void add_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
339   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
340   // NOOPT:       [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
341   // OPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
342   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
343   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
344   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
345   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0
346   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
347   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
348   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
349 
350   a += vs;
351 }
352 
353 void subtract_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
354   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
355   // NOOPT:       [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
356   // OPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
357   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
358   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
359   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
360   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0
361   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
362   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
363   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
364 
365   a -= vs;
366 }
367 
368 void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
369   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
370   // NOOPT:        [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
371   // NOOPT-NEXT:   [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
372   // OPT:          [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
373   // OPT-NEXT:     [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
374   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
375   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
376   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
377   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
378   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
379 
380   a = a + vli;
381 }
382 
383 void add_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
384   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
385   // NOOPT:       [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
386   // OPT:         [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
387   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
388   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
389   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
390   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
391   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
392   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
393   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
394 
395   a += vli;
396 }
397 
398 void subtract_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
399   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
400   // NOOPT:       [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
401   // OPT:         [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
402   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
403   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
404   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
405   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
406   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
407   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
408   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
409 
410   a -= vli;
411 }
412 
413 void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
414   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
415   // NOOPT:        [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
416   // NOOPT-NEXT:   [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
417   // OPT:          [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
418   // OPT-NEXT:     [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
419   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
420   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
421   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
422   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
423   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
424 
425   a = a + vulli;
426 }
427 
428 void add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
429   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
430   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
431   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
432   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
433   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}}
434   // OPT-NEXT:     [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
435   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
436   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
437   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
438   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
439 
440   a += vulli;
441 }
442 
443 void subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
444   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
445   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
446   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
447   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
448   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}}
449   // OPT-NEXT:     [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
450   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
451   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
452   // CHECK-NEXT:   [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
453   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
454 
455   a -= vulli;
456 }
457 
458 void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
459   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
460   // NOOPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
461   // OPT:           [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
462   // CHECK-NEXT:    [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
463   // NOOPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
464   // OPT-NEXT:      [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
465   // CHECK-NEXT:    [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
466   // CHECK-NEXT:    [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
467   // CHECK-NEXT:    [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
468   // CHECK-NEXT:    store <8 x i64> [[RES]], ptr {{.*}}, align 8
469 
470   b = vs + b;
471 }
472 
473 void add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
474   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
475   // NOOPT:       [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
476   // OPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
477   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
478   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
479   // OPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
480   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
481   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
482   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
483   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
484 
485   b += vs;
486 }
487 
488 void subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
489   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
490   // NOOPT:       [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
491   // OPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
492   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
493   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
494   // OPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
495   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
496   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
497   // CHECK-NEXT:  [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
498   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
499 
500   b -= vs;
501 }
502 
503 void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
504   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
505   // NOOPT:         [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
506   // NOOPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
507   // OPT:           [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
508   // OPT-NEXT:      [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
509   // CHECK-NEXT:    [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
510   // CHECK-NEXT:    [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
511   // CHECK-NEXT:    [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
512   // CHECK-NEXT:    store <8 x i64> [[RES]], ptr {{.*}}, align 8
513 
514   b = vli + b;
515 }
516 
517 void add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
518   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
519   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
520   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
521   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
522   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
523   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
524   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
525   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
526   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
527 
528   b += vli;
529 }
530 
531 void subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
532   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
533   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
534   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
535   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
536   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
537   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
538   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
539   // CHECK-NEXT:   [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
540   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
541 
542   b -= vli;
543 }
544 
545 void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
546   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_unsigned_long_long
547   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
548   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
549   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
550   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
551   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
552   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
553   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
554   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
555   b = vulli + b;
556 }
557 
558 void add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
559   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_unsigned_long_long
560   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
561   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
562   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
563   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
564   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
565   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
566   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
567   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
568 
569   b += vulli;
570 }
571 
572 void subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
573   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_unsigned_long_long
574   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
575   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
576   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
577   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
578   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
579   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
580   // CHECK-NEXT:   [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
581   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
582 
583   b -= vulli;
584 }
585 
586 // Tests for matrix multiplication.
587 
588 void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
589   // CHECK-LABEL: @multiply_matrix_matrix_double(
590   // NOOPT:         [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
591   // NOOPT-NEXT:    [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
592   // OPT:           [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
593   // OPT-NEXT:      [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
594   // CHECK-NEXT:    [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
595   // CHECK-NEXT:    store <25 x double> [[RES]], ptr %a, align 8
596   // CHECK:         ret void
597   //
598 
599   dx5x5_t a;
600   a = b * c;
601 }
602 
603 void multiply_compound_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
604   // CHECK-LABEL: @multiply_compound_matrix_matrix_double(
605   // NOOPT:        [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
606   // NOOPT-NEXT:   [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
607   // OPT:          [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
608   // OPT-NEXT:     [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
609   // CHECK-NEXT:   [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
610   // CHECK-NEXT:   store <25 x double> [[RES]], ptr {{.*}}, align 8
611   // CHECK-NEXT:   ret void
612   b *= c;
613 }
614 
615 typedef int ix3x9_t __attribute__((matrix_type(3, 9)));
616 typedef int ix9x9_t __attribute__((matrix_type(9, 9)));
617 // CHECK-LABEL: @multiply_matrix_matrix_int(
618 // NOOPT:         [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
619 // NOOPT-NEXT:    [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
620 // OPT:           [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
621 // OPT-NEXT:      [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
622 // CHECK-NEXT:    [[RES:%.*]] = call <81 x i32> @llvm.matrix.multiply.v81i32.v27i32.v27i32(<27 x i32> [[B]], <27 x i32> [[C]], i32 9, i32 3, i32 9)
623 // CHECK-NEXT:    store <81 x i32> [[RES]], ptr %a, align 4
624 // CHECK:         ret void
625 //
626 void multiply_matrix_matrix_int(ix9x3_t b, ix3x9_t c) {
627   ix9x9_t a;
628   a = b * c;
629 }
630 
631 // CHECK-LABEL: @multiply_double_matrix_scalar_float(
632 // NOOPT:         [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
633 // NOOPT-NEXT:    [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
634 // OPT:           [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
635 // OPT-NEXT:      [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
636 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
637 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
638 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
639 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
640 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
641 // CHECK-NEXT:    ret void
642 //
643 void multiply_double_matrix_scalar_float(dx5x5_t a, float s) {
644   a = a * s;
645 }
646 
647 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_float
648 // NOOPT:         [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
649 // OPT:           [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
650 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
651 // NOOPT-NEXT:    [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
652 // OPT-NEXT:      [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
653 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
654 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
655 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
656 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
657 // CHECK-NEXT:    ret void
658 //
659 void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) {
660   a *= s;
661 }
662 
663 // CHECK-LABEL: @multiply_double_matrix_scalar_double(
664 // NOOPT:         [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
665 // NOOPT-NEXT:    [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
666 // OPT:           [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
667 // OPT-NEXT:      [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
668 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
669 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
670 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
671 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
672 // CHECK-NEXT:    ret void
673 //
674 void multiply_double_matrix_scalar_double(dx5x5_t a, double s) {
675   a = a * s;
676 }
677 
678 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_double(
679 // NOOPT:         [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
680 // NOOPT-NEXT:    [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
681 // OPT:           [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
682 // OPT-NEXT:      [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
683 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
684 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
685 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
686 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
687 // CHECK-NEXT:    ret void
688 void multiply_compound_double_matrix_scalar_double(dx5x5_t a, double s) {
689   a *= s;
690 }
691 
692 // CHECK-LABEL: @multiply_float_matrix_scalar_double(
693 // NOOPT:         [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
694 // OPT:           [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
695 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
696 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
697 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
698 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
699 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
700 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]]
701 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4
702 // CHECK-NEXT:    ret void
703 //
704 void multiply_float_matrix_scalar_double(fx2x3_t b, double s) {
705   b = s * b;
706 }
707 
708 // CHECK-LABEL: @multiply_compound_float_matrix_scalar_double(
709 // NOOPT:         [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
710 // OPT:           [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
711 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
712 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
713 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
714 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
715 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
716 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]]
717 // store <6 x float> %3, ptr [[MAT_ADDR]], align 4
718 // ret void
719 void multiply_compound_float_matrix_scalar_double(fx2x3_t b, double s) {
720   b *= s;
721 }
722 
723 // CHECK-LABEL: @multiply_int_matrix_scalar_short(
724 // NOOPT:         [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
725 // OPT:           [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
726 // CHECK-NEXT:    [[S_EXT:%.*]] = sext i16 [[S]] to i32
727 // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
728 // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
729 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
730 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
731 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]]
732 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
733 // CHECK-NEXT:    ret void
734 //
735 void multiply_int_matrix_scalar_short(ix9x3_t b, short s) {
736   b = s * b;
737 }
738 
739 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_short(
740 // NOOPT:        [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
741 // OPT:          [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
742 // CHECK-NEXT:   [[S_EXT:%.*]] = sext i16 [[S]] to i32
743 // NOOPT-NEXT:   [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
744 // OPT-NEXT:     [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
745 // CHECK-NEXT:   [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
746 // CHECK-NEXT:   [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
747 // CHECK-NEXT:   [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
748 // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
749 // CHECK-NEXT:   ret void
750 //
751 void multiply_compound_int_matrix_scalar_short(ix9x3_t b, short s) {
752   b *= s;
753 }
754 
755 // CHECK-LABEL: @multiply_int_matrix_scalar_ull(
756 // NOOPT:         [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
757 // OPT:           [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
758 // NOOPT-NEXT:    [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
759 // OPT-NEXT:      [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
760 // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
761 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
762 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
763 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
764 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
765 // CHECK-NEXT:    ret void
766 //
767 void multiply_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
768   b = b * s;
769 }
770 
771 void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
772   // CHECK-LABEL: @multiply_compound_int_matrix_scalar_ull(
773   // NOOPT:         [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
774   // OPT:           [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
775   // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
776   // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
777   // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
778   // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
779   // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
780   // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
781   // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
782   // CHECK-NEXT:    ret void
783 
784   b *= s;
785 }
786 
787 // CHECK-LABEL: @multiply_float_matrix_constant(
788 // CHECK-NEXT:  entry:
789 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
790 // CHECK-NEXT:    store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
791 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
792 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
793 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00)
794 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
795 // CHECK-NEXT:    ret void
796 //
797 void multiply_float_matrix_constant(fx2x3_t a) {
798   a = a * 2.5;
799 }
800 
801 // CHECK-LABEL: @multiply_compound_float_matrix_constant(
802 // CHECK-NEXT:  entry:
803 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
804 // CHECK-NEXT:    store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
805 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
806 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
807 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00)
808 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
809 // CHECK-NEXT:    ret void
810 void multiply_compound_float_matrix_constant(fx2x3_t a) {
811   a *= 2.5;
812 }
813 
814 // CHECK-LABEL: @multiply_int_matrix_constant(
815 // CHECK-NEXT:  entry:
816 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [27 x i32], align 4
817 // CHECK-NEXT:    store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4
818 // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}}
819 // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
820 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> splat (i32 5), [[MAT]]
821 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4
822 // CHECK-NEXT:    ret void
823 //
824 void multiply_int_matrix_constant(ix9x3_t a) {
825   a = 5 * a;
826 }
827 
828 // CHECK-LABEL: @multiply_compound_int_matrix_constant(
829 // CHECK-NEXT:  entry:
830 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [27 x i32], align 4
831 // CHECK-NEXT:    store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4
832 // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}}
833 // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
834 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], splat (i32 5)
835 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4
836 // CHECK-NEXT:    ret void
837 //
838 void multiply_compound_int_matrix_constant(ix9x3_t a) {
839   a *= 5;
840 }
841 
842 // CHECK-LABEL: @divide_double_matrix_scalar_float(
843 // NOOPT:         [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
844 // NOOPT-NEXT:    [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
845 // OPT:           [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
846 // OPT-NEXT:      [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
847 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
848 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
849 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
850 // CHECK-NEXT:    [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
851 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
852 // CHECK-NEXT:    ret void
853 //
854 void divide_double_matrix_scalar_float(dx5x5_t a, float s) {
855   a = a / s;
856 }
857 
858 // CHECK-LABEL: @divide_double_matrix_scalar_double(
859 // NOOPT:         [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
860 // NOOPT-NEXT:    [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
861 // OPT:           [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
862 // OPT-NEXT:      [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
863 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
864 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
865 // CHECK-NEXT:    [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
866 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
867 // CHECK-NEXT:    ret void
868 //
869 void divide_double_matrix_scalar_double(dx5x5_t a, double s) {
870   a = a / s;
871 }
872 
873 // CHECK-LABEL: @divide_float_matrix_scalar_double(
874 // NOOPT:         [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
875 // NOOPT-NEXT:    [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
876 // OPT:           [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
877 // OPT-NEXT:      [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
878 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
879 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
880 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
881 // CHECK-NEXT:    [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]]
882 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4
883 // CHECK-NEXT:    ret void
884 //
885 void divide_float_matrix_scalar_double(fx2x3_t b, double s) {
886   b = b / s;
887 }
888 
889 // CHECK-LABEL: @divide_int_matrix_scalar_short(
890 // NOOPT:         [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
891 // NOOPT-NEXT:    [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
892 // OPT:           [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
893 // OPT-NEXT:      [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
894 // CHECK-NEXT:    [[S_EXT:%.*]] = sext i16 [[S]] to i32
895 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
896 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
897 // CHECK-NEXT:    [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
898 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
899 // CHECK-NEXT:    ret void
900 //
901 void divide_int_matrix_scalar_short(ix9x3_t b, short s) {
902   b = b / s;
903 }
904 
905 // CHECK-LABEL: @divide_int_matrix_scalar_ull(
906 // NOOPT:         [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
907 // NOOPT-NEXT:    [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
908 // OPT:           [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
909 // OPT-NEXT:      [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
910 // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
911 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
912 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
913 // CHECK-NEXT:    [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
914 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
915 // CHECK-NEXT:    ret void
916 //
917 void divide_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
918   b = b / s;
919 }
920 
921 // CHECK-LABEL: @divide_ull_matrix_scalar_ull(
922 // NOOPT:         [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
923 // NOOPT-NEXT:    [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
924 // OPT:           [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8, !tbaa !{{[0-9]+}}{{$}}
925 // OPT-NEXT:      [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
926 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i64 0
927 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
928 // CHECK-NEXT:    [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]]
929 // CHECK-NEXT:    store <8 x i64> [[RES]], ptr [[MAT_ADDR]], align 8
930 // CHECK-NEXT:    ret void
931 //
932 void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) {
933   b = b / s;
934 }
935 
936 // CHECK-LABEL: @divide_float_matrix_constant(
937 // CHECK-NEXT:  entry:
938 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
939 // CHECK-NEXT:    store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
940 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
941 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
942 // CHECK-NEXT:    [[RES:%.*]] = fdiv <6 x float> [[MAT]], splat (float 2.500000e+00)
943 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
944 // CHECK-NEXT:    ret void
945 //
946 void divide_float_matrix_constant(fx2x3_t a) {
947   a = a / 2.5;
948 }
949 
950 // Tests for the matrix type operators.
951 
952 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
953 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
954 
955 // Check that we can use matrix index expression on different floating point
956 // matrixes and indices.
957 void insert_double_matrix_const_idx_ll_u_double(dx5x5_t a, double d, fx2x3_t b, float e, int j, unsigned k) {
958   // CHECK-LABEL: @insert_double_matrix_const_idx_ll_u_double(
959   // NOOPT:         [[D:%.*]] = load double, ptr %d.addr, align 8{{$}}
960   // OPT:           [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
961   // CHECK-NEXT:    [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
962   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 5
963   // CHECK-NEXT:    store <25 x double> [[MATINS]], ptr {{.*}}, align 8
964   // CHECK-NEXT:    ret void
965 
966   a[0ll][1u] = d;
967 }
968 
969 void insert_double_matrix_const_idx_i_u_double(dx5x5_t a, double d) {
970   // CHECK-LABEL: @insert_double_matrix_const_idx_i_u_double(
971   // NOOPT:         [[D:%.*]] = load double, ptr %d.addr, align 8{{$}}
972   // OPT:           [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
973   // CHECK-NEXT:    [[MAT:%.*]] = load <25 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
974   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 21
975   // CHECK-NEXT:    store <25 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8
976   // CHECK-NEXT:    ret void
977 
978   a[1][4u] = d;
979 }
980 
981 void insert_float_matrix_const_idx_ull_i_float(fx2x3_t b, float e) {
982   // CHECK-LABEL: @insert_float_matrix_const_idx_ull_i_float(
983   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
984   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
985   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
986   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 3
987   // CHECK-NEXT:    store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
988   // CHECK-NEXT:    ret void
989 
990   b[1ull][1] = e;
991 }
992 
993 void insert_float_matrix_idx_i_u_float(fx2x3_t b, float e, int j, unsigned k) {
994   // CHECK-LABEL: @insert_float_matrix_idx_i_u_float(
995   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
996   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
997   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
998   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
999   // CHECK-NEXT:    [[J_EXT:%.*]] = sext i32 [[J]] to i64
1000   // NOOPT-NEXT:    [[K:%.*]] = load i32, ptr %k.addr, align 4{{$}}
1001   // OPT-NEXT:      [[K:%.*]] = load i32, ptr %k.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1002   // CHECK-NEXT:    [[K_EXT:%.*]] = zext i32 [[K]] to i64
1003   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K_EXT]], 2
1004   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
1005   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1006   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1007   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1008   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
1009   // CHECK-NEXT:    store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1010   // CHECK-NEXT:    ret void
1011 
1012   b[j][k] = e;
1013 }
1014 
1015 void insert_float_matrix_idx_s_ull_float(fx2x3_t b, float e, short j, unsigned long long k) {
1016   // CHECK-LABEL: @insert_float_matrix_idx_s_ull_float(
1017   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
1018   // NOOPT-NEXT:    [[J:%.*]] = load i16, ptr %j.addr, align 2{{$}}
1019   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1020   // OPT-NEXT:      [[J:%.*]] = load i16, ptr %j.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
1021   // CHECK-NEXT:    [[J_EXT:%.*]] = sext i16 [[J]] to i64
1022   // NOOPT-NEXT:    [[K:%.*]] = load i64, ptr %k.addr, align 8{{$}}
1023   // OPT-NEXT:      [[K:%.*]] = load i64, ptr %k.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1024   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K]], 2
1025   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
1026   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1027   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1028   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1029   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
1030   // CHECK-NEXT:    store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1031   // CHECK-NEXT:    ret void
1032 
1033   (b)[j][k] = e;
1034 }
1035 
1036 // Check that we can can use matrix index expressions on integer matrixes.
1037 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
1038 void insert_int_idx_expr(ix9x3_t a, int i) {
1039   // CHECK-LABEL: @insert_int_idx_expr(
1040   // NOOPT:         [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1041   // NOOPT-NEXT:    [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1042   // OPT:           [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1043   // OPT-NEXT:      [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1044   // CHECK-NEXT:    [[I2_ADD:%.*]] = add nsw i32 4, [[I2]]
1045   // CHECK-NEXT:    [[ADD_EXT:%.*]] = sext i32 [[I2_ADD]] to i64
1046   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 18, [[ADD_EXT]]
1047   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
1048   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1049   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1050   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I1]], i64 [[IDX2]]
1051   // CHECK-NEXT:    store <27 x i32> [[MATINS]], ptr [[MAT_ADDR]], align 4
1052   // CHECK-NEXT:    ret void
1053 
1054   a[4 + i][1 + 1u] = i;
1055 }
1056 
1057 // Check that we can can use matrix index expressions on FP and integer
1058 // matrixes.
1059 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
1060 void insert_float_into_int_matrix(ix9x3_t *a, int i) {
1061   // CHECK-LABEL: @insert_float_into_int_matrix(
1062   // NOOPT:         [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1063   // OPT:           [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1064   // NOOPT-NEXT:    [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8{{$}}
1065   // OPT-NEXT:      [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1066   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR1]], align 4{{$}}
1067   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I]], i64 13
1068   // CHECK-NEXT:    store <27 x i32> [[MATINS]], ptr [[MAT_ADDR1]], align 4
1069   // CHECK-NEXT:    ret void
1070 
1071   (*a)[4][1] = i;
1072 }
1073 
1074 // Check that we can use overloaded matrix index expressions on matrixes with
1075 // matching dimensions, but different element types.
1076 typedef double dx3x3_t __attribute__((matrix_type(3, 3)));
1077 typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
1078 void insert_matching_dimensions1(dx3x3_t a, double i) {
1079   // CHECK-LABEL: @insert_matching_dimensions1(
1080   // NOOPT:         [[I:%.*]] = load double, ptr %i.addr, align 8{{$}}
1081   // OPT:           [[I:%.*]] = load double, ptr %i.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1082   // CHECK-NEXT:    [[MAT:%.*]] = load <9 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
1083   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x double> [[MAT]], double [[I]], i64 5
1084   // CHECK-NEXT:    store <9 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8
1085   // CHECK-NEXT:    ret void
1086 
1087   a[2u][1u] = i;
1088 }
1089 
1090 void insert_matching_dimensions(fx3x3_t b, float e) {
1091   // CHECK-LABEL: @insert_matching_dimensions(
1092   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
1093   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1094   // CHECK-NEXT:    [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1095   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x float> [[MAT]], float [[E]], i64 7
1096   // CHECK-NEXT:    store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1097   // CHECK-NEXT:    ret void
1098 
1099   b[1u][2u] = e;
1100 }
1101 
1102 double extract_double(dx5x5_t a) {
1103   // CHECK-LABEL: @extract_double(
1104   // NOOPT:         [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
1105   // OPT:           [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
1106   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <25 x double> [[MAT]], i64 12
1107   // CHECK-NEXT:    ret double [[MATEXT]]
1108 
1109   return a[2][3 - 1u];
1110 }
1111 
1112 double extract_float(fx3x3_t b) {
1113   // CHECK-LABEL: @extract_float(
1114   // NOOPT:         [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4{{$}}
1115   // OPT:           [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1116   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 5
1117   // CHECK-NEXT:    [[TO_DOUBLE:%.*]] = fpext float [[MATEXT]] to double
1118   // CHECK-NEXT:    ret double [[TO_DOUBLE]]
1119 
1120   return b[2][1];
1121 }
1122 
1123 int extract_int(ix9x3_t c, unsigned long j) {
1124   // CHECK-LABEL: @extract_int(
1125   // NOOPT:         [[J1:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1126   // NOOPT-NEXT:    [[J2:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1127   // OPT:           [[J1:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1128   // OPT-NEXT:      [[J2:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1129   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J2]], 9
1130   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J1]]
1131   // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
1132   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
1133   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1134   // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1135   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <27 x i32> [[MAT]], i64 [[IDX2]]
1136   // CHECK-NEXT:    ret i32 [[MATEXT]]
1137 
1138   return c[j][j];
1139 }
1140 
1141 typedef double dx3x2_t __attribute__((matrix_type(3, 2)));
1142 
1143 double test_extract_matrix_pointer1(dx3x2_t **ptr, unsigned j) {
1144   // CHECK-LABEL: @test_extract_matrix_pointer1(
1145   // NOOPT:         [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1146   // OPT:           [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1147   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
1148   // CHECK-NEXT:    [[IDX:%.*]] = add i64 3, [[J_EXT]]
1149   // NOOPT-NEXT:    [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}}
1150   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
1151   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1152   // OPT-NEXT:      [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1153   // CHECK-NEXT:    [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 1
1154   // NOOPT-NEXT:    [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}}
1155   // OPT-NEXT:      [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1156   // CHECK-NEXT:    [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 2
1157   // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}}
1158   // OPT-NEXT:      [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1159   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 [[IDX]]
1160   // CHECK-NEXT:    ret double [[MATEXT]]
1161 
1162   return ptr[1][2][j][1];
1163 }
1164 
1165 double test_extract_matrix_pointer2(dx3x2_t **ptr) {
1166   // CHECK-LABEL: @test_extract_matrix_pointer2(
1167   // CHECK-NEXT:  entry:
1168   // NOOPT:         [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}}
1169   // OPT:           [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1170   // CHECK-NEXT:    [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 4
1171   // NOOPT-NEXT:    [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}}
1172   // OPT-NEXT:      [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1173   // CHECK-NEXT:    [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 6
1174   // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}}
1175   // OPT-NEXT:      [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1176   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 5
1177   // CHECK-NEXT:    ret double [[MATEXT]]
1178 
1179   return (*(*(ptr + 4) + 6))[2][1 * 3 - 2];
1180 }
1181 
1182 void insert_extract(dx5x5_t a, fx3x3_t b, unsigned long j, short k) {
1183   // CHECK-LABEL: @insert_extract(
1184   // NOOPT:         [[K:%.*]] = load i16, ptr %k.addr, align 2{{$}}
1185   // OPT:           [[K:%.*]] = load i16, ptr %k.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
1186   // CHECK-NEXT:    [[K_EXT:%.*]] = sext i16 [[K]] to i64
1187   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K_EXT]], 3
1188   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], 0
1189   // NOOPT-NEXT:    [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1190   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 9
1191   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1192   // OPT-NEXT:      [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
1193   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX2]]
1194   // NOOPT-NEXT:    [[J:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1195   // OPT-NEXT:      [[J:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1196   // CHECK-NEXT:    [[IDX3:%.*]] = mul i64 [[J]], 3
1197   // CHECK-NEXT:    [[IDX4:%.*]] = add i64 [[IDX3]], 2
1198   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX4]], 9
1199   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1200   // CHECK-NEXT:    [[MAT2:%.*]] = load <9 x float>, ptr [[MAT_ADDR]], align 4{{$}}
1201   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], i64 [[IDX4]]
1202   // CHECK-NEXT:    store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1203   // CHECK-NEXT:    ret void
1204 
1205   b[2][j] = b[0][k];
1206 }
1207 
1208 void insert_compound_stmt(dx5x5_t a) {
1209   // CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> noundef %a)
1210   // CHECK:        [[A:%.*]] = load <25 x double>, ptr [[A_PTR:%.*]], align 8{{$}}
1211   // CHECK-NEXT:   [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17
1212   // CHECK-NEXT:   [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00
1213   // CHECK-NEXT:   [[A2:%.*]] = load <25 x double>, ptr [[A_PTR]], align 8{{$}}
1214   // CHECK-NEXT:   [[INS:%.*]] = insertelement <25 x double> [[A2]], double [[SUB]], i64 17
1215   // CHECK-NEXT:   store <25 x double> [[INS]], ptr [[A_PTR]], align 8
1216   // CHECK-NEXT:   ret void
1217 
1218   a[2][3] -= 1.0;
1219 }
1220 
1221 struct Foo {
1222   fx2x3_t mat;
1223 };
1224 
1225 void insert_compound_stmt_field(struct Foo *a, float f, unsigned i, unsigned j) {
1226   // CHECK-LABEL: define{{.*}} void @insert_compound_stmt_field(ptr noundef %a, float noundef %f, i32 noundef %i, i32 noundef %j)
1227   // NOOPT:         [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1228   // OPT:           [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1229   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
1230   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1231   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1232   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
1233   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
1234   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
1235   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1236   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1237   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr %mat, align 4{{$}}
1238   // CHECK-NEXT:    [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]]
1239   // CHECK-NEXT:    [[SUM:%.*]] = fadd float [[EXT]], {{.*}}
1240   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1241   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1242   // CHECK-NEXT:    [[MAT2:%.*]] = load <6 x float>, ptr %mat, align 4{{$}}
1243   // CHECK-NEXT:    [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], i64 [[IDX2]]
1244   // CHECK-NEXT:    store <6 x float> [[INS]], ptr %mat, align 4
1245   // CHECK-NEXT:    ret void
1246 
1247   a->mat[i][j] += f;
1248 }
1249 
1250 void matrix_as_idx(ix9x3_t a, int i, int j, dx5x5_t b) {
1251   // CHECK-LABEL: define{{.*}} void @matrix_as_idx(<27 x i32> noundef %a, i32 noundef %i, i32 noundef %j, <25 x double> noundef %b)
1252   // NOOPT:       [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1253   // OPT:         [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1254   // CHECK-NEXT:  [[I1_EXT:%.*]] = sext i32 [[I1]] to i64
1255   // NOOPT-NEXT:  [[J1:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1256   // OPT-NEXT:    [[J1:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1257   // CHECK-NEXT:  [[J1_EXT:%.*]] = sext i32 [[J1]] to i64
1258   // CHECK-NEXT:  [[IDX1_1:%.*]] = mul i64 [[J1_EXT]], 9
1259   // CHECK-NEXT:  [[IDX1_2:%.*]] = add i64 [[IDX1_1]], [[I1_EXT]]
1260   // NOOPT-NEXT:  [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
1261   // OPT-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX1_2]], 27
1262   // OPT-NEXT:    call void @llvm.assume(i1 [[CMP]])
1263   // OPT-NEXT:    [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1264   // CHECK-NEXT:  [[MI1:%.*]] = extractelement <27 x i32> [[A]], i64 [[IDX1_2]]
1265   // CHECK-NEXT:  [[MI1_EXT:%.*]] = sext i32 [[MI1]] to i64
1266   // NOOPT-NEXT:  [[J2:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1267   // OPT-NEXT:    [[J2:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1268   // CHECK-NEXT:  [[J2_EXT:%.*]] = sext i32 [[J2]] to i64
1269   // NOOPT-NEXT:  [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1270   // OPT-NEXT:    [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1271   // CHECK-NEXT:  [[I2_EXT:%.*]] = sext i32 [[I2]] to i64
1272   // CHECK-NEXT:  [[IDX2_1:%.*]] = mul i64 [[I2_EXT]], 9
1273   // CHECK-NEXT:  [[IDX2_2:%.*]] = add i64 [[IDX2_1]], [[J2_EXT]]
1274   // NOOPT-NEXT:  [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
1275   // OPT-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2_2]], 27
1276   // OPT-NEXT:    call void @llvm.assume(i1 [[CMP]])
1277   // OPT-NEXT:    [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1278   // CHECK-NEXT:  [[MI2:%.*]] = extractelement <27 x i32> [[A2]], i64 [[IDX2_2]]
1279   // CHECK-NEXT:  [[MI3:%.*]] = add nsw i32 [[MI2]], 2
1280   // CHECK-NEXT:  [[MI3_EXT:%.*]] = sext i32 [[MI3]] to i64
1281   // CHECK-NEXT:  [[IDX3_1:%.*]] = mul i64 [[MI3_EXT]], 5
1282   // CHECK-NEXT:  [[IDX3_2:%.*]] = add i64 [[IDX3_1]], [[MI1_EXT]]
1283   // OPT-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX3_2]], 25
1284   // OPT-NEXT:    call void @llvm.assume(i1 [[CMP]])
1285   // CHECK-NEXT:  [[B:%.*]] = load <25 x double>, ptr [[B_PTR:%.*]], align 8{{$}}
1286   // CHECK-NEXT:  [[INS:%.*]] = insertelement <25 x double> [[B]], double 1.500000e+00, i64 [[IDX3_2]]
1287   // CHECK-NEXT:  store <25 x double> [[INS]], ptr [[B_PTR]], align 8
1288   b[a[i][j]][a[j][i] + 2] = 1.5;
1289 }
1290