1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,NOOPT %s 2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,OPT %s 3 4 5 typedef double dx5x5_t __attribute__((matrix_type(5, 5))); 6 typedef float fx2x3_t __attribute__((matrix_type(2, 3))); 7 typedef int ix9x3_t __attribute__((matrix_type(9, 3))); 8 typedef unsigned long long ullx4x2_t __attribute__((matrix_type(4, 2))); 9 10 // Floating point matrix/scalar additions. 11 12 void add_matrix_matrix_double(dx5x5_t a, dx5x5_t b, dx5x5_t c) { 13 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b, <25 x double> noundef %c) 14 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 15 // NOOPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 16 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 17 // OPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 18 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[B]], [[C]] 19 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 20 21 a = b + c; 22 } 23 24 void add_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) { 25 // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b) 26 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 27 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 28 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 29 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 30 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[A]], [[B]] 31 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 32 33 a += b; 34 } 35 36 void subtract_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) { 37 // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b) 38 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 39 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 40 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 41 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 42 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[A]], [[B]] 43 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 44 45 a -= b; 46 } 47 48 void add_matrix_matrix_float(fx2x3_t a, fx2x3_t b, fx2x3_t c) { 49 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b, <6 x float> noundef %c) 50 // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} 51 // NOOPT-NEXT: [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} 52 // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 53 // OPT-NEXT: [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 54 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[B]], [[C]] 55 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 56 57 a = b + c; 58 } 59 60 void add_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) { 61 // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b) 62 // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} 63 // NOOPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} 64 // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 65 // OPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 66 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[A]], [[B]] 67 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 68 69 a += b; 70 } 71 72 void subtract_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) { 73 // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b) 74 // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} 75 // NOOPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} 76 // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 77 // OPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 78 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[A]], [[B]] 79 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 80 81 a -= b; 82 } 83 84 void add_matrix_scalar_double_float(dx5x5_t a, float vf) { 85 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf) 86 // NOOPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 87 // NOOPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} 88 // OPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 89 // OPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 90 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double 91 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0 92 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer 93 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]] 94 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 95 96 a = a + vf; 97 } 98 99 void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) { 100 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf) 101 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} 102 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 103 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double 104 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 105 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 106 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0 107 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer 108 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]] 109 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 110 111 a += vf; 112 } 113 114 void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) { 115 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf) 116 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} 117 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 118 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double 119 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 120 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 121 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0 122 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer 123 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]] 124 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 125 126 a -= vf; 127 } 128 129 void add_matrix_scalar_double_double(dx5x5_t a, double vd) { 130 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd) 131 // NOOPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 132 // NOOPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} 133 // OPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 134 // OPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 135 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0 136 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer 137 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]] 138 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 139 140 a = a + vd; 141 } 142 143 void add_compound_matrix_scalar_double_double(dx5x5_t a, double vd) { 144 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd) 145 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} 146 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 147 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 148 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 149 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0 150 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer 151 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]] 152 // store <25 x double> [[RES]], ptr {{.*}}, align 8 153 a += vd; 154 } 155 156 void subtract_compound_matrix_scalar_double_double(dx5x5_t a, double vd) { 157 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd) 158 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} 159 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 160 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 161 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 162 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0 163 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer 164 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]] 165 // store <25 x double> [[RES]], ptr {{.*}}, align 8 166 a -= vd; 167 } 168 169 void add_matrix_scalar_float_float(fx2x3_t b, float vf) { 170 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf) 171 // NOOPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} 172 // NOOPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} 173 // OPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 174 // OPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 175 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0 176 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer 177 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] 178 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 179 180 b = b + vf; 181 } 182 183 void add_compound_matrix_scalar_float_float(fx2x3_t b, float vf) { 184 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf) 185 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} 186 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}} 187 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 188 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 189 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0 190 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer 191 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] 192 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 193 b += vf; 194 } 195 196 void subtract_compound_matrix_scalar_float_float(fx2x3_t b, float vf) { 197 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf) 198 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} 199 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}} 200 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 201 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 202 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0 203 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer 204 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] 205 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 206 b -= vf; 207 } 208 209 void add_matrix_scalar_float_double(fx2x3_t b, double vd) { 210 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd) 211 // NOOPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} 212 // NOOPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} 213 // OPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 214 // OPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 215 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float 216 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0 217 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer 218 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] 219 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 220 221 b = b + vd; 222 } 223 224 void add_compound_matrix_scalar_float_double(fx2x3_t b, double vd) { 225 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd) 226 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} 227 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 228 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float 229 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} 230 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 231 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0 232 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer 233 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] 234 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 235 b += vd; 236 } 237 238 void subtract_compound_matrix_scalar_float_double(fx2x3_t b, double vd) { 239 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd) 240 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} 241 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 242 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float 243 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} 244 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 245 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0 246 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer 247 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] 248 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 249 b -= vd; 250 } 251 252 // Integer matrix/scalar additions 253 254 void add_matrix_matrix_int(ix9x3_t a, ix9x3_t b, ix9x3_t c) { 255 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b, <27 x i32> noundef %c) 256 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} 257 // NOOPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} 258 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 259 // OPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 260 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[B]], [[C]] 261 // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4 262 a = b + c; 263 } 264 265 void add_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) { 266 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b) 267 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} 268 // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} 269 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 270 // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 271 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[A]], [[B]] 272 // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4 273 a += b; 274 } 275 276 void subtract_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) { 277 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b) 278 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} 279 // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} 280 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 281 // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 282 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[A]], [[B]] 283 // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4 284 a -= b; 285 } 286 287 void add_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b, ullx4x2_t c) { 288 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b, <8 x i64> noundef %c) 289 // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} 290 // NOOPT-NEXT: [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} 291 // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 292 // OPT-NEXT: [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 293 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[B]], [[C]] 294 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 295 296 a = b + c; 297 } 298 299 void add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) { 300 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b) 301 // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} 302 // NOOPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} 303 // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 304 // OPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 305 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[A]], [[B]] 306 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 307 308 a += b; 309 } 310 311 void subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) { 312 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b) 313 // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} 314 // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 315 // NOOPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} 316 // OPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 317 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[A]], [[B]] 318 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 319 320 a -= b; 321 } 322 323 void add_matrix_scalar_int_short(ix9x3_t a, short vs) { 324 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs) 325 // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 326 // NOOPT-NEXT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} 327 // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 328 // OPT-NEXT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} 329 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32 330 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i64 0 331 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer 332 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] 333 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 334 335 a = a + vs; 336 } 337 338 void add_compound_matrix_scalar_int_short(ix9x3_t a, short vs) { 339 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs) 340 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} 341 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} 342 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32 343 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}} 344 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 345 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0 346 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer 347 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] 348 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 349 350 a += vs; 351 } 352 353 void subtract_compound_matrix_scalar_int_short(ix9x3_t a, short vs) { 354 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs) 355 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} 356 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} 357 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32 358 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}} 359 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 360 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0 361 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer 362 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] 363 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 364 365 a -= vs; 366 } 367 368 void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) { 369 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli) 370 // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 371 // NOOPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} 372 // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 373 // OPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 374 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 375 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 376 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer 377 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] 378 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 379 380 a = a + vli; 381 } 382 383 void add_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) { 384 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli) 385 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} 386 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 387 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 388 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}} 389 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 390 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 391 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer 392 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] 393 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 394 395 a += vli; 396 } 397 398 void subtract_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) { 399 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli) 400 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} 401 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 402 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 403 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}} 404 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 405 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 406 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer 407 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] 408 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 409 410 a -= vli; 411 } 412 413 void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) { 414 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli) 415 // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 416 // NOOPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} 417 // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 418 // OPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 419 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 420 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 421 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer 422 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] 423 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 424 425 a = a + vulli; 426 } 427 428 void add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) { 429 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli) 430 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} 431 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 432 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 433 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}} 434 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 435 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 436 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer 437 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] 438 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 439 440 a += vulli; 441 } 442 443 void subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) { 444 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli) 445 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} 446 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 447 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 448 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}} 449 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 450 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 451 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer 452 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] 453 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 454 455 a -= vulli; 456 } 457 458 void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) { 459 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs) 460 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} 461 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} 462 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64 463 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} 464 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 465 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0 466 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer 467 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]] 468 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 469 470 b = vs + b; 471 } 472 473 void add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) { 474 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs) 475 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} 476 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} 477 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64 478 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} 479 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 480 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0 481 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer 482 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] 483 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 484 485 b += vs; 486 } 487 488 void subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) { 489 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs) 490 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} 491 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} 492 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64 493 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} 494 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 495 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0 496 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer 497 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] 498 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 499 500 b -= vs; 501 } 502 503 void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) { 504 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli) 505 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} 506 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} 507 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 508 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 509 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 510 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer 511 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]] 512 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 513 514 b = vli + b; 515 } 516 517 void add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) { 518 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli) 519 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} 520 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} 521 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 522 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 523 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 524 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer 525 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] 526 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 527 528 b += vli; 529 } 530 531 void subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) { 532 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli) 533 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} 534 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 535 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} 536 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 537 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 538 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer 539 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] 540 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 541 542 b -= vli; 543 } 544 545 void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) { 546 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_unsigned_long_long 547 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} 548 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} 549 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 550 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 551 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 552 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer 553 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]] 554 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 555 b = vulli + b; 556 } 557 558 void add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) { 559 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_unsigned_long_long 560 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} 561 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} 562 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 563 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 564 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 565 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer 566 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] 567 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 568 569 b += vulli; 570 } 571 572 void subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) { 573 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_unsigned_long_long 574 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} 575 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} 576 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 577 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 578 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 579 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer 580 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] 581 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 582 583 b -= vulli; 584 } 585 586 // Tests for matrix multiplication. 587 588 void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) { 589 // CHECK-LABEL: @multiply_matrix_matrix_double( 590 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 591 // NOOPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 592 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 593 // OPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 594 // CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5) 595 // CHECK-NEXT: store <25 x double> [[RES]], ptr %a, align 8 596 // CHECK: ret void 597 // 598 599 dx5x5_t a; 600 a = b * c; 601 } 602 603 void multiply_compound_matrix_matrix_double(dx5x5_t b, dx5x5_t c) { 604 // CHECK-LABEL: @multiply_compound_matrix_matrix_double( 605 // NOOPT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 606 // NOOPT-NEXT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 607 // OPT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 608 // OPT-NEXT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 609 // CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5) 610 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 611 // CHECK-NEXT: ret void 612 b *= c; 613 } 614 615 typedef int ix3x9_t __attribute__((matrix_type(3, 9))); 616 typedef int ix9x9_t __attribute__((matrix_type(9, 9))); 617 // CHECK-LABEL: @multiply_matrix_matrix_int( 618 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} 619 // NOOPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} 620 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 621 // OPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 622 // CHECK-NEXT: [[RES:%.*]] = call <81 x i32> @llvm.matrix.multiply.v81i32.v27i32.v27i32(<27 x i32> [[B]], <27 x i32> [[C]], i32 9, i32 3, i32 9) 623 // CHECK-NEXT: store <81 x i32> [[RES]], ptr %a, align 4 624 // CHECK: ret void 625 // 626 void multiply_matrix_matrix_int(ix9x3_t b, ix3x9_t c) { 627 ix9x9_t a; 628 a = b * c; 629 } 630 631 // CHECK-LABEL: @multiply_double_matrix_scalar_float( 632 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 633 // NOOPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}} 634 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 635 // OPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 636 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double 637 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0 638 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer 639 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]] 640 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 641 // CHECK-NEXT: ret void 642 // 643 void multiply_double_matrix_scalar_float(dx5x5_t a, float s) { 644 a = a * s; 645 } 646 647 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_float 648 // NOOPT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}} 649 // OPT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 650 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double 651 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 652 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 653 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0 654 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer 655 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]] 656 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 657 // CHECK-NEXT: ret void 658 // 659 void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) { 660 a *= s; 661 } 662 663 // CHECK-LABEL: @multiply_double_matrix_scalar_double( 664 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 665 // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} 666 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 667 // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 668 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0 669 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer 670 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]] 671 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 672 // CHECK-NEXT: ret void 673 // 674 void multiply_double_matrix_scalar_double(dx5x5_t a, double s) { 675 a = a * s; 676 } 677 678 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_double( 679 // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} 680 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 681 // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 682 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 683 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0 684 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer 685 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]] 686 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 687 // CHECK-NEXT: ret void 688 void multiply_compound_double_matrix_scalar_double(dx5x5_t a, double s) { 689 a *= s; 690 } 691 692 // CHECK-LABEL: @multiply_float_matrix_scalar_double( 693 // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} 694 // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 695 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float 696 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 697 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 698 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0 699 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer 700 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]] 701 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4 702 // CHECK-NEXT: ret void 703 // 704 void multiply_float_matrix_scalar_double(fx2x3_t b, double s) { 705 b = s * b; 706 } 707 708 // CHECK-LABEL: @multiply_compound_float_matrix_scalar_double( 709 // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} 710 // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 711 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float 712 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 713 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 714 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0 715 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer 716 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]] 717 // store <6 x float> %3, ptr [[MAT_ADDR]], align 4 718 // ret void 719 void multiply_compound_float_matrix_scalar_double(fx2x3_t b, double s) { 720 b *= s; 721 } 722 723 // CHECK-LABEL: @multiply_int_matrix_scalar_short( 724 // NOOPT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}} 725 // OPT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}} 726 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32 727 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 728 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 729 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0 730 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer 731 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]] 732 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 733 // CHECK-NEXT: ret void 734 // 735 void multiply_int_matrix_scalar_short(ix9x3_t b, short s) { 736 b = s * b; 737 } 738 739 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_short( 740 // NOOPT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}} 741 // OPT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}} 742 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32 743 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 744 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 745 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0 746 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer 747 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]] 748 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 749 // CHECK-NEXT: ret void 750 // 751 void multiply_compound_int_matrix_scalar_short(ix9x3_t b, short s) { 752 b *= s; 753 } 754 755 // CHECK-LABEL: @multiply_int_matrix_scalar_ull( 756 // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 757 // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 758 // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}} 759 // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 760 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32 761 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0 762 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer 763 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]] 764 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 765 // CHECK-NEXT: ret void 766 // 767 void multiply_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) { 768 b = b * s; 769 } 770 771 void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) { 772 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_ull( 773 // NOOPT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}} 774 // OPT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 775 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32 776 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 777 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 778 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0 779 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer 780 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]] 781 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 782 // CHECK-NEXT: ret void 783 784 b *= s; 785 } 786 787 // CHECK-LABEL: @multiply_float_matrix_constant( 788 // CHECK-NEXT: entry: 789 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4 790 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4 791 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} 792 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} 793 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00) 794 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 795 // CHECK-NEXT: ret void 796 // 797 void multiply_float_matrix_constant(fx2x3_t a) { 798 a = a * 2.5; 799 } 800 801 // CHECK-LABEL: @multiply_compound_float_matrix_constant( 802 // CHECK-NEXT: entry: 803 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4 804 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4 805 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} 806 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} 807 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00) 808 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 809 // CHECK-NEXT: ret void 810 void multiply_compound_float_matrix_constant(fx2x3_t a) { 811 a *= 2.5; 812 } 813 814 // CHECK-LABEL: @multiply_int_matrix_constant( 815 // CHECK-NEXT: entry: 816 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4 817 // CHECK-NEXT: store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4 818 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}} 819 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} 820 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> splat (i32 5), [[MAT]] 821 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4 822 // CHECK-NEXT: ret void 823 // 824 void multiply_int_matrix_constant(ix9x3_t a) { 825 a = 5 * a; 826 } 827 828 // CHECK-LABEL: @multiply_compound_int_matrix_constant( 829 // CHECK-NEXT: entry: 830 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4 831 // CHECK-NEXT: store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4 832 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}} 833 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} 834 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], splat (i32 5) 835 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4 836 // CHECK-NEXT: ret void 837 // 838 void multiply_compound_int_matrix_constant(ix9x3_t a) { 839 a *= 5; 840 } 841 842 // CHECK-LABEL: @divide_double_matrix_scalar_float( 843 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 844 // NOOPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}} 845 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 846 // OPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 847 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double 848 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0 849 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer 850 // CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]] 851 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 852 // CHECK-NEXT: ret void 853 // 854 void divide_double_matrix_scalar_float(dx5x5_t a, float s) { 855 a = a / s; 856 } 857 858 // CHECK-LABEL: @divide_double_matrix_scalar_double( 859 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 860 // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} 861 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 862 // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 863 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0 864 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer 865 // CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]] 866 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 867 // CHECK-NEXT: ret void 868 // 869 void divide_double_matrix_scalar_double(dx5x5_t a, double s) { 870 a = a / s; 871 } 872 873 // CHECK-LABEL: @divide_float_matrix_scalar_double( 874 // NOOPT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 875 // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} 876 // OPT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 877 // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 878 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float 879 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0 880 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer 881 // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]] 882 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4 883 // CHECK-NEXT: ret void 884 // 885 void divide_float_matrix_scalar_double(fx2x3_t b, double s) { 886 b = b / s; 887 } 888 889 // CHECK-LABEL: @divide_int_matrix_scalar_short( 890 // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 891 // NOOPT-NEXT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}} 892 // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 893 // OPT-NEXT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}} 894 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32 895 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0 896 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer 897 // CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]] 898 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 899 // CHECK-NEXT: ret void 900 // 901 void divide_int_matrix_scalar_short(ix9x3_t b, short s) { 902 b = b / s; 903 } 904 905 // CHECK-LABEL: @divide_int_matrix_scalar_ull( 906 // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 907 // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}} 908 // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 909 // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 910 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32 911 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0 912 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer 913 // CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]] 914 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 915 // CHECK-NEXT: ret void 916 // 917 void divide_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) { 918 b = b / s; 919 } 920 921 // CHECK-LABEL: @divide_ull_matrix_scalar_ull( 922 // NOOPT: [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8{{$}} 923 // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}} 924 // OPT: [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8, !tbaa !{{[0-9]+}}{{$}} 925 // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 926 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i64 0 927 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer 928 // CHECK-NEXT: [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]] 929 // CHECK-NEXT: store <8 x i64> [[RES]], ptr [[MAT_ADDR]], align 8 930 // CHECK-NEXT: ret void 931 // 932 void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) { 933 b = b / s; 934 } 935 936 // CHECK-LABEL: @divide_float_matrix_constant( 937 // CHECK-NEXT: entry: 938 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4 939 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4 940 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} 941 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} 942 // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], splat (float 2.500000e+00) 943 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 944 // CHECK-NEXT: ret void 945 // 946 void divide_float_matrix_constant(fx2x3_t a) { 947 a = a / 2.5; 948 } 949 950 // Tests for the matrix type operators. 951 952 typedef double dx5x5_t __attribute__((matrix_type(5, 5))); 953 typedef float fx2x3_t __attribute__((matrix_type(2, 3))); 954 955 // Check that we can use matrix index expression on different floating point 956 // matrixes and indices. 957 void insert_double_matrix_const_idx_ll_u_double(dx5x5_t a, double d, fx2x3_t b, float e, int j, unsigned k) { 958 // CHECK-LABEL: @insert_double_matrix_const_idx_ll_u_double( 959 // NOOPT: [[D:%.*]] = load double, ptr %d.addr, align 8{{$}} 960 // OPT: [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 961 // CHECK-NEXT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 962 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 5 963 // CHECK-NEXT: store <25 x double> [[MATINS]], ptr {{.*}}, align 8 964 // CHECK-NEXT: ret void 965 966 a[0ll][1u] = d; 967 } 968 969 void insert_double_matrix_const_idx_i_u_double(dx5x5_t a, double d) { 970 // CHECK-LABEL: @insert_double_matrix_const_idx_i_u_double( 971 // NOOPT: [[D:%.*]] = load double, ptr %d.addr, align 8{{$}} 972 // OPT: [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 973 // CHECK-NEXT: [[MAT:%.*]] = load <25 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}} 974 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 21 975 // CHECK-NEXT: store <25 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8 976 // CHECK-NEXT: ret void 977 978 a[1][4u] = d; 979 } 980 981 void insert_float_matrix_const_idx_ull_i_float(fx2x3_t b, float e) { 982 // CHECK-LABEL: @insert_float_matrix_const_idx_ull_i_float( 983 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} 984 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 985 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 986 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 3 987 // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4 988 // CHECK-NEXT: ret void 989 990 b[1ull][1] = e; 991 } 992 993 void insert_float_matrix_idx_i_u_float(fx2x3_t b, float e, int j, unsigned k) { 994 // CHECK-LABEL: @insert_float_matrix_idx_i_u_float( 995 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} 996 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} 997 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 998 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 999 // CHECK-NEXT: [[J_EXT:%.*]] = sext i32 [[J]] to i64 1000 // NOOPT-NEXT: [[K:%.*]] = load i32, ptr %k.addr, align 4{{$}} 1001 // OPT-NEXT: [[K:%.*]] = load i32, ptr %k.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 1002 // CHECK-NEXT: [[K_EXT:%.*]] = zext i32 [[K]] to i64 1003 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 2 1004 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]] 1005 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6 1006 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 1007 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 1008 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]] 1009 // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4 1010 // CHECK-NEXT: ret void 1011 1012 b[j][k] = e; 1013 } 1014 1015 void insert_float_matrix_idx_s_ull_float(fx2x3_t b, float e, short j, unsigned long long k) { 1016 // CHECK-LABEL: @insert_float_matrix_idx_s_ull_float( 1017 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} 1018 // NOOPT-NEXT: [[J:%.*]] = load i16, ptr %j.addr, align 2{{$}} 1019 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 1020 // OPT-NEXT: [[J:%.*]] = load i16, ptr %j.addr, align 2, !tbaa !{{[0-9]+}}{{$}} 1021 // CHECK-NEXT: [[J_EXT:%.*]] = sext i16 [[J]] to i64 1022 // NOOPT-NEXT: [[K:%.*]] = load i64, ptr %k.addr, align 8{{$}} 1023 // OPT-NEXT: [[K:%.*]] = load i64, ptr %k.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 1024 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K]], 2 1025 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]] 1026 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6 1027 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 1028 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 1029 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]] 1030 // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4 1031 // CHECK-NEXT: ret void 1032 1033 (b)[j][k] = e; 1034 } 1035 1036 // Check that we can can use matrix index expressions on integer matrixes. 1037 typedef int ix9x3_t __attribute__((matrix_type(9, 3))); 1038 void insert_int_idx_expr(ix9x3_t a, int i) { 1039 // CHECK-LABEL: @insert_int_idx_expr( 1040 // NOOPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}} 1041 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}} 1042 // OPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 1043 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 1044 // CHECK-NEXT: [[I2_ADD:%.*]] = add nsw i32 4, [[I2]] 1045 // CHECK-NEXT: [[ADD_EXT:%.*]] = sext i32 [[I2_ADD]] to i64 1046 // CHECK-NEXT: [[IDX2:%.*]] = add i64 18, [[ADD_EXT]] 1047 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27 1048 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 1049 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 1050 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I1]], i64 [[IDX2]] 1051 // CHECK-NEXT: store <27 x i32> [[MATINS]], ptr [[MAT_ADDR]], align 4 1052 // CHECK-NEXT: ret void 1053 1054 a[4 + i][1 + 1u] = i; 1055 } 1056 1057 // Check that we can can use matrix index expressions on FP and integer 1058 // matrixes. 1059 typedef int ix9x3_t __attribute__((matrix_type(9, 3))); 1060 void insert_float_into_int_matrix(ix9x3_t *a, int i) { 1061 // CHECK-LABEL: @insert_float_into_int_matrix( 1062 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}} 1063 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 1064 // NOOPT-NEXT: [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8{{$}} 1065 // OPT-NEXT: [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 1066 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR1]], align 4{{$}} 1067 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I]], i64 13 1068 // CHECK-NEXT: store <27 x i32> [[MATINS]], ptr [[MAT_ADDR1]], align 4 1069 // CHECK-NEXT: ret void 1070 1071 (*a)[4][1] = i; 1072 } 1073 1074 // Check that we can use overloaded matrix index expressions on matrixes with 1075 // matching dimensions, but different element types. 1076 typedef double dx3x3_t __attribute__((matrix_type(3, 3))); 1077 typedef float fx3x3_t __attribute__((matrix_type(3, 3))); 1078 void insert_matching_dimensions1(dx3x3_t a, double i) { 1079 // CHECK-LABEL: @insert_matching_dimensions1( 1080 // NOOPT: [[I:%.*]] = load double, ptr %i.addr, align 8{{$}} 1081 // OPT: [[I:%.*]] = load double, ptr %i.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 1082 // CHECK-NEXT: [[MAT:%.*]] = load <9 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}} 1083 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x double> [[MAT]], double [[I]], i64 5 1084 // CHECK-NEXT: store <9 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8 1085 // CHECK-NEXT: ret void 1086 1087 a[2u][1u] = i; 1088 } 1089 1090 void insert_matching_dimensions(fx3x3_t b, float e) { 1091 // CHECK-LABEL: @insert_matching_dimensions( 1092 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} 1093 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 1094 // CHECK-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 1095 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT]], float [[E]], i64 7 1096 // CHECK-NEXT: store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4 1097 // CHECK-NEXT: ret void 1098 1099 b[1u][2u] = e; 1100 } 1101 1102 double extract_double(dx5x5_t a) { 1103 // CHECK-LABEL: @extract_double( 1104 // NOOPT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} 1105 // OPT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 1106 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <25 x double> [[MAT]], i64 12 1107 // CHECK-NEXT: ret double [[MATEXT]] 1108 1109 return a[2][3 - 1u]; 1110 } 1111 1112 double extract_float(fx3x3_t b) { 1113 // CHECK-LABEL: @extract_float( 1114 // NOOPT: [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4{{$}} 1115 // OPT: [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 1116 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 5 1117 // CHECK-NEXT: [[TO_DOUBLE:%.*]] = fpext float [[MATEXT]] to double 1118 // CHECK-NEXT: ret double [[TO_DOUBLE]] 1119 1120 return b[2][1]; 1121 } 1122 1123 int extract_int(ix9x3_t c, unsigned long j) { 1124 // CHECK-LABEL: @extract_int( 1125 // NOOPT: [[J1:%.*]] = load i64, ptr %j.addr, align 8{{$}} 1126 // NOOPT-NEXT: [[J2:%.*]] = load i64, ptr %j.addr, align 8{{$}} 1127 // OPT: [[J1:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 1128 // OPT-NEXT: [[J2:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 1129 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J2]], 9 1130 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J1]] 1131 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} 1132 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27 1133 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 1134 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 1135 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <27 x i32> [[MAT]], i64 [[IDX2]] 1136 // CHECK-NEXT: ret i32 [[MATEXT]] 1137 1138 return c[j][j]; 1139 } 1140 1141 typedef double dx3x2_t __attribute__((matrix_type(3, 2))); 1142 1143 double test_extract_matrix_pointer1(dx3x2_t **ptr, unsigned j) { 1144 // CHECK-LABEL: @test_extract_matrix_pointer1( 1145 // NOOPT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} 1146 // OPT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 1147 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64 1148 // CHECK-NEXT: [[IDX:%.*]] = add i64 3, [[J_EXT]] 1149 // NOOPT-NEXT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}} 1150 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 6 1151 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 1152 // OPT-NEXT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 1153 // CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 1 1154 // NOOPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}} 1155 // OPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}} 1156 // CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 2 1157 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}} 1158 // OPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}} 1159 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 [[IDX]] 1160 // CHECK-NEXT: ret double [[MATEXT]] 1161 1162 return ptr[1][2][j][1]; 1163 } 1164 1165 double test_extract_matrix_pointer2(dx3x2_t **ptr) { 1166 // CHECK-LABEL: @test_extract_matrix_pointer2( 1167 // CHECK-NEXT: entry: 1168 // NOOPT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}} 1169 // OPT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 1170 // CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 4 1171 // NOOPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}} 1172 // OPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}} 1173 // CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 6 1174 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}} 1175 // OPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}} 1176 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 5 1177 // CHECK-NEXT: ret double [[MATEXT]] 1178 1179 return (*(*(ptr + 4) + 6))[2][1 * 3 - 2]; 1180 } 1181 1182 void insert_extract(dx5x5_t a, fx3x3_t b, unsigned long j, short k) { 1183 // CHECK-LABEL: @insert_extract( 1184 // NOOPT: [[K:%.*]] = load i16, ptr %k.addr, align 2{{$}} 1185 // OPT: [[K:%.*]] = load i16, ptr %k.addr, align 2, !tbaa !{{[0-9]+}}{{$}} 1186 // CHECK-NEXT: [[K_EXT:%.*]] = sext i16 [[K]] to i64 1187 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 3 1188 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], 0 1189 // NOOPT-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} 1190 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 9 1191 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 1192 // OPT-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} 1193 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX2]] 1194 // NOOPT-NEXT: [[J:%.*]] = load i64, ptr %j.addr, align 8{{$}} 1195 // OPT-NEXT: [[J:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 1196 // CHECK-NEXT: [[IDX3:%.*]] = mul i64 [[J]], 3 1197 // CHECK-NEXT: [[IDX4:%.*]] = add i64 [[IDX3]], 2 1198 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX4]], 9 1199 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 1200 // CHECK-NEXT: [[MAT2:%.*]] = load <9 x float>, ptr [[MAT_ADDR]], align 4{{$}} 1201 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], i64 [[IDX4]] 1202 // CHECK-NEXT: store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4 1203 // CHECK-NEXT: ret void 1204 1205 b[2][j] = b[0][k]; 1206 } 1207 1208 void insert_compound_stmt(dx5x5_t a) { 1209 // CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> noundef %a) 1210 // CHECK: [[A:%.*]] = load <25 x double>, ptr [[A_PTR:%.*]], align 8{{$}} 1211 // CHECK-NEXT: [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17 1212 // CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00 1213 // CHECK-NEXT: [[A2:%.*]] = load <25 x double>, ptr [[A_PTR]], align 8{{$}} 1214 // CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[A2]], double [[SUB]], i64 17 1215 // CHECK-NEXT: store <25 x double> [[INS]], ptr [[A_PTR]], align 8 1216 // CHECK-NEXT: ret void 1217 1218 a[2][3] -= 1.0; 1219 } 1220 1221 struct Foo { 1222 fx2x3_t mat; 1223 }; 1224 1225 void insert_compound_stmt_field(struct Foo *a, float f, unsigned i, unsigned j) { 1226 // CHECK-LABEL: define{{.*}} void @insert_compound_stmt_field(ptr noundef %a, float noundef %f, i32 noundef %i, i32 noundef %j) 1227 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}} 1228 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 1229 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64 1230 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} 1231 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 1232 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64 1233 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2 1234 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]] 1235 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6 1236 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 1237 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr %mat, align 4{{$}} 1238 // CHECK-NEXT: [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]] 1239 // CHECK-NEXT: [[SUM:%.*]] = fadd float [[EXT]], {{.*}} 1240 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6 1241 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 1242 // CHECK-NEXT: [[MAT2:%.*]] = load <6 x float>, ptr %mat, align 4{{$}} 1243 // CHECK-NEXT: [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], i64 [[IDX2]] 1244 // CHECK-NEXT: store <6 x float> [[INS]], ptr %mat, align 4 1245 // CHECK-NEXT: ret void 1246 1247 a->mat[i][j] += f; 1248 } 1249 1250 void matrix_as_idx(ix9x3_t a, int i, int j, dx5x5_t b) { 1251 // CHECK-LABEL: define{{.*}} void @matrix_as_idx(<27 x i32> noundef %a, i32 noundef %i, i32 noundef %j, <25 x double> noundef %b) 1252 // NOOPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}} 1253 // OPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 1254 // CHECK-NEXT: [[I1_EXT:%.*]] = sext i32 [[I1]] to i64 1255 // NOOPT-NEXT: [[J1:%.*]] = load i32, ptr %j.addr, align 4{{$}} 1256 // OPT-NEXT: [[J1:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 1257 // CHECK-NEXT: [[J1_EXT:%.*]] = sext i32 [[J1]] to i64 1258 // CHECK-NEXT: [[IDX1_1:%.*]] = mul i64 [[J1_EXT]], 9 1259 // CHECK-NEXT: [[IDX1_2:%.*]] = add i64 [[IDX1_1]], [[I1_EXT]] 1260 // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}} 1261 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX1_2]], 27 1262 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 1263 // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 1264 // CHECK-NEXT: [[MI1:%.*]] = extractelement <27 x i32> [[A]], i64 [[IDX1_2]] 1265 // CHECK-NEXT: [[MI1_EXT:%.*]] = sext i32 [[MI1]] to i64 1266 // NOOPT-NEXT: [[J2:%.*]] = load i32, ptr %j.addr, align 4{{$}} 1267 // OPT-NEXT: [[J2:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 1268 // CHECK-NEXT: [[J2_EXT:%.*]] = sext i32 [[J2]] to i64 1269 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}} 1270 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 1271 // CHECK-NEXT: [[I2_EXT:%.*]] = sext i32 [[I2]] to i64 1272 // CHECK-NEXT: [[IDX2_1:%.*]] = mul i64 [[I2_EXT]], 9 1273 // CHECK-NEXT: [[IDX2_2:%.*]] = add i64 [[IDX2_1]], [[J2_EXT]] 1274 // NOOPT-NEXT: [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} 1275 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2_2]], 27 1276 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 1277 // OPT-NEXT: [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 1278 // CHECK-NEXT: [[MI2:%.*]] = extractelement <27 x i32> [[A2]], i64 [[IDX2_2]] 1279 // CHECK-NEXT: [[MI3:%.*]] = add nsw i32 [[MI2]], 2 1280 // CHECK-NEXT: [[MI3_EXT:%.*]] = sext i32 [[MI3]] to i64 1281 // CHECK-NEXT: [[IDX3_1:%.*]] = mul i64 [[MI3_EXT]], 5 1282 // CHECK-NEXT: [[IDX3_2:%.*]] = add i64 [[IDX3_1]], [[MI1_EXT]] 1283 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX3_2]], 25 1284 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 1285 // CHECK-NEXT: [[B:%.*]] = load <25 x double>, ptr [[B_PTR:%.*]], align 8{{$}} 1286 // CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[B]], double 1.500000e+00, i64 [[IDX3_2]] 1287 // CHECK-NEXT: store <25 x double> [[INS]], ptr [[B_PTR]], align 8 1288 b[a[i][j]][a[j][i] + 2] = 1.5; 1289 } 1290