1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,NOOPT %s 2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,OPT %s 3 typedef double dx5x5_t __attribute__((matrix_type(5, 5))); 4 using fx2x3_t = float __attribute__((matrix_type(2, 3))); 5 6 template <typename EltTy, unsigned Rows, unsigned Columns> 7 struct MyMatrix { 8 using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns))); 9 10 matrix_t value; 11 }; 12 13 template <typename EltTy0, unsigned R0, unsigned C0> 14 typename MyMatrix<EltTy0, R0, C0>::matrix_t add(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) { 15 return A.value + B.value; 16 } 17 18 void test_add_template() { 19 // CHECK-LABEL: define{{.*}} void @_Z17test_add_templatev() 20 // CHECK: %call = call noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2) 21 22 // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_( 23 // NOOPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}} 24 // NOOPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}} 25 // OPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 26 // OPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 27 // CHECK-NEXT: [[RES:%.*]] = fadd <10 x float> [[MAT1]], [[MAT2]] 28 // CHECK-NEXT: ret <10 x float> [[RES]] 29 30 MyMatrix<float, 2, 5> Mat1; 31 MyMatrix<float, 2, 5> Mat2; 32 Mat1.value = add(Mat1, Mat2); 33 } 34 35 template <typename EltTy0, unsigned R0, unsigned C0> 36 typename MyMatrix<EltTy0, R0, C0>::matrix_t subtract(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) { 37 return A.value - B.value; 38 } 39 40 void test_subtract_template() { 41 // CHECK-LABEL: define{{.*}} void @_Z22test_subtract_templatev() 42 // CHECK: %call = call noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2) 43 44 // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_( 45 // NOOPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}} 46 // NOOPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}} 47 // OPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 48 // OPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 49 // CHECK-NEXT: [[RES:%.*]] = fsub <10 x float> [[MAT1]], [[MAT2]] 50 // CHECK-NEXT: ret <10 x float> [[RES]] 51 52 MyMatrix<float, 2, 5> Mat1; 53 MyMatrix<float, 2, 5> Mat2; 54 Mat1.value = subtract(Mat1, Mat2); 55 } 56 57 struct DoubleWrapper1 { 58 int x; 59 operator double() { 60 return x; 61 } 62 }; 63 64 void test_DoubleWrapper1_Sub1(MyMatrix<double, 10, 9> &m) { 65 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE( 66 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}} 67 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 68 // CHECK-NEXT: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1) 69 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0 70 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer 71 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]] 72 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8 73 74 DoubleWrapper1 w1; 75 w1.x = 10; 76 m.value = m.value - w1; 77 } 78 79 void test_DoubleWrapper1_Sub2(MyMatrix<double, 10, 9> &m) { 80 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE( 81 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1) 82 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}} 83 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 84 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0 85 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer 86 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]] 87 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8 88 89 DoubleWrapper1 w1; 90 w1.x = 10; 91 m.value = w1 - m.value; 92 } 93 94 struct DoubleWrapper2 { 95 int x; 96 operator double() { 97 return x; 98 } 99 }; 100 101 void test_DoubleWrapper2_Add1(MyMatrix<double, 10, 9> &m) { 102 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE( 103 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8{{$}} 104 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8, !tbaa !{{[0-9]+}}{{$}} 105 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2) 106 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0 107 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer 108 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]] 109 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8 110 111 DoubleWrapper2 w2; 112 w2.x = 20; 113 m.value = m.value + w2; 114 } 115 116 void test_DoubleWrapper2_Add2(MyMatrix<double, 10, 9> &m) { 117 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE( 118 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2) 119 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}} 120 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 121 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0 122 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer 123 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]] 124 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8 125 126 DoubleWrapper2 w2; 127 w2.x = 20; 128 m.value = w2 + m.value; 129 } 130 131 struct IntWrapper { 132 char x; 133 operator int() { 134 return x; 135 } 136 }; 137 138 void test_IntWrapper_Add(MyMatrix<double, 10, 9> &m) { 139 // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_AddR8MyMatrixIdLj10ELj9EE( 140 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}} 141 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 142 // CHECK-NEXT: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3) 143 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double 144 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0 145 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer 146 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]] 147 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8 148 149 IntWrapper w3; 150 w3.x = 'c'; 151 m.value = m.value + w3; 152 } 153 154 void test_IntWrapper_Sub(MyMatrix<double, 10, 9> &m) { 155 // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_SubR8MyMatrixIdLj10ELj9EE( 156 // CHECK: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3) 157 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double 158 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}} 159 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 160 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0 161 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer 162 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]] 163 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8 164 165 IntWrapper w3; 166 w3.x = 'c'; 167 m.value = w3 - m.value; 168 } 169 170 template <typename EltTy0, unsigned R0, unsigned C0, unsigned C1> 171 typename MyMatrix<EltTy0, R0, C1>::matrix_t multiply(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, C0, C1> &B) { 172 return A.value * B.value; 173 } 174 175 MyMatrix<float, 2, 2> test_multiply_template(MyMatrix<float, 2, 5> Mat1, 176 MyMatrix<float, 5, 2> Mat2) { 177 // CHECK-LABEL: define{{.*}} void @_Z22test_multiply_template8MyMatrixIfLj2ELj5EES_IfLj5ELj2EE( 178 // CHECK-NEXT: entry: 179 // CHECK-NEXT: [[RES:%.*]] = call noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2) 180 // CHECK-NEXT: %value = getelementptr inbounds nuw %struct.MyMatrix.1, ptr %agg.result, i32 0, i32 0 181 // CHECK-NEXT: store <4 x float> [[RES]], ptr %value, align 4 182 // CHECK-NEXT: ret void 183 // 184 // CHECK-LABEL: define linkonce_odr noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE( 185 // NOOPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}} 186 // NOOPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}} 187 // OPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 188 // OPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 189 // CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.matrix.multiply.v4f32.v10f32.v10f32(<10 x float> [[MAT1]], <10 x float> [[MAT2]], i32 2, i32 5, i32 2) 190 // CHECK-NEXT: ret <4 x float> [[RES]] 191 192 MyMatrix<float, 2, 2> Res; 193 Res.value = multiply(Mat1, Mat2); 194 return Res; 195 } 196 197 void test_IntWrapper_Multiply(MyMatrix<double, 10, 9> &m, IntWrapper &w3) { 198 // CHECK-LABEL: define{{.*}} void @_Z24test_IntWrapper_MultiplyR8MyMatrixIdLj10ELj9EER10IntWrapper( 199 // CHECK: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr noundef {{.*}}) 200 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double 201 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}} 202 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 203 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0 204 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer 205 // CHECK-NEXT: [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]] 206 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8 207 // CHECK-NEXT: ret void 208 m.value = w3 * m.value; 209 } 210 211 template <typename EltTy, unsigned Rows, unsigned Columns> 212 void insert(MyMatrix<EltTy, Rows, Columns> &Mat, EltTy e, unsigned i, unsigned j) { 213 Mat.value[i][j] = e; 214 } 215 216 void test_insert_template1(MyMatrix<unsigned, 2, 2> &Mat, unsigned e, unsigned i, unsigned j) { 217 // CHECK-LABEL: @_Z21test_insert_template1R8MyMatrixIjLj2ELj2EEjjj( 218 // NOOPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}} 219 // NOOPT-NEXT: [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}} 220 // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}} 221 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} 222 // OPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 223 // OPT-NEXT: [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 224 // OPT-NEXT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 225 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 226 // CHECK-NEXT: call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(16) [[MAT_ADDR]], i32 noundef [[E]], i32 noundef [[I]], i32 noundef [[J]]) 227 // CHECK-NEXT: ret void 228 // 229 // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj( 230 // NOOPT: [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}} 231 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}} 232 // OPT: [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 233 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 234 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64 235 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} 236 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 237 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64 238 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2 239 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]] 240 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 4 241 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 242 // CHECK-NEXT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}} 243 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], i64 [[IDX2]] 244 // CHECK-NEXT: store <4 x i32> [[MATINS]], ptr {{.*}}, align 4 245 // CHECK-NEXT: ret void 246 247 insert(Mat, e, i, j); 248 } 249 250 void test_insert_template2(MyMatrix<float, 3, 8> &Mat, float e) { 251 // CHECK-LABEL: @_Z21test_insert_template2R8MyMatrixIfLj3ELj8EEf( 252 // NOOPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}} 253 // NOOPT-NEXT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} 254 // OPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 255 // OPT-NEXT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 256 // CHECK-NEXT: call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(96) [[MAT_ADDR]], float noundef [[E]], i32 noundef 2, i32 noundef 5) 257 // CHECK-NEXT: ret void 258 // 259 // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj( 260 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} 261 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}} 262 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 263 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 264 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64 265 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} 266 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} 267 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64 268 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 3 269 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]] 270 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 24 271 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 272 // CHECK-NEXT: [[MAT:%.*]] = load <24 x float>, ptr {{.*}}, align 4{{$}} 273 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <24 x float> [[MAT]], float [[E]], i64 [[IDX2]] 274 // CHECK-NEXT: store <24 x float> [[MATINS]], ptr {{.*}}, align 4 275 // CHECK-NEXT: ret void 276 277 insert(Mat, e, 2, 5); 278 } 279 280 template <typename EltTy, unsigned Rows, unsigned Columns> 281 EltTy extract(MyMatrix<EltTy, Rows, Columns> &Mat) { 282 return Mat.value[1u][0u]; 283 } 284 285 int test_extract_template(MyMatrix<int, 2, 2> Mat1) { 286 // CHECK-LABEL: @_Z21test_extract_template8MyMatrixIiLj2ELj2EE( 287 // CHECK-NEXT: entry: 288 // CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(ptr noundef nonnull align 4 dereferenceable(16) [[MAT1:%.*]]) 289 // CHECK-NEXT: ret i32 [[CALL]] 290 // 291 // CHECK-LABEL: define linkonce_odr noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE( 292 // NOOPT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}} 293 // OPT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 294 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <4 x i32> [[MAT]], i64 1 295 // CHECK-NEXT: ret i32 [[MATEXT]] 296 297 return extract(Mat1); 298 } 299 300 using double4x4 = double __attribute__((matrix_type(4, 4))); 301 302 template <class R, class C> 303 auto matrix_subscript(double4x4 m, R r, C c) -> decltype(m[r][c]) { 304 // FIXME: We can't actually do 'return m[r][c]' here currently. 305 static double d; 306 return d; 307 } 308 309 double test_matrix_subscript(double4x4 m) { 310 // CHECK-LABEL: @_Z21test_matrix_subscriptu11matrix_typeILm4ELm4EdE( 311 // NOOPT: [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} 312 // OPT: [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} 313 // CHECK-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) ptr @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_Eu11matrix_typeILm4ELm4EdET_T0_(<16 x double> noundef [[MAT]], i32 noundef 1, i32 noundef 2) 314 // NOOPT-NEXT: [[RES:%.*]] = load double, ptr [[CALL]], align 8{{$}} 315 // OPT-NEXT: [[RES:%.*]] = load double, ptr [[CALL]], align 8, !tbaa !{{[0-9]+}}{{$}} 316 // CHECK-NEXT: ret double [[RES]] 317 318 return matrix_subscript(m, 1, 2); 319 } 320 321 const double &test_matrix_subscript_reference(const double4x4 m) { 322 // CHECK-LABEL: @_Z31test_matrix_subscript_referenceu11matrix_typeILm4ELm4EdE( 323 // CHECK-NEXT: entry: 324 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca [16 x double], align 8 325 // CHECK-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 326 // CHECK-NEXT: store <16 x double> [[M:%.*]], ptr [[M_ADDR]], align 8 327 // NOOPT: [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8{{$}} 328 // OPT: [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}} 329 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[NAMELESS1]], i64 4 330 // CHECK-NEXT: store double [[MATEXT]], ptr [[REF_TMP]], align 8 331 // CHECK: ret ptr [[REF_TMP]] 332 333 return m[0][1]; 334 } 335 336 struct UnsignedWrapper { 337 char x; 338 operator unsigned() { 339 return x; 340 } 341 }; 342 343 double extract_IntWrapper_idx(double4x4 &m, IntWrapper i, UnsignedWrapper j) { 344 // CHECK-LABEL: define{{.*}} double @_Z22extract_IntWrapper_idxRu11matrix_typeILm4ELm4EdE10IntWrapper15UnsignedWrapper( 345 // CHECK: [[I:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %i) 346 // CHECK-NEXT: [[I_ADD:%.*]] = add nsw i32 [[I]], 1 347 // CHECK-NEXT: [[I_ADD_EXT:%.*]] = sext i32 [[I_ADD]] to i64 348 // CHECK-NEXT: [[J:%.*]] = call noundef i32 @_ZN15UnsignedWrappercvjEv(ptr {{[^,]*}} %j) 349 // CHECK-NEXT: [[J_SUB:%.*]] = sub i32 [[J]], 1 350 // CHECK-NEXT: [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64 351 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4 352 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]] 353 // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}} 354 // NOOPT-NEXT: [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8{{$}} 355 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16 356 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 357 // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 358 // OPT-NEXT: [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}} 359 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[MAT]], i64 [[IDX2]] 360 // CHECK-NEXT: ret double [[MATEXT]] 361 return m[i + 1][j - 1]; 362 } 363 364 template <class T, unsigned R, unsigned C> 365 using matrix_type = T __attribute__((matrix_type(R, C))); 366 struct identmatrix_t { 367 template <class T, unsigned N> 368 operator matrix_type<T, N, N>() const { 369 matrix_type<T, N, N> result; 370 for (unsigned i = 0; i != N; ++i) 371 result[i][i] = 1; 372 return result; 373 } 374 }; 375 376 constexpr identmatrix_t identmatrix; 377 378 void test_constexpr1(matrix_type<float, 4, 4> &m) { 379 // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr1Ru11matrix_typeILm4ELm4EfE( 380 // NOOPT: [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4{{$}} 381 // OPT: [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 382 // CHECK-NEXT: [[IM:%.*]] = call noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(ptr {{[^,]*}} @_ZL11identmatrix) 383 // CHECK-NEXT: [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]] 384 // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}} 385 // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 386 // CHECK-NEXT: store <16 x float> [[ADD]], ptr [[MAT_ADDR]], align 4 387 // CHECK-NEXT: ret voi 388 389 // CHECK-LABEL: define linkonce_odr noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv( 390 // CHECK-LABEL: for.body: ; preds = %for.cond 391 // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4{{$}} 392 // OPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}} 393 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64 394 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4{{$}} 395 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}} 396 // CHECK-NEXT: [[I2_EXT:%.*]] = zext i32 [[I2]] to i64 397 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[I2_EXT]], 4 398 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]] 399 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16 400 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 401 // CHECK-NEXT: [[MAT:%.*]] = load <16 x float>, ptr %result, align 4{{$}} 402 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, i64 [[IDX2]] 403 // CHECK-NEXT: store <16 x float> [[MATINS]], ptr %result, align 4 404 // CHECK-NEXT: br label %for.inc 405 m = m + identmatrix; 406 } 407 408 void test_constexpr2(matrix_type<int, 5, 5> &m) { 409 // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr2Ru11matrix_typeILm5ELm5EiE( 410 // CHECK: [[IM:%.*]] = call noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(ptr {{[^,]*}} @_ZL11identmatrix) 411 // NOOPT: [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4{{$}} 412 // OPT: [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} 413 // CHECK-NEXT: [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]] 414 // CHECK-NEXT: [[SUB2:%.*]] = add <25 x i32> [[SUB]], splat (i32 1) 415 // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}} 416 // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}} 417 // CHECK-NEXT: store <25 x i32> [[SUB2]], ptr [[MAT_ADDR]], align 4 418 // CHECK-NEXT: ret void 419 // 420 421 // CHECK-LABEL: define linkonce_odr noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv( 422 // CHECK-LABEL: for.body: ; preds = %for.cond 423 // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4{{$}} 424 // OPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}} 425 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64 426 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4{{$}} 427 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}} 428 // CHECK-NEXT: [[I2_EXT:%.*]] = zext i32 [[I2]] to i64 429 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[I2_EXT]], 5 430 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]] 431 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 25 432 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) 433 // CHECK-NEXT: [[MAT:%.*]] = load <25 x i32>, ptr %result, align 4{{$}} 434 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x i32> [[MAT]], i32 1, i64 [[IDX2]] 435 // CHECK-NEXT: store <25 x i32> [[MATINS]], ptr %result, align 4 436 // CHECK-NEXT: br label %for.inc 437 438 m = identmatrix - m + 1; 439 } 440