1 // RUN: %clang_cc1 -no-enable-noundef-analysis -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=COMMON,CHECK64 %s 2 // RUN: %clang_cc1 -no-enable-noundef-analysis -fenable-matrix -triple i386-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=COMMON,CHECK32 %s 3 4 // Also check we do not crash when running some middle-end passes. Most 5 // importantly this includes the IR verifier, to ensure we emit valid IR. 6 // RUN: %clang_cc1 -fenable-matrix -emit-llvm -triple x86_64-apple-darwin %s -o %t 7 8 // Tests for the matrix type builtins. 9 10 typedef double dx5x5_t __attribute__((matrix_type(5, 5))); 11 typedef float fx2x3_t __attribute__((matrix_type(2, 3))); 12 typedef float fx3x2_t __attribute__((matrix_type(3, 2))); 13 typedef int ix20x4_t __attribute__((matrix_type(20, 4))); 14 typedef int ix4x20_t __attribute__((matrix_type(4, 20))); 15 typedef unsigned ux1x6_t __attribute__((matrix_type(1, 6))); 16 typedef unsigned ux6x1_t __attribute__((matrix_type(6, 1))); 17 18 void transpose_double_5x5(dx5x5_t *a) { 19 // COMMON-LABEL: define{{.*}} void @transpose_double_5x5( 20 // CHECK32: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 4 21 // CHECK64: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8 22 // COMMON-NEXT: [[TRANS:%.*]] = call <25 x double> @llvm.matrix.transpose.v25f64(<25 x double> [[A]], i32 5, i32 5) 23 // CHECK32-NEXT: store <25 x double> [[TRANS]], ptr %a_t, align 4 24 // CHECK64-NEXT: store <25 x double> [[TRANS]], ptr %a_t, align 8 25 26 dx5x5_t a_t = __builtin_matrix_transpose(*a); 27 } 28 29 void transpose_float_3x2(fx3x2_t *a) { 30 // COMMON-LABEL: define{{.*}} void @transpose_float_3x2( 31 // COMMON: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4 32 // COMMON-NEXT: [[TRANS:%.*]] = call <6 x float> @llvm.matrix.transpose.v6f32(<6 x float> [[A]], i32 3, i32 2) 33 // COMMON-NEXT: store <6 x float> [[TRANS]], ptr %a_t, align 4 34 35 fx2x3_t a_t = __builtin_matrix_transpose(*a); 36 } 37 38 void transpose_int_20x4(ix20x4_t *a) { 39 // COMMON-LABEL: define{{.*}} void @transpose_int_20x4( 40 // COMMON: [[A:%.*]] = load <80 x i32>, ptr {{.*}}, align 4 41 // COMMON-NEXT: [[TRANS:%.*]] = call <80 x i32> @llvm.matrix.transpose.v80i32(<80 x i32> [[A]], i32 20, i32 4) 42 // COMMON-NEXT: store <80 x i32> [[TRANS]], ptr %a_t, align 4 43 44 ix4x20_t a_t = __builtin_matrix_transpose(*a); 45 } 46 47 struct Foo { 48 ux1x6_t in; 49 ux6x1_t out; 50 }; 51 52 void transpose_struct_member(struct Foo *F) { 53 // COMMON-LABEL: define{{.*}} void @transpose_struct_member( 54 // COMMON: [[M:%.*]] = load <6 x i32>, ptr {{.*}}, align 4 55 // COMMON-NEXT: [[M_T:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M]], i32 1, i32 6) 56 // CHECK32-NEXT: [[F_ADDR:%.*]] = load ptr, ptr %F.addr, align 4 57 // CHECK64-NEXT: [[F_ADDR:%.*]] = load ptr, ptr %F.addr, align 8 58 // COMMON-NEXT: [[OUT_PTR:%.*]] = getelementptr inbounds nuw %struct.Foo, ptr [[F_ADDR]], i32 0, i32 1 59 // COMMON-NEXT: store <6 x i32> [[M_T]], ptr [[OUT_PTR]], align 4 60 61 F->out = __builtin_matrix_transpose(F->in); 62 } 63 64 void transpose_transpose_struct_member(struct Foo *F) { 65 // COMMON-LABEL: define{{.*}} void @transpose_transpose_struct_member( 66 // COMMON: [[M:%.*]] = load <6 x i32>, ptr {{.*}}, align 4 67 // COMMON-NEXT: [[M_T:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M]], i32 1, i32 6) 68 // COMMON-NEXT: [[M_T2:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M_T]], i32 6, i32 1) 69 // CHECK32-NEXT: [[F_ADDR:%.*]] = load ptr, ptr %F.addr, align 4 70 // CHECK64-NEXT: [[F_ADDR:%.*]] = load ptr, ptr %F.addr, align 8 71 // COMMON-NEXT: [[IN_PTR:%.*]] = getelementptr inbounds nuw %struct.Foo, ptr [[F_ADDR]], i32 0, i32 0 72 // COMMON-NEXT: store <6 x i32> [[M_T2]], ptr [[IN_PTR]], align 4 73 74 F->in = __builtin_matrix_transpose(__builtin_matrix_transpose(F->in)); 75 } 76 77 dx5x5_t get_matrix(void); 78 79 void transpose_rvalue(void) { 80 // COMMON-LABEL: define{{.*}} void @transpose_rvalue() 81 // COMMON-NEXT: entry: 82 // CHECK32-NEXT: [[M_T_ADDR:%.*]] = alloca [25 x double], align 4 83 // CHECK64-NEXT: [[M_T_ADDR:%.*]] = alloca [25 x double], align 8 84 // COMMON-NEXT: [[CALL:%.*]] = call <25 x double> @get_matrix() 85 // COMMON-NEXT: [[M_T:%.*]] = call <25 x double> @llvm.matrix.transpose.v25f64(<25 x double> [[CALL]], i32 5, i32 5) 86 // CHECK32-NEXT: store <25 x double> [[M_T]], ptr [[M_T_ADDR]], align 4 87 // CHECK64-NEXT: store <25 x double> [[M_T]], ptr [[M_T_ADDR]], align 8 88 89 dx5x5_t m_t = __builtin_matrix_transpose(get_matrix()); 90 } 91 92 const dx5x5_t global_matrix; 93 94 void transpose_global(void) { 95 // COMMON-LABEL: define{{.*}} void @transpose_global() 96 // COMMON-NEXT: entry: 97 // CHECK32-NEXT: [[M_T_ADDR:%.*]] = alloca [25 x double], align 4 98 // CHECK32-NEXT: [[GLOBAL_MATRIX:%.*]] = load <25 x double>, ptr @global_matrix, align 4 99 // CHECK64-NEXT: [[M_T_ADDR:%.*]] = alloca [25 x double], align 8 100 // CHECK64-NEXT: [[GLOBAL_MATRIX:%.*]] = load <25 x double>, ptr @global_matrix, align 8 101 // COMMON-NEXT: [[M_T:%.*]] = call <25 x double> @llvm.matrix.transpose.v25f64(<25 x double> [[GLOBAL_MATRIX]], i32 5, i32 5) 102 // CHECK32-NEXT: store <25 x double> [[M_T]], ptr [[M_T_ADDR]], align 4 103 // CHECK64-NEXT: store <25 x double> [[M_T]], ptr [[M_T_ADDR]], align 8 104 105 dx5x5_t m_t = __builtin_matrix_transpose(global_matrix); 106 } 107 108 void column_major_load_with_const_stride_double(double *Ptr) { 109 // COMMON-LABEL: define{{.*}} void @column_major_load_with_const_stride_double(ptr %Ptr) 110 // CHECK32: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 4 111 // CHECK32-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i32(ptr align 4 [[PTR]], i32 5, i1 false, i32 5, i32 5) 112 // CHECK64: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 8 113 // CHECK64-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i64(ptr align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5) 114 115 dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5); 116 } 117 118 void column_major_load_with_const_stride2_double(double *Ptr) { 119 // COMMON-LABEL: define{{.*}} void @column_major_load_with_const_stride2_double(ptr %Ptr) 120 // CHECK32: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 4 121 // CHECK32-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i32(ptr align 4 [[PTR]], i32 15, i1 false, i32 5, i32 5) 122 // CHECK64: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 8 123 // CHECK64-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i64(ptr align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5) 124 125 dx5x5_t m_a2 = __builtin_matrix_column_major_load(Ptr, 5, 5, 2 * 3 + 9); 126 } 127 128 void column_major_load_with_variable_stride_ull_float(float *Ptr, unsigned long long S) { 129 // COMMON-LABEL: define{{.*}} void @column_major_load_with_variable_stride_ull_float(ptr %Ptr, i64 %S) 130 // CHECK32: [[S:%.*]] = load i64, ptr %S.addr, align 8 131 // CHECK32-NEXT: [[STRIDE_TRUNC:%.*]] = trunc i64 [[S]] to i32 132 // CHECK32-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 4 133 // CHECK32-NEXT: call <6 x float> @llvm.matrix.column.major.load.v6f32.i32(ptr align 4 [[PTR]], i32 [[STRIDE_TRUNC]], i1 false, i32 2, i32 3) 134 135 // CHECK64: [[S:%.*]] = load i64, ptr %S.addr, align 8 136 // CHECK64-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 8 137 // CHECK64-NEXT: call <6 x float> @llvm.matrix.column.major.load.v6f32.i64(ptr align 4 [[PTR]], i64 [[S]], i1 false, i32 2, i32 3) 138 139 fx2x3_t m_b = __builtin_matrix_column_major_load(Ptr, 2, 3, S); 140 } 141 142 void column_major_load_with_stride_math_int(int *Ptr, int S) { 143 // COMMON-LABEL: define{{.*}} void @column_major_load_with_stride_math_int(ptr %Ptr, i32 %S) 144 // COMMON: [[S:%.*]] = load i32, ptr %S.addr, align 4 145 // COMMON-NEXT: [[STRIDE:%.*]] = add nsw i32 [[S]], 32 146 // CHECK32-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 4 147 // CHECK32-NEXT: call <80 x i32> @llvm.matrix.column.major.load.v80i32.i32(ptr align 4 [[PTR]], i32 [[STRIDE]], i1 false, i32 4, i32 20) 148 // 149 // CHECK64-NEXT: [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64 150 // CHECK64-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 8 151 // CHECK64-NEXT: call <80 x i32> @llvm.matrix.column.major.load.v80i32.i64(ptr align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20) 152 153 ix4x20_t m_c = __builtin_matrix_column_major_load(Ptr, 4, 20, S + 32); 154 } 155 156 void column_major_load_with_stride_math_s_int(int *Ptr, short S) { 157 // COMMON-LABEL: define{{.*}} void @column_major_load_with_stride_math_s_int(ptr %Ptr, i16 signext %S) 158 // COMMON: [[S:%.*]] = load i16, ptr %S.addr, align 2 159 // COMMON-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32 160 // COMMON-NEXT: [[STRIDE:%.*]] = add nsw i32 [[S_EXT]], 32 161 // CHECK32-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 4 162 // CHECK32-NEXT: %matrix = call <80 x i32> @llvm.matrix.column.major.load.v80i32.i32(ptr align 4 [[PTR]], i32 [[STRIDE]], i1 false, i32 4, i32 20) 163 // 164 // CHECK64-NEXT: [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64 165 // CHECK64-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 8 166 // CHECK64-NEXT: %matrix = call <80 x i32> @llvm.matrix.column.major.load.v80i32.i64(ptr align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20) 167 168 ix4x20_t m_c = __builtin_matrix_column_major_load(Ptr, 4, 20, S + 32); 169 } 170 171 void column_major_load_array1(double Ptr[25]) { 172 // COMMON-LABEL: define{{.*}} void @column_major_load_array1(ptr %Ptr) 173 // CHECK32: [[ADDR:%.*]] = load ptr, ptr %Ptr.addr, align 4 174 // CHECK32-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i32(ptr align 4 [[ADDR]], i32 5, i1 false, i32 5, i32 5) 175 176 // CHECK64: [[ADDR:%.*]] = load ptr, ptr %Ptr.addr, align 8 177 // CHECK64-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i64(ptr align 8 [[ADDR]], i64 5, i1 false, i32 5, i32 5) 178 179 dx5x5_t m = __builtin_matrix_column_major_load(Ptr, 5, 5, 5); 180 } 181 182 void column_major_load_array2(void) { 183 // COMMON-LABEL: define{{.*}} void @column_major_load_array2() #0 { 184 // COMMON-NEXT: entry: 185 // CHECK32-NEXT: [[PTR:%.*]] = alloca [25 x double], align 8 186 // CHECK32: [[ARRAY_DEC:%.*]] = getelementptr inbounds [25 x double], ptr [[PTR]], i32 0, i32 0 187 // CHECK32-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i32(ptr align 8 [[ARRAY_DEC]], i32 5, i1 false, i32 5, i32 5) 188 189 // CHECK64-NEXT: [[PTR:%.*]] = alloca [25 x double], align 16 190 // CHECK64: [[ARRAY_DEC:%.*]] = getelementptr inbounds [25 x double], ptr [[PTR]], i64 0, i64 0 191 // CHECK64-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i64(ptr align 16 [[ARRAY_DEC]], i64 5, i1 false, i32 5, i32 5) 192 193 double Ptr[25]; 194 dx5x5_t m = __builtin_matrix_column_major_load(Ptr, 5, 5, 5); 195 } 196 197 void column_major_load_const(const double *Ptr) { 198 // COMMON-LABEL: define{{.*}} void @column_major_load_const(ptr %Ptr) 199 // CHECK32: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 4 200 // CHECK32-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i32(ptr align 4 [[PTR]], i32 5, i1 false, i32 5, i32 5) 201 // 202 // CHECK64: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 8 203 // CHECK64-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i64(ptr align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5) 204 205 dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5); 206 } 207 208 void column_major_load_volatile(volatile double *Ptr) { 209 // COMMON-LABEL: define{{.*}} void @column_major_load_volatile(ptr %Ptr) 210 // CHECK32: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 4 211 // CHECK32-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i32(ptr align 4 [[PTR]], i32 5, i1 true, i32 5, i32 5) 212 // 213 // CHECK64: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 8 214 // CHECK64-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64.i64(ptr align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5) 215 216 dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5); 217 } 218 219 void column_major_store_with_const_stride_double(double *Ptr) { 220 // COMMON-LABEL: define{{.*}} void @column_major_store_with_const_stride_double(ptr %Ptr) 221 // CHECK32: [[M:%.*]] = load <25 x double>, ptr {{.*}}, align 4 222 // CHECK32-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 4 223 // CHECK32-NEXT: call void @llvm.matrix.column.major.store.v25f64.i32(<25 x double> [[M]], ptr align 4 [[PTR]], i32 5, i1 false, i32 5, i32 5) 224 // 225 // CHECK64: [[M:%.*]] = load <25 x double>, ptr {{.*}}, align 8 226 // CHECK64-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 8 227 // CHECK64-NEXT: call void @llvm.matrix.column.major.store.v25f64.i64(<25 x double> [[M]], ptr align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5) 228 229 dx5x5_t m; 230 __builtin_matrix_column_major_store(m, Ptr, 5); 231 } 232 233 void column_major_store_with_const_stride2_double(double *Ptr) { 234 // COMMON-LABEL: define{{.*}} void @column_major_store_with_const_stride2_double(ptr %Ptr) 235 // CHECK32: [[M:%.*]] = load <25 x double>, ptr {{.*}}, align 4 236 // CHECK32-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 4 237 // CHECK32-NEXT: call void @llvm.matrix.column.major.store.v25f64.i32(<25 x double> [[M]], ptr align 4 [[PTR]], i32 15, i1 false, i32 5, i32 5) 238 // 239 // CHECK64: [[M:%.*]] = load <25 x double>, ptr {{.*}}, align 8 240 // CHECK64-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 8 241 // CHECK64-NEXT: call void @llvm.matrix.column.major.store.v25f64.i64(<25 x double> [[M]], ptr align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5) 242 // 243 dx5x5_t m; 244 __builtin_matrix_column_major_store(m, Ptr, 2 * 3 + 9); 245 } 246 247 void column_major_store_with_stride_math_int(int *Ptr, int S) { 248 // COMMON-LABEL: define{{.*}} void @column_major_store_with_stride_math_int(ptr %Ptr, i32 %S) 249 // COMMON: [[M:%.*]] = load <80 x i32>, ptr {{.*}}, align 4 250 // CHECK32-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 4 251 // CHECK64-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 8 252 // COMMON-NEXT: [[S:%.*]] = load i32, ptr %S.addr, align 4 253 // COMMON-NEXT: [[ADD:%.*]] = add nsw i32 [[S]], 32 254 // CHECK32-NEXT: call void @llvm.matrix.column.major.store.v80i32.i32(<80 x i32> [[M]], ptr align 4 [[PTR]], i32 [[ADD]], i1 false, i32 4, i32 20) 255 // 256 // CHECK64-NEXT: [[IDX:%.*]] = sext i32 [[ADD]] to i64 257 // CHECK64-NEXT: call void @llvm.matrix.column.major.store.v80i32.i64(<80 x i32> [[M]], ptr align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20) 258 259 ix4x20_t m; 260 __builtin_matrix_column_major_store(m, Ptr, S + 32); 261 } 262 263 void column_major_store_with_stride_math_s_int(int *Ptr, short S) { 264 // COMMON-LABEL: define{{.*}} void @column_major_store_with_stride_math_s_int(ptr %Ptr, i16 signext %S) 265 // COMMON: [[M:%.*]] = load <80 x i32>, ptr {{.*}}, align 4 266 // CHECK32-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 4 267 // CHECK64-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 8 268 // COMMON-NEXT: [[S:%.*]] = load i16, ptr %S.addr, align 2 269 // COMMON-NEXT: [[EXT:%.*]] = sext i16 [[S]] to i32 270 // COMMON-NEXT: [[ADD:%.*]] = add nsw i32 [[EXT]], 2 271 // CHECK32-NEXT: call void @llvm.matrix.column.major.store.v80i32.i32(<80 x i32> [[M]], ptr align 4 [[PTR]], i32 [[ADD]], i1 false, i32 4, i32 20) 272 // 273 // CHECK64-NEXT: [[IDX:%.*]] = sext i32 [[ADD]] to i64 274 // CHECK64-NEXT: call void @llvm.matrix.column.major.store.v80i32.i64(<80 x i32> [[M]], ptr align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20) 275 276 ix4x20_t m; 277 __builtin_matrix_column_major_store(m, Ptr, S + 2); 278 } 279 280 void column_major_store_array1(double Ptr[25]) { 281 // COMMON-LABEL: define{{.*}} void @column_major_store_array1(ptr %Ptr) 282 // CHECK32: [[M:%.*]] = load <25 x double>, ptr {{.*}}, align 4 283 // CHECK32-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 4 284 // CHECK32-NEXT: call void @llvm.matrix.column.major.store.v25f64.i32(<25 x double> [[M]], ptr align 4 [[PTR]], i32 5, i1 false, i32 5, i32 5) 285 // 286 // CHECK64: [[M:%.*]] = load <25 x double>, ptr {{.*}}, align 8 287 // CHECK64-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 8 288 // CHECK64-NEXT: call void @llvm.matrix.column.major.store.v25f64.i64(<25 x double> [[M]], ptr align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5) 289 290 dx5x5_t m; 291 __builtin_matrix_column_major_store(m, Ptr, 5); 292 } 293 294 void column_major_store_array2(void) { 295 // COMMON-LABEL: define{{.*}} void @column_major_store_array2() 296 // CHECK32: [[M:%.*]] = load <25 x double>, ptr {{.*}}, align 4 297 // CHECK32-NEXT: [[PTR:%.*]] = getelementptr inbounds [25 x double], ptr %Ptr, i32 0, i32 0 298 // CHECK32-NEXT: call void @llvm.matrix.column.major.store.v25f64.i32(<25 x double> [[M]], ptr align 8 [[PTR]], i32 5, i1 false, i32 5, i32 5) 299 // 300 // CHECK64: [[M:%.*]] = load <25 x double>, ptr {{.*}}, align 8 301 // CHECK64-NEXT: [[PTR:%.*]] = getelementptr inbounds [25 x double], ptr %Ptr, i64 0, i64 0 302 // CHECK64-NEXT: call void @llvm.matrix.column.major.store.v25f64.i64(<25 x double> [[M]], ptr align 16 [[PTR]], i64 5, i1 false, i32 5, i32 5) 303 304 double Ptr[25]; 305 dx5x5_t m; 306 __builtin_matrix_column_major_store(m, Ptr, 5); 307 } 308 309 void column_major_store_volatile(volatile double *Ptr) { 310 // COMMON-LABEL: define{{.*}} void @column_major_store_volatile(ptr %Ptr) #0 { 311 // CHECK32: [[M:%.*]] = load <25 x double>, ptr {{.*}}, align 4 312 // CHECK32-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 4 313 // CHECK32-NEXT: call void @llvm.matrix.column.major.store.v25f64.i32(<25 x double> [[M]], ptr align 4 [[PTR]], i32 5, i1 true, i32 5, i32 5) 314 // 315 // CHECK64: [[M:%.*]] = load <25 x double>, ptr {{.*}}, align 8 316 // CHECK64-NEXT: [[PTR:%.*]] = load ptr, ptr %Ptr.addr, align 8 317 // CHECK64-NEXT: call void @llvm.matrix.column.major.store.v25f64.i64(<25 x double> [[M]], ptr align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5) 318 319 dx5x5_t m; 320 __builtin_matrix_column_major_store(m, Ptr, 5); 321 } 322