1 // RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s 2 3 #if !__has_extension(matrix_types) 4 #error Expected extension 'matrix_types' to be enabled 5 #endif 6 7 #if !__has_extension(matrix_types_scalar_division) 8 #error Expected extension 'matrix_types_scalar_division' to be enabled 9 #endif 10 11 typedef double dx5x5_t __attribute__((matrix_type(5, 5))); 12 13 // CHECK: %struct.Matrix = type { i8, [12 x float], float } 14 15 void load_store_double(dx5x5_t *a, dx5x5_t *b) { 16 // CHECK-LABEL: define{{.*}} void @load_store_double( 17 // CHECK-NEXT: entry: 18 // CHECK-NEXT: %a.addr = alloca ptr, align 8 19 // CHECK-NEXT: %b.addr = alloca ptr, align 8 20 // CHECK-NEXT: store ptr %a, ptr %a.addr, align 8 21 // CHECK-NEXT: store ptr %b, ptr %b.addr, align 8 22 // CHECK-NEXT: %0 = load ptr, ptr %b.addr, align 8 23 // CHECK-NEXT: %1 = load <25 x double>, ptr %0, align 8 24 // CHECK-NEXT: %2 = load ptr, ptr %a.addr, align 8 25 // CHECK-NEXT: store <25 x double> %1, ptr %2, align 8 26 // CHECK-NEXT: ret void 27 28 *a = *b; 29 } 30 31 typedef float fx3x4_t __attribute__((matrix_type(3, 4))); 32 void load_store_float(fx3x4_t *a, fx3x4_t *b) { 33 // CHECK-LABEL: define{{.*}} void @load_store_float( 34 // CHECK-NEXT: entry: 35 // CHECK-NEXT: %a.addr = alloca ptr, align 8 36 // CHECK-NEXT: %b.addr = alloca ptr, align 8 37 // CHECK-NEXT: store ptr %a, ptr %a.addr, align 8 38 // CHECK-NEXT: store ptr %b, ptr %b.addr, align 8 39 // CHECK-NEXT: %0 = load ptr, ptr %b.addr, align 8 40 // CHECK-NEXT: %1 = load <12 x float>, ptr %0, align 4 41 // CHECK-NEXT: %2 = load ptr, ptr %a.addr, align 8 42 // CHECK-NEXT: store <12 x float> %1, ptr %2, align 4 43 // CHECK-NEXT: ret void 44 45 *a = *b; 46 } 47 48 typedef int ix3x4_t __attribute__((matrix_type(4, 3))); 49 void load_store_int(ix3x4_t *a, ix3x4_t *b) { 50 // CHECK-LABEL: define{{.*}} void @load_store_int( 51 // CHECK-NEXT: entry: 52 // CHECK-NEXT: %a.addr = alloca ptr, align 8 53 // CHECK-NEXT: %b.addr = alloca ptr, align 8 54 // CHECK-NEXT: store ptr %a, ptr %a.addr, align 8 55 // CHECK-NEXT: store ptr %b, ptr %b.addr, align 8 56 // CHECK-NEXT: %0 = load ptr, ptr %b.addr, align 8 57 // CHECK-NEXT: %1 = load <12 x i32>, ptr %0, align 4 58 // CHECK-NEXT: %2 = load ptr, ptr %a.addr, align 8 59 // CHECK-NEXT: store <12 x i32> %1, ptr %2, align 4 60 // CHECK-NEXT: ret void 61 62 *a = *b; 63 } 64 65 typedef unsigned long long ullx3x4_t __attribute__((matrix_type(4, 3))); 66 void load_store_ull(ullx3x4_t *a, ullx3x4_t *b) { 67 // CHECK-LABEL: define{{.*}} void @load_store_ull( 68 // CHECK-NEXT: entry: 69 // CHECK-NEXT: %a.addr = alloca ptr, align 8 70 // CHECK-NEXT: %b.addr = alloca ptr, align 8 71 // CHECK-NEXT: store ptr %a, ptr %a.addr, align 8 72 // CHECK-NEXT: store ptr %b, ptr %b.addr, align 8 73 // CHECK-NEXT: %0 = load ptr, ptr %b.addr, align 8 74 // CHECK-NEXT: %1 = load <12 x i64>, ptr %0, align 8 75 // CHECK-NEXT: %2 = load ptr, ptr %a.addr, align 8 76 // CHECK-NEXT: store <12 x i64> %1, ptr %2, align 8 77 // CHECK-NEXT: ret void 78 79 *a = *b; 80 } 81 82 typedef __fp16 fp16x3x4_t __attribute__((matrix_type(4, 3))); 83 void load_store_fp16(fp16x3x4_t *a, fp16x3x4_t *b) { 84 // CHECK-LABEL: define{{.*}} void @load_store_fp16( 85 // CHECK-NEXT: entry: 86 // CHECK-NEXT: %a.addr = alloca ptr, align 8 87 // CHECK-NEXT: %b.addr = alloca ptr, align 8 88 // CHECK-NEXT: store ptr %a, ptr %a.addr, align 8 89 // CHECK-NEXT: store ptr %b, ptr %b.addr, align 8 90 // CHECK-NEXT: %0 = load ptr, ptr %b.addr, align 8 91 // CHECK-NEXT: %1 = load <12 x half>, ptr %0, align 2 92 // CHECK-NEXT: %2 = load ptr, ptr %a.addr, align 8 93 // CHECK-NEXT: store <12 x half> %1, ptr %2, align 2 94 // CHECK-NEXT: ret void 95 96 *a = *b; 97 } 98 99 typedef float fx3x3_t __attribute__((matrix_type(3, 3))); 100 101 void parameter_passing(fx3x3_t a, fx3x3_t *b) { 102 // CHECK-LABEL: define{{.*}} void @parameter_passing( 103 // CHECK-NEXT: entry: 104 // CHECK-NEXT: %a.addr = alloca [9 x float], align 4 105 // CHECK-NEXT: %b.addr = alloca ptr, align 8 106 // CHECK-NEXT: store <9 x float> %a, ptr %a.addr, align 4 107 // CHECK-NEXT: store ptr %b, ptr %b.addr, align 8 108 // CHECK-NEXT: %0 = load <9 x float>, ptr %a.addr, align 4 109 // CHECK-NEXT: %1 = load ptr, ptr %b.addr, align 8 110 // CHECK-NEXT: store <9 x float> %0, ptr %1, align 4 111 // CHECK-NEXT: ret void 112 *b = a; 113 } 114 115 fx3x3_t return_matrix(fx3x3_t *a) { 116 // CHECK-LABEL: define{{.*}} <9 x float> @return_matrix 117 // CHECK-NEXT: entry: 118 // CHECK-NEXT: %a.addr = alloca ptr, align 8 119 // CHECK-NEXT: store ptr %a, ptr %a.addr, align 8 120 // CHECK-NEXT: %0 = load ptr, ptr %a.addr, align 8 121 // CHECK-NEXT: %1 = load <9 x float>, ptr %0, align 4 122 // CHECK-NEXT: ret <9 x float> %1 123 return *a; 124 } 125 126 typedef struct { 127 char Tmp1; 128 fx3x4_t Data; 129 float Tmp2; 130 } Matrix; 131 132 void matrix_struct(Matrix *a, Matrix *b) { 133 // CHECK-LABEL: define{{.*}} void @matrix_struct( 134 // CHECK-NEXT: entry: 135 // CHECK-NEXT: %a.addr = alloca ptr, align 8 136 // CHECK-NEXT: %b.addr = alloca ptr, align 8 137 // CHECK-NEXT: store ptr %a, ptr %a.addr, align 8 138 // CHECK-NEXT: store ptr %b, ptr %b.addr, align 8 139 // CHECK-NEXT: %0 = load ptr, ptr %a.addr, align 8 140 // CHECK-NEXT: %Data = getelementptr inbounds nuw %struct.Matrix, ptr %0, i32 0, i32 1 141 // CHECK-NEXT: %1 = load <12 x float>, ptr %Data, align 4 142 // CHECK-NEXT: %2 = load ptr, ptr %b.addr, align 8 143 // CHECK-NEXT: %Data1 = getelementptr inbounds nuw %struct.Matrix, ptr %2, i32 0, i32 1 144 // CHECK-NEXT: store <12 x float> %1, ptr %Data1, align 4 145 // CHECK-NEXT: ret void 146 b->Data = a->Data; 147 } 148 149 typedef double dx4x4_t __attribute__((matrix_type(4, 4))); 150 void matrix_inline_asm_memory_readwrite(void) { 151 // CHECK-LABEL: define{{.*}} void @matrix_inline_asm_memory_readwrite() 152 // CHECK-NEXT: entry: 153 // CHECK-NEXT: [[ALLOCA:%.+]] = alloca [16 x double], align 8 154 // CHECK-NEXT: [[VAL:%.+]] = load <16 x double>, ptr [[ALLOCA]], align 8 155 // CHECK-NEXT: call void asm sideeffect "", "=*r|m,0,~{memory},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(<16 x double>) [[ALLOCA]], <16 x double> [[VAL]]) 156 // CHECK-NEXT: ret void 157 158 dx4x4_t m; 159 asm volatile("" 160 : "+r,m"(m) 161 : 162 : "memory"); 163 } 164