1// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s 2 3// CHECK-LABEL: @arm_sme_zero 4llvm.func @arm_sme_zero() { 5 // CHECK: call void @llvm.aarch64.sme.zero(i32 0) 6 "arm_sme.intr.zero"() <{tile_mask = 0 : i32}> : () -> () 7 llvm.return 8} 9 10// ----- 11 12// CHECK-LABEL: @arm_sme_fmopa 13llvm.func @arm_sme_fmopa(%nxv2f64 : vector<[2]xf64>, 14 %nxv4f32 : vector<[4]xf32>, 15 %nxv8f16 : vector<[8]xf16>, 16 %nxv8bf16: vector<[8]xbf16>, 17 %nxv2i1 : vector<[2]xi1>, 18 %nxv4i1 : vector<[4]xi1>, 19 %nxv8i1 : vector<[8]xi1>) { 20 // CHECK: call void @llvm.aarch64.sme.mopa.nxv2f64 21 "arm_sme.intr.mopa"(%nxv2i1, %nxv2i1, %nxv2f64, %nxv2f64) <{tile_id = 0 : i32}> : 22 (vector<[2]xi1>, vector<[2]xi1>, vector<[2]xf64>, vector<[2]xf64>) -> () 23 // CHECK: call void @llvm.aarch64.sme.mopa.nxv4f32 24 "arm_sme.intr.mopa"(%nxv4i1, %nxv4i1, %nxv4f32, %nxv4f32) <{tile_id = 0 : i32}> : 25 (vector<[4]xi1>, vector<[4]xi1>, vector<[4]xf32>, vector<[4]xf32>) -> () 26 // CHECK: call void @llvm.aarch64.sme.mopa.wide.nxv8f16 27 "arm_sme.intr.mopa.wide"(%nxv8i1, %nxv8i1, %nxv8f16, %nxv8f16) <{tile_id = 0 : i32}> : 28 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xf16>, vector<[8]xf16>) -> () 29 // CHECK: call void @llvm.aarch64.sme.mopa.wide.nxv8bf16 30 "arm_sme.intr.mopa.wide"(%nxv8i1, %nxv8i1, %nxv8bf16, %nxv8bf16) <{tile_id = 0 : i32}> : 31 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xbf16>, vector<[8]xbf16>) -> () 32 llvm.return 33} 34 35// ----- 36 37// CHECK-LABEL: @arm_sme_imopa 38llvm.func @arm_sme_imopa(%nxv8i16 : vector<[8]xi16>, 39 %nxv16i8 : vector<[16]xi8>, 40 %nxv8i1 : vector<[8]xi1>, 41 %nxv16i1 : vector<[16]xi1>) { 42 // CHECK: call void @llvm.aarch64.sme.smopa.wide.nxv8i16 43 "arm_sme.intr.smopa.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> : 44 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> () 45 // CHECK: call void @llvm.aarch64.sme.umopa.wide.nxv8i16 46 "arm_sme.intr.umopa.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> : 47 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> () 48 // CHECK: call void @llvm.aarch64.sme.sumopa.wide.nxv8i16 49 "arm_sme.intr.sumopa.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> : 50 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> () 51 // CHECK: call void @llvm.aarch64.sme.usmopa.wide.nxv8i16 52 "arm_sme.intr.usmopa.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> : 53 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> () 54 // CHECK: call void @llvm.aarch64.sme.smopa.wide.nxv16i8 55 "arm_sme.intr.smopa.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> : 56 (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> () 57 // CHECK: call void @llvm.aarch64.sme.umopa.wide.nxv16i8 58 "arm_sme.intr.umopa.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> : 59 (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> () 60 // CHECK: call void @llvm.aarch64.sme.sumopa.wide.nxv16i8 61 "arm_sme.intr.sumopa.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> : 62 (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> () 63 // CHECK: call void @llvm.aarch64.sme.usmopa.wide.nxv16i8 64 "arm_sme.intr.usmopa.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> : 65 (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> () 66 // CHECK: call void @llvm.aarch64.sme.smopa.za32.nxv8i16 67 "arm_sme.intr.smopa.za32"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> : 68 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> () 69 // CHECK: call void @llvm.aarch64.sme.umopa.za32.nxv8i16 70 "arm_sme.intr.umopa.za32"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> : 71 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> () 72 llvm.return 73} 74 75// ----- 76 77// CHECK-LABEL: @arm_sme_fmops 78llvm.func @arm_sme_fmops(%nxv2f64 : vector<[2]xf64>, 79 %nxv4f32 : vector<[4]xf32>, 80 %nxv8f16 : vector<[8]xf16>, 81 %nxv8bf16: vector<[8]xbf16>, 82 %nxv2i1 : vector<[2]xi1>, 83 %nxv4i1 : vector<[4]xi1>, 84 %nxv8i1 : vector<[8]xi1>) { 85 // CHECK: call void @llvm.aarch64.sme.mops.nxv2f64 86 "arm_sme.intr.mops"(%nxv2i1, %nxv2i1, %nxv2f64, %nxv2f64) <{tile_id = 0 : i32}> : 87 (vector<[2]xi1>, vector<[2]xi1>, vector<[2]xf64>, vector<[2]xf64>) -> () 88 // CHECK: call void @llvm.aarch64.sme.mops.nxv4f32 89 "arm_sme.intr.mops"(%nxv4i1, %nxv4i1, %nxv4f32, %nxv4f32) <{tile_id = 0 : i32}> : 90 (vector<[4]xi1>, vector<[4]xi1>, vector<[4]xf32>, vector<[4]xf32>) -> () 91 // CHECK: call void @llvm.aarch64.sme.mops.wide.nxv8f16 92 "arm_sme.intr.mops.wide"(%nxv8i1, %nxv8i1, %nxv8f16, %nxv8f16) <{tile_id = 0 : i32}> : 93 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xf16>, vector<[8]xf16>) -> () 94 // CHECK: call void @llvm.aarch64.sme.mops.wide.nxv8bf16 95 "arm_sme.intr.mops.wide"(%nxv8i1, %nxv8i1, %nxv8bf16, %nxv8bf16) <{tile_id = 0 : i32}> : 96 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xbf16>, vector<[8]xbf16>) -> () 97 llvm.return 98} 99 100// ----- 101 102// CHECK-LABEL: @arm_sme_imops 103llvm.func @arm_sme_imops(%nxv8i16 : vector<[8]xi16>, 104 %nxv16i8 : vector<[16]xi8>, 105 %nxv8i1 : vector<[8]xi1>, 106 %nxv16i1 : vector<[16]xi1>) { 107 // CHECK: call void @llvm.aarch64.sme.smops.wide.nxv8i16 108 "arm_sme.intr.smops.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> : 109 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> () 110 // CHECK: call void @llvm.aarch64.sme.umops.wide.nxv8i16 111 "arm_sme.intr.umops.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> : 112 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> () 113 // CHECK: call void @llvm.aarch64.sme.sumops.wide.nxv8i16 114 "arm_sme.intr.sumops.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> : 115 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> () 116 // CHECK: call void @llvm.aarch64.sme.usmops.wide.nxv8i16 117 "arm_sme.intr.usmops.wide"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> : 118 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> () 119 // CHECK: call void @llvm.aarch64.sme.smops.wide.nxv16i8 120 "arm_sme.intr.smops.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> : 121 (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> () 122 // CHECK: call void @llvm.aarch64.sme.umops.wide.nxv16i8 123 "arm_sme.intr.umops.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> : 124 (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> () 125 // CHECK: call void @llvm.aarch64.sme.sumops.wide.nxv16i8 126 "arm_sme.intr.sumops.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> : 127 (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> () 128 // CHECK: call void @llvm.aarch64.sme.usmops.wide.nxv16i8 129 "arm_sme.intr.usmops.wide"(%nxv16i1, %nxv16i1, %nxv16i8, %nxv16i8) <{tile_id = 0 : i32}> : 130 (vector<[16]xi1>, vector<[16]xi1>, vector<[16]xi8>, vector<[16]xi8>) -> () 131 // CHECK: call void @llvm.aarch64.sme.smops.za32.nxv8i16 132 "arm_sme.intr.smops.za32"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> : 133 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> () 134 // CHECK: call void @llvm.aarch64.sme.umops.za32.nxv8i16 135 "arm_sme.intr.umops.za32"(%nxv8i1, %nxv8i1, %nxv8i16, %nxv8i16) <{tile_id = 0 : i32}> : 136 (vector<[8]xi1>, vector<[8]xi1>, vector<[8]xi16>, vector<[8]xi16>) -> () 137 llvm.return 138} 139 140// ----- 141 142// CHECK-LABEL: @arm_sme_load 143llvm.func @arm_sme_load(%nxv1i1 : vector<[1]xi1>, 144 %nxv2i1 : vector<[2]xi1>, 145 %nxv4i1 : vector<[4]xi1>, 146 %nxv8i1 : vector<[8]xi1>, 147 %nxv16i1 : vector<[16]xi1>, 148 %ptr : !llvm.ptr) { 149 %c0 = llvm.mlir.constant(0 : index) : i32 150 // CHECK: call void @llvm.aarch64.sme.ld1q.horiz 151 "arm_sme.intr.ld1q.horiz"(%nxv1i1, %ptr, %c0) <{tile_id = 0 : i32}> : 152 (vector<[1]xi1>, !llvm.ptr, i32) -> () 153 // CHECK: call void @llvm.aarch64.sme.ld1d.horiz 154 "arm_sme.intr.ld1d.horiz"(%nxv2i1, %ptr, %c0) <{tile_id = 0 : i32}> : 155 (vector<[2]xi1>, !llvm.ptr, i32) -> () 156 // CHECK: call void @llvm.aarch64.sme.ld1w.horiz 157 "arm_sme.intr.ld1w.horiz"(%nxv4i1, %ptr, %c0) <{tile_id = 0 : i32}> : 158 (vector<[4]xi1>, !llvm.ptr, i32) -> () 159 // CHECK: call void @llvm.aarch64.sme.ld1h.horiz 160 "arm_sme.intr.ld1h.horiz"(%nxv8i1, %ptr, %c0) <{tile_id = 0 : i32}> : 161 (vector<[8]xi1>, !llvm.ptr, i32) -> () 162 // CHECK: call void @llvm.aarch64.sme.ld1b.horiz 163 "arm_sme.intr.ld1b.horiz"(%nxv16i1, %ptr, %c0) <{tile_id = 0 : i32}> : 164 (vector<[16]xi1>, !llvm.ptr, i32) -> () 165 // CHECK: call void @llvm.aarch64.sme.ld1q.vert 166 "arm_sme.intr.ld1q.vert"(%nxv1i1, %ptr, %c0) <{tile_id = 0 : i32}> : 167 (vector<[1]xi1>, !llvm.ptr, i32) -> () 168 // CHECK: call void @llvm.aarch64.sme.ld1d.vert 169 "arm_sme.intr.ld1d.vert"(%nxv2i1, %ptr, %c0) <{tile_id = 0 : i32}> : 170 (vector<[2]xi1>, !llvm.ptr, i32) -> () 171 // CHECK: call void @llvm.aarch64.sme.ld1w.vert 172 "arm_sme.intr.ld1w.vert"(%nxv4i1, %ptr, %c0) <{tile_id = 0 : i32}> : 173 (vector<[4]xi1>, !llvm.ptr, i32) -> () 174 // CHECK: call void @llvm.aarch64.sme.ld1h.vert 175 "arm_sme.intr.ld1h.vert"(%nxv8i1, %ptr, %c0) <{tile_id = 0 : i32}> : 176 (vector<[8]xi1>, !llvm.ptr, i32) -> () 177 // CHECK: call void @llvm.aarch64.sme.ld1b.vert 178 "arm_sme.intr.ld1b.vert"(%nxv16i1, %ptr, %c0) <{tile_id = 0 : i32}> : 179 (vector<[16]xi1>, !llvm.ptr, i32) -> () 180 llvm.return 181} 182 183// ----- 184 185// CHECK-LABEL: @arm_sme_store 186llvm.func @arm_sme_store(%nxv1i1 : vector<[1]xi1>, 187 %nxv2i1 : vector<[2]xi1>, 188 %nxv4i1 : vector<[4]xi1>, 189 %nxv8i1 : vector<[8]xi1>, 190 %nxv16i1 : vector<[16]xi1>, 191 %ptr : !llvm.ptr) { 192 %c0 = llvm.mlir.constant(0 : index) : i32 193 // CHECK: call void @llvm.aarch64.sme.st1q.horiz 194 "arm_sme.intr.st1q.horiz"(%nxv1i1, %ptr, %c0) <{tile_id = 0 : i32}> : 195 (vector<[1]xi1>, !llvm.ptr, i32) -> () 196 // CHECK: call void @llvm.aarch64.sme.st1d.horiz 197 "arm_sme.intr.st1d.horiz"(%nxv2i1, %ptr, %c0) <{tile_id = 0 : i32}> : 198 (vector<[2]xi1>, !llvm.ptr, i32) -> () 199 // CHECK: call void @llvm.aarch64.sme.st1w.horiz 200 "arm_sme.intr.st1w.horiz"(%nxv4i1, %ptr, %c0) <{tile_id = 0 : i32}> : 201 (vector<[4]xi1>, !llvm.ptr, i32) -> () 202 // CHECK: call void @llvm.aarch64.sme.st1h.horiz 203 "arm_sme.intr.st1h.horiz"(%nxv8i1, %ptr, %c0) <{tile_id = 0 : i32}> : 204 (vector<[8]xi1>, !llvm.ptr, i32) -> () 205 // CHECK: call void @llvm.aarch64.sme.st1b.horiz 206 "arm_sme.intr.st1b.horiz"(%nxv16i1, %ptr, %c0) <{tile_id = 0 : i32}> : 207 (vector<[16]xi1>, !llvm.ptr, i32) -> () 208 // CHECK: call void @llvm.aarch64.sme.st1q.vert 209 "arm_sme.intr.st1q.vert"(%nxv1i1, %ptr, %c0) <{tile_id = 0 : i32}> : 210 (vector<[1]xi1>, !llvm.ptr, i32) -> () 211 // CHECK: call void @llvm.aarch64.sme.st1d.vert 212 "arm_sme.intr.st1d.vert"(%nxv2i1, %ptr, %c0) <{tile_id = 0 : i32}> : 213 (vector<[2]xi1>, !llvm.ptr, i32) -> () 214 // CHECK: call void @llvm.aarch64.sme.st1w.vert 215 "arm_sme.intr.st1w.vert"(%nxv4i1, %ptr, %c0) <{tile_id = 0 : i32}> : 216 (vector<[4]xi1>, !llvm.ptr, i32) -> () 217 // CHECK: call void @llvm.aarch64.sme.st1h.vert 218 "arm_sme.intr.st1h.vert"(%nxv8i1, %ptr, %c0) <{tile_id = 0 : i32}> : 219 (vector<[8]xi1>, !llvm.ptr, i32) -> () 220 // CHECK: call void @llvm.aarch64.sme.st1b.vert 221 "arm_sme.intr.st1b.vert"(%nxv16i1, %ptr, %c0) <{tile_id = 0 : i32}> : 222 (vector<[16]xi1>, !llvm.ptr, i32) -> () 223 // CHECK: call void @llvm.aarch64.sme.str 224 "arm_sme.intr.str"(%c0, %ptr, %c0) : (i32, !llvm.ptr, i32) -> () 225 llvm.return 226} 227 228// ----- 229 230// CHECK-LABEL: @arm_sme_vector_to_tile_horiz 231llvm.func @arm_sme_vector_to_tile_horiz(%tileslice : i32, 232 %nxv16i1 : vector<[16]xi1>, 233 %nxv8i1 : vector<[8]xi1>, 234 %nxv4i1 : vector<[4]xi1>, 235 %nxv2i1 : vector<[2]xi1>, 236 %nxv1i1 : vector<[1]xi1>, 237 %nxv16i8 : vector<[16]xi8>, 238 %nxv8i16 : vector<[8]xi16>, 239 %nxv4i32 : vector<[4]xi32>, 240 %nxv2i64 : vector<[2]xi64>, 241 %nxv1i128 : vector<[1]xi128>, 242 %nxv8f16 : vector<[8]xf16>, 243 %nxv8bf16 : vector<[8]xbf16>, 244 %nxv4f32 : vector<[4]xf32>, 245 %nxv2f64 : vector<[2]xf64>) { 246 // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv16i8 247 "arm_sme.intr.write.horiz"(%tileslice, %nxv16i1, %nxv16i8) <{tile_id = 0 : i32}> : 248 (i32, vector<[16]xi1>, vector<[16]xi8>) -> () 249 // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv8i16 250 "arm_sme.intr.write.horiz"(%tileslice, %nxv8i1, %nxv8i16) <{tile_id = 0 : i32}> : 251 (i32, vector<[8]xi1>, vector<[8]xi16>) -> () 252 // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv4i32 253 "arm_sme.intr.write.horiz"(%tileslice, %nxv4i1, %nxv4i32) <{tile_id = 0 : i32}> : 254 (i32, vector<[4]xi1>, vector<[4]xi32>) -> () 255 // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv2i64 256 "arm_sme.intr.write.horiz"(%tileslice, %nxv2i1, %nxv2i64) <{tile_id = 0 : i32}> : 257 (i32, vector<[2]xi1>, vector<[2]xi64>) -> () 258 // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv1i128 259 "arm_sme.intr.write.horiz"(%tileslice, %nxv1i1, %nxv1i128) <{tile_id = 0 : i32}> : 260 (i32, vector<[1]xi1>, vector<[1]xi128>) -> () 261 // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv8f16 262 "arm_sme.intr.write.horiz"(%tileslice, %nxv8i1, %nxv8f16) <{tile_id = 0 : i32}> : 263 (i32, vector<[8]xi1>, vector<[8]xf16>) -> () 264 // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv8bf16 265 "arm_sme.intr.write.horiz"(%tileslice, %nxv8i1, %nxv8bf16) <{tile_id = 0 : i32}> : 266 (i32, vector<[8]xi1>, vector<[8]xbf16>) -> () 267 // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv4f32 268 "arm_sme.intr.write.horiz"(%tileslice, %nxv4i1, %nxv4f32) <{tile_id = 0 : i32}> : 269 (i32, vector<[4]xi1>, vector<[4]xf32>) -> () 270 // CHECK: call void @llvm.aarch64.sme.write.horiz.nxv2f64 271 "arm_sme.intr.write.horiz"(%tileslice, %nxv2i1, %nxv2f64) <{tile_id = 0 : i32}> : 272 (i32, vector<[2]xi1>, vector<[2]xf64>) -> () 273 llvm.return 274} 275 276// ----- 277 278// CHECK-LABEL: @arm_sme_vector_to_tile_vert 279llvm.func @arm_sme_vector_to_tile_vert(%tileslice : i32, 280 %nxv16i1 : vector<[16]xi1>, 281 %nxv8i1 : vector<[8]xi1>, 282 %nxv4i1 : vector<[4]xi1>, 283 %nxv2i1 : vector<[2]xi1>, 284 %nxv1i1 : vector<[1]xi1>, 285 %nxv16i8 : vector<[16]xi8>, 286 %nxv8i16 : vector<[8]xi16>, 287 %nxv4i32 : vector<[4]xi32>, 288 %nxv2i64 : vector<[2]xi64>, 289 %nxv1i128 : vector<[1]xi128>, 290 %nxv8f16 : vector<[8]xf16>, 291 %nxv8bf16 : vector<[8]xbf16>, 292 %nxv4f32 : vector<[4]xf32>, 293 %nxv2f64 : vector<[2]xf64>) { 294 // CHECK: call void @llvm.aarch64.sme.write.vert.nxv16i8 295 "arm_sme.intr.write.vert"(%tileslice, %nxv16i1, %nxv16i8) <{tile_id = 0 : i32}> : 296 (i32, vector<[16]xi1>, vector<[16]xi8>) -> () 297 // CHECK: call void @llvm.aarch64.sme.write.vert.nxv8i16 298 "arm_sme.intr.write.vert"(%tileslice, %nxv8i1, %nxv8i16) <{tile_id = 0 : i32}> : 299 (i32, vector<[8]xi1>, vector<[8]xi16>) -> () 300 // CHECK: call void @llvm.aarch64.sme.write.vert.nxv4i32 301 "arm_sme.intr.write.vert"(%tileslice, %nxv4i1, %nxv4i32) <{tile_id = 0 : i32}> : 302 (i32, vector<[4]xi1>, vector<[4]xi32>) -> () 303 // CHECK: call void @llvm.aarch64.sme.write.vert.nxv2i64 304 "arm_sme.intr.write.vert"(%tileslice, %nxv2i1, %nxv2i64) <{tile_id = 0 : i32}> : 305 (i32, vector<[2]xi1>, vector<[2]xi64>) -> () 306 // CHECK: call void @llvm.aarch64.sme.write.vert.nxv1i128 307 "arm_sme.intr.write.vert"(%tileslice, %nxv1i1, %nxv1i128) <{tile_id = 0 : i32}> : 308 (i32, vector<[1]xi1>, vector<[1]xi128>) -> () 309 // CHECK: call void @llvm.aarch64.sme.write.vert.nxv8f16 310 "arm_sme.intr.write.vert"(%tileslice, %nxv8i1, %nxv8f16) <{tile_id = 0 : i32}> : 311 (i32, vector<[8]xi1>, vector<[8]xf16>) -> () 312 // CHECK: call void @llvm.aarch64.sme.write.vert.nxv8bf16 313 "arm_sme.intr.write.vert"(%tileslice, %nxv8i1, %nxv8bf16) <{tile_id = 0 : i32}> : 314 (i32, vector<[8]xi1>, vector<[8]xbf16>) -> () 315 // CHECK: call void @llvm.aarch64.sme.write.vert.nxv4f32 316 "arm_sme.intr.write.vert"(%tileslice, %nxv4i1, %nxv4f32) <{tile_id = 0 : i32}> : 317 (i32, vector<[4]xi1>, vector<[4]xf32>) -> () 318 // CHECK: call void @llvm.aarch64.sme.write.vert.nxv2f64 319 "arm_sme.intr.write.vert"(%tileslice, %nxv2i1, %nxv2f64) <{tile_id = 0 : i32}> : 320 (i32, vector<[2]xi1>, vector<[2]xf64>) -> () 321 llvm.return 322} 323 324// ----- 325 326 327llvm.func @arm_sme_tile_slice_to_vector_horiz(%tileslice : i32, 328 %nxv16i1 : vector<[16]xi1>, 329 %nxv8i1 : vector<[8]xi1>, 330 %nxv4i1 : vector<[4]xi1>, 331 %nxv2i1 : vector<[2]xi1>, 332 %nxv1i1 : vector<[1]xi1>, 333 %nxv16i8 : vector<[16]xi8>, 334 %nxv8i16 : vector<[8]xi16>, 335 %nxv4i32 : vector<[4]xi32>, 336 %nxv2i64 : vector<[2]xi64>, 337 %nxv1i128 : vector<[1]xi128>, 338 %nxv8f16 : vector<[8]xf16>, 339 %nxv8bf16 : vector<[8]xbf16>, 340 %nxv4f32 : vector<[4]xf32>, 341 %nxv2f64 : vector<[2]xf64>) { 342 // CHECK: call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8 343 %res0 = "arm_sme.intr.read.horiz"(%nxv16i8, %nxv16i1, %tileslice) <{tile_id = 0 : i32}> 344 : (vector<[16]xi8>, vector<[16]xi1>, i32) -> vector<[16]xi8> 345 // CHECK: call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16 346 %res1 = "arm_sme.intr.read.horiz"(%nxv8i16, %nxv8i1, %tileslice) <{tile_id = 0 : i32}> 347 : (vector<[8]xi16>, vector<[8]xi1>, i32) -> vector<[8]xi16> 348 // CHECK: call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32 349 %res2 = "arm_sme.intr.read.horiz"(%nxv4i32, %nxv4i1, %tileslice) <{tile_id = 0 : i32}> 350 : (vector<[4]xi32>, vector<[4]xi1>, i32) -> vector<[4]xi32> 351 // CHECK: call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64 352 %res3 = "arm_sme.intr.read.horiz"(%nxv2i64, %nxv2i1, %tileslice) <{tile_id = 0 : i32}> 353 : (vector<[2]xi64>, vector<[2]xi1>, i32) -> vector<[2]xi64> 354 // CHECK: call <vscale x 1 x i128> @llvm.aarch64.sme.read.horiz.nxv1i128 355 %res4 = "arm_sme.intr.read.horiz"(%nxv1i128, %nxv1i1, %tileslice) <{tile_id = 0 : i32}> 356 : (vector<[1]xi128>, vector<[1]xi1>, i32) -> vector<[1]xi128> 357 // CHECK: call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16 358 %res5 = "arm_sme.intr.read.horiz"(%nxv8f16, %nxv8i1, %tileslice) <{tile_id = 0 : i32}> 359 : (vector<[8]xf16>, vector<[8]xi1>, i32) -> vector<[8]xf16> 360 // CHECK: call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16 361 %res6 = "arm_sme.intr.read.horiz"(%nxv8bf16, %nxv8i1, %tileslice) <{tile_id = 0 : i32}> 362 : (vector<[8]xbf16>, vector<[8]xi1>, i32) -> vector<[8]xbf16> 363 // CHECK: call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32 364 %res7 = "arm_sme.intr.read.horiz"(%nxv4f32, %nxv4i1, %tileslice) <{tile_id = 0 : i32}> 365 : (vector<[4]xf32>, vector<[4]xi1>, i32) -> vector<[4]xf32> 366 // CHECK: call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64 367 %res8 = "arm_sme.intr.read.horiz"(%nxv2f64, %nxv2i1, %tileslice) <{tile_id = 0 : i32}> 368 : (vector<[2]xf64>, vector<[2]xi1>, i32) -> vector<[2]xf64> 369 llvm.return 370} 371 372// ----- 373 374llvm.func @arm_sme_tile_slice_to_vector_vert(%tileslice : i32, 375 %nxv16i1 : vector<[16]xi1>, 376 %nxv8i1 : vector<[8]xi1>, 377 %nxv4i1 : vector<[4]xi1>, 378 %nxv2i1 : vector<[2]xi1>, 379 %nxv1i1 : vector<[1]xi1>, 380 %nxv16i8 : vector<[16]xi8>, 381 %nxv8i16 : vector<[8]xi16>, 382 %nxv4i32 : vector<[4]xi32>, 383 %nxv2i64 : vector<[2]xi64>, 384 %nxv1i128 : vector<[1]xi128>, 385 %nxv8f16 : vector<[8]xf16>, 386 %nxv8bf16 : vector<[8]xbf16>, 387 %nxv4f32 : vector<[4]xf32>, 388 %nxv2f64 : vector<[2]xf64>) { 389 // CHECK: call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8 390 %res0 = "arm_sme.intr.read.vert"(%nxv16i8, %nxv16i1, %tileslice) <{tile_id = 0 : i32}> 391 : (vector<[16]xi8>, vector<[16]xi1>, i32) -> vector<[16]xi8> 392 // CHECK: call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16 393 %res1 = "arm_sme.intr.read.vert"(%nxv8i16, %nxv8i1, %tileslice) <{tile_id = 0 : i32}> 394 : (vector<[8]xi16>, vector<[8]xi1>, i32) -> vector<[8]xi16> 395 // CHECK: call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32 396 %res2 = "arm_sme.intr.read.vert"(%nxv4i32, %nxv4i1, %tileslice) <{tile_id = 0 : i32}> 397 : (vector<[4]xi32>, vector<[4]xi1>, i32) -> vector<[4]xi32> 398 // CHECK: call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64 399 %res3 = "arm_sme.intr.read.vert"(%nxv2i64, %nxv2i1, %tileslice) <{tile_id = 0 : i32}> 400 : (vector<[2]xi64>, vector<[2]xi1>, i32) -> vector<[2]xi64> 401 // CHECK: call <vscale x 1 x i128> @llvm.aarch64.sme.read.vert.nxv1i128 402 %res4 = "arm_sme.intr.read.vert"(%nxv1i128, %nxv1i1, %tileslice) <{tile_id = 0 : i32}> 403 : (vector<[1]xi128>, vector<[1]xi1>, i32) -> vector<[1]xi128> 404 // CHECK: call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16 405 %res5 = "arm_sme.intr.read.vert"(%nxv8f16, %nxv8i1, %tileslice) <{tile_id = 0 : i32}> 406 : (vector<[8]xf16>, vector<[8]xi1>, i32) -> vector<[8]xf16> 407 // CHECK: call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16 408 %res6 = "arm_sme.intr.read.vert"(%nxv8bf16, %nxv8i1, %tileslice) <{tile_id = 0 : i32}> 409 : (vector<[8]xbf16>, vector<[8]xi1>, i32) -> vector<[8]xbf16> 410 // CHECK: call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32 411 %res7 = "arm_sme.intr.read.vert"(%nxv4f32, %nxv4i1, %tileslice) <{tile_id = 0 : i32}> 412 : (vector<[4]xf32>, vector<[4]xi1>, i32) -> vector<[4]xf32> 413 // CHECK: call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64 414 %res8 = "arm_sme.intr.read.vert"(%nxv2f64, %nxv2i1, %tileslice) <{tile_id = 0 : i32}> 415 : (vector<[2]xf64>, vector<[2]xi1>, i32) -> vector<[2]xf64> 416 llvm.return 417} 418 419// ----- 420 421llvm.func @arm_sme_streaming_vl() { 422 // CHECK: call i64 @llvm.aarch64.sme.cntsb() 423 %svl_b = "arm_sme.intr.cntsb"() : () -> i64 424 // CHECK: call i64 @llvm.aarch64.sme.cntsh() 425 %svl_h = "arm_sme.intr.cntsh"() : () -> i64 426 // CHECK: call i64 @llvm.aarch64.sme.cntsw() 427 %svl_w = "arm_sme.intr.cntsw"() : () -> i64 428 // CHECK: call i64 @llvm.aarch64.sme.cntsd() 429 %svl_d = "arm_sme.intr.cntsd"() : () -> i64 430 llvm.return 431} 432