1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs -force-streaming < %s | FileCheck %s 3 4; 5; Move Multi-Vector From Tile (Read) x2 6; 7 8; Horizontal 9 10define { <vscale x 16 x i8>, <vscale x 16 x i8> } @za_read_horiz_vg2_b(i32 %slice) { 11; CHECK-LABEL: za_read_horiz_vg2_b: 12; CHECK: // %bb.0: 13; CHECK-NEXT: mov w12, w0 14; CHECK-NEXT: mov { z0.b, z1.b }, za0h.b[w12, 0:1] 15; CHECK-NEXT: mov { z0.b, z1.b }, za0h.b[w12, 14:15] 16; CHECK-NEXT: ret 17 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 %slice) 18 %slice.14 = add i32 %slice, 14 19 %res2 = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 %slice.14) 20 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res2 21} 22 23define { <vscale x 8 x i16>, <vscale x 8 x i16> } @za_read_horiz_vg2_h(i32 %slice) { 24; CHECK-LABEL: za_read_horiz_vg2_h: 25; CHECK: // %bb.0: 26; CHECK-NEXT: mov w12, w0 27; CHECK-NEXT: mov { z0.h, z1.h }, za0h.h[w12, 0:1] 28; CHECK-NEXT: mov { z0.h, z1.h }, za1h.h[w12, 6:7] 29; CHECK-NEXT: ret 30 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 0, i32 %slice) 31 %slice.6 = add i32 %slice, 6 32 %res2 = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 %slice.6) 33 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res2 34} 35 36define { <vscale x 8 x half>, <vscale x 8 x half> } @za_read_horiz_vg2_f16(i32 %slice) { 37; CHECK-LABEL: za_read_horiz_vg2_f16: 38; CHECK: // %bb.0: 39; CHECK-NEXT: mov w12, w0 40; CHECK-NEXT: mov { z0.h, z1.h }, za0h.h[w12, 0:1] 41; CHECK-NEXT: mov { z0.h, z1.h }, za1h.h[w12, 6:7] 42; CHECK-NEXT: ret 43 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 0, i32 %slice) 44 %slice.6 = add i32 %slice, 6 45 %res2 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 %slice.6) 46 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res2 47} 48 49define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @za_read_horiz_vg2_bf16(i32 %slice) { 50; CHECK-LABEL: za_read_horiz_vg2_bf16: 51; CHECK: // %bb.0: 52; CHECK-NEXT: mov w12, w0 53; CHECK-NEXT: mov { z0.h, z1.h }, za0h.h[w12, 0:1] 54; CHECK-NEXT: mov { z0.h, z1.h }, za1h.h[w12, 6:7] 55; CHECK-NEXT: ret 56 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 0, i32 %slice) 57 %slice.6 = add i32 %slice, 6 58 %res2 = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 %slice.6) 59 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res2 60} 61 62define { <vscale x 4 x i32>, <vscale x 4 x i32> } @za_read_horiz_vg2_s(i32 %slice) { 63; CHECK-LABEL: za_read_horiz_vg2_s: 64; CHECK: // %bb.0: 65; CHECK-NEXT: mov w12, w0 66; CHECK-NEXT: mov { z0.s, z1.s }, za0h.s[w12, 0:1] 67; CHECK-NEXT: mov { z0.s, z1.s }, za3h.s[w12, 2:3] 68; CHECK-NEXT: ret 69 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 0, i32 %slice) 70 %slice.2 = add i32 %slice, 2 71 %res2 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 %slice.2) 72 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res2 73} 74 75define { <vscale x 4 x float>, <vscale x 4 x float> } @za_read_horiz_vg2_f32(i32 %slice) { 76; CHECK-LABEL: za_read_horiz_vg2_f32: 77; CHECK: // %bb.0: 78; CHECK-NEXT: mov w12, w0 79; CHECK-NEXT: mov { z0.s, z1.s }, za0h.s[w12, 0:1] 80; CHECK-NEXT: mov { z0.s, z1.s }, za3h.s[w12, 2:3] 81; CHECK-NEXT: ret 82 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 0, i32 %slice) 83 %slice.2 = add i32 %slice, 2 84 %res2 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 %slice.2) 85 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res2 86} 87 88define { <vscale x 2 x i64>, <vscale x 2 x i64> } @za_read_horiz_vg2_d(i32 %slice) { 89; CHECK-LABEL: za_read_horiz_vg2_d: 90; CHECK: // %bb.0: 91; CHECK-NEXT: mov w12, w0 92; CHECK-NEXT: mov { z0.d, z1.d }, za0h.d[w12, 0:1] 93; CHECK-NEXT: ret 94 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 0, i32 %slice) 95 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res 96} 97 98define { <vscale x 2 x double>, <vscale x 2 x double> } @za_read_horiz_vg2_f64(i32 %slice) { 99; CHECK-LABEL: za_read_horiz_vg2_f64: 100; CHECK: // %bb.0: 101; CHECK-NEXT: mov w12, w0 102; CHECK-NEXT: mov { z0.d, z1.d }, za0h.d[w12, 0:1] 103; CHECK-NEXT: ret 104 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 0, i32 %slice) 105 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res 106} 107 108; Vertical 109 110define { <vscale x 16 x i8>, <vscale x 16 x i8> } @za_read_vert_vg2_b(i32 %slice) { 111; CHECK-LABEL: za_read_vert_vg2_b: 112; CHECK: // %bb.0: 113; CHECK-NEXT: mov w12, w0 114; CHECK-NEXT: mov { z0.b, z1.b }, za0v.b[w12, 0:1] 115; CHECK-NEXT: mov { z0.b, z1.b }, za0v.b[w12, 14:15] 116; CHECK-NEXT: ret 117 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 %slice) 118 %slice.14 = add i32 %slice, 14 119 %res2 = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 %slice.14) 120 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res2 121} 122 123define { <vscale x 8 x i16>, <vscale x 8 x i16> } @za_read_vert_vg2_h(i32 %slice) { 124; CHECK-LABEL: za_read_vert_vg2_h: 125; CHECK: // %bb.0: 126; CHECK-NEXT: mov w12, w0 127; CHECK-NEXT: mov { z0.h, z1.h }, za0v.h[w12, 0:1] 128; CHECK-NEXT: mov { z0.h, z1.h }, za1v.h[w12, 6:7] 129; CHECK-NEXT: ret 130 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 0, i32 %slice) 131 %slice.6 = add i32 %slice, 6 132 %res2 = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 %slice.6) 133 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res2 134} 135 136define { <vscale x 8 x half>, <vscale x 8 x half> } @za_read_vert_vg2_f16(i32 %slice) { 137; CHECK-LABEL: za_read_vert_vg2_f16: 138; CHECK: // %bb.0: 139; CHECK-NEXT: mov w12, w0 140; CHECK-NEXT: mov { z0.h, z1.h }, za0v.h[w12, 0:1] 141; CHECK-NEXT: mov { z0.h, z1.h }, za1v.h[w12, 6:7] 142; CHECK-NEXT: ret 143 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 0, i32 %slice) 144 %slice.6 = add i32 %slice, 6 145 %res2 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 %slice.6) 146 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res2 147} 148 149define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @za_read_vert_vg2_bf16(i32 %slice) { 150; CHECK-LABEL: za_read_vert_vg2_bf16: 151; CHECK: // %bb.0: 152; CHECK-NEXT: mov w12, w0 153; CHECK-NEXT: mov { z0.h, z1.h }, za0v.h[w12, 0:1] 154; CHECK-NEXT: mov { z0.h, z1.h }, za1v.h[w12, 6:7] 155; CHECK-NEXT: ret 156 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 0, i32 %slice) 157 %slice.6 = add i32 %slice, 6 158 %res2 = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 %slice.6) 159 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res2 160} 161 162define { <vscale x 4 x i32>, <vscale x 4 x i32> } @za_read_vert_vg2_s(i32 %slice) { 163; CHECK-LABEL: za_read_vert_vg2_s: 164; CHECK: // %bb.0: 165; CHECK-NEXT: mov w12, w0 166; CHECK-NEXT: mov { z0.s, z1.s }, za0v.s[w12, 0:1] 167; CHECK-NEXT: mov { z0.s, z1.s }, za3v.s[w12, 2:3] 168; CHECK-NEXT: ret 169 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 0, i32 %slice) 170 %slice.2 = add i32 %slice, 2 171 %res2 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 %slice.2) 172 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res2 173} 174 175define { <vscale x 4 x float>, <vscale x 4 x float> } @za_read_vert_vg2_f32(i32 %slice) { 176; CHECK-LABEL: za_read_vert_vg2_f32: 177; CHECK: // %bb.0: 178; CHECK-NEXT: mov w12, w0 179; CHECK-NEXT: mov { z0.s, z1.s }, za0v.s[w12, 0:1] 180; CHECK-NEXT: mov { z0.s, z1.s }, za3v.s[w12, 2:3] 181; CHECK-NEXT: ret 182 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 0, i32 %slice) 183 %slice.2 = add i32 %slice, 2 184 %res2 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 %slice.2) 185 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res2 186} 187 188define { <vscale x 2 x i64>, <vscale x 2 x i64> } @za_read_vert_vg2_d(i32 %slice) { 189; CHECK-LABEL: za_read_vert_vg2_d: 190; CHECK: // %bb.0: 191; CHECK-NEXT: mov w12, w0 192; CHECK-NEXT: mov { z0.d, z1.d }, za0v.d[w12, 0:1] 193; CHECK-NEXT: ret 194 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 0, i32 %slice) 195 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res 196} 197 198define { <vscale x 2 x double>, <vscale x 2 x double> } @za_read_vert_vg2_f64(i32 %slice) { 199; CHECK-LABEL: za_read_vert_vg2_f64: 200; CHECK: // %bb.0: 201; CHECK-NEXT: mov w12, w0 202; CHECK-NEXT: mov { z0.d, z1.d }, za0v.d[w12, 0:1] 203; CHECK-NEXT: ret 204 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 0, i32 %slice) 205 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res 206} 207 208; 209; Move Multi-Vector From Tile (Read) x4 210; 211 212; Horizontal 213 214define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @za_read_horiz_vg4_b(i32 %slice) { 215; CHECK-LABEL: za_read_horiz_vg4_b: 216; CHECK: // %bb.0: 217; CHECK-NEXT: mov w12, w0 218; CHECK-NEXT: mov { z0.b - z3.b }, za0h.b[w12, 0:3] 219; CHECK-NEXT: mov { z0.b - z3.b }, za0h.b[w12, 12:15] 220; CHECK-NEXT: ret 221 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 %slice) 222 %slice.12 = add i32 %slice, 12 223 %res2 = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 %slice.12) 224 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res2 225} 226 227define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @za_read_horiz_vg4_h(i32 %slice) { 228; CHECK-LABEL: za_read_horiz_vg4_h: 229; CHECK: // %bb.0: 230; CHECK-NEXT: mov w12, w0 231; CHECK-NEXT: mov { z0.h - z3.h }, za0h.h[w12, 0:3] 232; CHECK-NEXT: mov { z0.h - z3.h }, za1h.h[w12, 4:7] 233; CHECK-NEXT: ret 234 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 0, i32 %slice) 235 %slice.4 = add i32 %slice, 4 236 %res2 = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 %slice.4) 237 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res2 238} 239 240define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @za_read_horiz_vg4_f16(i32 %slice) { 241; CHECK-LABEL: za_read_horiz_vg4_f16: 242; CHECK: // %bb.0: 243; CHECK-NEXT: mov w12, w0 244; CHECK-NEXT: mov { z0.h - z3.h }, za0h.h[w12, 0:3] 245; CHECK-NEXT: mov { z0.h - z3.h }, za1h.h[w12, 4:7] 246; CHECK-NEXT: ret 247 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 0, i32 %slice) 248 %slice.4 = add i32 %slice, 4 249 %res2 = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 %slice.4) 250 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res2 251} 252 253define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @za_read_horiz_vg4_bf16(i32 %slice) { 254; CHECK-LABEL: za_read_horiz_vg4_bf16: 255; CHECK: // %bb.0: 256; CHECK-NEXT: mov w12, w0 257; CHECK-NEXT: mov { z0.h - z3.h }, za0h.h[w12, 0:3] 258; CHECK-NEXT: mov { z0.h - z3.h }, za1h.h[w12, 4:7] 259; CHECK-NEXT: ret 260 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 0, i32 %slice) 261 %slice.4 = add i32 %slice, 4 262 %res2 = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 %slice.4) 263 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res2 264} 265 266define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @za_read_horiz_vg4_s(i32 %slice) { 267; CHECK-LABEL: za_read_horiz_vg4_s: 268; CHECK: // %bb.0: 269; CHECK-NEXT: mov w12, w0 270; CHECK-NEXT: mov { z0.s - z3.s }, za0h.s[w12, 0:3] 271; CHECK-NEXT: ret 272 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 0, i32 %slice) 273 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res 274} 275 276define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @za_read_horiz_vg4_f32(i32 %slice) { 277; CHECK-LABEL: za_read_horiz_vg4_f32: 278; CHECK: // %bb.0: 279; CHECK-NEXT: mov w12, w0 280; CHECK-NEXT: mov { z0.s - z3.s }, za0h.s[w12, 0:3] 281; CHECK-NEXT: ret 282 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 0, i32 %slice) 283 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res 284} 285 286define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @za_read_horiz_vg4_d(i32 %slice) { 287; CHECK-LABEL: za_read_horiz_vg4_d: 288; CHECK: // %bb.0: 289; CHECK-NEXT: mov w12, w0 290; CHECK-NEXT: mov { z0.d - z3.d }, za0h.d[w12, 0:3] 291; CHECK-NEXT: ret 292 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 0, i32 %slice) 293 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res 294} 295 296define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @za_read_horiz_vg4_f64(i32 %slice) { 297; CHECK-LABEL: za_read_horiz_vg4_f64: 298; CHECK: // %bb.0: 299; CHECK-NEXT: mov w12, w0 300; CHECK-NEXT: mov { z0.d - z3.d }, za0h.d[w12, 0:3] 301; CHECK-NEXT: ret 302 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 0, i32 %slice) 303 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res 304} 305 306; Vertical 307 308define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @za_read_vert_vg4_b(i32 %slice) { 309; CHECK-LABEL: za_read_vert_vg4_b: 310; CHECK: // %bb.0: 311; CHECK-NEXT: mov w12, w0 312; CHECK-NEXT: mov { z0.b - z3.b }, za0v.b[w12, 0:3] 313; CHECK-NEXT: mov { z0.b - z3.b }, za0v.b[w12, 12:15] 314; CHECK-NEXT: ret 315 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 %slice) 316 %slice.12 = add i32 %slice, 12 317 %res2 = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 %slice.12) 318 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res2 319} 320 321define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @za_read_vert_vg4_h(i32 %slice) { 322; CHECK-LABEL: za_read_vert_vg4_h: 323; CHECK: // %bb.0: 324; CHECK-NEXT: mov w12, w0 325; CHECK-NEXT: mov { z0.h - z3.h }, za0v.h[w12, 0:3] 326; CHECK-NEXT: mov { z0.h - z3.h }, za1v.h[w12, 4:7] 327; CHECK-NEXT: ret 328 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 0, i32 %slice) 329 %slice.4 = add i32 %slice, 4 330 %res2 = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 %slice.4) 331 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res2 332} 333 334define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @za_read_vert_vg4_f16(i32 %slice) { 335; CHECK-LABEL: za_read_vert_vg4_f16: 336; CHECK: // %bb.0: 337; CHECK-NEXT: mov w12, w0 338; CHECK-NEXT: mov { z0.h - z3.h }, za0v.h[w12, 0:3] 339; CHECK-NEXT: mov { z0.h - z3.h }, za1v.h[w12, 4:7] 340; CHECK-NEXT: ret 341 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 0, i32 %slice) 342 %slice.4 = add i32 %slice, 4 343 %res2 = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 %slice.4) 344 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res2 345} 346 347define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @za_read_vert_vg4_bf16(i32 %slice) { 348; CHECK-LABEL: za_read_vert_vg4_bf16: 349; CHECK: // %bb.0: 350; CHECK-NEXT: mov w12, w0 351; CHECK-NEXT: mov { z0.h - z3.h }, za0v.h[w12, 0:3] 352; CHECK-NEXT: mov { z0.h - z3.h }, za1v.h[w12, 4:7] 353; CHECK-NEXT: ret 354 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 0, i32 %slice) 355 %slice.4 = add i32 %slice, 4 356 %res2 = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 %slice.4) 357 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res2 358} 359 360define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @za_read_vert_vg4_s(i32 %slice) { 361; CHECK-LABEL: za_read_vert_vg4_s: 362; CHECK: // %bb.0: 363; CHECK-NEXT: mov w12, w0 364; CHECK-NEXT: mov { z0.s - z3.s }, za0v.s[w12, 0:3] 365; CHECK-NEXT: ret 366 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 0, i32 %slice) 367 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res 368} 369 370define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @za_read_vert_vg4_f32(i32 %slice) { 371; CHECK-LABEL: za_read_vert_vg4_f32: 372; CHECK: // %bb.0: 373; CHECK-NEXT: mov w12, w0 374; CHECK-NEXT: mov { z0.s - z3.s }, za0v.s[w12, 0:3] 375; CHECK-NEXT: ret 376 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 0, i32 %slice) 377 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res 378} 379 380define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @za_read_vert_vg4_d(i32 %slice) { 381; CHECK-LABEL: za_read_vert_vg4_d: 382; CHECK: // %bb.0: 383; CHECK-NEXT: mov w12, w0 384; CHECK-NEXT: mov { z0.d - z3.d }, za0v.d[w12, 0:3] 385; CHECK-NEXT: ret 386 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 0, i32 %slice) 387 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res 388} 389 390define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @za_read_vert_vg4_f64(i32 %slice) { 391; CHECK-LABEL: za_read_vert_vg4_f64: 392; CHECK: // %bb.0: 393; CHECK-NEXT: mov w12, w0 394; CHECK-NEXT: mov { z0.d - z3.d }, za0v.d[w12, 0:3] 395; CHECK-NEXT: ret 396 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 0, i32 %slice) 397 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res 398} 399 400; Move Multi-Vector From ZA (Read) x2 401 402define { <vscale x 16 x i8>, <vscale x 16 x i8> } @za_read_vg1x2_b(i32 %slice) { 403; CHECK-LABEL: za_read_vg1x2_b: 404; CHECK: // %bb.0: 405; CHECK-NEXT: mov w8, w0 406; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] 407; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] 408; CHECK-NEXT: ret 409 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 %slice) 410 %slice.7 = add i32 %slice, 7 411 %res2 = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 %slice.7) 412 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res2 413} 414 415define { <vscale x 8 x i16>, <vscale x 8 x i16> } @za_read_vg1x2_h(i32 %slice) { 416; CHECK-LABEL: za_read_vg1x2_h: 417; CHECK: // %bb.0: 418; CHECK-NEXT: mov w8, w0 419; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] 420; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] 421; CHECK-NEXT: ret 422 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 %slice) 423 %slice.7 = add i32 %slice, 7 424 %res2 = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 %slice.7) 425 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res2 426} 427 428define { <vscale x 8 x half>, <vscale x 8 x half> } @za_read_vg1x2_f16(i32 %slice) { 429; CHECK-LABEL: za_read_vg1x2_f16: 430; CHECK: // %bb.0: 431; CHECK-NEXT: mov w8, w0 432; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] 433; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] 434; CHECK-NEXT: ret 435 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 %slice) 436 %slice.7 = add i32 %slice, 7 437 %res2 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 %slice.7) 438 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res2 439} 440 441define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @za_read_vg1x2_bf16(i32 %slice) { 442; CHECK-LABEL: za_read_vg1x2_bf16: 443; CHECK: // %bb.0: 444; CHECK-NEXT: mov w8, w0 445; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] 446; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] 447; CHECK-NEXT: ret 448 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 %slice) 449 %slice.7 = add i32 %slice, 7 450 %res2 = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 %slice.7) 451 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res2 452} 453 454define { <vscale x 4 x i32>, <vscale x 4 x i32> } @za_read_vg1x2_s(i32 %slice) { 455; CHECK-LABEL: za_read_vg1x2_s: 456; CHECK: // %bb.0: 457; CHECK-NEXT: mov w8, w0 458; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] 459; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] 460; CHECK-NEXT: ret 461 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 %slice) 462 %slice.7 = add i32 %slice, 7 463 %res2 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 %slice.7) 464 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res2 465} 466 467define { <vscale x 4 x float>, <vscale x 4 x float> } @za_read_vg1x2_f32(i32 %slice) { 468; CHECK-LABEL: za_read_vg1x2_f32: 469; CHECK: // %bb.0: 470; CHECK-NEXT: mov w8, w0 471; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] 472; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] 473; CHECK-NEXT: ret 474 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 %slice) 475 %slice.7 = add i32 %slice, 7 476 %res2 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 %slice.7) 477 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res2 478} 479 480define { <vscale x 2 x i64>, <vscale x 2 x i64> } @za_read_vg1x2_d(i32 %slice) { 481; CHECK-LABEL: za_read_vg1x2_d: 482; CHECK: // %bb.0: 483; CHECK-NEXT: mov w8, w0 484; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] 485; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] 486; CHECK-NEXT: ret 487 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %slice) 488 %slice.7 = add i32 %slice, 7 489 %res2 = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %slice.7) 490 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res2 491} 492 493define { <vscale x 2 x double>, <vscale x 2 x double> } @za_read_vg1x2_f64(i32 %slice) { 494; CHECK-LABEL: za_read_vg1x2_f64: 495; CHECK: // %bb.0: 496; CHECK-NEXT: mov w8, w0 497; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] 498; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] 499; CHECK-NEXT: ret 500 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 %slice) 501 %slice.7 = add i32 %slice, 7 502 %res2 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 %slice.7) 503 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res2 504} 505 506; Move Multi-Vector From ZA (Read) x4 507 508define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @za_read_vg1x4_b(i32 %slice) { 509; CHECK-LABEL: za_read_vg1x4_b: 510; CHECK: // %bb.0: 511; CHECK-NEXT: mov w8, w0 512; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] 513; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] 514; CHECK-NEXT: ret 515 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 %slice) 516 %slice.7 = add i32 %slice, 7 517 %res2 = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 %slice.7) 518 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res2 519} 520 521define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @za_read_vg1x4_h(i32 %slice) { 522; CHECK-LABEL: za_read_vg1x4_h: 523; CHECK: // %bb.0: 524; CHECK-NEXT: mov w8, w0 525; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] 526; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] 527; CHECK-NEXT: ret 528 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 %slice) 529 %slice.7 = add i32 %slice, 7 530 %res2 = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 %slice.7) 531 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res2 532} 533 534define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @za_read_vg1x4_f16(i32 %slice) { 535; CHECK-LABEL: za_read_vg1x4_f16: 536; CHECK: // %bb.0: 537; CHECK-NEXT: mov w8, w0 538; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] 539; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] 540; CHECK-NEXT: ret 541 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 %slice) 542 %slice.7 = add i32 %slice, 7 543 %res2 = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 %slice.7) 544 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res2 545} 546 547define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @za_read_vg1x4_bf16(i32 %slice) { 548; CHECK-LABEL: za_read_vg1x4_bf16: 549; CHECK: // %bb.0: 550; CHECK-NEXT: mov w8, w0 551; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] 552; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] 553; CHECK-NEXT: ret 554 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 %slice) 555 %slice.7 = add i32 %slice, 7 556 %res2 = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 %slice.7) 557 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res2 558} 559 560define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @za_read_vg1x4_s(i32 %slice) { 561; CHECK-LABEL: za_read_vg1x4_s: 562; CHECK: // %bb.0: 563; CHECK-NEXT: mov w8, w0 564; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] 565; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] 566; CHECK-NEXT: ret 567 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 %slice) 568 %slice.7 = add i32 %slice, 7 569 %res2 = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 %slice.7) 570 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res2 571} 572 573define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @za_read_vg1x4_f32(i32 %slice) { 574; CHECK-LABEL: za_read_vg1x4_f32: 575; CHECK: // %bb.0: 576; CHECK-NEXT: mov w8, w0 577; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] 578; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] 579; CHECK-NEXT: ret 580 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 %slice) 581 %slice.7 = add i32 %slice, 7 582 %res2 = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 %slice.7) 583 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res2 584} 585 586define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @za_read_vg1x4_d(i32 %slice) { 587; CHECK-LABEL: za_read_vg1x4_d: 588; CHECK: // %bb.0: 589; CHECK-NEXT: mov w8, w0 590; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] 591; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] 592; CHECK-NEXT: ret 593 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 %slice) 594 %slice.7 = add i32 %slice, 7 595 %res2 = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 %slice.7) 596 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res2 597} 598 599define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @za_read_vg1x4_f64(i32 %slice) { 600; CHECK-LABEL: za_read_vg1x4_f64: 601; CHECK: // %bb.0: 602; CHECK-NEXT: mov w8, w0 603; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] 604; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] 605; CHECK-NEXT: ret 606 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 %slice) 607 %slice.7 = add i32 %slice, 7 608 %res2 = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 %slice.7) 609 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res2 610} 611 612declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32, i32) 613declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32, i32) 614declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32, i32) 615declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32, i32) 616declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32, i32) 617declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32, i32) 618declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32, i32) 619declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32, i32) 620 621declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32, i32) 622declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32, i32) 623declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32, i32) 624declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32, i32) 625declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32, i32) 626declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32, i32) 627declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32, i32) 628declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32, i32) 629 630declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32, i32) 631declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32, i32) 632declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32, i32) 633declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32, i32) 634declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32, i32) 635declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32, i32) 636declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32, i32) 637declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32, i32) 638 639declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32, i32) 640declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32, i32) 641declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32, i32) 642declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32, i32) 643declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32, i32) 644declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32, i32) 645declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32, i32) 646declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32, i32) 647 648declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32) 649declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32) 650declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32) 651declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32) 652declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32) 653declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32) 654declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32) 655declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32) 656 657declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32) 658declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32) 659declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32) 660declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32) 661declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32) 662declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32) 663declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32) 664declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32) 665