1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s 3 4; LD1B 5 6define <vscale x 16 x i8> @ld1_nxv16i8(ptr %addr, i64 %off) { 7; CHECK-LABEL: ld1_nxv16i8: 8; CHECK: // %bb.0: 9; CHECK-NEXT: ptrue p0.b 10; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1] 11; CHECK-NEXT: ret 12 %ptr = getelementptr inbounds i8, ptr %addr, i64 %off 13 %val = load volatile <vscale x 16 x i8>, ptr %ptr 14 ret <vscale x 16 x i8> %val 15} 16 17define <vscale x 8 x i16> @ld1_nxv16i8_bitcast_to_i16(ptr %addr, i64 %off) { 18; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i16: 19; CHECK: // %bb.0: 20; CHECK-NEXT: ptrue p0.b 21; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1] 22; CHECK-NEXT: ret 23 %ptr = getelementptr inbounds i8, ptr %addr, i64 %off 24 %val = load volatile <vscale x 8 x i16>, ptr %ptr 25 ret <vscale x 8 x i16> %val 26} 27 28define <vscale x 4 x i32> @ld1_nxv16i8_bitcast_to_i32(ptr %addr, i64 %off) { 29; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i32: 30; CHECK: // %bb.0: 31; CHECK-NEXT: ptrue p0.b 32; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1] 33; CHECK-NEXT: ret 34 %ptr = getelementptr inbounds i8, ptr %addr, i64 %off 35 %val = load volatile <vscale x 4 x i32>, ptr %ptr 36 ret <vscale x 4 x i32> %val 37} 38 39define <vscale x 2 x i64> @ld1_nxv16i8_bitcast_to_i64(ptr %addr, i64 %off) { 40; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i64: 41; CHECK: // %bb.0: 42; CHECK-NEXT: ptrue p0.b 43; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1] 44; CHECK-NEXT: ret 45 %ptr = getelementptr inbounds i8, ptr %addr, i64 %off 46 %val = load volatile <vscale x 2 x i64>, ptr %ptr 47 ret <vscale x 2 x i64> %val 48} 49 50define <vscale x 8 x i16> @ld1_nxv8i16_zext8(ptr %addr, i64 %off) { 51; CHECK-LABEL: ld1_nxv8i16_zext8: 52; CHECK: // %bb.0: 53; CHECK-NEXT: ptrue p0.h 54; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0, x1] 55; CHECK-NEXT: ret 56 %ptr = getelementptr inbounds i8, ptr %addr, i64 %off 57 %val = load volatile <vscale x 8 x i8>, ptr %ptr 58 %zext = zext <vscale x 8 x i8> %val to <vscale x 8 x i16> 59 ret <vscale x 8 x i16> %zext 60} 61 62define <vscale x 4 x i32> @ld1_nxv4i32_zext8(ptr %addr, i64 %off) { 63; CHECK-LABEL: ld1_nxv4i32_zext8: 64; CHECK: // %bb.0: 65; CHECK-NEXT: ptrue p0.s 66; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, x1] 67; CHECK-NEXT: ret 68 %ptr = getelementptr inbounds i8, ptr %addr, i64 %off 69 %val = load volatile <vscale x 4 x i8>, ptr %ptr 70 %zext = zext <vscale x 4 x i8> %val to <vscale x 4 x i32> 71 ret <vscale x 4 x i32> %zext 72} 73 74define <vscale x 2 x i64> @ld1_nxv2i64_zext8(ptr %addr, i64 %off) { 75; CHECK-LABEL: ld1_nxv2i64_zext8: 76; CHECK: // %bb.0: 77; CHECK-NEXT: ptrue p0.d 78; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, x1] 79; CHECK-NEXT: ret 80 %ptr = getelementptr inbounds i8, ptr %addr, i64 %off 81 %val = load volatile <vscale x 2 x i8>, ptr %ptr 82 %zext = zext <vscale x 2 x i8> %val to <vscale x 2 x i64> 83 ret <vscale x 2 x i64> %zext 84} 85 86define <vscale x 8 x i16> @ld1_nxv8i16_sext8(ptr %addr, i64 %off) { 87; CHECK-LABEL: ld1_nxv8i16_sext8: 88; CHECK: // %bb.0: 89; CHECK-NEXT: ptrue p0.h 90; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0, x1] 91; CHECK-NEXT: ret 92 %ptr = getelementptr inbounds i8, ptr %addr, i64 %off 93 %val = load volatile <vscale x 8 x i8>, ptr %ptr 94 %sext = sext <vscale x 8 x i8> %val to <vscale x 8 x i16> 95 ret <vscale x 8 x i16> %sext 96} 97 98define <vscale x 4 x i32> @ld1_nxv4i32_sext8(ptr %addr, i64 %off) { 99; CHECK-LABEL: ld1_nxv4i32_sext8: 100; CHECK: // %bb.0: 101; CHECK-NEXT: ptrue p0.s 102; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, x1] 103; CHECK-NEXT: ret 104 %ptr = getelementptr inbounds i8, ptr %addr, i64 %off 105 %val = load volatile <vscale x 4 x i8>, ptr %ptr 106 %sext = sext <vscale x 4 x i8> %val to <vscale x 4 x i32> 107 ret <vscale x 4 x i32> %sext 108} 109 110define <vscale x 2 x i64> @ld1_nxv2i64_sext8(ptr %addr, i64 %off) { 111; CHECK-LABEL: ld1_nxv2i64_sext8: 112; CHECK: // %bb.0: 113; CHECK-NEXT: ptrue p0.d 114; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, x1] 115; CHECK-NEXT: ret 116 %ptr = getelementptr inbounds i8, ptr %addr, i64 %off 117 %val = load volatile <vscale x 2 x i8>, ptr %ptr 118 %sext = sext <vscale x 2 x i8> %val to <vscale x 2 x i64> 119 ret <vscale x 2 x i64> %sext 120} 121 122; LD1H 123 124define <vscale x 8 x i16> @ld1_nxv8i16(ptr %addr, i64 %off) { 125; CHECK-LABEL: ld1_nxv8i16: 126; CHECK: // %bb.0: 127; CHECK-NEXT: ptrue p0.h 128; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x1, lsl #1] 129; CHECK-NEXT: ret 130 %ptr = getelementptr inbounds i16, ptr %addr, i64 %off 131 %val = load volatile <vscale x 8 x i16>, ptr %ptr 132 ret <vscale x 8 x i16> %val 133} 134 135define <vscale x 4 x i32> @ld1_nxv4i32_zext16(ptr %addr, i64 %off) { 136; CHECK-LABEL: ld1_nxv4i32_zext16: 137; CHECK: // %bb.0: 138; CHECK-NEXT: ptrue p0.s 139; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, x1, lsl #1] 140; CHECK-NEXT: ret 141 %ptr = getelementptr inbounds i16, ptr %addr, i64 %off 142 %val = load volatile <vscale x 4 x i16>, ptr %ptr 143 %zext = zext <vscale x 4 x i16> %val to <vscale x 4 x i32> 144 ret <vscale x 4 x i32> %zext 145} 146 147define <vscale x 2 x i64> @ld1_nxv2i64_zext16(ptr %addr, i64 %off) { 148; CHECK-LABEL: ld1_nxv2i64_zext16: 149; CHECK: // %bb.0: 150; CHECK-NEXT: ptrue p0.d 151; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x1, lsl #1] 152; CHECK-NEXT: ret 153 %ptr = getelementptr inbounds i16, ptr %addr, i64 %off 154 %val = load volatile <vscale x 2 x i16>, ptr %ptr 155 %zext = zext <vscale x 2 x i16> %val to <vscale x 2 x i64> 156 ret <vscale x 2 x i64> %zext 157} 158 159define <vscale x 4 x i32> @ld1_nxv4i32_sext16(ptr %addr, i64 %off) { 160; CHECK-LABEL: ld1_nxv4i32_sext16: 161; CHECK: // %bb.0: 162; CHECK-NEXT: ptrue p0.s 163; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, x1, lsl #1] 164; CHECK-NEXT: ret 165 %ptr = getelementptr inbounds i16, ptr %addr, i64 %off 166 %val = load volatile <vscale x 4 x i16>, ptr %ptr 167 %sext = sext <vscale x 4 x i16> %val to <vscale x 4 x i32> 168 ret <vscale x 4 x i32> %sext 169} 170 171define <vscale x 2 x i64> @ld1_nxv2i64_sext16(ptr %addr, i64 %off) { 172; CHECK-LABEL: ld1_nxv2i64_sext16: 173; CHECK: // %bb.0: 174; CHECK-NEXT: ptrue p0.d 175; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, x1, lsl #1] 176; CHECK-NEXT: ret 177 %ptr = getelementptr inbounds i16, ptr %addr, i64 %off 178 %val = load volatile <vscale x 2 x i16>, ptr %ptr 179 %sext = sext <vscale x 2 x i16> %val to <vscale x 2 x i64> 180 ret <vscale x 2 x i64> %sext 181} 182 183define <vscale x 8 x half> @ld1_nxv8f16(ptr %addr, i64 %off) { 184; CHECK-LABEL: ld1_nxv8f16: 185; CHECK: // %bb.0: 186; CHECK-NEXT: ptrue p0.h 187; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x1, lsl #1] 188; CHECK-NEXT: ret 189 %ptr = getelementptr inbounds half, ptr %addr, i64 %off 190 %val = load volatile <vscale x 8 x half>, ptr %ptr 191 ret <vscale x 8 x half> %val 192} 193 194define <vscale x 8 x bfloat> @ld1_nxv8bf16(ptr %addr, i64 %off) { 195; CHECK-LABEL: ld1_nxv8bf16: 196; CHECK: // %bb.0: 197; CHECK-NEXT: ptrue p0.h 198; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x1, lsl #1] 199; CHECK-NEXT: ret 200 %ptr = getelementptr inbounds bfloat, ptr %addr, i64 %off 201 %val = load volatile <vscale x 8 x bfloat>, ptr %ptr 202 ret <vscale x 8 x bfloat> %val 203} 204 205define <vscale x 4 x half> @ld1_nxv4f16(ptr %addr, i64 %off) { 206; CHECK-LABEL: ld1_nxv4f16: 207; CHECK: // %bb.0: 208; CHECK-NEXT: ptrue p0.s 209; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, x1, lsl #1] 210; CHECK-NEXT: ret 211 %ptr = getelementptr inbounds half, ptr %addr, i64 %off 212 %val = load volatile <vscale x 4 x half>, ptr %ptr 213 ret <vscale x 4 x half> %val 214} 215 216define <vscale x 4 x bfloat> @ld1_nxv4bf16(ptr %addr, i64 %off) { 217; CHECK-LABEL: ld1_nxv4bf16: 218; CHECK: // %bb.0: 219; CHECK-NEXT: ptrue p0.s 220; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, x1, lsl #1] 221; CHECK-NEXT: ret 222 %ptr = getelementptr inbounds bfloat, ptr %addr, i64 %off 223 %val = load volatile <vscale x 4 x bfloat>, ptr %ptr 224 ret <vscale x 4 x bfloat> %val 225} 226 227define <vscale x 2 x half> @ld1_nxv2f16(ptr %addr, i64 %off) { 228; CHECK-LABEL: ld1_nxv2f16: 229; CHECK: // %bb.0: 230; CHECK-NEXT: ptrue p0.d 231; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x1, lsl #1] 232; CHECK-NEXT: ret 233 %ptr = getelementptr inbounds half, ptr %addr, i64 %off 234 %val = load volatile <vscale x 2 x half>, ptr %ptr 235 ret <vscale x 2 x half> %val 236} 237 238define <vscale x 2 x bfloat> @ld1_nxv2bf16(ptr %addr, i64 %off) { 239; CHECK-LABEL: ld1_nxv2bf16: 240; CHECK: // %bb.0: 241; CHECK-NEXT: ptrue p0.d 242; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x1, lsl #1] 243; CHECK-NEXT: ret 244 %ptr = getelementptr inbounds bfloat, ptr %addr, i64 %off 245 %val = load volatile <vscale x 2 x bfloat>, ptr %ptr 246 ret <vscale x 2 x bfloat> %val 247} 248 249; LD1W 250 251define <vscale x 4 x i32> @ld1_nxv4i32(ptr %addr, i64 %off) { 252; CHECK-LABEL: ld1_nxv4i32: 253; CHECK: // %bb.0: 254; CHECK-NEXT: ptrue p0.s 255; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x1, lsl #2] 256; CHECK-NEXT: ret 257 %ptr = getelementptr inbounds i32, ptr %addr, i64 %off 258 %val = load volatile <vscale x 4 x i32>, ptr %ptr 259 ret <vscale x 4 x i32> %val 260} 261 262define <vscale x 2 x i64> @ld1_nxv2i64_zext32(ptr %addr, i64 %off) { 263; CHECK-LABEL: ld1_nxv2i64_zext32: 264; CHECK: // %bb.0: 265; CHECK-NEXT: ptrue p0.d 266; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x1, lsl #2] 267; CHECK-NEXT: ret 268 %ptr = getelementptr inbounds i32, ptr %addr, i64 %off 269 %val = load volatile <vscale x 2 x i32>, ptr %ptr 270 %zext = zext <vscale x 2 x i32> %val to <vscale x 2 x i64> 271 ret <vscale x 2 x i64> %zext 272} 273 274define <vscale x 2 x i64> @ld1_nxv2i64_sext32(ptr %addr, i64 %off) { 275; CHECK-LABEL: ld1_nxv2i64_sext32: 276; CHECK: // %bb.0: 277; CHECK-NEXT: ptrue p0.d 278; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, x1, lsl #2] 279; CHECK-NEXT: ret 280 %ptr = getelementptr inbounds i32, ptr %addr, i64 %off 281 %val = load volatile <vscale x 2 x i32>, ptr %ptr 282 %sext = sext <vscale x 2 x i32> %val to <vscale x 2 x i64> 283 ret <vscale x 2 x i64> %sext 284} 285 286define <vscale x 4 x float> @ld1_nxv4f32(ptr %addr, i64 %off) { 287; CHECK-LABEL: ld1_nxv4f32: 288; CHECK: // %bb.0: 289; CHECK-NEXT: ptrue p0.s 290; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x1, lsl #2] 291; CHECK-NEXT: ret 292 %ptr = getelementptr inbounds float, ptr %addr, i64 %off 293 %val = load volatile <vscale x 4 x float>, ptr %ptr 294 ret <vscale x 4 x float> %val 295} 296 297define <vscale x 2 x float> @ld1_nxv2f32(ptr %addr, i64 %off) { 298; CHECK-LABEL: ld1_nxv2f32: 299; CHECK: // %bb.0: 300; CHECK-NEXT: ptrue p0.d 301; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x1, lsl #2] 302; CHECK-NEXT: ret 303 %ptr = getelementptr inbounds float, ptr %addr, i64 %off 304 %val = load volatile <vscale x 2 x float>, ptr %ptr 305 ret <vscale x 2 x float> %val 306} 307 308; LD1D 309 310define <vscale x 2 x i64> @ld1_nxv2i64(ptr %addr, i64 %off) { 311; CHECK-LABEL: ld1_nxv2i64: 312; CHECK: // %bb.0: 313; CHECK-NEXT: ptrue p0.d 314; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x1, lsl #3] 315; CHECK-NEXT: ret 316 %ptr = getelementptr inbounds i64, ptr %addr, i64 %off 317 %val = load volatile <vscale x 2 x i64>, ptr %ptr 318 ret <vscale x 2 x i64> %val 319} 320 321define <vscale x 2 x double> @ld1_nxv2f64(ptr %addr, i64 %off) { 322; CHECK-LABEL: ld1_nxv2f64: 323; CHECK: // %bb.0: 324; CHECK-NEXT: ptrue p0.d 325; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x1, lsl #3] 326; CHECK-NEXT: ret 327 %ptr = getelementptr inbounds double, ptr %addr, i64 %off 328 %val = load volatile <vscale x 2 x double>, ptr %ptr 329 ret <vscale x 2 x double> %val 330} 331