1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64 -mattr=+sve %s -o - | FileCheck %s 3 4define <vscale x 16 x i8> @i8_1v_4s(ptr %b) { 5; CHECK-LABEL: i8_1v_4s: 6; CHECK: // %bb.0: // %entry 7; CHECK-NEXT: rdvl x8, #1 8; CHECK-NEXT: ptrue p0.b 9; CHECK-NEXT: mov w9, #4 // =0x4 10; CHECK-NEXT: add x8, x0, x8 11; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9] 12; CHECK-NEXT: ret 13entry: 14 %0 = tail call i64 @llvm.vscale.i64() 15 %1 = shl nuw nsw i64 %0, 4 16 %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1 17 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 4 18 %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16 19 ret <vscale x 16 x i8> %2 20} 21 22define <vscale x 16 x i8> @i8_4s_1v(ptr %b) { 23; CHECK-LABEL: i8_4s_1v: 24; CHECK: // %bb.0: // %entry 25; CHECK-NEXT: ptrue p0.b 26; CHECK-NEXT: add x8, x0, #4 27; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, #1, mul vl] 28; CHECK-NEXT: ret 29entry: 30 %add.ptr = getelementptr inbounds i8, ptr %b, i64 4 31 %0 = tail call i64 @llvm.vscale.i64() 32 %1 = shl nuw nsw i64 %0, 4 33 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1 34 %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16 35 ret <vscale x 16 x i8> %2 36} 37 38define <vscale x 8 x i16> @i16_1v_8s(ptr %b) { 39; CHECK-LABEL: i16_1v_8s: 40; CHECK: // %bb.0: // %entry 41; CHECK-NEXT: rdvl x8, #1 42; CHECK-NEXT: ptrue p0.h 43; CHECK-NEXT: mov x9, #4 // =0x4 44; CHECK-NEXT: add x8, x0, x8 45; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] 46; CHECK-NEXT: ret 47entry: 48 %0 = tail call i64 @llvm.vscale.i64() 49 %1 = shl nuw nsw i64 %0, 3 50 %add.ptr = getelementptr inbounds i16, ptr %b, i64 %1 51 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8 52 %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16 53 ret <vscale x 8 x i16> %2 54} 55 56define <vscale x 8 x i16> @i16_8s_1v(ptr %b) { 57; CHECK-LABEL: i16_8s_1v: 58; CHECK: // %bb.0: // %entry 59; CHECK-NEXT: ptrue p0.h 60; CHECK-NEXT: add x8, x0, #8 61; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #1, mul vl] 62; CHECK-NEXT: ret 63entry: 64 %add.ptr = getelementptr inbounds i8, ptr %b, i64 8 65 %0 = tail call i64 @llvm.vscale.i64() 66 %1 = shl nuw nsw i64 %0, 3 67 %add.ptr1 = getelementptr inbounds i16, ptr %add.ptr, i64 %1 68 %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16 69 ret <vscale x 8 x i16> %2 70} 71 72define <vscale x 8 x i16> @i16_2v_8s(ptr %b) { 73; CHECK-LABEL: i16_2v_8s: 74; CHECK: // %bb.0: // %entry 75; CHECK-NEXT: rdvl x8, #2 76; CHECK-NEXT: ptrue p0.h 77; CHECK-NEXT: mov x9, #4 // =0x4 78; CHECK-NEXT: add x8, x0, x8 79; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] 80; CHECK-NEXT: ret 81entry: 82 %0 = tail call i64 @llvm.vscale.i64() 83 %1 = shl nuw nsw i64 %0, 4 84 %add.ptr = getelementptr inbounds i16, ptr %b, i64 %1 85 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8 86 %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16 87 ret <vscale x 8 x i16> %2 88} 89 90define <vscale x 8 x i16> @i16_8s_2v(ptr %b) { 91; CHECK-LABEL: i16_8s_2v: 92; CHECK: // %bb.0: // %entry 93; CHECK-NEXT: ptrue p0.h 94; CHECK-NEXT: add x8, x0, #8 95; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #2, mul vl] 96; CHECK-NEXT: ret 97entry: 98 %add.ptr = getelementptr inbounds i8, ptr %b, i64 8 99 %0 = tail call i64 @llvm.vscale.i64() 100 %1 = shl nuw nsw i64 %0, 4 101 %add.ptr1 = getelementptr inbounds i16, ptr %add.ptr, i64 %1 102 %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16 103 ret <vscale x 8 x i16> %2 104} 105 106define <vscale x 4 x i32> @i32_1v_16s(ptr %b) { 107; CHECK-LABEL: i32_1v_16s: 108; CHECK: // %bb.0: // %entry 109; CHECK-NEXT: rdvl x8, #1 110; CHECK-NEXT: ptrue p0.s 111; CHECK-NEXT: mov x9, #4 // =0x4 112; CHECK-NEXT: add x8, x0, x8 113; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] 114; CHECK-NEXT: ret 115entry: 116 %0 = tail call i64 @llvm.vscale.i64() 117 %1 = shl nuw nsw i64 %0, 2 118 %add.ptr = getelementptr inbounds i32, ptr %b, i64 %1 119 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 16 120 %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16 121 ret <vscale x 4 x i32> %2 122} 123 124define <vscale x 4 x i32> @i32_16s_2v(ptr %b) { 125; CHECK-LABEL: i32_16s_2v: 126; CHECK: // %bb.0: // %entry 127; CHECK-NEXT: ptrue p0.s 128; CHECK-NEXT: add x8, x0, #16 129; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, #1, mul vl] 130; CHECK-NEXT: ret 131entry: 132 %add.ptr = getelementptr inbounds i8, ptr %b, i64 16 133 %0 = tail call i64 @llvm.vscale.i64() 134 %1 = shl nuw nsw i64 %0, 2 135 %add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i64 %1 136 %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16 137 ret <vscale x 4 x i32> %2 138} 139 140define <vscale x 2 x i64> @i64_1v_32s(ptr %b) { 141; CHECK-LABEL: i64_1v_32s: 142; CHECK: // %bb.0: // %entry 143; CHECK-NEXT: rdvl x8, #1 144; CHECK-NEXT: ptrue p0.d 145; CHECK-NEXT: mov x9, #4 // =0x4 146; CHECK-NEXT: add x8, x0, x8 147; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] 148; CHECK-NEXT: ret 149entry: 150 %0 = tail call i64 @llvm.vscale.i64() 151 %1 = shl nuw nsw i64 %0, 1 152 %add.ptr = getelementptr inbounds i64, ptr %b, i64 %1 153 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 32 154 %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16 155 ret <vscale x 2 x i64> %2 156} 157 158define <vscale x 2 x i64> @i64_32s_2v(ptr %b) { 159; CHECK-LABEL: i64_32s_2v: 160; CHECK: // %bb.0: // %entry 161; CHECK-NEXT: ptrue p0.d 162; CHECK-NEXT: add x8, x0, #32 163; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #1, mul vl] 164; CHECK-NEXT: ret 165entry: 166 %add.ptr = getelementptr inbounds i8, ptr %b, i64 32 167 %0 = tail call i64 @llvm.vscale.i64() 168 %1 = shl nuw nsw i64 %0, 1 169 %add.ptr1 = getelementptr inbounds i64, ptr %add.ptr, i64 %1 170 %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16 171 ret <vscale x 2 x i64> %2 172} 173 174 175define <vscale x 16 x i8> @i8_m2v_4s(ptr %b) { 176; CHECK-LABEL: i8_m2v_4s: 177; CHECK: // %bb.0: // %entry 178; CHECK-NEXT: cnth x8, all, mul #4 179; CHECK-NEXT: ptrue p0.b 180; CHECK-NEXT: mov w9, #4 // =0x4 181; CHECK-NEXT: sub x8, x0, x8 182; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9] 183; CHECK-NEXT: ret 184entry: 185 %0 = tail call i64 @llvm.vscale.i64() 186 %1 = mul i64 %0, -32 187 %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1 188 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 4 189 %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16 190 ret <vscale x 16 x i8> %2 191} 192 193define <vscale x 16 x i8> @i8_4s_m2v(ptr %b) { 194; CHECK-LABEL: i8_4s_m2v: 195; CHECK: // %bb.0: // %entry 196; CHECK-NEXT: ptrue p0.b 197; CHECK-NEXT: add x8, x0, #4 198; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, #-2, mul vl] 199; CHECK-NEXT: ret 200entry: 201 %add.ptr = getelementptr inbounds i8, ptr %b, i64 4 202 %0 = tail call i64 @llvm.vscale.i64() 203 %1 = mul i64 %0, -32 204 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1 205 %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16 206 ret <vscale x 16 x i8> %2 207} 208 209define <vscale x 8 x i16> @i16_m2v_8s(ptr %b) { 210; CHECK-LABEL: i16_m2v_8s: 211; CHECK: // %bb.0: // %entry 212; CHECK-NEXT: cnth x8, all, mul #4 213; CHECK-NEXT: ptrue p0.h 214; CHECK-NEXT: mov x9, #4 // =0x4 215; CHECK-NEXT: sub x8, x0, x8 216; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] 217; CHECK-NEXT: ret 218entry: 219 %0 = tail call i64 @llvm.vscale.i64() 220 %1 = mul i64 %0, -32 221 %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1 222 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8 223 %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16 224 ret <vscale x 8 x i16> %2 225} 226 227define <vscale x 8 x i16> @i16_8s_m2v(ptr %b) { 228; CHECK-LABEL: i16_8s_m2v: 229; CHECK: // %bb.0: // %entry 230; CHECK-NEXT: ptrue p0.h 231; CHECK-NEXT: add x8, x0, #8 232; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #-2, mul vl] 233; CHECK-NEXT: ret 234entry: 235 %add.ptr = getelementptr inbounds i8, ptr %b, i64 8 236 %0 = tail call i64 @llvm.vscale.i64() 237 %1 = mul i64 %0, -32 238 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1 239 %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16 240 ret <vscale x 8 x i16> %2 241} 242 243define <vscale x 4 x i32> @i32_m2v_16s(ptr %b) { 244; CHECK-LABEL: i32_m2v_16s: 245; CHECK: // %bb.0: // %entry 246; CHECK-NEXT: cnth x8, all, mul #4 247; CHECK-NEXT: ptrue p0.s 248; CHECK-NEXT: mov x9, #4 // =0x4 249; CHECK-NEXT: sub x8, x0, x8 250; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] 251; CHECK-NEXT: ret 252entry: 253 %0 = tail call i64 @llvm.vscale.i64() 254 %1 = mul i64 %0, -32 255 %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1 256 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 16 257 %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16 258 ret <vscale x 4 x i32> %2 259} 260 261define <vscale x 4 x i32> @i32_16s_m2v(ptr %b) { 262; CHECK-LABEL: i32_16s_m2v: 263; CHECK: // %bb.0: // %entry 264; CHECK-NEXT: ptrue p0.s 265; CHECK-NEXT: add x8, x0, #16 266; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, #-2, mul vl] 267; CHECK-NEXT: ret 268entry: 269 %add.ptr = getelementptr inbounds i8, ptr %b, i64 16 270 %0 = tail call i64 @llvm.vscale.i64() 271 %1 = mul i64 %0, -32 272 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1 273 %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16 274 ret <vscale x 4 x i32> %2 275} 276 277define <vscale x 2 x i64> @i64_m2v_32s(ptr %b) { 278; CHECK-LABEL: i64_m2v_32s: 279; CHECK: // %bb.0: // %entry 280; CHECK-NEXT: cnth x8, all, mul #4 281; CHECK-NEXT: ptrue p0.d 282; CHECK-NEXT: mov x9, #4 // =0x4 283; CHECK-NEXT: sub x8, x0, x8 284; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] 285; CHECK-NEXT: ret 286entry: 287 %0 = tail call i64 @llvm.vscale.i64() 288 %1 = mul i64 %0, -32 289 %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1 290 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 32 291 %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16 292 ret <vscale x 2 x i64> %2 293} 294 295define <vscale x 2 x i64> @i64_32s_m2v(ptr %b) { 296; CHECK-LABEL: i64_32s_m2v: 297; CHECK: // %bb.0: // %entry 298; CHECK-NEXT: ptrue p0.d 299; CHECK-NEXT: add x8, x0, #32 300; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #-2, mul vl] 301; CHECK-NEXT: ret 302entry: 303 %add.ptr = getelementptr inbounds i8, ptr %b, i64 32 304 %0 = tail call i64 @llvm.vscale.i64() 305 %1 = mul i64 %0, -32 306 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1 307 %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16 308 ret <vscale x 2 x i64> %2 309} 310 311declare i64 @llvm.vscale.i64() 312