1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_s16(ptr %base, <8 x i16> %offset) { 5; CHECK-LABEL: test_vldrbq_gather_offset_s16: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vldrb.s16 q1, [r0, q0] 8; CHECK-NEXT: vmov q0, q1 9; CHECK-NEXT: bx lr 10entry: 11 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 8, i32 0, i32 0) 12 ret <8 x i16> %0 13} 14 15declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr, <8 x i16>, i32, i32, i32) 16 17define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_s32(ptr %base, <4 x i32> %offset) { 18; CHECK-LABEL: test_vldrbq_gather_offset_s32: 19; CHECK: @ %bb.0: @ %entry 20; CHECK-NEXT: vldrb.s32 q1, [r0, q0] 21; CHECK-NEXT: vmov q0, q1 22; CHECK-NEXT: bx lr 23entry: 24 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 8, i32 0, i32 0) 25 ret <4 x i32> %0 26} 27 28declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr, <4 x i32>, i32, i32, i32) 29 30define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_s8(ptr %base, <16 x i8> %offset) { 31; CHECK-LABEL: test_vldrbq_gather_offset_s8: 32; CHECK: @ %bb.0: @ %entry 33; CHECK-NEXT: vldrb.u8 q1, [r0, q0] 34; CHECK-NEXT: vmov q0, q1 35; CHECK-NEXT: bx lr 36entry: 37 %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0.v16i8(ptr %base, <16 x i8> %offset, i32 8, i32 0, i32 0) 38 ret <16 x i8> %0 39} 40 41declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0.v16i8(ptr, <16 x i8>, i32, i32, i32) 42 43define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_u16(ptr %base, <8 x i16> %offset) { 44; CHECK-LABEL: test_vldrbq_gather_offset_u16: 45; CHECK: @ %bb.0: @ %entry 46; CHECK-NEXT: vldrb.u16 q1, [r0, q0] 47; CHECK-NEXT: vmov q0, q1 48; CHECK-NEXT: bx lr 49entry: 50 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 8, i32 0, i32 1) 51 ret <8 x i16> %0 52} 53 54define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_u32(ptr %base, <4 x i32> %offset) { 55; CHECK-LABEL: test_vldrbq_gather_offset_u32: 56; CHECK: @ %bb.0: @ %entry 57; CHECK-NEXT: vldrb.u32 q1, [r0, q0] 58; CHECK-NEXT: vmov q0, q1 59; CHECK-NEXT: bx lr 60entry: 61 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 8, i32 0, i32 1) 62 ret <4 x i32> %0 63} 64 65define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_u8(ptr %base, <16 x i8> %offset) { 66; CHECK-LABEL: test_vldrbq_gather_offset_u8: 67; CHECK: @ %bb.0: @ %entry 68; CHECK-NEXT: vldrb.u8 q1, [r0, q0] 69; CHECK-NEXT: vmov q0, q1 70; CHECK-NEXT: bx lr 71entry: 72 %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0.v16i8(ptr %base, <16 x i8> %offset, i32 8, i32 0, i32 1) 73 ret <16 x i8> %0 74} 75 76define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_s16(ptr %base, <8 x i16> %offset, i16 zeroext %p) { 77; CHECK-LABEL: test_vldrbq_gather_offset_z_s16: 78; CHECK: @ %bb.0: @ %entry 79; CHECK-NEXT: vmsr p0, r1 80; CHECK-NEXT: vpst 81; CHECK-NEXT: vldrbt.s16 q1, [r0, q0] 82; CHECK-NEXT: vmov q0, q1 83; CHECK-NEXT: bx lr 84entry: 85 %0 = zext i16 %p to i32 86 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 87 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 8, i32 0, i32 0, <8 x i1> %1) 88 ret <8 x i16> %2 89} 90 91declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 92 93declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr, <8 x i16>, i32, i32, i32, <8 x i1>) 94 95define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) { 96; CHECK-LABEL: test_vldrbq_gather_offset_z_s32: 97; CHECK: @ %bb.0: @ %entry 98; CHECK-NEXT: vmsr p0, r1 99; CHECK-NEXT: vpst 100; CHECK-NEXT: vldrbt.s32 q1, [r0, q0] 101; CHECK-NEXT: vmov q0, q1 102; CHECK-NEXT: bx lr 103entry: 104 %0 = zext i16 %p to i32 105 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 106 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 8, i32 0, i32 0, <4 x i1> %1) 107 ret <4 x i32> %2 108} 109 110declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 111 112declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr, <4 x i32>, i32, i32, i32, <4 x i1>) 113 114define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_s8(ptr %base, <16 x i8> %offset, i16 zeroext %p) { 115; CHECK-LABEL: test_vldrbq_gather_offset_z_s8: 116; CHECK: @ %bb.0: @ %entry 117; CHECK-NEXT: vmsr p0, r1 118; CHECK-NEXT: vpst 119; CHECK-NEXT: vldrbt.u8 q1, [r0, q0] 120; CHECK-NEXT: vmov q0, q1 121; CHECK-NEXT: bx lr 122entry: 123 %0 = zext i16 %p to i32 124 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 125 %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0.v16i8.v16i1(ptr %base, <16 x i8> %offset, i32 8, i32 0, i32 0, <16 x i1> %1) 126 ret <16 x i8> %2 127} 128 129declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 130 131declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0.v16i8.v16i1(ptr, <16 x i8>, i32, i32, i32, <16 x i1>) 132 133define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_u16(ptr %base, <8 x i16> %offset, i16 zeroext %p) { 134; CHECK-LABEL: test_vldrbq_gather_offset_z_u16: 135; CHECK: @ %bb.0: @ %entry 136; CHECK-NEXT: vmsr p0, r1 137; CHECK-NEXT: vpst 138; CHECK-NEXT: vldrbt.u16 q1, [r0, q0] 139; CHECK-NEXT: vmov q0, q1 140; CHECK-NEXT: bx lr 141entry: 142 %0 = zext i16 %p to i32 143 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 144 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 8, i32 0, i32 1, <8 x i1> %1) 145 ret <8 x i16> %2 146} 147 148define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) { 149; CHECK-LABEL: test_vldrbq_gather_offset_z_u32: 150; CHECK: @ %bb.0: @ %entry 151; CHECK-NEXT: vmsr p0, r1 152; CHECK-NEXT: vpst 153; CHECK-NEXT: vldrbt.u32 q1, [r0, q0] 154; CHECK-NEXT: vmov q0, q1 155; CHECK-NEXT: bx lr 156entry: 157 %0 = zext i16 %p to i32 158 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 159 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 8, i32 0, i32 1, <4 x i1> %1) 160 ret <4 x i32> %2 161} 162 163define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_u8(ptr %base, <16 x i8> %offset, i16 zeroext %p) { 164; CHECK-LABEL: test_vldrbq_gather_offset_z_u8: 165; CHECK: @ %bb.0: @ %entry 166; CHECK-NEXT: vmsr p0, r1 167; CHECK-NEXT: vpst 168; CHECK-NEXT: vldrbt.u8 q1, [r0, q0] 169; CHECK-NEXT: vmov q0, q1 170; CHECK-NEXT: bx lr 171entry: 172 %0 = zext i16 %p to i32 173 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 174 %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0.v16i8.v16i1(ptr %base, <16 x i8> %offset, i32 8, i32 0, i32 1, <16 x i1> %1) 175 ret <16 x i8> %2 176} 177 178define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_s64(<2 x i64> %addr) { 179; CHECK-LABEL: test_vldrdq_gather_base_s64: 180; CHECK: @ %bb.0: @ %entry 181; CHECK-NEXT: vldrd.u64 q1, [q0, #616] 182; CHECK-NEXT: vmov q0, q1 183; CHECK-NEXT: bx lr 184entry: 185 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 616) 186 ret <2 x i64> %0 187} 188 189declare <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64>, i32) 190 191define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_u64(<2 x i64> %addr) { 192; CHECK-LABEL: test_vldrdq_gather_base_u64: 193; CHECK: @ %bb.0: @ %entry 194; CHECK-NEXT: vldrd.u64 q1, [q0, #-336] 195; CHECK-NEXT: vmov q0, q1 196; CHECK-NEXT: bx lr 197entry: 198 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 -336) 199 ret <2 x i64> %0 200} 201 202define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_s64(ptr %addr) { 203; CHECK-LABEL: test_vldrdq_gather_base_wb_s64: 204; CHECK: @ %bb.0: @ %entry 205; CHECK-NEXT: vldrw.u32 q1, [r0] 206; CHECK-NEXT: vldrd.u64 q0, [q1, #576]! 207; CHECK-NEXT: vstrw.32 q1, [r0] 208; CHECK-NEXT: bx lr 209entry: 210 %0 = load <2 x i64>, ptr %addr, align 8 211 %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 576) 212 %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1 213 store <2 x i64> %2, ptr %addr, align 8 214 %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0 215 ret <2 x i64> %3 216} 217 218declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64>, i32) 219 220define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_u64(ptr %addr) { 221; CHECK-LABEL: test_vldrdq_gather_base_wb_u64: 222; CHECK: @ %bb.0: @ %entry 223; CHECK-NEXT: vldrw.u32 q1, [r0] 224; CHECK-NEXT: vldrd.u64 q0, [q1, #-328]! 225; CHECK-NEXT: vstrw.32 q1, [r0] 226; CHECK-NEXT: bx lr 227entry: 228 %0 = load <2 x i64>, ptr %addr, align 8 229 %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -328) 230 %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1 231 store <2 x i64> %2, ptr %addr, align 8 232 %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0 233 ret <2 x i64> %3 234} 235 236define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(ptr %addr, i16 zeroext %p) { 237; CHECK-LABEL: test_vldrdq_gather_base_wb_z_s64: 238; CHECK: @ %bb.0: @ %entry 239; CHECK-NEXT: vmsr p0, r1 240; CHECK-NEXT: vldrw.u32 q1, [r0] 241; CHECK-NEXT: vpst 242; CHECK-NEXT: vldrdt.u64 q0, [q1, #664]! 243; CHECK-NEXT: vstrw.32 q1, [r0] 244; CHECK-NEXT: bx lr 245entry: 246 %0 = load <2 x i64>, ptr %addr, align 8 247 %1 = zext i16 %p to i32 248 %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1) 249 %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> %0, i32 664, <2 x i1> %2) 250 %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1 251 store <2 x i64> %4, ptr %addr, align 8 252 %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0 253 ret <2 x i64> %5 254} 255 256declare <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32) 257declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i1>) 258 259define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_u64(ptr %addr, i16 zeroext %p) { 260; CHECK-LABEL: test_vldrdq_gather_base_wb_z_u64: 261; CHECK: @ %bb.0: @ %entry 262; CHECK-NEXT: vmsr p0, r1 263; CHECK-NEXT: vldrw.u32 q1, [r0] 264; CHECK-NEXT: vpst 265; CHECK-NEXT: vldrdt.u64 q0, [q1, #656]! 266; CHECK-NEXT: vstrw.32 q1, [r0] 267; CHECK-NEXT: bx lr 268entry: 269 %0 = load <2 x i64>, ptr %addr, align 8 270 %1 = zext i16 %p to i32 271 %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1) 272 %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> %0, i32 656, <2 x i1> %2) 273 %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1 274 store <2 x i64> %4, ptr %addr, align 8 275 %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0 276 ret <2 x i64> %5 277} 278 279define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) { 280; CHECK-LABEL: test_vldrdq_gather_base_z_s64: 281; CHECK: @ %bb.0: @ %entry 282; CHECK-NEXT: vmsr p0, r0 283; CHECK-NEXT: vpst 284; CHECK-NEXT: vldrdt.u64 q1, [q0, #888] 285; CHECK-NEXT: vmov q0, q1 286; CHECK-NEXT: bx lr 287entry: 288 %0 = zext i16 %p to i32 289 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 290 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v2i1(<2 x i64> %addr, i32 888, <2 x i1> %1) 291 ret <2 x i64> %2 292} 293 294declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i1>) 295 296define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_u64(<2 x i64> %addr, i16 zeroext %p) { 297; CHECK-LABEL: test_vldrdq_gather_base_z_u64: 298; CHECK: @ %bb.0: @ %entry 299; CHECK-NEXT: vmsr p0, r0 300; CHECK-NEXT: vpst 301; CHECK-NEXT: vldrdt.u64 q1, [q0, #-1000] 302; CHECK-NEXT: vmov q0, q1 303; CHECK-NEXT: bx lr 304entry: 305 %0 = zext i16 %p to i32 306 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 307 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v2i1(<2 x i64> %addr, i32 -1000, <2 x i1> %1) 308 ret <2 x i64> %2 309} 310 311define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_s64(ptr %base, <2 x i64> %offset) { 312; CHECK-LABEL: test_vldrdq_gather_offset_s64: 313; CHECK: @ %bb.0: @ %entry 314; CHECK-NEXT: vldrd.u64 q1, [r0, q0] 315; CHECK-NEXT: vmov q0, q1 316; CHECK-NEXT: bx lr 317entry: 318 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 0) 319 ret <2 x i64> %0 320} 321 322declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr, <2 x i64>, i32, i32, i32) 323 324define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_u64(ptr %base, <2 x i64> %offset) { 325; CHECK-LABEL: test_vldrdq_gather_offset_u64: 326; CHECK: @ %bb.0: @ %entry 327; CHECK-NEXT: vldrd.u64 q1, [r0, q0] 328; CHECK-NEXT: vmov q0, q1 329; CHECK-NEXT: bx lr 330entry: 331 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 1) 332 ret <2 x i64> %0 333} 334 335define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(ptr %base, <2 x i64> %offset, i16 zeroext %p) { 336; CHECK-LABEL: test_vldrdq_gather_offset_z_s64: 337; CHECK: @ %bb.0: @ %entry 338; CHECK-NEXT: vmsr p0, r1 339; CHECK-NEXT: vpst 340; CHECK-NEXT: vldrdt.u64 q1, [r0, q0] 341; CHECK-NEXT: vmov q0, q1 342; CHECK-NEXT: bx lr 343entry: 344 %0 = zext i16 %p to i32 345 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 346 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <2 x i1> %1) 347 ret <2 x i64> %2 348} 349 350declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr, <2 x i64>, i32, i32, i32, <2 x i1>) 351 352define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_u64(ptr %base, <2 x i64> %offset, i16 zeroext %p) { 353; CHECK-LABEL: test_vldrdq_gather_offset_z_u64: 354; CHECK: @ %bb.0: @ %entry 355; CHECK-NEXT: vmsr p0, r1 356; CHECK-NEXT: vpst 357; CHECK-NEXT: vldrdt.u64 q1, [r0, q0] 358; CHECK-NEXT: vmov q0, q1 359; CHECK-NEXT: bx lr 360entry: 361 %0 = zext i16 %p to i32 362 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 363 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 1, <2 x i1> %1) 364 ret <2 x i64> %2 365} 366 367define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_s64(ptr %base, <2 x i64> %offset) { 368; CHECK-LABEL: test_vldrdq_gather_shifted_offset_s64: 369; CHECK: @ %bb.0: @ %entry 370; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3] 371; CHECK-NEXT: vmov q0, q1 372; CHECK-NEXT: bx lr 373entry: 374 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr %base, <2 x i64> %offset, i32 64, i32 3, i32 0) 375 ret <2 x i64> %0 376} 377 378define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_u64(ptr %base, <2 x i64> %offset) { 379; CHECK-LABEL: test_vldrdq_gather_shifted_offset_u64: 380; CHECK: @ %bb.0: @ %entry 381; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3] 382; CHECK-NEXT: vmov q0, q1 383; CHECK-NEXT: bx lr 384entry: 385 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr %base, <2 x i64> %offset, i32 64, i32 3, i32 1) 386 ret <2 x i64> %0 387} 388 389define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_s64(ptr %base, <2 x i64> %offset, i16 zeroext %p) { 390; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_s64: 391; CHECK: @ %bb.0: @ %entry 392; CHECK-NEXT: vmsr p0, r1 393; CHECK-NEXT: vpst 394; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3] 395; CHECK-NEXT: vmov q0, q1 396; CHECK-NEXT: bx lr 397entry: 398 %0 = zext i16 %p to i32 399 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 400 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr %base, <2 x i64> %offset, i32 64, i32 3, i32 0, <2 x i1> %1) 401 ret <2 x i64> %2 402} 403 404define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_u64(ptr %base, <2 x i64> %offset, i16 zeroext %p) { 405; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_u64: 406; CHECK: @ %bb.0: @ %entry 407; CHECK-NEXT: vmsr p0, r1 408; CHECK-NEXT: vpst 409; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3] 410; CHECK-NEXT: vmov q0, q1 411; CHECK-NEXT: bx lr 412entry: 413 %0 = zext i16 %p to i32 414 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 415 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr %base, <2 x i64> %offset, i32 64, i32 3, i32 1, <2 x i1> %1) 416 ret <2 x i64> %2 417} 418 419define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_f16(ptr %base, <8 x i16> %offset) { 420; CHECK-LABEL: test_vldrhq_gather_offset_f16: 421; CHECK: @ %bb.0: @ %entry 422; CHECK-NEXT: vldrh.u16 q1, [r0, q0] 423; CHECK-NEXT: vmov q0, q1 424; CHECK-NEXT: bx lr 425entry: 426 %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 0) 427 ret <8 x half> %0 428} 429 430declare <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0.v8i16(ptr, <8 x i16>, i32, i32, i32) 431 432define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_s16(ptr %base, <8 x i16> %offset) { 433; CHECK-LABEL: test_vldrhq_gather_offset_s16: 434; CHECK: @ %bb.0: @ %entry 435; CHECK-NEXT: vldrh.u16 q1, [r0, q0] 436; CHECK-NEXT: vmov q0, q1 437; CHECK-NEXT: bx lr 438entry: 439 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 0) 440 ret <8 x i16> %0 441} 442 443 444define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_s32(ptr %base, <4 x i32> %offset) { 445; CHECK-LABEL: test_vldrhq_gather_offset_s32: 446; CHECK: @ %bb.0: @ %entry 447; CHECK-NEXT: vldrh.s32 q1, [r0, q0] 448; CHECK-NEXT: vmov q0, q1 449; CHECK-NEXT: bx lr 450entry: 451 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 16, i32 0, i32 0) 452 ret <4 x i32> %0 453} 454 455 456define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_u16(ptr %base, <8 x i16> %offset) { 457; CHECK-LABEL: test_vldrhq_gather_offset_u16: 458; CHECK: @ %bb.0: @ %entry 459; CHECK-NEXT: vldrh.u16 q1, [r0, q0] 460; CHECK-NEXT: vmov q0, q1 461; CHECK-NEXT: bx lr 462entry: 463 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 1) 464 ret <8 x i16> %0 465} 466 467define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_u32(ptr %base, <4 x i32> %offset) { 468; CHECK-LABEL: test_vldrhq_gather_offset_u32: 469; CHECK: @ %bb.0: @ %entry 470; CHECK-NEXT: vldrh.u32 q1, [r0, q0] 471; CHECK-NEXT: vmov q0, q1 472; CHECK-NEXT: bx lr 473entry: 474 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 16, i32 0, i32 1) 475 ret <4 x i32> %0 476} 477 478define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_z_f16(ptr %base, <8 x i16> %offset, i16 zeroext %p) { 479; CHECK-LABEL: test_vldrhq_gather_offset_z_f16: 480; CHECK: @ %bb.0: @ %entry 481; CHECK-NEXT: vmsr p0, r1 482; CHECK-NEXT: vpst 483; CHECK-NEXT: vldrht.u16 q1, [r0, q0] 484; CHECK-NEXT: vmov q0, q1 485; CHECK-NEXT: bx lr 486entry: 487 %0 = zext i16 %p to i32 488 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 489 %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1) 490 ret <8 x half> %2 491} 492 493declare <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0.v8i16.v8i1(ptr, <8 x i16>, i32, i32, i32, <8 x i1>) 494 495define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_s16(ptr %base, <8 x i16> %offset, i16 zeroext %p) { 496; CHECK-LABEL: test_vldrhq_gather_offset_z_s16: 497; CHECK: @ %bb.0: @ %entry 498; CHECK-NEXT: vmsr p0, r1 499; CHECK-NEXT: vpst 500; CHECK-NEXT: vldrht.u16 q1, [r0, q0] 501; CHECK-NEXT: vmov q0, q1 502; CHECK-NEXT: bx lr 503entry: 504 %0 = zext i16 %p to i32 505 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 506 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1) 507 ret <8 x i16> %2 508} 509 510 511define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) { 512; CHECK-LABEL: test_vldrhq_gather_offset_z_s32: 513; CHECK: @ %bb.0: @ %entry 514; CHECK-NEXT: vmsr p0, r1 515; CHECK-NEXT: vpst 516; CHECK-NEXT: vldrht.s32 q1, [r0, q0] 517; CHECK-NEXT: vmov q0, q1 518; CHECK-NEXT: bx lr 519entry: 520 %0 = zext i16 %p to i32 521 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 522 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 16, i32 0, i32 0, <4 x i1> %1) 523 ret <4 x i32> %2 524} 525 526 527define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_u16(ptr %base, <8 x i16> %offset, i16 zeroext %p) { 528; CHECK-LABEL: test_vldrhq_gather_offset_z_u16: 529; CHECK: @ %bb.0: @ %entry 530; CHECK-NEXT: vmsr p0, r1 531; CHECK-NEXT: vpst 532; CHECK-NEXT: vldrht.u16 q1, [r0, q0] 533; CHECK-NEXT: vmov q0, q1 534; CHECK-NEXT: bx lr 535entry: 536 %0 = zext i16 %p to i32 537 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 538 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 1, <8 x i1> %1) 539 ret <8 x i16> %2 540} 541 542define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) { 543; CHECK-LABEL: test_vldrhq_gather_offset_z_u32: 544; CHECK: @ %bb.0: @ %entry 545; CHECK-NEXT: vmsr p0, r1 546; CHECK-NEXT: vpst 547; CHECK-NEXT: vldrht.u32 q1, [r0, q0] 548; CHECK-NEXT: vmov q0, q1 549; CHECK-NEXT: bx lr 550entry: 551 %0 = zext i16 %p to i32 552 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 553 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 16, i32 0, i32 1, <4 x i1> %1) 554 ret <4 x i32> %2 555} 556 557define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_f16(ptr %base, <8 x i16> %offset) { 558; CHECK-LABEL: test_vldrhq_gather_shifted_offset_f16: 559; CHECK: @ %bb.0: @ %entry 560; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1] 561; CHECK-NEXT: vmov q0, q1 562; CHECK-NEXT: bx lr 563entry: 564 %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 0) 565 ret <8 x half> %0 566} 567 568define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_s16(ptr %base, <8 x i16> %offset) { 569; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s16: 570; CHECK: @ %bb.0: @ %entry 571; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1] 572; CHECK-NEXT: vmov q0, q1 573; CHECK-NEXT: bx lr 574entry: 575 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 0) 576 ret <8 x i16> %0 577} 578 579define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_s32(ptr %base, <4 x i32> %offset) { 580; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s32: 581; CHECK: @ %bb.0: @ %entry 582; CHECK-NEXT: vldrh.s32 q1, [r0, q0, uxtw #1] 583; CHECK-NEXT: vmov q0, q1 584; CHECK-NEXT: bx lr 585entry: 586 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 16, i32 1, i32 0) 587 ret <4 x i32> %0 588} 589 590define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_u16(ptr %base, <8 x i16> %offset) { 591; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u16: 592; CHECK: @ %bb.0: @ %entry 593; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1] 594; CHECK-NEXT: vmov q0, q1 595; CHECK-NEXT: bx lr 596entry: 597 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 1) 598 ret <8 x i16> %0 599} 600 601define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_u32(ptr %base, <4 x i32> %offset) { 602; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u32: 603; CHECK: @ %bb.0: @ %entry 604; CHECK-NEXT: vldrh.u32 q1, [r0, q0, uxtw #1] 605; CHECK-NEXT: vmov q0, q1 606; CHECK-NEXT: bx lr 607entry: 608 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 16, i32 1, i32 1) 609 ret <4 x i32> %0 610} 611 612define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_z_f16(ptr %base, <8 x i16> %offset, i16 zeroext %p) { 613; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_f16: 614; CHECK: @ %bb.0: @ %entry 615; CHECK-NEXT: vmsr p0, r1 616; CHECK-NEXT: vpst 617; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1] 618; CHECK-NEXT: vmov q0, q1 619; CHECK-NEXT: bx lr 620entry: 621 %0 = zext i16 %p to i32 622 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 623 %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1) 624 ret <8 x half> %2 625} 626 627define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_s16(ptr %base, <8 x i16> %offset, i16 zeroext %p) { 628; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s16: 629; CHECK: @ %bb.0: @ %entry 630; CHECK-NEXT: vmsr p0, r1 631; CHECK-NEXT: vpst 632; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1] 633; CHECK-NEXT: vmov q0, q1 634; CHECK-NEXT: bx lr 635entry: 636 %0 = zext i16 %p to i32 637 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 638 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1) 639 ret <8 x i16> %2 640} 641 642define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) { 643; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s32: 644; CHECK: @ %bb.0: @ %entry 645; CHECK-NEXT: vmsr p0, r1 646; CHECK-NEXT: vpst 647; CHECK-NEXT: vldrht.s32 q1, [r0, q0, uxtw #1] 648; CHECK-NEXT: vmov q0, q1 649; CHECK-NEXT: bx lr 650entry: 651 %0 = zext i16 %p to i32 652 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 653 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 16, i32 1, i32 0, <4 x i1> %1) 654 ret <4 x i32> %2 655} 656 657define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_u16(ptr %base, <8 x i16> %offset, i16 zeroext %p) { 658; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u16: 659; CHECK: @ %bb.0: @ %entry 660; CHECK-NEXT: vmsr p0, r1 661; CHECK-NEXT: vpst 662; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1] 663; CHECK-NEXT: vmov q0, q1 664; CHECK-NEXT: bx lr 665entry: 666 %0 = zext i16 %p to i32 667 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 668 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 1, <8 x i1> %1) 669 ret <8 x i16> %2 670} 671 672define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) { 673; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u32: 674; CHECK: @ %bb.0: @ %entry 675; CHECK-NEXT: vmsr p0, r1 676; CHECK-NEXT: vpst 677; CHECK-NEXT: vldrht.u32 q1, [r0, q0, uxtw #1] 678; CHECK-NEXT: vmov q0, q1 679; CHECK-NEXT: bx lr 680entry: 681 %0 = zext i16 %p to i32 682 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 683 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 16, i32 1, i32 1, <4 x i1> %1) 684 ret <4 x i32> %2 685} 686 687define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_f32(<4 x i32> %addr) { 688; CHECK-LABEL: test_vldrwq_gather_base_f32: 689; CHECK: @ %bb.0: @ %entry 690; CHECK-NEXT: vldrw.u32 q1, [q0, #12] 691; CHECK-NEXT: vmov q0, q1 692; CHECK-NEXT: bx lr 693entry: 694 %0 = call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> %addr, i32 12) 695 ret <4 x float> %0 696} 697 698declare <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32>, i32) 699 700define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_s32(<4 x i32> %addr) { 701; CHECK-LABEL: test_vldrwq_gather_base_s32: 702; CHECK: @ %bb.0: @ %entry 703; CHECK-NEXT: vldrw.u32 q1, [q0, #400] 704; CHECK-NEXT: vmov q0, q1 705; CHECK-NEXT: bx lr 706entry: 707 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 400) 708 ret <4 x i32> %0 709} 710 711declare <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32>, i32) 712 713define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_u32(<4 x i32> %addr) { 714; CHECK-LABEL: test_vldrwq_gather_base_u32: 715; CHECK: @ %bb.0: @ %entry 716; CHECK-NEXT: vldrw.u32 q1, [q0, #284] 717; CHECK-NEXT: vmov q0, q1 718; CHECK-NEXT: bx lr 719entry: 720 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 284) 721 ret <4 x i32> %0 722} 723 724define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_f32(ptr %addr) { 725; CHECK-LABEL: test_vldrwq_gather_base_wb_f32: 726; CHECK: @ %bb.0: @ %entry 727; CHECK-NEXT: vldrw.u32 q1, [r0] 728; CHECK-NEXT: vldrw.u32 q0, [q1, #-64]! 729; CHECK-NEXT: vstrw.32 q1, [r0] 730; CHECK-NEXT: bx lr 731entry: 732 %0 = load <4 x i32>, ptr %addr, align 8 733 %1 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> %0, i32 -64) 734 %2 = extractvalue { <4 x float>, <4 x i32> } %1, 1 735 store <4 x i32> %2, ptr %addr, align 8 736 %3 = extractvalue { <4 x float>, <4 x i32> } %1, 0 737 ret <4 x float> %3 738} 739 740declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32>, i32) 741 742define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_s32(ptr %addr) { 743; CHECK-LABEL: test_vldrwq_gather_base_wb_s32: 744; CHECK: @ %bb.0: @ %entry 745; CHECK-NEXT: vldrw.u32 q1, [r0] 746; CHECK-NEXT: vldrw.u32 q0, [q1, #80]! 747; CHECK-NEXT: vstrw.32 q1, [r0] 748; CHECK-NEXT: bx lr 749entry: 750 %0 = load <4 x i32>, ptr %addr, align 8 751 %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 80) 752 %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1 753 store <4 x i32> %2, ptr %addr, align 8 754 %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0 755 ret <4 x i32> %3 756} 757 758declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32>, i32) 759 760define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_u32(ptr %addr) { 761; CHECK-LABEL: test_vldrwq_gather_base_wb_u32: 762; CHECK: @ %bb.0: @ %entry 763; CHECK-NEXT: vldrw.u32 q1, [r0] 764; CHECK-NEXT: vldrw.u32 q0, [q1, #480]! 765; CHECK-NEXT: vstrw.32 q1, [r0] 766; CHECK-NEXT: bx lr 767entry: 768 %0 = load <4 x i32>, ptr %addr, align 8 769 %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 480) 770 %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1 771 store <4 x i32> %2, ptr %addr, align 8 772 %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0 773 ret <4 x i32> %3 774} 775 776define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_z_f32(ptr %addr, i16 zeroext %p) { 777; CHECK-LABEL: test_vldrwq_gather_base_wb_z_f32: 778; CHECK: @ %bb.0: @ %entry 779; CHECK-NEXT: vmsr p0, r1 780; CHECK-NEXT: vldrw.u32 q1, [r0] 781; CHECK-NEXT: vpst 782; CHECK-NEXT: vldrwt.u32 q0, [q1, #-352]! 783; CHECK-NEXT: vstrw.32 q1, [r0] 784; CHECK-NEXT: bx lr 785entry: 786 %0 = load <4 x i32>, ptr %addr, align 8 787 %1 = zext i16 %p to i32 788 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 789 %3 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %0, i32 -352, <4 x i1> %2) 790 %4 = extractvalue { <4 x float>, <4 x i32> } %3, 1 791 store <4 x i32> %4, ptr %addr, align 8 792 %5 = extractvalue { <4 x float>, <4 x i32> } %3, 0 793 ret <4 x float> %5 794} 795 796declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) 797 798define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_s32(ptr %addr, i16 zeroext %p) { 799; CHECK-LABEL: test_vldrwq_gather_base_wb_z_s32: 800; CHECK: @ %bb.0: @ %entry 801; CHECK-NEXT: vmsr p0, r1 802; CHECK-NEXT: vldrw.u32 q1, [r0] 803; CHECK-NEXT: vpst 804; CHECK-NEXT: vldrwt.u32 q0, [q1, #276]! 805; CHECK-NEXT: vstrw.32 q1, [r0] 806; CHECK-NEXT: bx lr 807entry: 808 %0 = load <4 x i32>, ptr %addr, align 8 809 %1 = zext i16 %p to i32 810 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 811 %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 276, <4 x i1> %2) 812 %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1 813 store <4 x i32> %4, ptr %addr, align 8 814 %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0 815 ret <4 x i32> %5 816} 817 818declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) 819 820define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_u32(ptr %addr, i16 zeroext %p) { 821; CHECK-LABEL: test_vldrwq_gather_base_wb_z_u32: 822; CHECK: @ %bb.0: @ %entry 823; CHECK-NEXT: vmsr p0, r1 824; CHECK-NEXT: vldrw.u32 q1, [r0] 825; CHECK-NEXT: vpst 826; CHECK-NEXT: vldrwt.u32 q0, [q1, #88]! 827; CHECK-NEXT: vstrw.32 q1, [r0] 828; CHECK-NEXT: bx lr 829entry: 830 %0 = load <4 x i32>, ptr %addr, align 8 831 %1 = zext i16 %p to i32 832 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 833 %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 88, <4 x i1> %2) 834 %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1 835 store <4 x i32> %4, ptr %addr, align 8 836 %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0 837 ret <4 x i32> %5 838} 839 840define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_z_f32(<4 x i32> %addr, i16 zeroext %p) { 841; CHECK-LABEL: test_vldrwq_gather_base_z_f32: 842; CHECK: @ %bb.0: @ %entry 843; CHECK-NEXT: vmsr p0, r0 844; CHECK-NEXT: vpst 845; CHECK-NEXT: vldrwt.u32 q1, [q0, #-300] 846; CHECK-NEXT: vmov q0, q1 847; CHECK-NEXT: bx lr 848entry: 849 %0 = zext i16 %p to i32 850 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 851 %2 = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> %addr, i32 -300, <4 x i1> %1) 852 ret <4 x float> %2 853} 854 855declare <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) 856 857define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_s32(<4 x i32> %addr, i16 zeroext %p) { 858; CHECK-LABEL: test_vldrwq_gather_base_z_s32: 859; CHECK: @ %bb.0: @ %entry 860; CHECK-NEXT: vmsr p0, r0 861; CHECK-NEXT: vpst 862; CHECK-NEXT: vldrwt.u32 q1, [q0, #440] 863; CHECK-NEXT: vmov q0, q1 864; CHECK-NEXT: bx lr 865entry: 866 %0 = zext i16 %p to i32 867 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 868 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 440, <4 x i1> %1) 869 ret <4 x i32> %2 870} 871 872declare <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) 873 874define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_u32(<4 x i32> %addr, i16 zeroext %p) { 875; CHECK-LABEL: test_vldrwq_gather_base_z_u32: 876; CHECK: @ %bb.0: @ %entry 877; CHECK-NEXT: vmsr p0, r0 878; CHECK-NEXT: vpst 879; CHECK-NEXT: vldrwt.u32 q1, [q0, #300] 880; CHECK-NEXT: vmov q0, q1 881; CHECK-NEXT: bx lr 882entry: 883 %0 = zext i16 %p to i32 884 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 885 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 300, <4 x i1> %1) 886 ret <4 x i32> %2 887} 888 889define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_f32(ptr %base, <4 x i32> %offset) { 890; CHECK-LABEL: test_vldrwq_gather_offset_f32: 891; CHECK: @ %bb.0: @ %entry 892; CHECK-NEXT: vldrw.u32 q1, [r0, q0] 893; CHECK-NEXT: vmov q0, q1 894; CHECK-NEXT: bx lr 895entry: 896 %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 0) 897 ret <4 x float> %0 898} 899 900declare <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0.v4i32(ptr, <4 x i32>, i32, i32, i32) 901 902define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_s32(ptr %base, <4 x i32> %offset) { 903; CHECK-LABEL: test_vldrwq_gather_offset_s32: 904; CHECK: @ %bb.0: @ %entry 905; CHECK-NEXT: vldrw.u32 q1, [r0, q0] 906; CHECK-NEXT: vmov q0, q1 907; CHECK-NEXT: bx lr 908entry: 909 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 0) 910 ret <4 x i32> %0 911} 912 913 914define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_u32(ptr %base, <4 x i32> %offset) { 915; CHECK-LABEL: test_vldrwq_gather_offset_u32: 916; CHECK: @ %bb.0: @ %entry 917; CHECK-NEXT: vldrw.u32 q1, [r0, q0] 918; CHECK-NEXT: vmov q0, q1 919; CHECK-NEXT: bx lr 920entry: 921 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 1) 922 ret <4 x i32> %0 923} 924 925define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_z_f32(ptr %base, <4 x i32> %offset, i16 zeroext %p) { 926; CHECK-LABEL: test_vldrwq_gather_offset_z_f32: 927; CHECK: @ %bb.0: @ %entry 928; CHECK-NEXT: vmsr p0, r1 929; CHECK-NEXT: vpst 930; CHECK-NEXT: vldrwt.u32 q1, [r0, q0] 931; CHECK-NEXT: vmov q0, q1 932; CHECK-NEXT: bx lr 933entry: 934 %0 = zext i16 %p to i32 935 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 936 %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1) 937 ret <4 x float> %2 938} 939 940declare <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0.v4i32.v4i1(ptr, <4 x i32>, i32, i32, i32, <4 x i1>) 941 942define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) { 943; CHECK-LABEL: test_vldrwq_gather_offset_z_s32: 944; CHECK: @ %bb.0: @ %entry 945; CHECK-NEXT: vmsr p0, r1 946; CHECK-NEXT: vpst 947; CHECK-NEXT: vldrwt.u32 q1, [r0, q0] 948; CHECK-NEXT: vmov q0, q1 949; CHECK-NEXT: bx lr 950entry: 951 %0 = zext i16 %p to i32 952 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 953 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1) 954 ret <4 x i32> %2 955} 956 957 958define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) { 959; CHECK-LABEL: test_vldrwq_gather_offset_z_u32: 960; CHECK: @ %bb.0: @ %entry 961; CHECK-NEXT: vmsr p0, r1 962; CHECK-NEXT: vpst 963; CHECK-NEXT: vldrwt.u32 q1, [r0, q0] 964; CHECK-NEXT: vmov q0, q1 965; CHECK-NEXT: bx lr 966entry: 967 %0 = zext i16 %p to i32 968 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 969 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 1, <4 x i1> %1) 970 ret <4 x i32> %2 971} 972 973define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_f32(ptr %base, <4 x i32> %offset) { 974; CHECK-LABEL: test_vldrwq_gather_shifted_offset_f32: 975; CHECK: @ %bb.0: @ %entry 976; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2] 977; CHECK-NEXT: vmov q0, q1 978; CHECK-NEXT: bx lr 979entry: 980 %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 0) 981 ret <4 x float> %0 982} 983 984define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_s32(ptr %base, <4 x i32> %offset) { 985; CHECK-LABEL: test_vldrwq_gather_shifted_offset_s32: 986; CHECK: @ %bb.0: @ %entry 987; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2] 988; CHECK-NEXT: vmov q0, q1 989; CHECK-NEXT: bx lr 990entry: 991 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 0) 992 ret <4 x i32> %0 993} 994 995define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_u32(ptr %base, <4 x i32> %offset) { 996; CHECK-LABEL: test_vldrwq_gather_shifted_offset_u32: 997; CHECK: @ %bb.0: @ %entry 998; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2] 999; CHECK-NEXT: vmov q0, q1 1000; CHECK-NEXT: bx lr 1001entry: 1002 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 1) 1003 ret <4 x i32> %0 1004} 1005 1006define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_z_f32(ptr %base, <4 x i32> %offset, i16 zeroext %p) { 1007; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_f32: 1008; CHECK: @ %bb.0: @ %entry 1009; CHECK-NEXT: vmsr p0, r1 1010; CHECK-NEXT: vpst 1011; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2] 1012; CHECK-NEXT: vmov q0, q1 1013; CHECK-NEXT: bx lr 1014entry: 1015 %0 = zext i16 %p to i32 1016 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1017 %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1) 1018 ret <4 x float> %2 1019} 1020 1021define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) { 1022; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_s32: 1023; CHECK: @ %bb.0: @ %entry 1024; CHECK-NEXT: vmsr p0, r1 1025; CHECK-NEXT: vpst 1026; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2] 1027; CHECK-NEXT: vmov q0, q1 1028; CHECK-NEXT: bx lr 1029entry: 1030 %0 = zext i16 %p to i32 1031 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1032 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1) 1033 ret <4 x i32> %2 1034} 1035 1036define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) { 1037; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_u32: 1038; CHECK: @ %bb.0: @ %entry 1039; CHECK-NEXT: vmsr p0, r1 1040; CHECK-NEXT: vpst 1041; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2] 1042; CHECK-NEXT: vmov q0, q1 1043; CHECK-NEXT: bx lr 1044entry: 1045 %0 = zext i16 %p to i32 1046 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1047 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 1, <4 x i1> %1) 1048 ret <4 x i32> %2 1049} 1050 1051define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { 1052; CHECK-LABEL: test_vstrbq_scatter_offset_p_s16: 1053; CHECK: @ %bb.0: @ %entry 1054; CHECK-NEXT: vmsr p0, r1 1055; CHECK-NEXT: vpst 1056; CHECK-NEXT: vstrbt.16 q1, [r0, q0] 1057; CHECK-NEXT: bx lr 1058entry: 1059 %0 = zext i16 %p to i32 1060 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1061 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1) 1062 ret void 1063} 1064 1065declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>) 1066 1067define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1068; CHECK-LABEL: test_vstrbq_scatter_offset_p_s32: 1069; CHECK: @ %bb.0: @ %entry 1070; CHECK-NEXT: vmsr p0, r1 1071; CHECK-NEXT: vpst 1072; CHECK-NEXT: vstrbt.32 q1, [r0, q0] 1073; CHECK-NEXT: bx lr 1074entry: 1075 %0 = zext i16 %p to i32 1076 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1077 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1) 1078 ret void 1079} 1080 1081declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>) 1082 1083define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s8(ptr %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) { 1084; CHECK-LABEL: test_vstrbq_scatter_offset_p_s8: 1085; CHECK: @ %bb.0: @ %entry 1086; CHECK-NEXT: vmsr p0, r1 1087; CHECK-NEXT: vpst 1088; CHECK-NEXT: vstrbt.8 q1, [r0, q0] 1089; CHECK-NEXT: bx lr 1090entry: 1091 %0 = zext i16 %p to i32 1092 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1093 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v16i8.v16i8.v16i1(ptr %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1) 1094 ret void 1095} 1096 1097declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v16i8.v16i8.v16i1(ptr, <16 x i8>, <16 x i8>, i32, i32, <16 x i1>) 1098 1099define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { 1100; CHECK-LABEL: test_vstrbq_scatter_offset_p_u16: 1101; CHECK: @ %bb.0: @ %entry 1102; CHECK-NEXT: vmsr p0, r1 1103; CHECK-NEXT: vpst 1104; CHECK-NEXT: vstrbt.16 q1, [r0, q0] 1105; CHECK-NEXT: bx lr 1106entry: 1107 %0 = zext i16 %p to i32 1108 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1109 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1) 1110 ret void 1111} 1112 1113define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1114; CHECK-LABEL: test_vstrbq_scatter_offset_p_u32: 1115; CHECK: @ %bb.0: @ %entry 1116; CHECK-NEXT: vmsr p0, r1 1117; CHECK-NEXT: vpst 1118; CHECK-NEXT: vstrbt.32 q1, [r0, q0] 1119; CHECK-NEXT: bx lr 1120entry: 1121 %0 = zext i16 %p to i32 1122 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1123 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1) 1124 ret void 1125} 1126 1127define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u8(ptr %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) { 1128; CHECK-LABEL: test_vstrbq_scatter_offset_p_u8: 1129; CHECK: @ %bb.0: @ %entry 1130; CHECK-NEXT: vmsr p0, r1 1131; CHECK-NEXT: vpst 1132; CHECK-NEXT: vstrbt.8 q1, [r0, q0] 1133; CHECK-NEXT: bx lr 1134entry: 1135 %0 = zext i16 %p to i32 1136 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1137 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v16i8.v16i8.v16i1(ptr %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1) 1138 ret void 1139} 1140 1141define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value) { 1142; CHECK-LABEL: test_vstrbq_scatter_offset_s16: 1143; CHECK: @ %bb.0: @ %entry 1144; CHECK-NEXT: vstrb.16 q1, [r0, q0] 1145; CHECK-NEXT: bx lr 1146entry: 1147 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0) 1148 ret void 1149} 1150 1151declare void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr, <8 x i16>, <8 x i16>, i32, i32) 1152 1153define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) { 1154; CHECK-LABEL: test_vstrbq_scatter_offset_s32: 1155; CHECK: @ %bb.0: @ %entry 1156; CHECK-NEXT: vstrb.32 q1, [r0, q0] 1157; CHECK-NEXT: bx lr 1158entry: 1159 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0) 1160 ret void 1161} 1162 1163declare void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr, <4 x i32>, <4 x i32>, i32, i32) 1164 1165define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s8(ptr %base, <16 x i8> %offset, <16 x i8> %value) { 1166; CHECK-LABEL: test_vstrbq_scatter_offset_s8: 1167; CHECK: @ %bb.0: @ %entry 1168; CHECK-NEXT: vstrb.8 q1, [r0, q0] 1169; CHECK-NEXT: bx lr 1170entry: 1171 call void @llvm.arm.mve.vstr.scatter.offset.p0.v16i8.v16i8(ptr %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0) 1172 ret void 1173} 1174 1175declare void @llvm.arm.mve.vstr.scatter.offset.p0.v16i8.v16i8(ptr, <16 x i8>, <16 x i8>, i32, i32) 1176 1177define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value) { 1178; CHECK-LABEL: test_vstrbq_scatter_offset_u16: 1179; CHECK: @ %bb.0: @ %entry 1180; CHECK-NEXT: vstrb.16 q1, [r0, q0] 1181; CHECK-NEXT: bx lr 1182entry: 1183 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0) 1184 ret void 1185} 1186 1187define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) { 1188; CHECK-LABEL: test_vstrbq_scatter_offset_u32: 1189; CHECK: @ %bb.0: @ %entry 1190; CHECK-NEXT: vstrb.32 q1, [r0, q0] 1191; CHECK-NEXT: bx lr 1192entry: 1193 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0) 1194 ret void 1195} 1196 1197define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u8(ptr %base, <16 x i8> %offset, <16 x i8> %value) { 1198; CHECK-LABEL: test_vstrbq_scatter_offset_u8: 1199; CHECK: @ %bb.0: @ %entry 1200; CHECK-NEXT: vstrb.8 q1, [r0, q0] 1201; CHECK-NEXT: bx lr 1202entry: 1203 call void @llvm.arm.mve.vstr.scatter.offset.p0.v16i8.v16i8(ptr %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0) 1204 ret void 1205} 1206 1207define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) { 1208; CHECK-LABEL: test_vstrdq_scatter_base_p_s64: 1209; CHECK: @ %bb.0: @ %entry 1210; CHECK-NEXT: vmsr p0, r0 1211; CHECK-NEXT: vpst 1212; CHECK-NEXT: vstrdt.64 q1, [q0, #888] 1213; CHECK-NEXT: bx lr 1214entry: 1215 %0 = zext i16 %p to i32 1216 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 1217 call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <2 x i1> %1) 1218 ret void 1219} 1220 1221declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i64>, <2 x i1>) 1222 1223define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_u64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) { 1224; CHECK-LABEL: test_vstrdq_scatter_base_p_u64: 1225; CHECK: @ %bb.0: @ %entry 1226; CHECK-NEXT: vmsr p0, r0 1227; CHECK-NEXT: vpst 1228; CHECK-NEXT: vstrdt.64 q1, [q0, #264] 1229; CHECK-NEXT: bx lr 1230entry: 1231 %0 = zext i16 %p to i32 1232 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 1233 call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64> %addr, i32 264, <2 x i64> %value, <2 x i1> %1) 1234 ret void 1235} 1236 1237define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_s64(<2 x i64> %addr, <2 x i64> %value) { 1238; CHECK-LABEL: test_vstrdq_scatter_base_s64: 1239; CHECK: @ %bb.0: @ %entry 1240; CHECK-NEXT: vstrd.64 q1, [q0, #408] 1241; CHECK-NEXT: bx lr 1242entry: 1243 call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 408, <2 x i64> %value) 1244 ret void 1245} 1246 1247declare void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64>, i32, <2 x i64>) 1248 1249define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_u64(<2 x i64> %addr, <2 x i64> %value) { 1250; CHECK-LABEL: test_vstrdq_scatter_base_u64: 1251; CHECK: @ %bb.0: @ %entry 1252; CHECK-NEXT: vstrd.64 q1, [q0, #-472] 1253; CHECK-NEXT: bx lr 1254entry: 1255 call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 -472, <2 x i64> %value) 1256 ret void 1257} 1258 1259define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(ptr %addr, <2 x i64> %value, i16 zeroext %p) { 1260; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_s64: 1261; CHECK: @ %bb.0: @ %entry 1262; CHECK-NEXT: vldrw.u32 q1, [r0] 1263; CHECK-NEXT: vmsr p0, r1 1264; CHECK-NEXT: vpst 1265; CHECK-NEXT: vstrdt.64 q0, [q1, #248]! 1266; CHECK-NEXT: vstrw.32 q1, [r0] 1267; CHECK-NEXT: bx lr 1268entry: 1269 %0 = load <2 x i64>, ptr %addr, align 8 1270 %1 = zext i16 %p to i32 1271 %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1) 1272 %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> %0, i32 248, <2 x i64> %value, <2 x i1> %2) 1273 store <2 x i64> %3, ptr %addr, align 8 1274 ret void 1275} 1276 1277declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i64>, <2 x i1>) 1278 1279define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_u64(ptr %addr, <2 x i64> %value, i16 zeroext %p) { 1280; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_u64: 1281; CHECK: @ %bb.0: @ %entry 1282; CHECK-NEXT: vldrw.u32 q1, [r0] 1283; CHECK-NEXT: vmsr p0, r1 1284; CHECK-NEXT: vpst 1285; CHECK-NEXT: vstrdt.64 q0, [q1, #136]! 1286; CHECK-NEXT: vstrw.32 q1, [r0] 1287; CHECK-NEXT: bx lr 1288entry: 1289 %0 = load <2 x i64>, ptr %addr, align 8 1290 %1 = zext i16 %p to i32 1291 %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1) 1292 %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> %0, i32 136, <2 x i64> %value, <2 x i1> %2) 1293 store <2 x i64> %3, ptr %addr, align 8 1294 ret void 1295} 1296 1297define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_s64(ptr %addr, <2 x i64> %value) { 1298; CHECK-LABEL: test_vstrdq_scatter_base_wb_s64: 1299; CHECK: @ %bb.0: @ %entry 1300; CHECK-NEXT: vldrw.u32 q1, [r0] 1301; CHECK-NEXT: vstrd.64 q0, [q1, #208]! 1302; CHECK-NEXT: vstrw.32 q1, [r0] 1303; CHECK-NEXT: bx lr 1304entry: 1305 %0 = load <2 x i64>, ptr %addr, align 8 1306 %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 208, <2 x i64> %value) 1307 store <2 x i64> %1, ptr %addr, align 8 1308 ret void 1309} 1310 1311declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64>, i32, <2 x i64>) 1312 1313define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_u64(ptr %addr, <2 x i64> %value) { 1314; CHECK-LABEL: test_vstrdq_scatter_base_wb_u64: 1315; CHECK: @ %bb.0: @ %entry 1316; CHECK-NEXT: vldrw.u32 q1, [r0] 1317; CHECK-NEXT: vstrd.64 q0, [q1, #-168]! 1318; CHECK-NEXT: vstrw.32 q1, [r0] 1319; CHECK-NEXT: bx lr 1320entry: 1321 %0 = load <2 x i64>, ptr %addr, align 8 1322 %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -168, <2 x i64> %value) 1323 store <2 x i64> %1, ptr %addr, align 8 1324 ret void 1325} 1326 1327define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { 1328; CHECK-LABEL: test_vstrdq_scatter_offset_p_s64: 1329; CHECK: @ %bb.0: @ %entry 1330; CHECK-NEXT: vmsr p0, r1 1331; CHECK-NEXT: vpst 1332; CHECK-NEXT: vstrdt.64 q1, [r0, q0] 1333; CHECK-NEXT: bx lr 1334entry: 1335 %0 = zext i16 %p to i32 1336 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 1337 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <2 x i1> %1) 1338 ret void 1339} 1340 1341declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr, <2 x i64>, <2 x i64>, i32, i32, <2 x i1>) 1342 1343define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_u64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { 1344; CHECK-LABEL: test_vstrdq_scatter_offset_p_u64: 1345; CHECK: @ %bb.0: @ %entry 1346; CHECK-NEXT: vmsr p0, r1 1347; CHECK-NEXT: vpst 1348; CHECK-NEXT: vstrdt.64 q1, [r0, q0] 1349; CHECK-NEXT: bx lr 1350entry: 1351 %0 = zext i16 %p to i32 1352 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 1353 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <2 x i1> %1) 1354 ret void 1355} 1356 1357define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value) { 1358; CHECK-LABEL: test_vstrdq_scatter_offset_s64: 1359; CHECK: @ %bb.0: @ %entry 1360; CHECK-NEXT: vstrd.64 q1, [r0, q0] 1361; CHECK-NEXT: bx lr 1362entry: 1363 call void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0) 1364 ret void 1365} 1366 1367declare void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr, <2 x i64>, <2 x i64>, i32, i32) 1368 1369define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_u64(ptr %base, <2 x i64> %offset, <2 x i64> %value) { 1370; CHECK-LABEL: test_vstrdq_scatter_offset_u64: 1371; CHECK: @ %bb.0: @ %entry 1372; CHECK-NEXT: vstrd.64 q1, [r0, q0] 1373; CHECK-NEXT: bx lr 1374entry: 1375 call void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0) 1376 ret void 1377} 1378 1379define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { 1380; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_s64: 1381; CHECK: @ %bb.0: @ %entry 1382; CHECK-NEXT: vmsr p0, r1 1383; CHECK-NEXT: vpst 1384; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3] 1385; CHECK-NEXT: bx lr 1386entry: 1387 %0 = zext i16 %p to i32 1388 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 1389 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <2 x i1> %1) 1390 ret void 1391} 1392 1393define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_u64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { 1394; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_u64: 1395; CHECK: @ %bb.0: @ %entry 1396; CHECK-NEXT: vmsr p0, r1 1397; CHECK-NEXT: vpst 1398; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3] 1399; CHECK-NEXT: bx lr 1400entry: 1401 %0 = zext i16 %p to i32 1402 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 1403 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <2 x i1> %1) 1404 ret void 1405} 1406 1407define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value) { 1408; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_s64: 1409; CHECK: @ %bb.0: @ %entry 1410; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3] 1411; CHECK-NEXT: bx lr 1412entry: 1413 call void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3) 1414 ret void 1415} 1416 1417define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_u64(ptr %base, <2 x i64> %offset, <2 x i64> %value) { 1418; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_u64: 1419; CHECK: @ %bb.0: @ %entry 1420; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3] 1421; CHECK-NEXT: bx lr 1422entry: 1423 call void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3) 1424 ret void 1425} 1426 1427define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_f16(ptr %base, <8 x i16> %offset, <8 x half> %value) { 1428; CHECK-LABEL: test_vstrhq_scatter_offset_f16: 1429; CHECK: @ %bb.0: @ %entry 1430; CHECK-NEXT: vstrh.16 q1, [r0, q0] 1431; CHECK-NEXT: bx lr 1432entry: 1433 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8f16(ptr %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0) 1434 ret void 1435} 1436 1437declare void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8f16(ptr, <8 x i16>, <8 x half>, i32, i32) 1438 1439define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_f16(ptr %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) { 1440; CHECK-LABEL: test_vstrhq_scatter_offset_p_f16: 1441; CHECK: @ %bb.0: @ %entry 1442; CHECK-NEXT: vmsr p0, r1 1443; CHECK-NEXT: vpst 1444; CHECK-NEXT: vstrht.16 q1, [r0, q0] 1445; CHECK-NEXT: bx lr 1446entry: 1447 %0 = zext i16 %p to i32 1448 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1449 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8f16.v8i1(ptr %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0, <8 x i1> %1) 1450 ret void 1451} 1452 1453declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8f16.v8i1(ptr, <8 x i16>, <8 x half>, i32, i32, <8 x i1>) 1454 1455define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { 1456; CHECK-LABEL: test_vstrhq_scatter_offset_p_s16: 1457; CHECK: @ %bb.0: @ %entry 1458; CHECK-NEXT: vmsr p0, r1 1459; CHECK-NEXT: vpst 1460; CHECK-NEXT: vstrht.16 q1, [r0, q0] 1461; CHECK-NEXT: bx lr 1462entry: 1463 %0 = zext i16 %p to i32 1464 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1465 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1) 1466 ret void 1467} 1468 1469 1470define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1471; CHECK-LABEL: test_vstrhq_scatter_offset_p_s32: 1472; CHECK: @ %bb.0: @ %entry 1473; CHECK-NEXT: vmsr p0, r1 1474; CHECK-NEXT: vpst 1475; CHECK-NEXT: vstrht.32 q1, [r0, q0] 1476; CHECK-NEXT: bx lr 1477entry: 1478 %0 = zext i16 %p to i32 1479 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1480 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1) 1481 ret void 1482} 1483 1484 1485define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { 1486; CHECK-LABEL: test_vstrhq_scatter_offset_p_u16: 1487; CHECK: @ %bb.0: @ %entry 1488; CHECK-NEXT: vmsr p0, r1 1489; CHECK-NEXT: vpst 1490; CHECK-NEXT: vstrht.16 q1, [r0, q0] 1491; CHECK-NEXT: bx lr 1492entry: 1493 %0 = zext i16 %p to i32 1494 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1495 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1) 1496 ret void 1497} 1498 1499define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1500; CHECK-LABEL: test_vstrhq_scatter_offset_p_u32: 1501; CHECK: @ %bb.0: @ %entry 1502; CHECK-NEXT: vmsr p0, r1 1503; CHECK-NEXT: vpst 1504; CHECK-NEXT: vstrht.32 q1, [r0, q0] 1505; CHECK-NEXT: bx lr 1506entry: 1507 %0 = zext i16 %p to i32 1508 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1509 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1) 1510 ret void 1511} 1512 1513define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value) { 1514; CHECK-LABEL: test_vstrhq_scatter_offset_s16: 1515; CHECK: @ %bb.0: @ %entry 1516; CHECK-NEXT: vstrh.16 q1, [r0, q0] 1517; CHECK-NEXT: bx lr 1518entry: 1519 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0) 1520 ret void 1521} 1522 1523 1524define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) { 1525; CHECK-LABEL: test_vstrhq_scatter_offset_s32: 1526; CHECK: @ %bb.0: @ %entry 1527; CHECK-NEXT: vstrh.32 q1, [r0, q0] 1528; CHECK-NEXT: bx lr 1529entry: 1530 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0) 1531 ret void 1532} 1533 1534 1535define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value) { 1536; CHECK-LABEL: test_vstrhq_scatter_offset_u16: 1537; CHECK: @ %bb.0: @ %entry 1538; CHECK-NEXT: vstrh.16 q1, [r0, q0] 1539; CHECK-NEXT: bx lr 1540entry: 1541 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0) 1542 ret void 1543} 1544 1545define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) { 1546; CHECK-LABEL: test_vstrhq_scatter_offset_u32: 1547; CHECK: @ %bb.0: @ %entry 1548; CHECK-NEXT: vstrh.32 q1, [r0, q0] 1549; CHECK-NEXT: bx lr 1550entry: 1551 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0) 1552 ret void 1553} 1554 1555define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_f16(ptr %base, <8 x i16> %offset, <8 x half> %value) { 1556; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_f16: 1557; CHECK: @ %bb.0: @ %entry 1558; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1] 1559; CHECK-NEXT: bx lr 1560entry: 1561 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8f16(ptr %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1) 1562 ret void 1563} 1564 1565define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_f16(ptr %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) { 1566; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_f16: 1567; CHECK: @ %bb.0: @ %entry 1568; CHECK-NEXT: vmsr p0, r1 1569; CHECK-NEXT: vpst 1570; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1] 1571; CHECK-NEXT: bx lr 1572entry: 1573 %0 = zext i16 %p to i32 1574 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1575 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8f16.v8i1(ptr %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1, <8 x i1> %1) 1576 ret void 1577} 1578 1579define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { 1580; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s16: 1581; CHECK: @ %bb.0: @ %entry 1582; CHECK-NEXT: vmsr p0, r1 1583; CHECK-NEXT: vpst 1584; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1] 1585; CHECK-NEXT: bx lr 1586entry: 1587 %0 = zext i16 %p to i32 1588 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1589 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1) 1590 ret void 1591} 1592 1593define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1594; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s32: 1595; CHECK: @ %bb.0: @ %entry 1596; CHECK-NEXT: vmsr p0, r1 1597; CHECK-NEXT: vpst 1598; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1] 1599; CHECK-NEXT: bx lr 1600entry: 1601 %0 = zext i16 %p to i32 1602 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1603 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1) 1604 ret void 1605} 1606 1607define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { 1608; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u16: 1609; CHECK: @ %bb.0: @ %entry 1610; CHECK-NEXT: vmsr p0, r1 1611; CHECK-NEXT: vpst 1612; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1] 1613; CHECK-NEXT: bx lr 1614entry: 1615 %0 = zext i16 %p to i32 1616 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1617 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1) 1618 ret void 1619} 1620 1621define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1622; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u32: 1623; CHECK: @ %bb.0: @ %entry 1624; CHECK-NEXT: vmsr p0, r1 1625; CHECK-NEXT: vpst 1626; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1] 1627; CHECK-NEXT: bx lr 1628entry: 1629 %0 = zext i16 %p to i32 1630 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1631 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1) 1632 ret void 1633} 1634 1635define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value) { 1636; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s16: 1637; CHECK: @ %bb.0: @ %entry 1638; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1] 1639; CHECK-NEXT: bx lr 1640entry: 1641 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1) 1642 ret void 1643} 1644 1645define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) { 1646; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s32: 1647; CHECK: @ %bb.0: @ %entry 1648; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1] 1649; CHECK-NEXT: bx lr 1650entry: 1651 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1) 1652 ret void 1653} 1654 1655define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value) { 1656; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u16: 1657; CHECK: @ %bb.0: @ %entry 1658; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1] 1659; CHECK-NEXT: bx lr 1660entry: 1661 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1) 1662 ret void 1663} 1664 1665define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) { 1666; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u32: 1667; CHECK: @ %bb.0: @ %entry 1668; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1] 1669; CHECK-NEXT: bx lr 1670entry: 1671 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1) 1672 ret void 1673} 1674 1675define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_f32(<4 x i32> %addr, <4 x float> %value) { 1676; CHECK-LABEL: test_vstrwq_scatter_base_f32: 1677; CHECK: @ %bb.0: @ %entry 1678; CHECK-NEXT: vstrw.32 q1, [q0, #380] 1679; CHECK-NEXT: bx lr 1680entry: 1681 call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> %addr, i32 380, <4 x float> %value) 1682 ret void 1683} 1684 1685declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32>, i32, <4 x float>) 1686 1687define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_f32(<4 x i32> %addr, <4 x float> %value, i16 zeroext %p) { 1688; CHECK-LABEL: test_vstrwq_scatter_base_p_f32: 1689; CHECK: @ %bb.0: @ %entry 1690; CHECK-NEXT: vmsr p0, r0 1691; CHECK-NEXT: vpst 1692; CHECK-NEXT: vstrwt.32 q1, [q0, #-400] 1693; CHECK-NEXT: bx lr 1694entry: 1695 %0 = zext i16 %p to i32 1696 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1697 call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32> %addr, i32 -400, <4 x float> %value, <4 x i1> %1) 1698 ret void 1699} 1700 1701declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>) 1702 1703define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_s32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) { 1704; CHECK-LABEL: test_vstrwq_scatter_base_p_s32: 1705; CHECK: @ %bb.0: @ %entry 1706; CHECK-NEXT: vmsr p0, r0 1707; CHECK-NEXT: vpst 1708; CHECK-NEXT: vstrwt.32 q1, [q0, #48] 1709; CHECK-NEXT: bx lr 1710entry: 1711 %0 = zext i16 %p to i32 1712 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1713 call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 48, <4 x i32> %value, <4 x i1> %1) 1714 ret void 1715} 1716 1717declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>) 1718 1719define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_u32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) { 1720; CHECK-LABEL: test_vstrwq_scatter_base_p_u32: 1721; CHECK: @ %bb.0: @ %entry 1722; CHECK-NEXT: vmsr p0, r0 1723; CHECK-NEXT: vpst 1724; CHECK-NEXT: vstrwt.32 q1, [q0, #-376] 1725; CHECK-NEXT: bx lr 1726entry: 1727 %0 = zext i16 %p to i32 1728 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1729 call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 -376, <4 x i32> %value, <4 x i1> %1) 1730 ret void 1731} 1732 1733define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_s32(<4 x i32> %addr, <4 x i32> %value) { 1734; CHECK-LABEL: test_vstrwq_scatter_base_s32: 1735; CHECK: @ %bb.0: @ %entry 1736; CHECK-NEXT: vstrw.32 q1, [q0, #156] 1737; CHECK-NEXT: bx lr 1738entry: 1739 call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 156, <4 x i32> %value) 1740 ret void 1741} 1742 1743declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32>, i32, <4 x i32>) 1744 1745define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_u32(<4 x i32> %addr, <4 x i32> %value) { 1746; CHECK-LABEL: test_vstrwq_scatter_base_u32: 1747; CHECK: @ %bb.0: @ %entry 1748; CHECK-NEXT: vstrw.32 q1, [q0, #212] 1749; CHECK-NEXT: bx lr 1750entry: 1751 call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 212, <4 x i32> %value) 1752 ret void 1753} 1754 1755define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_f32(ptr %addr, <4 x float> %value) { 1756; CHECK-LABEL: test_vstrwq_scatter_base_wb_f32: 1757; CHECK: @ %bb.0: @ %entry 1758; CHECK-NEXT: vldrw.u32 q1, [r0] 1759; CHECK-NEXT: vstrw.32 q0, [q1, #-412]! 1760; CHECK-NEXT: vstrw.32 q1, [r0] 1761; CHECK-NEXT: bx lr 1762entry: 1763 %0 = load <4 x i32>, ptr %addr, align 8 1764 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32> %0, i32 -412, <4 x float> %value) 1765 store <4 x i32> %1, ptr %addr, align 8 1766 ret void 1767} 1768 1769declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32>, i32, <4 x float>) 1770 1771define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_f32(ptr %addr, <4 x float> %value, i16 zeroext %p) { 1772; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_f32: 1773; CHECK: @ %bb.0: @ %entry 1774; CHECK-NEXT: vldrw.u32 q1, [r0] 1775; CHECK-NEXT: vmsr p0, r1 1776; CHECK-NEXT: vpst 1777; CHECK-NEXT: vstrwt.32 q0, [q1, #236]! 1778; CHECK-NEXT: vstrw.32 q1, [r0] 1779; CHECK-NEXT: bx lr 1780entry: 1781 %0 = load <4 x i32>, ptr %addr, align 8 1782 %1 = zext i16 %p to i32 1783 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 1784 %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32> %0, i32 236, <4 x float> %value, <4 x i1> %2) 1785 store <4 x i32> %3, ptr %addr, align 8 1786 ret void 1787} 1788 1789declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>) 1790 1791define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_s32(ptr %addr, <4 x i32> %value, i16 zeroext %p) { 1792; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_s32: 1793; CHECK: @ %bb.0: @ %entry 1794; CHECK-NEXT: vldrw.u32 q1, [r0] 1795; CHECK-NEXT: vmsr p0, r1 1796; CHECK-NEXT: vpst 1797; CHECK-NEXT: vstrwt.32 q0, [q1, #328]! 1798; CHECK-NEXT: vstrw.32 q1, [r0] 1799; CHECK-NEXT: bx lr 1800entry: 1801 %0 = load <4 x i32>, ptr %addr, align 8 1802 %1 = zext i16 %p to i32 1803 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 1804 %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 328, <4 x i32> %value, <4 x i1> %2) 1805 store <4 x i32> %3, ptr %addr, align 8 1806 ret void 1807} 1808 1809declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>) 1810 1811define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_u32(ptr %addr, <4 x i32> %value, i16 zeroext %p) { 1812; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_u32: 1813; CHECK: @ %bb.0: @ %entry 1814; CHECK-NEXT: vldrw.u32 q1, [r0] 1815; CHECK-NEXT: vmsr p0, r1 1816; CHECK-NEXT: vpst 1817; CHECK-NEXT: vstrwt.32 q0, [q1, #412]! 1818; CHECK-NEXT: vstrw.32 q1, [r0] 1819; CHECK-NEXT: bx lr 1820entry: 1821 %0 = load <4 x i32>, ptr %addr, align 8 1822 %1 = zext i16 %p to i32 1823 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 1824 %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 412, <4 x i32> %value, <4 x i1> %2) 1825 store <4 x i32> %3, ptr %addr, align 8 1826 ret void 1827} 1828 1829define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_s32(ptr %addr, <4 x i32> %value) { 1830; CHECK-LABEL: test_vstrwq_scatter_base_wb_s32: 1831; CHECK: @ %bb.0: @ %entry 1832; CHECK-NEXT: vldrw.u32 q1, [r0] 1833; CHECK-NEXT: vstrw.32 q0, [q1, #-152]! 1834; CHECK-NEXT: vstrw.32 q1, [r0] 1835; CHECK-NEXT: bx lr 1836entry: 1837 %0 = load <4 x i32>, ptr %addr, align 8 1838 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 -152, <4 x i32> %value) 1839 store <4 x i32> %1, ptr %addr, align 8 1840 ret void 1841} 1842 1843declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32>, i32, <4 x i32>) 1844 1845define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_u32(ptr %addr, <4 x i32> %value) { 1846; CHECK-LABEL: test_vstrwq_scatter_base_wb_u32: 1847; CHECK: @ %bb.0: @ %entry 1848; CHECK-NEXT: vldrw.u32 q1, [r0] 1849; CHECK-NEXT: vstrw.32 q0, [q1, #64]! 1850; CHECK-NEXT: vstrw.32 q1, [r0] 1851; CHECK-NEXT: bx lr 1852entry: 1853 %0 = load <4 x i32>, ptr %addr, align 8 1854 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 64, <4 x i32> %value) 1855 store <4 x i32> %1, ptr %addr, align 8 1856 ret void 1857} 1858 1859define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_f32(ptr %base, <4 x i32> %offset, <4 x float> %value) { 1860; CHECK-LABEL: test_vstrwq_scatter_offset_f32: 1861; CHECK: @ %bb.0: @ %entry 1862; CHECK-NEXT: vstrw.32 q1, [r0, q0] 1863; CHECK-NEXT: bx lr 1864entry: 1865 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4f32(ptr %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0) 1866 ret void 1867} 1868 1869declare void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4f32(ptr, <4 x i32>, <4 x float>, i32, i32) 1870 1871define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_f32(ptr %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) { 1872; CHECK-LABEL: test_vstrwq_scatter_offset_p_f32: 1873; CHECK: @ %bb.0: @ %entry 1874; CHECK-NEXT: vmsr p0, r1 1875; CHECK-NEXT: vpst 1876; CHECK-NEXT: vstrwt.32 q1, [r0, q0] 1877; CHECK-NEXT: bx lr 1878entry: 1879 %0 = zext i16 %p to i32 1880 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1881 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4f32.v4i1(ptr %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0, <4 x i1> %1) 1882 ret void 1883} 1884 1885declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4f32.v4i1(ptr, <4 x i32>, <4 x float>, i32, i32, <4 x i1>) 1886 1887define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1888; CHECK-LABEL: test_vstrwq_scatter_offset_p_s32: 1889; CHECK: @ %bb.0: @ %entry 1890; CHECK-NEXT: vmsr p0, r1 1891; CHECK-NEXT: vpst 1892; CHECK-NEXT: vstrwt.32 q1, [r0, q0] 1893; CHECK-NEXT: bx lr 1894entry: 1895 %0 = zext i16 %p to i32 1896 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1897 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1) 1898 ret void 1899} 1900 1901 1902define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1903; CHECK-LABEL: test_vstrwq_scatter_offset_p_u32: 1904; CHECK: @ %bb.0: @ %entry 1905; CHECK-NEXT: vmsr p0, r1 1906; CHECK-NEXT: vpst 1907; CHECK-NEXT: vstrwt.32 q1, [r0, q0] 1908; CHECK-NEXT: bx lr 1909entry: 1910 %0 = zext i16 %p to i32 1911 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1912 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1) 1913 ret void 1914} 1915 1916define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) { 1917; CHECK-LABEL: test_vstrwq_scatter_offset_s32: 1918; CHECK: @ %bb.0: @ %entry 1919; CHECK-NEXT: vstrw.32 q1, [r0, q0] 1920; CHECK-NEXT: bx lr 1921entry: 1922 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0) 1923 ret void 1924} 1925 1926 1927define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) { 1928; CHECK-LABEL: test_vstrwq_scatter_offset_u32: 1929; CHECK: @ %bb.0: @ %entry 1930; CHECK-NEXT: vstrw.32 q1, [r0, q0] 1931; CHECK-NEXT: bx lr 1932entry: 1933 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0) 1934 ret void 1935} 1936 1937define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_f32(ptr %base, <4 x i32> %offset, <4 x float> %value) { 1938; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_f32: 1939; CHECK: @ %bb.0: @ %entry 1940; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2] 1941; CHECK-NEXT: bx lr 1942entry: 1943 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4f32(ptr %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2) 1944 ret void 1945} 1946 1947define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_f32(ptr %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) { 1948; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_f32: 1949; CHECK: @ %bb.0: @ %entry 1950; CHECK-NEXT: vmsr p0, r1 1951; CHECK-NEXT: vpst 1952; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2] 1953; CHECK-NEXT: bx lr 1954entry: 1955 %0 = zext i16 %p to i32 1956 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1957 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4f32.v4i1(ptr %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2, <4 x i1> %1) 1958 ret void 1959} 1960 1961define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1962; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_s32: 1963; CHECK: @ %bb.0: @ %entry 1964; CHECK-NEXT: vmsr p0, r1 1965; CHECK-NEXT: vpst 1966; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2] 1967; CHECK-NEXT: bx lr 1968entry: 1969 %0 = zext i16 %p to i32 1970 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1971 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1) 1972 ret void 1973} 1974 1975define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1976; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_u32: 1977; CHECK: @ %bb.0: @ %entry 1978; CHECK-NEXT: vmsr p0, r1 1979; CHECK-NEXT: vpst 1980; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2] 1981; CHECK-NEXT: bx lr 1982entry: 1983 %0 = zext i16 %p to i32 1984 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1985 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1) 1986 ret void 1987} 1988 1989define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) { 1990; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_s32: 1991; CHECK: @ %bb.0: @ %entry 1992; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2] 1993; CHECK-NEXT: bx lr 1994entry: 1995 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2) 1996 ret void 1997} 1998 1999define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) { 2000; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_u32: 2001; CHECK: @ %bb.0: @ %entry 2002; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2] 2003; CHECK-NEXT: bx lr 2004entry: 2005 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2) 2006 ret void 2007} 2008