1; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -stop-after=finalize-isel -o - %s | FileCheck %s 2 3define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_s16(ptr %base, <8 x i16> %offset) { 4; CHECK-LABEL: name: test_vldrbq_gather_offset_s16 5; CHECK: early-clobber %2:mqpr = MVE_VLDRBS16_rq %0, %1, 0, $noreg, $noreg :: (load (s64), align 1) 6entry: 7 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 8, i32 0, i32 0) 8 ret <8 x i16> %0 9} 10 11define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) { 12; CHECK-LABEL: name: test_vldrbq_gather_offset_z_s32 13; CHECK: early-clobber %4:mqpr = MVE_VLDRBS32_rq %0, %1, 1, killed %3, $noreg :: (load (s32), align 1) 14entry: 15 %0 = zext i16 %p to i32 16 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 17 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 8, i32 0, i32 0, <4 x i1> %1) 18 ret <4 x i32> %2 19} 20 21define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_s64(<2 x i64> %addr) { 22; CHECK-LABEL: name: test_vldrdq_gather_base_s64 23; CHECK: early-clobber %1:mqpr = MVE_VLDRDU64_qi %0, 616, 0, $noreg, $noreg :: (load (s128), align 1) 24entry: 25 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 616) 26 ret <2 x i64> %0 27} 28 29define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_z_f32(<4 x i32> %addr, i16 zeroext %p) { 30; CHECK-LABEL: name: test_vldrwq_gather_base_z_f32 31; CHECK: early-clobber %3:mqpr = MVE_VLDRWU32_qi %0, -300, 1, killed %2, $noreg :: (load (s128), align 1) 32entry: 33 %0 = zext i16 %p to i32 34 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 35 %2 = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> %addr, i32 -300, <4 x i1> %1) 36 ret <4 x float> %2 37} 38 39define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_s64(ptr %addr) { 40; CHECK-LABEL: name: test_vldrdq_gather_base_wb_s64 41; CHECK: %2:mqpr, early-clobber %3:mqpr = MVE_VLDRDU64_qi_pre %1, 576, 0, $noreg, $noreg :: (load (s128), align 1) 42entry: 43 %0 = load <2 x i64>, ptr %addr, align 8 44 %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 576) 45 %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1 46 store <2 x i64> %2, ptr %addr, align 8 47 %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0 48 ret <2 x i64> %3 49} 50 51define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_z_f32(ptr %addr, i16 zeroext %p) { 52; CHECK-LABEL: name: test_vldrwq_gather_base_wb_z_f32 53; CHECK: %4:mqpr, early-clobber %5:mqpr = MVE_VLDRWU32_qi_pre %3, -352, 1, killed %2, $noreg :: (load (s128), align 1) 54entry: 55 %0 = load <4 x i32>, ptr %addr, align 8 56 %1 = zext i16 %p to i32 57 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 58 %3 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %0, i32 -352, <4 x i1> %2) 59 %4 = extractvalue { <4 x float>, <4 x i32> } %3, 1 60 store <4 x i32> %4, ptr %addr, align 8 61 %5 = extractvalue { <4 x float>, <4 x i32> } %3, 0 62 ret <4 x float> %5 63} 64 65 66define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) { 67; CHECK-LABEL: name: test_vstrbq_scatter_offset_s32 68; CHECK: MVE_VSTRB32_rq %2, %0, %1, 0, $noreg, $noreg :: (store (s32), align 1) 69entry: 70 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0) 71 ret void 72} 73 74define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s8(ptr %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) { 75; CHECK-LABEL: name: test_vstrbq_scatter_offset_p_s8 76; CHECK: MVE_VSTRB8_rq %2, %0, %1, 1, killed %4, $noreg :: (store (s128), align 1) 77entry: 78 %0 = zext i16 %p to i32 79 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 80 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v16i8.v16i8.v16i1(ptr %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1) 81 ret void 82} 83 84define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_u64(<2 x i64> %addr, <2 x i64> %value) { 85; CHECK-LABEL: name: test_vstrdq_scatter_base_u64 86; CHECK: MVE_VSTRD64_qi %1, %0, -472, 0, $noreg, $noreg :: (store (s128), align 1) 87entry: 88 call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 -472, <2 x i64> %value) 89 ret void 90} 91 92define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) { 93; CHECK-LABEL: name: test_vstrdq_scatter_base_p_s64 94; CHECK: MVE_VSTRD64_qi %1, %0, 888, 1, killed %3, $noreg :: (store (s128), align 1) 95entry: 96 %0 = zext i16 %p to i32 97 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 98 call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <2 x i1> %1) 99 ret void 100} 101 102define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_s64(ptr %addr, <2 x i64> %value) { 103; CHECK-LABEL: name: test_vstrdq_scatter_base_wb_s64 104; CHECK: %3:mqpr = MVE_VSTRD64_qi_pre %1, %2, 208, 0, $noreg, $noreg :: (store (s128), align 1) 105entry: 106 %0 = load <2 x i64>, ptr %addr, align 8 107 %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 208, <2 x i64> %value) 108 store <2 x i64> %1, ptr %addr, align 8 109 ret void 110} 111 112define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(ptr %addr, <2 x i64> %value, i16 zeroext %p) { 113; CHECK-LABEL: name: test_vstrdq_scatter_base_wb_p_s64 114; CHECK: %5:mqpr = MVE_VSTRD64_qi_pre %1, %3, 248, 1, killed %4, $noreg :: (store (s128), align 1) 115entry: 116 %0 = load <2 x i64>, ptr %addr, align 8 117 %1 = zext i16 %p to i32 118 %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1) 119 %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> %0, i32 248, <2 x i64> %value, <2 x i1> %2) 120 store <2 x i64> %3, ptr %addr, align 8 121 ret void 122} 123 124declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 125declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 126declare <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32) 127declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr, <8 x i16>, i32, i32, i32) 128declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr, <4 x i32>, i32, i32, i32, <4 x i1>) 129declare <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64>, i32) 130declare <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) 131declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64>, i32) 132declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) 133declare void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr, <4 x i32>, <4 x i32>, i32, i32) 134declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v16i8.v16i8.v16i1(ptr, <16 x i8>, <16 x i8>, i32, i32, <16 x i1>) 135declare void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64>, i32, <2 x i64>) 136declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i64>, <2 x i1>) 137declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64>, i32, <2 x i64>) 138declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i64>, <2 x i1>) 139