1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,+fp64 -verify-machineinstrs -o - %s | FileCheck %s 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,+fp64 -verify-machineinstrs -early-live-intervals -o - %s | FileCheck %s 4 5define arm_aapcs_vfpcc <4 x float> @foo_v4i16(ptr nocapture readonly %pSrc, i32 %blockSize, <4 x i16> %a) { 6; CHECK-LABEL: foo_v4i16: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vmovlb.s16 q0, q0 9; CHECK-NEXT: vpt.s32 lt, q0, zr 10; CHECK-NEXT: vldrht.s32 q0, [r0] 11; CHECK-NEXT: vcvt.f32.s32 q0, q0 12; CHECK-NEXT: bx lr 13entry: 14 %active.lane.mask = icmp slt <4 x i16> %a, zeroinitializer 15 %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %pSrc, i32 2, <4 x i1> %active.lane.mask, <4 x i16> undef) 16 %0 = sitofp <4 x i16> %wide.masked.load to <4 x float> 17 ret <4 x float> %0 18} 19 20define arm_aapcs_vfpcc <8 x half> @foo_v8i8(ptr nocapture readonly %pSrc, i32 %blockSize, <8 x i8> %a) { 21; CHECK-LABEL: foo_v8i8: 22; CHECK: @ %bb.0: @ %entry 23; CHECK-NEXT: vmovlb.s8 q0, q0 24; CHECK-NEXT: vpt.s16 lt, q0, zr 25; CHECK-NEXT: vldrbt.s16 q0, [r0] 26; CHECK-NEXT: vcvt.f16.s16 q0, q0 27; CHECK-NEXT: bx lr 28entry: 29 %active.lane.mask = icmp slt <8 x i8> %a, zeroinitializer 30 %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %pSrc, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) 31 %0 = sitofp <8 x i8> %wide.masked.load to <8 x half> 32 ret <8 x half> %0 33} 34 35define arm_aapcs_vfpcc <4 x float> @foo_v4i8(ptr nocapture readonly %pSrc, i32 %blockSize, <4 x i8> %a) { 36; CHECK-LABEL: foo_v4i8: 37; CHECK: @ %bb.0: @ %entry 38; CHECK-NEXT: vmovlb.s8 q0, q0 39; CHECK-NEXT: vmovlb.s16 q0, q0 40; CHECK-NEXT: vpt.s32 lt, q0, zr 41; CHECK-NEXT: vldrbt.s32 q0, [r0] 42; CHECK-NEXT: vcvt.f32.s32 q0, q0 43; CHECK-NEXT: bx lr 44entry: 45 %active.lane.mask = icmp slt <4 x i8> %a, zeroinitializer 46 %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %pSrc, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef) 47 %0 = sitofp <4 x i8> %wide.masked.load to <4 x float> 48 ret <4 x float> %0 49} 50 51define arm_aapcs_vfpcc <4 x double> @foo_v4i32(ptr nocapture readonly %pSrc, i32 %blockSize, <4 x i32> %a) { 52; CHECK-LABEL: foo_v4i32: 53; CHECK: @ %bb.0: @ %entry 54; CHECK-NEXT: .save {r7, lr} 55; CHECK-NEXT: push {r7, lr} 56; CHECK-NEXT: .vsave {d8, d9, d10, d11} 57; CHECK-NEXT: vpush {d8, d9, d10, d11} 58; CHECK-NEXT: vpt.s32 lt, q0, zr 59; CHECK-NEXT: vldrwt.u32 q5, [r0] 60; CHECK-NEXT: vmov.f32 s2, s23 61; CHECK-NEXT: vmov.f32 s16, s22 62; CHECK-NEXT: vmov r0, s2 63; CHECK-NEXT: asrs r1, r0, #31 64; CHECK-NEXT: bl __aeabi_l2d 65; CHECK-NEXT: vmov r2, s16 66; CHECK-NEXT: vmov d9, r0, r1 67; CHECK-NEXT: asrs r3, r2, #31 68; CHECK-NEXT: mov r0, r2 69; CHECK-NEXT: mov r1, r3 70; CHECK-NEXT: bl __aeabi_l2d 71; CHECK-NEXT: vmov.f32 s2, s21 72; CHECK-NEXT: vmov d8, r0, r1 73; CHECK-NEXT: vmov r2, s2 74; CHECK-NEXT: asrs r3, r2, #31 75; CHECK-NEXT: mov r0, r2 76; CHECK-NEXT: mov r1, r3 77; CHECK-NEXT: bl __aeabi_l2d 78; CHECK-NEXT: vmov r2, s20 79; CHECK-NEXT: vmov d11, r0, r1 80; CHECK-NEXT: asrs r3, r2, #31 81; CHECK-NEXT: mov r0, r2 82; CHECK-NEXT: mov r1, r3 83; CHECK-NEXT: bl __aeabi_l2d 84; CHECK-NEXT: vmov d10, r0, r1 85; CHECK-NEXT: vmov q1, q4 86; CHECK-NEXT: vmov q0, q5 87; CHECK-NEXT: vpop {d8, d9, d10, d11} 88; CHECK-NEXT: pop {r7, pc} 89entry: 90 %active.lane.mask = icmp slt <4 x i32> %a, zeroinitializer 91 %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %pSrc, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) 92 %0 = sitofp <4 x i32> %wide.masked.load to <4 x double> 93 ret <4 x double> %0 94} 95 96declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) 97 98declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>) 99 100declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>) 101 102declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) 103