1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 3; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 4; RUN: --check-prefixes=CHECK,ZVFH 5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 6; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 7; RUN: --check-prefixes=CHECK,ZVFH 8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 9; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 10; RUN: --check-prefixes=CHECK,ZVFHMIN 11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 12; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 13; RUN: --check-prefixes=CHECK,ZVFHMIN 14 15declare <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) 16 17define <vscale x 1 x bfloat> @vfsqrt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 18; CHECK-LABEL: vfsqrt_vv_nxv1bf16: 19; CHECK: # %bb.0: 20; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 21; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t 22; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 23; CHECK-NEXT: vfsqrt.v v9, v9, v0.t 24; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 25; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t 26; CHECK-NEXT: ret 27 %v = call <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) 28 ret <vscale x 1 x bfloat> %v 29} 30 31define <vscale x 1 x bfloat> @vfsqrt_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) { 32; CHECK-LABEL: vfsqrt_vv_nxv1bf16_unmasked: 33; CHECK: # %bb.0: 34; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 35; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 36; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 37; CHECK-NEXT: vfsqrt.v v9, v9 38; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 39; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 40; CHECK-NEXT: ret 41 %v = call <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 42 ret <vscale x 1 x bfloat> %v 43} 44 45declare <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) 46 47define <vscale x 2 x bfloat> @vfsqrt_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 48; CHECK-LABEL: vfsqrt_vv_nxv2bf16: 49; CHECK: # %bb.0: 50; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 51; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t 52; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 53; CHECK-NEXT: vfsqrt.v v9, v9, v0.t 54; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 55; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t 56; CHECK-NEXT: ret 57 %v = call <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl) 58 ret <vscale x 2 x bfloat> %v 59} 60 61define <vscale x 2 x bfloat> @vfsqrt_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) { 62; CHECK-LABEL: vfsqrt_vv_nxv2bf16_unmasked: 63; CHECK: # %bb.0: 64; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 65; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 66; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 67; CHECK-NEXT: vfsqrt.v v9, v9 68; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 69; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 70; CHECK-NEXT: ret 71 %v = call <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 72 ret <vscale x 2 x bfloat> %v 73} 74 75declare <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) 76 77define <vscale x 4 x bfloat> @vfsqrt_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 78; CHECK-LABEL: vfsqrt_vv_nxv4bf16: 79; CHECK: # %bb.0: 80; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 81; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t 82; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 83; CHECK-NEXT: vfsqrt.v v10, v10, v0.t 84; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 85; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t 86; CHECK-NEXT: ret 87 %v = call <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl) 88 ret <vscale x 4 x bfloat> %v 89} 90 91define <vscale x 4 x bfloat> @vfsqrt_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) { 92; CHECK-LABEL: vfsqrt_vv_nxv4bf16_unmasked: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 95; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 96; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 97; CHECK-NEXT: vfsqrt.v v10, v10 98; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 99; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 100; CHECK-NEXT: ret 101 %v = call <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 102 ret <vscale x 4 x bfloat> %v 103} 104 105declare <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) 106 107define <vscale x 8 x bfloat> @vfsqrt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 108; CHECK-LABEL: vfsqrt_vv_nxv8bf16: 109; CHECK: # %bb.0: 110; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 111; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t 112; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 113; CHECK-NEXT: vfsqrt.v v12, v12, v0.t 114; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 115; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t 116; CHECK-NEXT: ret 117 %v = call <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl) 118 ret <vscale x 8 x bfloat> %v 119} 120 121define <vscale x 8 x bfloat> @vfsqrt_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) { 122; CHECK-LABEL: vfsqrt_vv_nxv8bf16_unmasked: 123; CHECK: # %bb.0: 124; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 125; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 126; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 127; CHECK-NEXT: vfsqrt.v v12, v12 128; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 129; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 130; CHECK-NEXT: ret 131 %v = call <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 132 ret <vscale x 8 x bfloat> %v 133} 134 135declare <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) 136 137define <vscale x 16 x bfloat> @vfsqrt_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 138; CHECK-LABEL: vfsqrt_vv_nxv16bf16: 139; CHECK: # %bb.0: 140; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 141; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t 142; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 143; CHECK-NEXT: vfsqrt.v v16, v16, v0.t 144; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 145; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t 146; CHECK-NEXT: ret 147 %v = call <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl) 148 ret <vscale x 16 x bfloat> %v 149} 150 151define <vscale x 16 x bfloat> @vfsqrt_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) { 152; CHECK-LABEL: vfsqrt_vv_nxv16bf16_unmasked: 153; CHECK: # %bb.0: 154; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 155; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 156; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 157; CHECK-NEXT: vfsqrt.v v16, v16 158; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 159; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 160; CHECK-NEXT: ret 161 %v = call <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 162 ret <vscale x 16 x bfloat> %v 163} 164 165declare <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) 166 167define <vscale x 32 x bfloat> @vfsqrt_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 168; CHECK-LABEL: vfsqrt_vv_nxv32bf16: 169; CHECK: # %bb.0: 170; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 171; CHECK-NEXT: vmv1r.v v16, v0 172; CHECK-NEXT: csrr a2, vlenb 173; CHECK-NEXT: slli a1, a2, 1 174; CHECK-NEXT: srli a2, a2, 2 175; CHECK-NEXT: sub a3, a0, a1 176; CHECK-NEXT: sltu a4, a0, a3 177; CHECK-NEXT: addi a4, a4, -1 178; CHECK-NEXT: vslidedown.vx v0, v0, a2 179; CHECK-NEXT: and a3, a4, a3 180; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma 181; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t 182; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 183; CHECK-NEXT: vfsqrt.v v24, v24, v0.t 184; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 185; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t 186; CHECK-NEXT: bltu a0, a1, .LBB10_2 187; CHECK-NEXT: # %bb.1: 188; CHECK-NEXT: mv a0, a1 189; CHECK-NEXT: .LBB10_2: 190; CHECK-NEXT: vmv1r.v v0, v16 191; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 192; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t 193; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 194; CHECK-NEXT: vfsqrt.v v24, v24, v0.t 195; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 196; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t 197; CHECK-NEXT: ret 198 %v = call <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl) 199 ret <vscale x 32 x bfloat> %v 200} 201 202define <vscale x 32 x bfloat> @vfsqrt_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) { 203; CHECK-LABEL: vfsqrt_vv_nxv32bf16_unmasked: 204; CHECK: # %bb.0: 205; CHECK-NEXT: csrr a2, vlenb 206; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma 207; CHECK-NEXT: vmset.m v16 208; CHECK-NEXT: slli a1, a2, 1 209; CHECK-NEXT: srli a2, a2, 2 210; CHECK-NEXT: sub a3, a0, a1 211; CHECK-NEXT: sltu a4, a0, a3 212; CHECK-NEXT: addi a4, a4, -1 213; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma 214; CHECK-NEXT: vslidedown.vx v0, v16, a2 215; CHECK-NEXT: and a3, a4, a3 216; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma 217; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t 218; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 219; CHECK-NEXT: vfsqrt.v v16, v16, v0.t 220; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 221; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16, v0.t 222; CHECK-NEXT: bltu a0, a1, .LBB11_2 223; CHECK-NEXT: # %bb.1: 224; CHECK-NEXT: mv a0, a1 225; CHECK-NEXT: .LBB11_2: 226; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 227; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 228; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 229; CHECK-NEXT: vfsqrt.v v16, v16 230; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 231; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 232; CHECK-NEXT: ret 233 %v = call <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) 234 ret <vscale x 32 x bfloat> %v 235} 236declare <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) 237 238define <vscale x 1 x half> @vfsqrt_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 239; ZVFH-LABEL: vfsqrt_vv_nxv1f16: 240; ZVFH: # %bb.0: 241; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 242; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t 243; ZVFH-NEXT: ret 244; 245; ZVFHMIN-LABEL: vfsqrt_vv_nxv1f16: 246; ZVFHMIN: # %bb.0: 247; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 248; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t 249; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 250; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t 251; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 252; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t 253; ZVFHMIN-NEXT: ret 254 %v = call <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl) 255 ret <vscale x 1 x half> %v 256} 257 258define <vscale x 1 x half> @vfsqrt_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) { 259; ZVFH-LABEL: vfsqrt_vv_nxv1f16_unmasked: 260; ZVFH: # %bb.0: 261; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 262; ZVFH-NEXT: vfsqrt.v v8, v8 263; ZVFH-NEXT: ret 264; 265; ZVFHMIN-LABEL: vfsqrt_vv_nxv1f16_unmasked: 266; ZVFHMIN: # %bb.0: 267; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 268; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 269; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 270; ZVFHMIN-NEXT: vfsqrt.v v9, v9 271; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 272; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 273; ZVFHMIN-NEXT: ret 274 %v = call <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 275 ret <vscale x 1 x half> %v 276} 277 278declare <vscale x 2 x half> @llvm.vp.sqrt.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32) 279 280define <vscale x 2 x half> @vfsqrt_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 281; ZVFH-LABEL: vfsqrt_vv_nxv2f16: 282; ZVFH: # %bb.0: 283; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 284; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t 285; ZVFH-NEXT: ret 286; 287; ZVFHMIN-LABEL: vfsqrt_vv_nxv2f16: 288; ZVFHMIN: # %bb.0: 289; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 290; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t 291; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 292; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t 293; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 294; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t 295; ZVFHMIN-NEXT: ret 296 %v = call <vscale x 2 x half> @llvm.vp.sqrt.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl) 297 ret <vscale x 2 x half> %v 298} 299 300define <vscale x 2 x half> @vfsqrt_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) { 301; ZVFH-LABEL: vfsqrt_vv_nxv2f16_unmasked: 302; ZVFH: # %bb.0: 303; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 304; ZVFH-NEXT: vfsqrt.v v8, v8 305; ZVFH-NEXT: ret 306; 307; ZVFHMIN-LABEL: vfsqrt_vv_nxv2f16_unmasked: 308; ZVFHMIN: # %bb.0: 309; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 310; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 311; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 312; ZVFHMIN-NEXT: vfsqrt.v v9, v9 313; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 314; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 315; ZVFHMIN-NEXT: ret 316 %v = call <vscale x 2 x half> @llvm.vp.sqrt.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 317 ret <vscale x 2 x half> %v 318} 319 320declare <vscale x 4 x half> @llvm.vp.sqrt.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32) 321 322define <vscale x 4 x half> @vfsqrt_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 323; ZVFH-LABEL: vfsqrt_vv_nxv4f16: 324; ZVFH: # %bb.0: 325; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 326; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t 327; ZVFH-NEXT: ret 328; 329; ZVFHMIN-LABEL: vfsqrt_vv_nxv4f16: 330; ZVFHMIN: # %bb.0: 331; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 332; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 333; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 334; ZVFHMIN-NEXT: vfsqrt.v v10, v10, v0.t 335; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 336; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 337; ZVFHMIN-NEXT: ret 338 %v = call <vscale x 4 x half> @llvm.vp.sqrt.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl) 339 ret <vscale x 4 x half> %v 340} 341 342define <vscale x 4 x half> @vfsqrt_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) { 343; ZVFH-LABEL: vfsqrt_vv_nxv4f16_unmasked: 344; ZVFH: # %bb.0: 345; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 346; ZVFH-NEXT: vfsqrt.v v8, v8 347; ZVFH-NEXT: ret 348; 349; ZVFHMIN-LABEL: vfsqrt_vv_nxv4f16_unmasked: 350; ZVFHMIN: # %bb.0: 351; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 352; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 353; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 354; ZVFHMIN-NEXT: vfsqrt.v v10, v10 355; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 356; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 357; ZVFHMIN-NEXT: ret 358 %v = call <vscale x 4 x half> @llvm.vp.sqrt.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 359 ret <vscale x 4 x half> %v 360} 361 362declare <vscale x 8 x half> @llvm.vp.sqrt.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32) 363 364define <vscale x 8 x half> @vfsqrt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 365; ZVFH-LABEL: vfsqrt_vv_nxv8f16: 366; ZVFH: # %bb.0: 367; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 368; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t 369; ZVFH-NEXT: ret 370; 371; ZVFHMIN-LABEL: vfsqrt_vv_nxv8f16: 372; ZVFHMIN: # %bb.0: 373; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 374; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t 375; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 376; ZVFHMIN-NEXT: vfsqrt.v v12, v12, v0.t 377; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 378; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t 379; ZVFHMIN-NEXT: ret 380 %v = call <vscale x 8 x half> @llvm.vp.sqrt.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl) 381 ret <vscale x 8 x half> %v 382} 383 384define <vscale x 8 x half> @vfsqrt_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) { 385; ZVFH-LABEL: vfsqrt_vv_nxv8f16_unmasked: 386; ZVFH: # %bb.0: 387; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 388; ZVFH-NEXT: vfsqrt.v v8, v8 389; ZVFH-NEXT: ret 390; 391; ZVFHMIN-LABEL: vfsqrt_vv_nxv8f16_unmasked: 392; ZVFHMIN: # %bb.0: 393; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 394; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 395; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 396; ZVFHMIN-NEXT: vfsqrt.v v12, v12 397; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 398; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 399; ZVFHMIN-NEXT: ret 400 %v = call <vscale x 8 x half> @llvm.vp.sqrt.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 401 ret <vscale x 8 x half> %v 402} 403 404declare <vscale x 16 x half> @llvm.vp.sqrt.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32) 405 406define <vscale x 16 x half> @vfsqrt_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 407; ZVFH-LABEL: vfsqrt_vv_nxv16f16: 408; ZVFH: # %bb.0: 409; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 410; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t 411; ZVFH-NEXT: ret 412; 413; ZVFHMIN-LABEL: vfsqrt_vv_nxv16f16: 414; ZVFHMIN: # %bb.0: 415; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 416; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t 417; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 418; ZVFHMIN-NEXT: vfsqrt.v v16, v16, v0.t 419; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 420; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t 421; ZVFHMIN-NEXT: ret 422 %v = call <vscale x 16 x half> @llvm.vp.sqrt.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl) 423 ret <vscale x 16 x half> %v 424} 425 426define <vscale x 16 x half> @vfsqrt_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) { 427; ZVFH-LABEL: vfsqrt_vv_nxv16f16_unmasked: 428; ZVFH: # %bb.0: 429; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 430; ZVFH-NEXT: vfsqrt.v v8, v8 431; ZVFH-NEXT: ret 432; 433; ZVFHMIN-LABEL: vfsqrt_vv_nxv16f16_unmasked: 434; ZVFHMIN: # %bb.0: 435; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 436; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 437; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 438; ZVFHMIN-NEXT: vfsqrt.v v16, v16 439; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 440; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 441; ZVFHMIN-NEXT: ret 442 %v = call <vscale x 16 x half> @llvm.vp.sqrt.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 443 ret <vscale x 16 x half> %v 444} 445 446declare <vscale x 32 x half> @llvm.vp.sqrt.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32) 447 448define <vscale x 32 x half> @vfsqrt_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 449; ZVFH-LABEL: vfsqrt_vv_nxv32f16: 450; ZVFH: # %bb.0: 451; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma 452; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t 453; ZVFH-NEXT: ret 454; 455; ZVFHMIN-LABEL: vfsqrt_vv_nxv32f16: 456; ZVFHMIN: # %bb.0: 457; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 458; ZVFHMIN-NEXT: vmv1r.v v16, v0 459; ZVFHMIN-NEXT: csrr a2, vlenb 460; ZVFHMIN-NEXT: slli a1, a2, 1 461; ZVFHMIN-NEXT: srli a2, a2, 2 462; ZVFHMIN-NEXT: sub a3, a0, a1 463; ZVFHMIN-NEXT: sltu a4, a0, a3 464; ZVFHMIN-NEXT: addi a4, a4, -1 465; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 466; ZVFHMIN-NEXT: and a3, a4, a3 467; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma 468; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t 469; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 470; ZVFHMIN-NEXT: vfsqrt.v v24, v24, v0.t 471; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 472; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t 473; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 474; ZVFHMIN-NEXT: # %bb.1: 475; ZVFHMIN-NEXT: mv a0, a1 476; ZVFHMIN-NEXT: .LBB22_2: 477; ZVFHMIN-NEXT: vmv1r.v v0, v16 478; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 479; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t 480; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 481; ZVFHMIN-NEXT: vfsqrt.v v24, v24, v0.t 482; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 483; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t 484; ZVFHMIN-NEXT: ret 485 %v = call <vscale x 32 x half> @llvm.vp.sqrt.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl) 486 ret <vscale x 32 x half> %v 487} 488 489define <vscale x 32 x half> @vfsqrt_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) { 490; ZVFH-LABEL: vfsqrt_vv_nxv32f16_unmasked: 491; ZVFH: # %bb.0: 492; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma 493; ZVFH-NEXT: vfsqrt.v v8, v8 494; ZVFH-NEXT: ret 495; 496; ZVFHMIN-LABEL: vfsqrt_vv_nxv32f16_unmasked: 497; ZVFHMIN: # %bb.0: 498; ZVFHMIN-NEXT: csrr a2, vlenb 499; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma 500; ZVFHMIN-NEXT: vmset.m v16 501; ZVFHMIN-NEXT: slli a1, a2, 1 502; ZVFHMIN-NEXT: srli a2, a2, 2 503; ZVFHMIN-NEXT: sub a3, a0, a1 504; ZVFHMIN-NEXT: sltu a4, a0, a3 505; ZVFHMIN-NEXT: addi a4, a4, -1 506; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma 507; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 508; ZVFHMIN-NEXT: and a3, a4, a3 509; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma 510; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t 511; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 512; ZVFHMIN-NEXT: vfsqrt.v v16, v16, v0.t 513; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 514; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t 515; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 516; ZVFHMIN-NEXT: # %bb.1: 517; ZVFHMIN-NEXT: mv a0, a1 518; ZVFHMIN-NEXT: .LBB23_2: 519; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 520; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 521; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 522; ZVFHMIN-NEXT: vfsqrt.v v16, v16 523; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 524; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 525; ZVFHMIN-NEXT: ret 526 %v = call <vscale x 32 x half> @llvm.vp.sqrt.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) 527 ret <vscale x 32 x half> %v 528} 529 530declare <vscale x 1 x float> @llvm.vp.sqrt.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32) 531 532define <vscale x 1 x float> @vfsqrt_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 533; CHECK-LABEL: vfsqrt_vv_nxv1f32: 534; CHECK: # %bb.0: 535; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 536; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 537; CHECK-NEXT: ret 538 %v = call <vscale x 1 x float> @llvm.vp.sqrt.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl) 539 ret <vscale x 1 x float> %v 540} 541 542define <vscale x 1 x float> @vfsqrt_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) { 543; CHECK-LABEL: vfsqrt_vv_nxv1f32_unmasked: 544; CHECK: # %bb.0: 545; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 546; CHECK-NEXT: vfsqrt.v v8, v8 547; CHECK-NEXT: ret 548 %v = call <vscale x 1 x float> @llvm.vp.sqrt.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 549 ret <vscale x 1 x float> %v 550} 551 552declare <vscale x 2 x float> @llvm.vp.sqrt.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) 553 554define <vscale x 2 x float> @vfsqrt_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 555; CHECK-LABEL: vfsqrt_vv_nxv2f32: 556; CHECK: # %bb.0: 557; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 558; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 559; CHECK-NEXT: ret 560 %v = call <vscale x 2 x float> @llvm.vp.sqrt.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl) 561 ret <vscale x 2 x float> %v 562} 563 564define <vscale x 2 x float> @vfsqrt_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) { 565; CHECK-LABEL: vfsqrt_vv_nxv2f32_unmasked: 566; CHECK: # %bb.0: 567; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 568; CHECK-NEXT: vfsqrt.v v8, v8 569; CHECK-NEXT: ret 570 %v = call <vscale x 2 x float> @llvm.vp.sqrt.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 571 ret <vscale x 2 x float> %v 572} 573 574declare <vscale x 4 x float> @llvm.vp.sqrt.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32) 575 576define <vscale x 4 x float> @vfsqrt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 577; CHECK-LABEL: vfsqrt_vv_nxv4f32: 578; CHECK: # %bb.0: 579; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 580; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 581; CHECK-NEXT: ret 582 %v = call <vscale x 4 x float> @llvm.vp.sqrt.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl) 583 ret <vscale x 4 x float> %v 584} 585 586define <vscale x 4 x float> @vfsqrt_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) { 587; CHECK-LABEL: vfsqrt_vv_nxv4f32_unmasked: 588; CHECK: # %bb.0: 589; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 590; CHECK-NEXT: vfsqrt.v v8, v8 591; CHECK-NEXT: ret 592 %v = call <vscale x 4 x float> @llvm.vp.sqrt.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 593 ret <vscale x 4 x float> %v 594} 595 596declare <vscale x 8 x float> @llvm.vp.sqrt.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32) 597 598define <vscale x 8 x float> @vfsqrt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 599; CHECK-LABEL: vfsqrt_vv_nxv8f32: 600; CHECK: # %bb.0: 601; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 602; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 603; CHECK-NEXT: ret 604 %v = call <vscale x 8 x float> @llvm.vp.sqrt.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl) 605 ret <vscale x 8 x float> %v 606} 607 608define <vscale x 8 x float> @vfsqrt_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) { 609; CHECK-LABEL: vfsqrt_vv_nxv8f32_unmasked: 610; CHECK: # %bb.0: 611; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 612; CHECK-NEXT: vfsqrt.v v8, v8 613; CHECK-NEXT: ret 614 %v = call <vscale x 8 x float> @llvm.vp.sqrt.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 615 ret <vscale x 8 x float> %v 616} 617 618declare <vscale x 16 x float> @llvm.vp.sqrt.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32) 619 620define <vscale x 16 x float> @vfsqrt_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 621; CHECK-LABEL: vfsqrt_vv_nxv16f32: 622; CHECK: # %bb.0: 623; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 624; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 625; CHECK-NEXT: ret 626 %v = call <vscale x 16 x float> @llvm.vp.sqrt.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl) 627 ret <vscale x 16 x float> %v 628} 629 630define <vscale x 16 x float> @vfsqrt_vv_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) { 631; CHECK-LABEL: vfsqrt_vv_nxv16f32_unmasked: 632; CHECK: # %bb.0: 633; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 634; CHECK-NEXT: vfsqrt.v v8, v8 635; CHECK-NEXT: ret 636 %v = call <vscale x 16 x float> @llvm.vp.sqrt.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 637 ret <vscale x 16 x float> %v 638} 639 640declare <vscale x 1 x double> @llvm.vp.sqrt.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32) 641 642define <vscale x 1 x double> @vfsqrt_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 643; CHECK-LABEL: vfsqrt_vv_nxv1f64: 644; CHECK: # %bb.0: 645; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 646; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 647; CHECK-NEXT: ret 648 %v = call <vscale x 1 x double> @llvm.vp.sqrt.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl) 649 ret <vscale x 1 x double> %v 650} 651 652define <vscale x 1 x double> @vfsqrt_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) { 653; CHECK-LABEL: vfsqrt_vv_nxv1f64_unmasked: 654; CHECK: # %bb.0: 655; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 656; CHECK-NEXT: vfsqrt.v v8, v8 657; CHECK-NEXT: ret 658 %v = call <vscale x 1 x double> @llvm.vp.sqrt.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 659 ret <vscale x 1 x double> %v 660} 661 662declare <vscale x 2 x double> @llvm.vp.sqrt.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) 663 664define <vscale x 2 x double> @vfsqrt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 665; CHECK-LABEL: vfsqrt_vv_nxv2f64: 666; CHECK: # %bb.0: 667; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 668; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 669; CHECK-NEXT: ret 670 %v = call <vscale x 2 x double> @llvm.vp.sqrt.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl) 671 ret <vscale x 2 x double> %v 672} 673 674define <vscale x 2 x double> @vfsqrt_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) { 675; CHECK-LABEL: vfsqrt_vv_nxv2f64_unmasked: 676; CHECK: # %bb.0: 677; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 678; CHECK-NEXT: vfsqrt.v v8, v8 679; CHECK-NEXT: ret 680 %v = call <vscale x 2 x double> @llvm.vp.sqrt.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 681 ret <vscale x 2 x double> %v 682} 683 684declare <vscale x 4 x double> @llvm.vp.sqrt.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32) 685 686define <vscale x 4 x double> @vfsqrt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 687; CHECK-LABEL: vfsqrt_vv_nxv4f64: 688; CHECK: # %bb.0: 689; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 690; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 691; CHECK-NEXT: ret 692 %v = call <vscale x 4 x double> @llvm.vp.sqrt.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl) 693 ret <vscale x 4 x double> %v 694} 695 696define <vscale x 4 x double> @vfsqrt_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) { 697; CHECK-LABEL: vfsqrt_vv_nxv4f64_unmasked: 698; CHECK: # %bb.0: 699; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 700; CHECK-NEXT: vfsqrt.v v8, v8 701; CHECK-NEXT: ret 702 %v = call <vscale x 4 x double> @llvm.vp.sqrt.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 703 ret <vscale x 4 x double> %v 704} 705 706declare <vscale x 7 x double> @llvm.vp.sqrt.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32) 707 708define <vscale x 7 x double> @vfsqrt_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { 709; CHECK-LABEL: vfsqrt_vv_nxv7f64: 710; CHECK: # %bb.0: 711; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 712; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 713; CHECK-NEXT: ret 714 %v = call <vscale x 7 x double> @llvm.vp.sqrt.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl) 715 ret <vscale x 7 x double> %v 716} 717 718define <vscale x 7 x double> @vfsqrt_vv_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) { 719; CHECK-LABEL: vfsqrt_vv_nxv7f64_unmasked: 720; CHECK: # %bb.0: 721; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 722; CHECK-NEXT: vfsqrt.v v8, v8 723; CHECK-NEXT: ret 724 %v = call <vscale x 7 x double> @llvm.vp.sqrt.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl) 725 ret <vscale x 7 x double> %v 726} 727 728declare <vscale x 8 x double> @llvm.vp.sqrt.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32) 729 730define <vscale x 8 x double> @vfsqrt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 731; CHECK-LABEL: vfsqrt_vv_nxv8f64: 732; CHECK: # %bb.0: 733; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 734; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 735; CHECK-NEXT: ret 736 %v = call <vscale x 8 x double> @llvm.vp.sqrt.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl) 737 ret <vscale x 8 x double> %v 738} 739 740define <vscale x 8 x double> @vfsqrt_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) { 741; CHECK-LABEL: vfsqrt_vv_nxv8f64_unmasked: 742; CHECK: # %bb.0: 743; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 744; CHECK-NEXT: vfsqrt.v v8, v8 745; CHECK-NEXT: ret 746 %v = call <vscale x 8 x double> @llvm.vp.sqrt.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 747 ret <vscale x 8 x double> %v 748} 749 750; Test splitting. 751declare <vscale x 16 x double> @llvm.vp.sqrt.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32) 752 753define <vscale x 16 x double> @vfsqrt_vv_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 754; CHECK-LABEL: vfsqrt_vv_nxv16f64: 755; CHECK: # %bb.0: 756; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 757; CHECK-NEXT: vmv1r.v v24, v0 758; CHECK-NEXT: csrr a1, vlenb 759; CHECK-NEXT: srli a2, a1, 3 760; CHECK-NEXT: sub a3, a0, a1 761; CHECK-NEXT: vslidedown.vx v0, v0, a2 762; CHECK-NEXT: sltu a2, a0, a3 763; CHECK-NEXT: addi a2, a2, -1 764; CHECK-NEXT: and a2, a2, a3 765; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 766; CHECK-NEXT: vfsqrt.v v16, v16, v0.t 767; CHECK-NEXT: bltu a0, a1, .LBB44_2 768; CHECK-NEXT: # %bb.1: 769; CHECK-NEXT: mv a0, a1 770; CHECK-NEXT: .LBB44_2: 771; CHECK-NEXT: vmv1r.v v0, v24 772; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 773; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 774; CHECK-NEXT: ret 775 %v = call <vscale x 16 x double> @llvm.vp.sqrt.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl) 776 ret <vscale x 16 x double> %v 777} 778 779define <vscale x 16 x double> @vfsqrt_vv_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) { 780; CHECK-LABEL: vfsqrt_vv_nxv16f64_unmasked: 781; CHECK: # %bb.0: 782; CHECK-NEXT: csrr a1, vlenb 783; CHECK-NEXT: sub a2, a0, a1 784; CHECK-NEXT: sltu a3, a0, a2 785; CHECK-NEXT: addi a3, a3, -1 786; CHECK-NEXT: and a2, a3, a2 787; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 788; CHECK-NEXT: vfsqrt.v v16, v16 789; CHECK-NEXT: bltu a0, a1, .LBB45_2 790; CHECK-NEXT: # %bb.1: 791; CHECK-NEXT: mv a0, a1 792; CHECK-NEXT: .LBB45_2: 793; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 794; CHECK-NEXT: vfsqrt.v v8, v8 795; CHECK-NEXT: ret 796 %v = call <vscale x 16 x double> @llvm.vp.sqrt.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 797 ret <vscale x 16 x double> %v 798} 799