1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH 6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 10 11declare <2 x half> @llvm.vp.sqrt.v2f16(<2 x half>, <2 x i1>, i32) 12 13define <2 x half> @vfsqrt_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { 14; ZVFH-LABEL: vfsqrt_vv_v2f16: 15; ZVFH: # %bb.0: 16; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 17; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t 18; ZVFH-NEXT: ret 19; 20; ZVFHMIN-LABEL: vfsqrt_vv_v2f16: 21; ZVFHMIN: # %bb.0: 22; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 23; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t 24; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 25; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t 26; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 27; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t 28; ZVFHMIN-NEXT: ret 29 %v = call <2 x half> @llvm.vp.sqrt.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) 30 ret <2 x half> %v 31} 32 33define <2 x half> @vfsqrt_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { 34; ZVFH-LABEL: vfsqrt_vv_v2f16_unmasked: 35; ZVFH: # %bb.0: 36; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 37; ZVFH-NEXT: vfsqrt.v v8, v8 38; ZVFH-NEXT: ret 39; 40; ZVFHMIN-LABEL: vfsqrt_vv_v2f16_unmasked: 41; ZVFHMIN: # %bb.0: 42; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 43; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 44; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 45; ZVFHMIN-NEXT: vfsqrt.v v9, v9 46; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 47; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 48; ZVFHMIN-NEXT: ret 49 %v = call <2 x half> @llvm.vp.sqrt.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) 50 ret <2 x half> %v 51} 52 53declare <4 x half> @llvm.vp.sqrt.v4f16(<4 x half>, <4 x i1>, i32) 54 55define <4 x half> @vfsqrt_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { 56; ZVFH-LABEL: vfsqrt_vv_v4f16: 57; ZVFH: # %bb.0: 58; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 59; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t 60; ZVFH-NEXT: ret 61; 62; ZVFHMIN-LABEL: vfsqrt_vv_v4f16: 63; ZVFHMIN: # %bb.0: 64; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 65; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t 66; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 67; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t 68; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 69; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t 70; ZVFHMIN-NEXT: ret 71 %v = call <4 x half> @llvm.vp.sqrt.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) 72 ret <4 x half> %v 73} 74 75define <4 x half> @vfsqrt_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { 76; ZVFH-LABEL: vfsqrt_vv_v4f16_unmasked: 77; ZVFH: # %bb.0: 78; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 79; ZVFH-NEXT: vfsqrt.v v8, v8 80; ZVFH-NEXT: ret 81; 82; ZVFHMIN-LABEL: vfsqrt_vv_v4f16_unmasked: 83; ZVFHMIN: # %bb.0: 84; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 85; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 86; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 87; ZVFHMIN-NEXT: vfsqrt.v v9, v9 88; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 89; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 90; ZVFHMIN-NEXT: ret 91 %v = call <4 x half> @llvm.vp.sqrt.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) 92 ret <4 x half> %v 93} 94 95declare <8 x half> @llvm.vp.sqrt.v8f16(<8 x half>, <8 x i1>, i32) 96 97define <8 x half> @vfsqrt_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { 98; ZVFH-LABEL: vfsqrt_vv_v8f16: 99; ZVFH: # %bb.0: 100; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 101; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t 102; ZVFH-NEXT: ret 103; 104; ZVFHMIN-LABEL: vfsqrt_vv_v8f16: 105; ZVFHMIN: # %bb.0: 106; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 107; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 108; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 109; ZVFHMIN-NEXT: vfsqrt.v v10, v10, v0.t 110; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 111; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 112; ZVFHMIN-NEXT: ret 113 %v = call <8 x half> @llvm.vp.sqrt.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) 114 ret <8 x half> %v 115} 116 117define <8 x half> @vfsqrt_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { 118; ZVFH-LABEL: vfsqrt_vv_v8f16_unmasked: 119; ZVFH: # %bb.0: 120; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 121; ZVFH-NEXT: vfsqrt.v v8, v8 122; ZVFH-NEXT: ret 123; 124; ZVFHMIN-LABEL: vfsqrt_vv_v8f16_unmasked: 125; ZVFHMIN: # %bb.0: 126; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 127; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 128; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 129; ZVFHMIN-NEXT: vfsqrt.v v10, v10 130; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 131; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 132; ZVFHMIN-NEXT: ret 133 %v = call <8 x half> @llvm.vp.sqrt.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) 134 ret <8 x half> %v 135} 136 137declare <16 x half> @llvm.vp.sqrt.v16f16(<16 x half>, <16 x i1>, i32) 138 139define <16 x half> @vfsqrt_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { 140; ZVFH-LABEL: vfsqrt_vv_v16f16: 141; ZVFH: # %bb.0: 142; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 143; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t 144; ZVFH-NEXT: ret 145; 146; ZVFHMIN-LABEL: vfsqrt_vv_v16f16: 147; ZVFHMIN: # %bb.0: 148; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 149; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t 150; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 151; ZVFHMIN-NEXT: vfsqrt.v v12, v12, v0.t 152; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 153; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t 154; ZVFHMIN-NEXT: ret 155 %v = call <16 x half> @llvm.vp.sqrt.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) 156 ret <16 x half> %v 157} 158 159define <16 x half> @vfsqrt_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { 160; ZVFH-LABEL: vfsqrt_vv_v16f16_unmasked: 161; ZVFH: # %bb.0: 162; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 163; ZVFH-NEXT: vfsqrt.v v8, v8 164; ZVFH-NEXT: ret 165; 166; ZVFHMIN-LABEL: vfsqrt_vv_v16f16_unmasked: 167; ZVFHMIN: # %bb.0: 168; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 169; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 170; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 171; ZVFHMIN-NEXT: vfsqrt.v v12, v12 172; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 173; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 174; ZVFHMIN-NEXT: ret 175 %v = call <16 x half> @llvm.vp.sqrt.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) 176 ret <16 x half> %v 177} 178 179declare <2 x float> @llvm.vp.sqrt.v2f32(<2 x float>, <2 x i1>, i32) 180 181define <2 x float> @vfsqrt_vv_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %evl) { 182; CHECK-LABEL: vfsqrt_vv_v2f32: 183; CHECK: # %bb.0: 184; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 185; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 186; CHECK-NEXT: ret 187 %v = call <2 x float> @llvm.vp.sqrt.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) 188 ret <2 x float> %v 189} 190 191define <2 x float> @vfsqrt_vv_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { 192; CHECK-LABEL: vfsqrt_vv_v2f32_unmasked: 193; CHECK: # %bb.0: 194; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 195; CHECK-NEXT: vfsqrt.v v8, v8 196; CHECK-NEXT: ret 197 %v = call <2 x float> @llvm.vp.sqrt.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) 198 ret <2 x float> %v 199} 200 201declare <4 x float> @llvm.vp.sqrt.v4f32(<4 x float>, <4 x i1>, i32) 202 203define <4 x float> @vfsqrt_vv_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %evl) { 204; CHECK-LABEL: vfsqrt_vv_v4f32: 205; CHECK: # %bb.0: 206; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 207; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 208; CHECK-NEXT: ret 209 %v = call <4 x float> @llvm.vp.sqrt.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) 210 ret <4 x float> %v 211} 212 213define <4 x float> @vfsqrt_vv_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { 214; CHECK-LABEL: vfsqrt_vv_v4f32_unmasked: 215; CHECK: # %bb.0: 216; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 217; CHECK-NEXT: vfsqrt.v v8, v8 218; CHECK-NEXT: ret 219 %v = call <4 x float> @llvm.vp.sqrt.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) 220 ret <4 x float> %v 221} 222 223declare <8 x float> @llvm.vp.sqrt.v8f32(<8 x float>, <8 x i1>, i32) 224 225define <8 x float> @vfsqrt_vv_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) { 226; CHECK-LABEL: vfsqrt_vv_v8f32: 227; CHECK: # %bb.0: 228; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 229; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 230; CHECK-NEXT: ret 231 %v = call <8 x float> @llvm.vp.sqrt.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) 232 ret <8 x float> %v 233} 234 235define <8 x float> @vfsqrt_vv_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { 236; CHECK-LABEL: vfsqrt_vv_v8f32_unmasked: 237; CHECK: # %bb.0: 238; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 239; CHECK-NEXT: vfsqrt.v v8, v8 240; CHECK-NEXT: ret 241 %v = call <8 x float> @llvm.vp.sqrt.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) 242 ret <8 x float> %v 243} 244 245declare <16 x float> @llvm.vp.sqrt.v16f32(<16 x float>, <16 x i1>, i32) 246 247define <16 x float> @vfsqrt_vv_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) { 248; CHECK-LABEL: vfsqrt_vv_v16f32: 249; CHECK: # %bb.0: 250; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 251; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 252; CHECK-NEXT: ret 253 %v = call <16 x float> @llvm.vp.sqrt.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) 254 ret <16 x float> %v 255} 256 257define <16 x float> @vfsqrt_vv_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { 258; CHECK-LABEL: vfsqrt_vv_v16f32_unmasked: 259; CHECK: # %bb.0: 260; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 261; CHECK-NEXT: vfsqrt.v v8, v8 262; CHECK-NEXT: ret 263 %v = call <16 x float> @llvm.vp.sqrt.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) 264 ret <16 x float> %v 265} 266 267declare <2 x double> @llvm.vp.sqrt.v2f64(<2 x double>, <2 x i1>, i32) 268 269define <2 x double> @vfsqrt_vv_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { 270; CHECK-LABEL: vfsqrt_vv_v2f64: 271; CHECK: # %bb.0: 272; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 273; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 274; CHECK-NEXT: ret 275 %v = call <2 x double> @llvm.vp.sqrt.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) 276 ret <2 x double> %v 277} 278 279define <2 x double> @vfsqrt_vv_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { 280; CHECK-LABEL: vfsqrt_vv_v2f64_unmasked: 281; CHECK: # %bb.0: 282; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 283; CHECK-NEXT: vfsqrt.v v8, v8 284; CHECK-NEXT: ret 285 %v = call <2 x double> @llvm.vp.sqrt.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) 286 ret <2 x double> %v 287} 288 289declare <4 x double> @llvm.vp.sqrt.v4f64(<4 x double>, <4 x i1>, i32) 290 291define <4 x double> @vfsqrt_vv_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { 292; CHECK-LABEL: vfsqrt_vv_v4f64: 293; CHECK: # %bb.0: 294; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 295; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 296; CHECK-NEXT: ret 297 %v = call <4 x double> @llvm.vp.sqrt.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) 298 ret <4 x double> %v 299} 300 301define <4 x double> @vfsqrt_vv_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { 302; CHECK-LABEL: vfsqrt_vv_v4f64_unmasked: 303; CHECK: # %bb.0: 304; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 305; CHECK-NEXT: vfsqrt.v v8, v8 306; CHECK-NEXT: ret 307 %v = call <4 x double> @llvm.vp.sqrt.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) 308 ret <4 x double> %v 309} 310 311declare <8 x double> @llvm.vp.sqrt.v8f64(<8 x double>, <8 x i1>, i32) 312 313define <8 x double> @vfsqrt_vv_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { 314; CHECK-LABEL: vfsqrt_vv_v8f64: 315; CHECK: # %bb.0: 316; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 317; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 318; CHECK-NEXT: ret 319 %v = call <8 x double> @llvm.vp.sqrt.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) 320 ret <8 x double> %v 321} 322 323define <8 x double> @vfsqrt_vv_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { 324; CHECK-LABEL: vfsqrt_vv_v8f64_unmasked: 325; CHECK: # %bb.0: 326; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 327; CHECK-NEXT: vfsqrt.v v8, v8 328; CHECK-NEXT: ret 329 %v = call <8 x double> @llvm.vp.sqrt.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) 330 ret <8 x double> %v 331} 332 333declare <15 x double> @llvm.vp.sqrt.v15f64(<15 x double>, <15 x i1>, i32) 334 335define <15 x double> @vfsqrt_vv_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { 336; CHECK-LABEL: vfsqrt_vv_v15f64: 337; CHECK: # %bb.0: 338; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 339; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 340; CHECK-NEXT: ret 341 %v = call <15 x double> @llvm.vp.sqrt.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) 342 ret <15 x double> %v 343} 344 345define <15 x double> @vfsqrt_vv_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { 346; CHECK-LABEL: vfsqrt_vv_v15f64_unmasked: 347; CHECK: # %bb.0: 348; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 349; CHECK-NEXT: vfsqrt.v v8, v8 350; CHECK-NEXT: ret 351 %v = call <15 x double> @llvm.vp.sqrt.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) 352 ret <15 x double> %v 353} 354 355declare <16 x double> @llvm.vp.sqrt.v16f64(<16 x double>, <16 x i1>, i32) 356 357define <16 x double> @vfsqrt_vv_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { 358; CHECK-LABEL: vfsqrt_vv_v16f64: 359; CHECK: # %bb.0: 360; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 361; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 362; CHECK-NEXT: ret 363 %v = call <16 x double> @llvm.vp.sqrt.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) 364 ret <16 x double> %v 365} 366 367define <16 x double> @vfsqrt_vv_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { 368; CHECK-LABEL: vfsqrt_vv_v16f64_unmasked: 369; CHECK: # %bb.0: 370; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 371; CHECK-NEXT: vfsqrt.v v8, v8 372; CHECK-NEXT: ret 373 %v = call <16 x double> @llvm.vp.sqrt.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) 374 ret <16 x double> %v 375} 376 377declare <32 x double> @llvm.vp.sqrt.v32f64(<32 x double>, <32 x i1>, i32) 378 379define <32 x double> @vfsqrt_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { 380; CHECK-LABEL: vfsqrt_vv_v32f64: 381; CHECK: # %bb.0: 382; CHECK-NEXT: li a2, 16 383; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 384; CHECK-NEXT: vslidedown.vi v24, v0, 2 385; CHECK-NEXT: mv a1, a0 386; CHECK-NEXT: bltu a0, a2, .LBB26_2 387; CHECK-NEXT: # %bb.1: 388; CHECK-NEXT: li a1, 16 389; CHECK-NEXT: .LBB26_2: 390; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 391; CHECK-NEXT: vfsqrt.v v8, v8, v0.t 392; CHECK-NEXT: addi a1, a0, -16 393; CHECK-NEXT: sltu a0, a0, a1 394; CHECK-NEXT: addi a0, a0, -1 395; CHECK-NEXT: and a0, a0, a1 396; CHECK-NEXT: vmv1r.v v0, v24 397; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 398; CHECK-NEXT: vfsqrt.v v16, v16, v0.t 399; CHECK-NEXT: ret 400 %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) 401 ret <32 x double> %v 402} 403 404define <32 x double> @vfsqrt_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { 405; CHECK-LABEL: vfsqrt_vv_v32f64_unmasked: 406; CHECK: # %bb.0: 407; CHECK-NEXT: li a2, 16 408; CHECK-NEXT: mv a1, a0 409; CHECK-NEXT: bltu a0, a2, .LBB27_2 410; CHECK-NEXT: # %bb.1: 411; CHECK-NEXT: li a1, 16 412; CHECK-NEXT: .LBB27_2: 413; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 414; CHECK-NEXT: vfsqrt.v v8, v8 415; CHECK-NEXT: addi a1, a0, -16 416; CHECK-NEXT: sltu a0, a0, a1 417; CHECK-NEXT: addi a0, a0, -1 418; CHECK-NEXT: and a0, a0, a1 419; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 420; CHECK-NEXT: vfsqrt.v v16, v16 421; CHECK-NEXT: ret 422 %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) 423 ret <32 x double> %v 424} 425