1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 3; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 4; RUN: --check-prefixes=CHECK,ZVFH 5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 6; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 7; RUN: --check-prefixes=CHECK,ZVFH 8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 9; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 10; RUN: --check-prefixes=CHECK,ZVFHMIN 11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 12; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 13; RUN: --check-prefixes=CHECK,ZVFHMIN 14 15define <vscale x 1 x bfloat> @vfsqrt_nxv1bf16(<vscale x 1 x bfloat> %v) strictfp { 16; CHECK-LABEL: vfsqrt_nxv1bf16: 17; CHECK: # %bb.0: 18; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 19; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 20; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 21; CHECK-NEXT: vfsqrt.v v9, v9 22; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 23; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 24; CHECK-NEXT: ret 25 %r = call <vscale x 1 x bfloat> @llvm.experimental.constrained.sqrt.nxv1bf16(<vscale x 1 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 26 ret <vscale x 1 x bfloat> %r 27} 28 29 30define <vscale x 2 x bfloat> @vfsqrt_nxv2bf16(<vscale x 2 x bfloat> %v) strictfp { 31; CHECK-LABEL: vfsqrt_nxv2bf16: 32; CHECK: # %bb.0: 33; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 34; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 35; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 36; CHECK-NEXT: vfsqrt.v v9, v9 37; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 38; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 39; CHECK-NEXT: ret 40 %r = call <vscale x 2 x bfloat> @llvm.experimental.constrained.sqrt.nxv2bf16(<vscale x 2 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 41 ret <vscale x 2 x bfloat> %r 42} 43 44 45define <vscale x 4 x bfloat> @vfsqrt_nxv4bf16(<vscale x 4 x bfloat> %v) strictfp { 46; CHECK-LABEL: vfsqrt_nxv4bf16: 47; CHECK: # %bb.0: 48; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 49; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 50; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 51; CHECK-NEXT: vfsqrt.v v10, v10 52; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 53; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 54; CHECK-NEXT: ret 55 %r = call <vscale x 4 x bfloat> @llvm.experimental.constrained.sqrt.nxv4bf16(<vscale x 4 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 56 ret <vscale x 4 x bfloat> %r 57} 58 59 60define <vscale x 8 x bfloat> @vfsqrt_nxv8bf16(<vscale x 8 x bfloat> %v) strictfp { 61; CHECK-LABEL: vfsqrt_nxv8bf16: 62; CHECK: # %bb.0: 63; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma 64; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 65; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 66; CHECK-NEXT: vfsqrt.v v12, v12 67; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 68; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 69; CHECK-NEXT: ret 70 %r = call <vscale x 8 x bfloat> @llvm.experimental.constrained.sqrt.nxv8bf16(<vscale x 8 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 71 ret <vscale x 8 x bfloat> %r 72} 73 74 75define <vscale x 16 x bfloat> @vfsqrt_nxv16bf16(<vscale x 16 x bfloat> %v) strictfp { 76; CHECK-LABEL: vfsqrt_nxv16bf16: 77; CHECK: # %bb.0: 78; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 79; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 80; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 81; CHECK-NEXT: vfsqrt.v v16, v16 82; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 83; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 84; CHECK-NEXT: ret 85 %r = call <vscale x 16 x bfloat> @llvm.experimental.constrained.sqrt.nxv16bf16(<vscale x 16 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 86 ret <vscale x 16 x bfloat> %r 87} 88 89 90define <vscale x 32 x bfloat> @vfsqrt_nxv32bf16(<vscale x 32 x bfloat> %v) strictfp { 91; CHECK-LABEL: vfsqrt_nxv32bf16: 92; CHECK: # %bb.0: 93; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 94; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 95; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 96; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 97; CHECK-NEXT: vfsqrt.v v16, v16 98; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 99; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 100; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 101; CHECK-NEXT: vfsqrt.v v16, v24 102; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 103; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 104; CHECK-NEXT: ret 105 %r = call <vscale x 32 x bfloat> @llvm.experimental.constrained.sqrt.nxv32bf16(<vscale x 32 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 106 ret <vscale x 32 x bfloat> %r 107} 108 109declare <vscale x 1 x half> @llvm.experimental.constrained.sqrt.nxv1f16(<vscale x 1 x half>, metadata, metadata) 110 111define <vscale x 1 x half> @vfsqrt_nxv1f16(<vscale x 1 x half> %v) strictfp { 112; ZVFH-LABEL: vfsqrt_nxv1f16: 113; ZVFH: # %bb.0: 114; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 115; ZVFH-NEXT: vfsqrt.v v8, v8 116; ZVFH-NEXT: ret 117; 118; ZVFHMIN-LABEL: vfsqrt_nxv1f16: 119; ZVFHMIN: # %bb.0: 120; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 121; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 122; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 123; ZVFHMIN-NEXT: vfsqrt.v v9, v9 124; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 125; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 126; ZVFHMIN-NEXT: ret 127 %r = call <vscale x 1 x half> @llvm.experimental.constrained.sqrt.nxv1f16(<vscale x 1 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 128 ret <vscale x 1 x half> %r 129} 130 131declare <vscale x 2 x half> @llvm.experimental.constrained.sqrt.nxv2f16(<vscale x 2 x half>, metadata, metadata) 132 133define <vscale x 2 x half> @vfsqrt_nxv2f16(<vscale x 2 x half> %v) strictfp { 134; ZVFH-LABEL: vfsqrt_nxv2f16: 135; ZVFH: # %bb.0: 136; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 137; ZVFH-NEXT: vfsqrt.v v8, v8 138; ZVFH-NEXT: ret 139; 140; ZVFHMIN-LABEL: vfsqrt_nxv2f16: 141; ZVFHMIN: # %bb.0: 142; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 143; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 144; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 145; ZVFHMIN-NEXT: vfsqrt.v v9, v9 146; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 147; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 148; ZVFHMIN-NEXT: ret 149 %r = call <vscale x 2 x half> @llvm.experimental.constrained.sqrt.nxv2f16(<vscale x 2 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 150 ret <vscale x 2 x half> %r 151} 152 153declare <vscale x 4 x half> @llvm.experimental.constrained.sqrt.nxv4f16(<vscale x 4 x half>, metadata, metadata) 154 155define <vscale x 4 x half> @vfsqrt_nxv4f16(<vscale x 4 x half> %v) strictfp { 156; ZVFH-LABEL: vfsqrt_nxv4f16: 157; ZVFH: # %bb.0: 158; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma 159; ZVFH-NEXT: vfsqrt.v v8, v8 160; ZVFH-NEXT: ret 161; 162; ZVFHMIN-LABEL: vfsqrt_nxv4f16: 163; ZVFHMIN: # %bb.0: 164; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma 165; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 166; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 167; ZVFHMIN-NEXT: vfsqrt.v v10, v10 168; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 169; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 170; ZVFHMIN-NEXT: ret 171 %r = call <vscale x 4 x half> @llvm.experimental.constrained.sqrt.nxv4f16(<vscale x 4 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 172 ret <vscale x 4 x half> %r 173} 174 175declare <vscale x 8 x half> @llvm.experimental.constrained.sqrt.nxv8f16(<vscale x 8 x half>, metadata, metadata) 176 177define <vscale x 8 x half> @vfsqrt_nxv8f16(<vscale x 8 x half> %v) strictfp { 178; ZVFH-LABEL: vfsqrt_nxv8f16: 179; ZVFH: # %bb.0: 180; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma 181; ZVFH-NEXT: vfsqrt.v v8, v8 182; ZVFH-NEXT: ret 183; 184; ZVFHMIN-LABEL: vfsqrt_nxv8f16: 185; ZVFHMIN: # %bb.0: 186; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma 187; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 188; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 189; ZVFHMIN-NEXT: vfsqrt.v v12, v12 190; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 191; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 192; ZVFHMIN-NEXT: ret 193 %r = call <vscale x 8 x half> @llvm.experimental.constrained.sqrt.nxv8f16(<vscale x 8 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 194 ret <vscale x 8 x half> %r 195} 196 197declare <vscale x 16 x half> @llvm.experimental.constrained.sqrt.nxv16f16(<vscale x 16 x half>, metadata, metadata) 198 199define <vscale x 16 x half> @vfsqrt_nxv16f16(<vscale x 16 x half> %v) strictfp { 200; ZVFH-LABEL: vfsqrt_nxv16f16: 201; ZVFH: # %bb.0: 202; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma 203; ZVFH-NEXT: vfsqrt.v v8, v8 204; ZVFH-NEXT: ret 205; 206; ZVFHMIN-LABEL: vfsqrt_nxv16f16: 207; ZVFHMIN: # %bb.0: 208; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 209; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 210; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 211; ZVFHMIN-NEXT: vfsqrt.v v16, v16 212; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 213; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 214; ZVFHMIN-NEXT: ret 215 %r = call <vscale x 16 x half> @llvm.experimental.constrained.sqrt.nxv16f16(<vscale x 16 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 216 ret <vscale x 16 x half> %r 217} 218 219declare <vscale x 32 x half> @llvm.experimental.constrained.sqrt.nxv32f16(<vscale x 32 x half>, metadata, metadata) 220 221define <vscale x 32 x half> @vfsqrt_nxv32f16(<vscale x 32 x half> %v) strictfp { 222; ZVFH-LABEL: vfsqrt_nxv32f16: 223; ZVFH: # %bb.0: 224; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma 225; ZVFH-NEXT: vfsqrt.v v8, v8 226; ZVFH-NEXT: ret 227; 228; ZVFHMIN-LABEL: vfsqrt_nxv32f16: 229; ZVFHMIN: # %bb.0: 230; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 231; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 232; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 233; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 234; ZVFHMIN-NEXT: vfsqrt.v v16, v16 235; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 236; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 237; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 238; ZVFHMIN-NEXT: vfsqrt.v v16, v24 239; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 240; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 241; ZVFHMIN-NEXT: ret 242 %r = call <vscale x 32 x half> @llvm.experimental.constrained.sqrt.nxv32f16(<vscale x 32 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 243 ret <vscale x 32 x half> %r 244} 245 246declare <vscale x 1 x float> @llvm.experimental.constrained.sqrt.nxv1f32(<vscale x 1 x float>, metadata, metadata) 247 248define <vscale x 1 x float> @vfsqrt_nxv1f32(<vscale x 1 x float> %v) strictfp { 249; CHECK-LABEL: vfsqrt_nxv1f32: 250; CHECK: # %bb.0: 251; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 252; CHECK-NEXT: vfsqrt.v v8, v8 253; CHECK-NEXT: ret 254 %r = call <vscale x 1 x float> @llvm.experimental.constrained.sqrt.nxv1f32(<vscale x 1 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 255 ret <vscale x 1 x float> %r 256} 257 258declare <vscale x 2 x float> @llvm.experimental.constrained.sqrt.nxv2f32(<vscale x 2 x float>, metadata, metadata) 259 260define <vscale x 2 x float> @vfsqrt_nxv2f32(<vscale x 2 x float> %v) strictfp { 261; CHECK-LABEL: vfsqrt_nxv2f32: 262; CHECK: # %bb.0: 263; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 264; CHECK-NEXT: vfsqrt.v v8, v8 265; CHECK-NEXT: ret 266 %r = call <vscale x 2 x float> @llvm.experimental.constrained.sqrt.nxv2f32(<vscale x 2 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 267 ret <vscale x 2 x float> %r 268} 269 270declare <vscale x 4 x float> @llvm.experimental.constrained.sqrt.nxv4f32(<vscale x 4 x float>, metadata, metadata) 271 272define <vscale x 4 x float> @vfsqrt_nxv4f32(<vscale x 4 x float> %v) strictfp { 273; CHECK-LABEL: vfsqrt_nxv4f32: 274; CHECK: # %bb.0: 275; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 276; CHECK-NEXT: vfsqrt.v v8, v8 277; CHECK-NEXT: ret 278 %r = call <vscale x 4 x float> @llvm.experimental.constrained.sqrt.nxv4f32(<vscale x 4 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 279 ret <vscale x 4 x float> %r 280} 281 282declare <vscale x 8 x float> @llvm.experimental.constrained.sqrt.nxv8f32(<vscale x 8 x float>, metadata, metadata) 283 284define <vscale x 8 x float> @vfsqrt_nxv8f32(<vscale x 8 x float> %v) strictfp { 285; CHECK-LABEL: vfsqrt_nxv8f32: 286; CHECK: # %bb.0: 287; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma 288; CHECK-NEXT: vfsqrt.v v8, v8 289; CHECK-NEXT: ret 290 %r = call <vscale x 8 x float> @llvm.experimental.constrained.sqrt.nxv8f32(<vscale x 8 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 291 ret <vscale x 8 x float> %r 292} 293 294declare <vscale x 16 x float> @llvm.experimental.constrained.sqrt.nxv16f32(<vscale x 16 x float>, metadata, metadata) 295 296define <vscale x 16 x float> @vfsqrt_nxv16f32(<vscale x 16 x float> %v) strictfp { 297; CHECK-LABEL: vfsqrt_nxv16f32: 298; CHECK: # %bb.0: 299; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 300; CHECK-NEXT: vfsqrt.v v8, v8 301; CHECK-NEXT: ret 302 %r = call <vscale x 16 x float> @llvm.experimental.constrained.sqrt.nxv16f32(<vscale x 16 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 303 ret <vscale x 16 x float> %r 304} 305 306declare <vscale x 1 x double> @llvm.experimental.constrained.sqrt.nxv1f64(<vscale x 1 x double>, metadata, metadata) 307 308define <vscale x 1 x double> @vfsqrt_nxv1f64(<vscale x 1 x double> %v) strictfp { 309; CHECK-LABEL: vfsqrt_nxv1f64: 310; CHECK: # %bb.0: 311; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 312; CHECK-NEXT: vfsqrt.v v8, v8 313; CHECK-NEXT: ret 314 %r = call <vscale x 1 x double> @llvm.experimental.constrained.sqrt.nxv1f64(<vscale x 1 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 315 ret <vscale x 1 x double> %r 316} 317 318declare <vscale x 2 x double> @llvm.experimental.constrained.sqrt.nxv2f64(<vscale x 2 x double>, metadata, metadata) 319 320define <vscale x 2 x double> @vfsqrt_nxv2f64(<vscale x 2 x double> %v) strictfp { 321; CHECK-LABEL: vfsqrt_nxv2f64: 322; CHECK: # %bb.0: 323; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma 324; CHECK-NEXT: vfsqrt.v v8, v8 325; CHECK-NEXT: ret 326 %r = call <vscale x 2 x double> @llvm.experimental.constrained.sqrt.nxv2f64(<vscale x 2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 327 ret <vscale x 2 x double> %r 328} 329 330declare <vscale x 4 x double> @llvm.experimental.constrained.sqrt.nxv4f64(<vscale x 4 x double>, metadata, metadata) 331 332define <vscale x 4 x double> @vfsqrt_nxv4f64(<vscale x 4 x double> %v) strictfp { 333; CHECK-LABEL: vfsqrt_nxv4f64: 334; CHECK: # %bb.0: 335; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 336; CHECK-NEXT: vfsqrt.v v8, v8 337; CHECK-NEXT: ret 338 %r = call <vscale x 4 x double> @llvm.experimental.constrained.sqrt.nxv4f64(<vscale x 4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 339 ret <vscale x 4 x double> %r 340} 341 342declare <vscale x 8 x double> @llvm.experimental.constrained.sqrt.nxv8f64(<vscale x 8 x double>, metadata, metadata) 343 344define <vscale x 8 x double> @vfsqrt_nxv8f64(<vscale x 8 x double> %v) strictfp { 345; CHECK-LABEL: vfsqrt_nxv8f64: 346; CHECK: # %bb.0: 347; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 348; CHECK-NEXT: vfsqrt.v v8, v8 349; CHECK-NEXT: ret 350 %r = call <vscale x 8 x double> @llvm.experimental.constrained.sqrt.nxv8f64(<vscale x 8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") 351 ret <vscale x 8 x double> %r 352} 353