1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 3; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 4; RUN: --check-prefixes=CHECK,ZVFH 5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 6; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 7; RUN: --check-prefixes=CHECK,ZVFH 8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 9; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 10; RUN: --check-prefixes=CHECK,ZVFHMIN 11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 12; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 13; RUN: --check-prefixes=CHECK,ZVFHMIN 14 15define <vscale x 1 x bfloat> @vfsqrt_nxv1bf16(<vscale x 1 x bfloat> %v) { 16; CHECK-LABEL: vfsqrt_nxv1bf16: 17; CHECK: # %bb.0: 18; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 19; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 20; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 21; CHECK-NEXT: vfsqrt.v v9, v9 22; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 23; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 24; CHECK-NEXT: ret 25 %r = call <vscale x 1 x bfloat> @llvm.sqrt.nxv1bf16(<vscale x 1 x bfloat> %v) 26 ret <vscale x 1 x bfloat> %r 27} 28 29define <vscale x 2 x bfloat> @vfsqrt_nxv2bf16(<vscale x 2 x bfloat> %v) { 30; CHECK-LABEL: vfsqrt_nxv2bf16: 31; CHECK: # %bb.0: 32; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 33; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 34; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 35; CHECK-NEXT: vfsqrt.v v9, v9 36; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 37; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 38; CHECK-NEXT: ret 39 %r = call <vscale x 2 x bfloat> @llvm.sqrt.nxv2bf16(<vscale x 2 x bfloat> %v) 40 ret <vscale x 2 x bfloat> %r 41} 42 43define <vscale x 4 x bfloat> @vfsqrt_nxv4bf16(<vscale x 4 x bfloat> %v) { 44; CHECK-LABEL: vfsqrt_nxv4bf16: 45; CHECK: # %bb.0: 46; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 47; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 48; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 49; CHECK-NEXT: vfsqrt.v v10, v10 50; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 51; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 52; CHECK-NEXT: ret 53 %r = call <vscale x 4 x bfloat> @llvm.sqrt.nxv4bf16(<vscale x 4 x bfloat> %v) 54 ret <vscale x 4 x bfloat> %r 55} 56 57define <vscale x 8 x bfloat> @vfsqrt_nxv8bf16(<vscale x 8 x bfloat> %v) { 58; CHECK-LABEL: vfsqrt_nxv8bf16: 59; CHECK: # %bb.0: 60; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma 61; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 62; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 63; CHECK-NEXT: vfsqrt.v v12, v12 64; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 65; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 66; CHECK-NEXT: ret 67 %r = call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> %v) 68 ret <vscale x 8 x bfloat> %r 69} 70 71define <vscale x 16 x bfloat> @vfsqrt_nxv16bf16(<vscale x 16 x bfloat> %v) { 72; CHECK-LABEL: vfsqrt_nxv16bf16: 73; CHECK: # %bb.0: 74; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 75; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 76; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 77; CHECK-NEXT: vfsqrt.v v16, v16 78; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 79; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 80; CHECK-NEXT: ret 81 %r = call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> %v) 82 ret <vscale x 16 x bfloat> %r 83} 84 85define <vscale x 32 x bfloat> @vfsqrt_nxv32bf16(<vscale x 32 x bfloat> %v) { 86; CHECK-LABEL: vfsqrt_nxv32bf16: 87; CHECK: # %bb.0: 88; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 89; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 90; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 91; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 92; CHECK-NEXT: vfsqrt.v v16, v16 93; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 94; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 95; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 96; CHECK-NEXT: vfsqrt.v v16, v24 97; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 98; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 99; CHECK-NEXT: ret 100 %r = call <vscale x 32 x bfloat> @llvm.sqrt.nxv32bf16(<vscale x 32 x bfloat> %v) 101 ret <vscale x 32 x bfloat> %r 102} 103 104declare <vscale x 1 x half> @llvm.sqrt.nxv1f16(<vscale x 1 x half>) 105 106define <vscale x 1 x half> @vfsqrt_nxv1f16(<vscale x 1 x half> %v) { 107; ZVFH-LABEL: vfsqrt_nxv1f16: 108; ZVFH: # %bb.0: 109; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 110; ZVFH-NEXT: vfsqrt.v v8, v8 111; ZVFH-NEXT: ret 112; 113; ZVFHMIN-LABEL: vfsqrt_nxv1f16: 114; ZVFHMIN: # %bb.0: 115; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 116; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 117; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 118; ZVFHMIN-NEXT: vfsqrt.v v9, v9 119; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 120; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 121; ZVFHMIN-NEXT: ret 122 %r = call <vscale x 1 x half> @llvm.sqrt.nxv1f16(<vscale x 1 x half> %v) 123 ret <vscale x 1 x half> %r 124} 125 126declare <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half>) 127 128define <vscale x 2 x half> @vfsqrt_nxv2f16(<vscale x 2 x half> %v) { 129; ZVFH-LABEL: vfsqrt_nxv2f16: 130; ZVFH: # %bb.0: 131; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 132; ZVFH-NEXT: vfsqrt.v v8, v8 133; ZVFH-NEXT: ret 134; 135; ZVFHMIN-LABEL: vfsqrt_nxv2f16: 136; ZVFHMIN: # %bb.0: 137; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 138; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 139; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 140; ZVFHMIN-NEXT: vfsqrt.v v9, v9 141; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 142; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 143; ZVFHMIN-NEXT: ret 144 %r = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> %v) 145 ret <vscale x 2 x half> %r 146} 147 148declare <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half>) 149 150define <vscale x 4 x half> @vfsqrt_nxv4f16(<vscale x 4 x half> %v) { 151; ZVFH-LABEL: vfsqrt_nxv4f16: 152; ZVFH: # %bb.0: 153; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma 154; ZVFH-NEXT: vfsqrt.v v8, v8 155; ZVFH-NEXT: ret 156; 157; ZVFHMIN-LABEL: vfsqrt_nxv4f16: 158; ZVFHMIN: # %bb.0: 159; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma 160; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 161; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 162; ZVFHMIN-NEXT: vfsqrt.v v10, v10 163; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 164; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 165; ZVFHMIN-NEXT: ret 166 %r = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> %v) 167 ret <vscale x 4 x half> %r 168} 169 170declare <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half>) 171 172define <vscale x 8 x half> @vfsqrt_nxv8f16(<vscale x 8 x half> %v) { 173; ZVFH-LABEL: vfsqrt_nxv8f16: 174; ZVFH: # %bb.0: 175; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma 176; ZVFH-NEXT: vfsqrt.v v8, v8 177; ZVFH-NEXT: ret 178; 179; ZVFHMIN-LABEL: vfsqrt_nxv8f16: 180; ZVFHMIN: # %bb.0: 181; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma 182; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 183; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 184; ZVFHMIN-NEXT: vfsqrt.v v12, v12 185; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 186; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 187; ZVFHMIN-NEXT: ret 188 %r = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> %v) 189 ret <vscale x 8 x half> %r 190} 191 192declare <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half>) 193 194define <vscale x 16 x half> @vfsqrt_nxv16f16(<vscale x 16 x half> %v) { 195; ZVFH-LABEL: vfsqrt_nxv16f16: 196; ZVFH: # %bb.0: 197; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma 198; ZVFH-NEXT: vfsqrt.v v8, v8 199; ZVFH-NEXT: ret 200; 201; ZVFHMIN-LABEL: vfsqrt_nxv16f16: 202; ZVFHMIN: # %bb.0: 203; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 204; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 205; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 206; ZVFHMIN-NEXT: vfsqrt.v v16, v16 207; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 208; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 209; ZVFHMIN-NEXT: ret 210 %r = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> %v) 211 ret <vscale x 16 x half> %r 212} 213 214declare <vscale x 32 x half> @llvm.sqrt.nxv32f16(<vscale x 32 x half>) 215 216define <vscale x 32 x half> @vfsqrt_nxv32f16(<vscale x 32 x half> %v) { 217; ZVFH-LABEL: vfsqrt_nxv32f16: 218; ZVFH: # %bb.0: 219; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma 220; ZVFH-NEXT: vfsqrt.v v8, v8 221; ZVFH-NEXT: ret 222; 223; ZVFHMIN-LABEL: vfsqrt_nxv32f16: 224; ZVFHMIN: # %bb.0: 225; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 226; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 227; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 228; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 229; ZVFHMIN-NEXT: vfsqrt.v v16, v16 230; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 231; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 232; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 233; ZVFHMIN-NEXT: vfsqrt.v v16, v24 234; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 235; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 236; ZVFHMIN-NEXT: ret 237 %r = call <vscale x 32 x half> @llvm.sqrt.nxv32f16(<vscale x 32 x half> %v) 238 ret <vscale x 32 x half> %r 239} 240 241declare <vscale x 1 x float> @llvm.sqrt.nxv1f32(<vscale x 1 x float>) 242 243define <vscale x 1 x float> @vfsqrt_nxv1f32(<vscale x 1 x float> %v) { 244; CHECK-LABEL: vfsqrt_nxv1f32: 245; CHECK: # %bb.0: 246; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 247; CHECK-NEXT: vfsqrt.v v8, v8 248; CHECK-NEXT: ret 249 %r = call <vscale x 1 x float> @llvm.sqrt.nxv1f32(<vscale x 1 x float> %v) 250 ret <vscale x 1 x float> %r 251} 252 253declare <vscale x 2 x float> @llvm.sqrt.nxv2f32(<vscale x 2 x float>) 254 255define <vscale x 2 x float> @vfsqrt_nxv2f32(<vscale x 2 x float> %v) { 256; CHECK-LABEL: vfsqrt_nxv2f32: 257; CHECK: # %bb.0: 258; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 259; CHECK-NEXT: vfsqrt.v v8, v8 260; CHECK-NEXT: ret 261 %r = call <vscale x 2 x float> @llvm.sqrt.nxv2f32(<vscale x 2 x float> %v) 262 ret <vscale x 2 x float> %r 263} 264 265declare <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float>) 266 267define <vscale x 4 x float> @vfsqrt_nxv4f32(<vscale x 4 x float> %v) { 268; CHECK-LABEL: vfsqrt_nxv4f32: 269; CHECK: # %bb.0: 270; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 271; CHECK-NEXT: vfsqrt.v v8, v8 272; CHECK-NEXT: ret 273 %r = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> %v) 274 ret <vscale x 4 x float> %r 275} 276 277declare <vscale x 8 x float> @llvm.sqrt.nxv8f32(<vscale x 8 x float>) 278 279define <vscale x 8 x float> @vfsqrt_nxv8f32(<vscale x 8 x float> %v) { 280; CHECK-LABEL: vfsqrt_nxv8f32: 281; CHECK: # %bb.0: 282; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma 283; CHECK-NEXT: vfsqrt.v v8, v8 284; CHECK-NEXT: ret 285 %r = call <vscale x 8 x float> @llvm.sqrt.nxv8f32(<vscale x 8 x float> %v) 286 ret <vscale x 8 x float> %r 287} 288 289declare <vscale x 16 x float> @llvm.sqrt.nxv16f32(<vscale x 16 x float>) 290 291define <vscale x 16 x float> @vfsqrt_nxv16f32(<vscale x 16 x float> %v) { 292; CHECK-LABEL: vfsqrt_nxv16f32: 293; CHECK: # %bb.0: 294; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 295; CHECK-NEXT: vfsqrt.v v8, v8 296; CHECK-NEXT: ret 297 %r = call <vscale x 16 x float> @llvm.sqrt.nxv16f32(<vscale x 16 x float> %v) 298 ret <vscale x 16 x float> %r 299} 300 301declare <vscale x 1 x double> @llvm.sqrt.nxv1f64(<vscale x 1 x double>) 302 303define <vscale x 1 x double> @vfsqrt_nxv1f64(<vscale x 1 x double> %v) { 304; CHECK-LABEL: vfsqrt_nxv1f64: 305; CHECK: # %bb.0: 306; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 307; CHECK-NEXT: vfsqrt.v v8, v8 308; CHECK-NEXT: ret 309 %r = call <vscale x 1 x double> @llvm.sqrt.nxv1f64(<vscale x 1 x double> %v) 310 ret <vscale x 1 x double> %r 311} 312 313declare <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double>) 314 315define <vscale x 2 x double> @vfsqrt_nxv2f64(<vscale x 2 x double> %v) { 316; CHECK-LABEL: vfsqrt_nxv2f64: 317; CHECK: # %bb.0: 318; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma 319; CHECK-NEXT: vfsqrt.v v8, v8 320; CHECK-NEXT: ret 321 %r = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %v) 322 ret <vscale x 2 x double> %r 323} 324 325declare <vscale x 4 x double> @llvm.sqrt.nxv4f64(<vscale x 4 x double>) 326 327define <vscale x 4 x double> @vfsqrt_nxv4f64(<vscale x 4 x double> %v) { 328; CHECK-LABEL: vfsqrt_nxv4f64: 329; CHECK: # %bb.0: 330; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 331; CHECK-NEXT: vfsqrt.v v8, v8 332; CHECK-NEXT: ret 333 %r = call <vscale x 4 x double> @llvm.sqrt.nxv4f64(<vscale x 4 x double> %v) 334 ret <vscale x 4 x double> %r 335} 336 337declare <vscale x 8 x double> @llvm.sqrt.nxv8f64(<vscale x 8 x double>) 338 339define <vscale x 8 x double> @vfsqrt_nxv8f64(<vscale x 8 x double> %v) { 340; CHECK-LABEL: vfsqrt_nxv8f64: 341; CHECK: # %bb.0: 342; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 343; CHECK-NEXT: vfsqrt.v v8, v8 344; CHECK-NEXT: ret 345 %r = call <vscale x 8 x double> @llvm.sqrt.nxv8f64(<vscale x 8 x double> %v) 346 ret <vscale x 8 x double> %r 347} 348