1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 3; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 4; RUN: --check-prefixes=CHECK,ZVFH 5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 6; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 7; RUN: --check-prefixes=CHECK,ZVFH 8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 9; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 10; RUN: --check-prefixes=CHECK,ZVFHMIN 11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 12; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 13; RUN: --check-prefixes=CHECK,ZVFHMIN 14 15define <vscale x 1 x bfloat> @nearbyint_nxv1bf16(<vscale x 1 x bfloat> %x) { 16; CHECK-LABEL: nearbyint_nxv1bf16: 17; CHECK: # %bb.0: 18; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 19; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 20; CHECK-NEXT: lui a0, 307200 21; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 22; CHECK-NEXT: vfabs.v v8, v9 23; CHECK-NEXT: fmv.w.x fa5, a0 24; CHECK-NEXT: vmflt.vf v0, v8, fa5 25; CHECK-NEXT: frflags a0 26; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t 27; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 28; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 29; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t 30; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 31; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 32; CHECK-NEXT: fsflags a0 33; CHECK-NEXT: ret 34 %a = call <vscale x 1 x bfloat> @llvm.nearbyint.nxv1bf16(<vscale x 1 x bfloat> %x) 35 ret <vscale x 1 x bfloat> %a 36} 37 38define <vscale x 2 x bfloat> @nearbyint_nxv2bf16(<vscale x 2 x bfloat> %x) { 39; CHECK-LABEL: nearbyint_nxv2bf16: 40; CHECK: # %bb.0: 41; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 42; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 43; CHECK-NEXT: lui a0, 307200 44; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 45; CHECK-NEXT: vfabs.v v8, v9 46; CHECK-NEXT: fmv.w.x fa5, a0 47; CHECK-NEXT: vmflt.vf v0, v8, fa5 48; CHECK-NEXT: frflags a0 49; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t 50; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 51; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 52; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t 53; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 54; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 55; CHECK-NEXT: fsflags a0 56; CHECK-NEXT: ret 57 %a = call <vscale x 2 x bfloat> @llvm.nearbyint.nxv2bf16(<vscale x 2 x bfloat> %x) 58 ret <vscale x 2 x bfloat> %a 59} 60 61define <vscale x 4 x bfloat> @nearbyint_nxv4bf16(<vscale x 4 x bfloat> %x) { 62; CHECK-LABEL: nearbyint_nxv4bf16: 63; CHECK: # %bb.0: 64; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 65; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 66; CHECK-NEXT: lui a0, 307200 67; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 68; CHECK-NEXT: vfabs.v v8, v10 69; CHECK-NEXT: fmv.w.x fa5, a0 70; CHECK-NEXT: vmflt.vf v0, v8, fa5 71; CHECK-NEXT: frflags a0 72; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 73; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 74; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 75; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 76; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 77; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 78; CHECK-NEXT: fsflags a0 79; CHECK-NEXT: ret 80 %a = call <vscale x 4 x bfloat> @llvm.nearbyint.nxv4bf16(<vscale x 4 x bfloat> %x) 81 ret <vscale x 4 x bfloat> %a 82} 83 84define <vscale x 8 x bfloat> @nearbyint_nxv8bf16(<vscale x 8 x bfloat> %x) { 85; CHECK-LABEL: nearbyint_nxv8bf16: 86; CHECK: # %bb.0: 87; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma 88; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 89; CHECK-NEXT: lui a0, 307200 90; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 91; CHECK-NEXT: vfabs.v v8, v12 92; CHECK-NEXT: fmv.w.x fa5, a0 93; CHECK-NEXT: vmflt.vf v0, v8, fa5 94; CHECK-NEXT: frflags a0 95; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t 96; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 97; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 98; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t 99; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 100; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 101; CHECK-NEXT: fsflags a0 102; CHECK-NEXT: ret 103 %a = call <vscale x 8 x bfloat> @llvm.nearbyint.nxv8bf16(<vscale x 8 x bfloat> %x) 104 ret <vscale x 8 x bfloat> %a 105} 106 107define <vscale x 16 x bfloat> @nearbyint_nxv16bf16(<vscale x 16 x bfloat> %x) { 108; CHECK-LABEL: nearbyint_nxv16bf16: 109; CHECK: # %bb.0: 110; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 111; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 112; CHECK-NEXT: lui a0, 307200 113; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 114; CHECK-NEXT: vfabs.v v8, v16 115; CHECK-NEXT: fmv.w.x fa5, a0 116; CHECK-NEXT: vmflt.vf v0, v8, fa5 117; CHECK-NEXT: frflags a0 118; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t 119; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 120; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 121; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t 122; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 123; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 124; CHECK-NEXT: fsflags a0 125; CHECK-NEXT: ret 126 %a = call <vscale x 16 x bfloat> @llvm.nearbyint.nxv16bf16(<vscale x 16 x bfloat> %x) 127 ret <vscale x 16 x bfloat> %a 128} 129 130define <vscale x 32 x bfloat> @nearbyint_nxv32bf16(<vscale x 32 x bfloat> %x) { 131; CHECK-LABEL: nearbyint_nxv32bf16: 132; CHECK: # %bb.0: 133; CHECK-NEXT: addi sp, sp, -16 134; CHECK-NEXT: .cfi_def_cfa_offset 16 135; CHECK-NEXT: csrr a0, vlenb 136; CHECK-NEXT: slli a0, a0, 3 137; CHECK-NEXT: sub sp, sp, a0 138; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 139; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 140; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 141; CHECK-NEXT: lui a0, 307200 142; CHECK-NEXT: fmv.w.x fa5, a0 143; CHECK-NEXT: frflags a0 144; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 145; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 146; CHECK-NEXT: vfabs.v v8, v16 147; CHECK-NEXT: vmflt.vf v0, v8, fa5 148; CHECK-NEXT: vfabs.v v8, v24 149; CHECK-NEXT: vmflt.vf v7, v8, fa5 150; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t 151; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 152; CHECK-NEXT: fsflags a0 153; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 154; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t 155; CHECK-NEXT: frflags a0 156; CHECK-NEXT: vmv1r.v v0, v7 157; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 158; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t 159; CHECK-NEXT: addi a1, sp, 16 160; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 161; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 162; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 163; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 164; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 165; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 166; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 167; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t 168; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 169; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 170; CHECK-NEXT: fsflags a0 171; CHECK-NEXT: csrr a0, vlenb 172; CHECK-NEXT: slli a0, a0, 3 173; CHECK-NEXT: add sp, sp, a0 174; CHECK-NEXT: .cfi_def_cfa sp, 16 175; CHECK-NEXT: addi sp, sp, 16 176; CHECK-NEXT: .cfi_def_cfa_offset 0 177; CHECK-NEXT: ret 178 %a = call <vscale x 32 x bfloat> @llvm.nearbyint.nxv32bf16(<vscale x 32 x bfloat> %x) 179 ret <vscale x 32 x bfloat> %a 180} 181 182define <vscale x 1 x half> @nearbyint_nxv1f16(<vscale x 1 x half> %x) { 183; ZVFH-LABEL: nearbyint_nxv1f16: 184; ZVFH: # %bb.0: 185; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) 186; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) 187; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 188; ZVFH-NEXT: vfabs.v v9, v8 189; ZVFH-NEXT: vmflt.vf v0, v9, fa5 190; ZVFH-NEXT: frflags a0 191; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 192; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 193; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 194; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 195; ZVFH-NEXT: fsflags a0 196; ZVFH-NEXT: ret 197; 198; ZVFHMIN-LABEL: nearbyint_nxv1f16: 199; ZVFHMIN: # %bb.0: 200; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 201; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 202; ZVFHMIN-NEXT: lui a0, 307200 203; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 204; ZVFHMIN-NEXT: vfabs.v v8, v9 205; ZVFHMIN-NEXT: fmv.w.x fa5, a0 206; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 207; ZVFHMIN-NEXT: frflags a0 208; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 209; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 210; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 211; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 212; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 213; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 214; ZVFHMIN-NEXT: fsflags a0 215; ZVFHMIN-NEXT: ret 216 %a = call <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half> %x) 217 ret <vscale x 1 x half> %a 218} 219declare <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half>) 220 221define <vscale x 2 x half> @nearbyint_nxv2f16(<vscale x 2 x half> %x) { 222; ZVFH-LABEL: nearbyint_nxv2f16: 223; ZVFH: # %bb.0: 224; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) 225; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) 226; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 227; ZVFH-NEXT: vfabs.v v9, v8 228; ZVFH-NEXT: vmflt.vf v0, v9, fa5 229; ZVFH-NEXT: frflags a0 230; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 231; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 232; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 233; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 234; ZVFH-NEXT: fsflags a0 235; ZVFH-NEXT: ret 236; 237; ZVFHMIN-LABEL: nearbyint_nxv2f16: 238; ZVFHMIN: # %bb.0: 239; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 240; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 241; ZVFHMIN-NEXT: lui a0, 307200 242; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 243; ZVFHMIN-NEXT: vfabs.v v8, v9 244; ZVFHMIN-NEXT: fmv.w.x fa5, a0 245; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 246; ZVFHMIN-NEXT: frflags a0 247; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 248; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 249; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 250; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 251; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 252; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 253; ZVFHMIN-NEXT: fsflags a0 254; ZVFHMIN-NEXT: ret 255 %a = call <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half> %x) 256 ret <vscale x 2 x half> %a 257} 258declare <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half>) 259 260define <vscale x 4 x half> @nearbyint_nxv4f16(<vscale x 4 x half> %x) { 261; ZVFH-LABEL: nearbyint_nxv4f16: 262; ZVFH: # %bb.0: 263; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) 264; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) 265; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma 266; ZVFH-NEXT: vfabs.v v9, v8 267; ZVFH-NEXT: vmflt.vf v0, v9, fa5 268; ZVFH-NEXT: frflags a0 269; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 270; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 271; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 272; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 273; ZVFH-NEXT: fsflags a0 274; ZVFH-NEXT: ret 275; 276; ZVFHMIN-LABEL: nearbyint_nxv4f16: 277; ZVFHMIN: # %bb.0: 278; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma 279; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 280; ZVFHMIN-NEXT: lui a0, 307200 281; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 282; ZVFHMIN-NEXT: vfabs.v v8, v10 283; ZVFHMIN-NEXT: fmv.w.x fa5, a0 284; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 285; ZVFHMIN-NEXT: frflags a0 286; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 287; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 288; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 289; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 290; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 291; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 292; ZVFHMIN-NEXT: fsflags a0 293; ZVFHMIN-NEXT: ret 294 %a = call <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half> %x) 295 ret <vscale x 4 x half> %a 296} 297declare <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half>) 298 299define <vscale x 8 x half> @nearbyint_nxv8f16(<vscale x 8 x half> %x) { 300; ZVFH-LABEL: nearbyint_nxv8f16: 301; ZVFH: # %bb.0: 302; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) 303; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) 304; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma 305; ZVFH-NEXT: vfabs.v v10, v8 306; ZVFH-NEXT: vmflt.vf v0, v10, fa5 307; ZVFH-NEXT: frflags a0 308; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t 309; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t 310; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 311; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t 312; ZVFH-NEXT: fsflags a0 313; ZVFH-NEXT: ret 314; 315; ZVFHMIN-LABEL: nearbyint_nxv8f16: 316; ZVFHMIN: # %bb.0: 317; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma 318; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 319; ZVFHMIN-NEXT: lui a0, 307200 320; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 321; ZVFHMIN-NEXT: vfabs.v v8, v12 322; ZVFHMIN-NEXT: fmv.w.x fa5, a0 323; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 324; ZVFHMIN-NEXT: frflags a0 325; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t 326; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 327; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 328; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t 329; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 330; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 331; ZVFHMIN-NEXT: fsflags a0 332; ZVFHMIN-NEXT: ret 333 %a = call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> %x) 334 ret <vscale x 8 x half> %a 335} 336declare <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half>) 337 338define <vscale x 16 x half> @nearbyint_nxv16f16(<vscale x 16 x half> %x) { 339; ZVFH-LABEL: nearbyint_nxv16f16: 340; ZVFH: # %bb.0: 341; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) 342; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) 343; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma 344; ZVFH-NEXT: vfabs.v v12, v8 345; ZVFH-NEXT: vmflt.vf v0, v12, fa5 346; ZVFH-NEXT: frflags a0 347; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t 348; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t 349; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu 350; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t 351; ZVFH-NEXT: fsflags a0 352; ZVFH-NEXT: ret 353; 354; ZVFHMIN-LABEL: nearbyint_nxv16f16: 355; ZVFHMIN: # %bb.0: 356; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 357; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 358; ZVFHMIN-NEXT: lui a0, 307200 359; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 360; ZVFHMIN-NEXT: vfabs.v v8, v16 361; ZVFHMIN-NEXT: fmv.w.x fa5, a0 362; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 363; ZVFHMIN-NEXT: frflags a0 364; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t 365; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 366; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 367; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t 368; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 369; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 370; ZVFHMIN-NEXT: fsflags a0 371; ZVFHMIN-NEXT: ret 372 %a = call <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half> %x) 373 ret <vscale x 16 x half> %a 374} 375declare <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half>) 376 377define <vscale x 32 x half> @nearbyint_nxv32f16(<vscale x 32 x half> %x) { 378; ZVFH-LABEL: nearbyint_nxv32f16: 379; ZVFH: # %bb.0: 380; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) 381; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) 382; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma 383; ZVFH-NEXT: vfabs.v v16, v8 384; ZVFH-NEXT: vmflt.vf v0, v16, fa5 385; ZVFH-NEXT: frflags a0 386; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t 387; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t 388; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu 389; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t 390; ZVFH-NEXT: fsflags a0 391; ZVFH-NEXT: ret 392; 393; ZVFHMIN-LABEL: nearbyint_nxv32f16: 394; ZVFHMIN: # %bb.0: 395; ZVFHMIN-NEXT: addi sp, sp, -16 396; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 397; ZVFHMIN-NEXT: csrr a0, vlenb 398; ZVFHMIN-NEXT: slli a0, a0, 3 399; ZVFHMIN-NEXT: sub sp, sp, a0 400; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 401; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 402; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 403; ZVFHMIN-NEXT: lui a0, 307200 404; ZVFHMIN-NEXT: fmv.w.x fa5, a0 405; ZVFHMIN-NEXT: frflags a0 406; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 407; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 408; ZVFHMIN-NEXT: vfabs.v v8, v16 409; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 410; ZVFHMIN-NEXT: vfabs.v v8, v24 411; ZVFHMIN-NEXT: vmflt.vf v7, v8, fa5 412; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t 413; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 414; ZVFHMIN-NEXT: fsflags a0 415; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 416; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t 417; ZVFHMIN-NEXT: frflags a0 418; ZVFHMIN-NEXT: vmv1r.v v0, v7 419; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 420; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t 421; ZVFHMIN-NEXT: addi a1, sp, 16 422; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 423; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 424; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 425; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 426; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 427; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t 428; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 429; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t 430; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 431; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 432; ZVFHMIN-NEXT: fsflags a0 433; ZVFHMIN-NEXT: csrr a0, vlenb 434; ZVFHMIN-NEXT: slli a0, a0, 3 435; ZVFHMIN-NEXT: add sp, sp, a0 436; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 437; ZVFHMIN-NEXT: addi sp, sp, 16 438; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 439; ZVFHMIN-NEXT: ret 440 %a = call <vscale x 32 x half> @llvm.nearbyint.nxv32f16(<vscale x 32 x half> %x) 441 ret <vscale x 32 x half> %a 442} 443declare <vscale x 32 x half> @llvm.nearbyint.nxv32f16(<vscale x 32 x half>) 444 445define <vscale x 1 x float> @nearbyint_nxv1f32(<vscale x 1 x float> %x) { 446; CHECK-LABEL: nearbyint_nxv1f32: 447; CHECK: # %bb.0: 448; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 449; CHECK-NEXT: vfabs.v v9, v8 450; CHECK-NEXT: lui a0, 307200 451; CHECK-NEXT: fmv.w.x fa5, a0 452; CHECK-NEXT: vmflt.vf v0, v9, fa5 453; CHECK-NEXT: frflags a0 454; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 455; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 456; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 457; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 458; CHECK-NEXT: fsflags a0 459; CHECK-NEXT: ret 460 %a = call <vscale x 1 x float> @llvm.nearbyint.nxv1f32(<vscale x 1 x float> %x) 461 ret <vscale x 1 x float> %a 462} 463declare <vscale x 1 x float> @llvm.nearbyint.nxv1f32(<vscale x 1 x float>) 464 465define <vscale x 2 x float> @nearbyint_nxv2f32(<vscale x 2 x float> %x) { 466; CHECK-LABEL: nearbyint_nxv2f32: 467; CHECK: # %bb.0: 468; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 469; CHECK-NEXT: vfabs.v v9, v8 470; CHECK-NEXT: lui a0, 307200 471; CHECK-NEXT: fmv.w.x fa5, a0 472; CHECK-NEXT: vmflt.vf v0, v9, fa5 473; CHECK-NEXT: frflags a0 474; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 475; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 476; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 477; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 478; CHECK-NEXT: fsflags a0 479; CHECK-NEXT: ret 480 %a = call <vscale x 2 x float> @llvm.nearbyint.nxv2f32(<vscale x 2 x float> %x) 481 ret <vscale x 2 x float> %a 482} 483declare <vscale x 2 x float> @llvm.nearbyint.nxv2f32(<vscale x 2 x float>) 484 485define <vscale x 4 x float> @nearbyint_nxv4f32(<vscale x 4 x float> %x) { 486; CHECK-LABEL: nearbyint_nxv4f32: 487; CHECK: # %bb.0: 488; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 489; CHECK-NEXT: vfabs.v v10, v8 490; CHECK-NEXT: lui a0, 307200 491; CHECK-NEXT: fmv.w.x fa5, a0 492; CHECK-NEXT: vmflt.vf v0, v10, fa5 493; CHECK-NEXT: frflags a0 494; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 495; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 496; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 497; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 498; CHECK-NEXT: fsflags a0 499; CHECK-NEXT: ret 500 %a = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> %x) 501 ret <vscale x 4 x float> %a 502} 503declare <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float>) 504 505define <vscale x 8 x float> @nearbyint_nxv8f32(<vscale x 8 x float> %x) { 506; CHECK-LABEL: nearbyint_nxv8f32: 507; CHECK: # %bb.0: 508; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma 509; CHECK-NEXT: vfabs.v v12, v8 510; CHECK-NEXT: lui a0, 307200 511; CHECK-NEXT: fmv.w.x fa5, a0 512; CHECK-NEXT: vmflt.vf v0, v12, fa5 513; CHECK-NEXT: frflags a0 514; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 515; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 516; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 517; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 518; CHECK-NEXT: fsflags a0 519; CHECK-NEXT: ret 520 %a = call <vscale x 8 x float> @llvm.nearbyint.nxv8f32(<vscale x 8 x float> %x) 521 ret <vscale x 8 x float> %a 522} 523declare <vscale x 8 x float> @llvm.nearbyint.nxv8f32(<vscale x 8 x float>) 524 525define <vscale x 16 x float> @nearbyint_nxv16f32(<vscale x 16 x float> %x) { 526; CHECK-LABEL: nearbyint_nxv16f32: 527; CHECK: # %bb.0: 528; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 529; CHECK-NEXT: vfabs.v v16, v8 530; CHECK-NEXT: lui a0, 307200 531; CHECK-NEXT: fmv.w.x fa5, a0 532; CHECK-NEXT: vmflt.vf v0, v16, fa5 533; CHECK-NEXT: frflags a0 534; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 535; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 536; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 537; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 538; CHECK-NEXT: fsflags a0 539; CHECK-NEXT: ret 540 %a = call <vscale x 16 x float> @llvm.nearbyint.nxv16f32(<vscale x 16 x float> %x) 541 ret <vscale x 16 x float> %a 542} 543declare <vscale x 16 x float> @llvm.nearbyint.nxv16f32(<vscale x 16 x float>) 544 545define <vscale x 1 x double> @nearbyint_nxv1f64(<vscale x 1 x double> %x) { 546; CHECK-LABEL: nearbyint_nxv1f64: 547; CHECK: # %bb.0: 548; CHECK-NEXT: lui a0, %hi(.LCPI17_0) 549; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) 550; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 551; CHECK-NEXT: vfabs.v v9, v8 552; CHECK-NEXT: vmflt.vf v0, v9, fa5 553; CHECK-NEXT: frflags a0 554; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 555; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 556; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 557; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 558; CHECK-NEXT: fsflags a0 559; CHECK-NEXT: ret 560 %a = call <vscale x 1 x double> @llvm.nearbyint.nxv1f64(<vscale x 1 x double> %x) 561 ret <vscale x 1 x double> %a 562} 563declare <vscale x 1 x double> @llvm.nearbyint.nxv1f64(<vscale x 1 x double>) 564 565define <vscale x 2 x double> @nearbyint_nxv2f64(<vscale x 2 x double> %x) { 566; CHECK-LABEL: nearbyint_nxv2f64: 567; CHECK: # %bb.0: 568; CHECK-NEXT: lui a0, %hi(.LCPI18_0) 569; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) 570; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma 571; CHECK-NEXT: vfabs.v v10, v8 572; CHECK-NEXT: vmflt.vf v0, v10, fa5 573; CHECK-NEXT: frflags a0 574; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 575; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 576; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 577; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 578; CHECK-NEXT: fsflags a0 579; CHECK-NEXT: ret 580 %a = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> %x) 581 ret <vscale x 2 x double> %a 582} 583declare <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double>) 584 585define <vscale x 4 x double> @nearbyint_nxv4f64(<vscale x 4 x double> %x) { 586; CHECK-LABEL: nearbyint_nxv4f64: 587; CHECK: # %bb.0: 588; CHECK-NEXT: lui a0, %hi(.LCPI19_0) 589; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) 590; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 591; CHECK-NEXT: vfabs.v v12, v8 592; CHECK-NEXT: vmflt.vf v0, v12, fa5 593; CHECK-NEXT: frflags a0 594; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 595; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 596; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 597; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 598; CHECK-NEXT: fsflags a0 599; CHECK-NEXT: ret 600 %a = call <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double> %x) 601 ret <vscale x 4 x double> %a 602} 603declare <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double>) 604 605define <vscale x 8 x double> @nearbyint_nxv8f64(<vscale x 8 x double> %x) { 606; CHECK-LABEL: nearbyint_nxv8f64: 607; CHECK: # %bb.0: 608; CHECK-NEXT: lui a0, %hi(.LCPI20_0) 609; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) 610; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 611; CHECK-NEXT: vfabs.v v16, v8 612; CHECK-NEXT: vmflt.vf v0, v16, fa5 613; CHECK-NEXT: frflags a0 614; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 615; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 616; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 617; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 618; CHECK-NEXT: fsflags a0 619; CHECK-NEXT: ret 620 %a = call <vscale x 8 x double> @llvm.nearbyint.nxv8f64(<vscale x 8 x double> %x) 621 ret <vscale x 8 x double> %a 622} 623declare <vscale x 8 x double> @llvm.nearbyint.nxv8f64(<vscale x 8 x double>) 624