1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 3; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 4; RUN: --check-prefixes=CHECK,ZVFH 5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 6; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 7; RUN: --check-prefixes=CHECK,ZVFH 8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 9; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 10; RUN: --check-prefixes=CHECK,ZVFHMIN 11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 12; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 13; RUN: --check-prefixes=CHECK,ZVFHMIN 14 15; This file tests the code generation for `llvm.round.*` on scalable vector type. 16 17define <vscale x 1 x bfloat> @round_nxv1bf16(<vscale x 1 x bfloat> %x) { 18; CHECK-LABEL: round_nxv1bf16: 19; CHECK: # %bb.0: 20; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 21; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 22; CHECK-NEXT: lui a0, 307200 23; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 24; CHECK-NEXT: vfabs.v v8, v9 25; CHECK-NEXT: fmv.w.x fa5, a0 26; CHECK-NEXT: vmflt.vf v0, v8, fa5 27; CHECK-NEXT: fsrmi a0, 4 28; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t 29; CHECK-NEXT: fsrm a0 30; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 31; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 32; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t 33; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 34; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 35; CHECK-NEXT: ret 36 %a = call <vscale x 1 x bfloat> @llvm.round.nxv1bf16(<vscale x 1 x bfloat> %x) 37 ret <vscale x 1 x bfloat> %a 38} 39 40define <vscale x 2 x bfloat> @round_nxv2bf16(<vscale x 2 x bfloat> %x) { 41; CHECK-LABEL: round_nxv2bf16: 42; CHECK: # %bb.0: 43; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 44; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 45; CHECK-NEXT: lui a0, 307200 46; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 47; CHECK-NEXT: vfabs.v v8, v9 48; CHECK-NEXT: fmv.w.x fa5, a0 49; CHECK-NEXT: vmflt.vf v0, v8, fa5 50; CHECK-NEXT: fsrmi a0, 4 51; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t 52; CHECK-NEXT: fsrm a0 53; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 54; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 55; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t 56; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 57; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 58; CHECK-NEXT: ret 59 %a = call <vscale x 2 x bfloat> @llvm.round.nxv2bf16(<vscale x 2 x bfloat> %x) 60 ret <vscale x 2 x bfloat> %a 61} 62 63define <vscale x 4 x bfloat> @round_nxv4bf16(<vscale x 4 x bfloat> %x) { 64; CHECK-LABEL: round_nxv4bf16: 65; CHECK: # %bb.0: 66; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 67; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 68; CHECK-NEXT: lui a0, 307200 69; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 70; CHECK-NEXT: vfabs.v v8, v10 71; CHECK-NEXT: fmv.w.x fa5, a0 72; CHECK-NEXT: vmflt.vf v0, v8, fa5 73; CHECK-NEXT: fsrmi a0, 4 74; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 75; CHECK-NEXT: fsrm a0 76; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 77; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 78; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 79; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 80; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 81; CHECK-NEXT: ret 82 %a = call <vscale x 4 x bfloat> @llvm.round.nxv4bf16(<vscale x 4 x bfloat> %x) 83 ret <vscale x 4 x bfloat> %a 84} 85 86define <vscale x 8 x bfloat> @round_nxv8bf16(<vscale x 8 x bfloat> %x) { 87; CHECK-LABEL: round_nxv8bf16: 88; CHECK: # %bb.0: 89; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma 90; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 91; CHECK-NEXT: lui a0, 307200 92; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 93; CHECK-NEXT: vfabs.v v8, v12 94; CHECK-NEXT: fmv.w.x fa5, a0 95; CHECK-NEXT: vmflt.vf v0, v8, fa5 96; CHECK-NEXT: fsrmi a0, 4 97; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t 98; CHECK-NEXT: fsrm a0 99; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 100; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 101; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t 102; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 103; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 104; CHECK-NEXT: ret 105 %a = call <vscale x 8 x bfloat> @llvm.round.nxv8bf16(<vscale x 8 x bfloat> %x) 106 ret <vscale x 8 x bfloat> %a 107} 108 109define <vscale x 16 x bfloat> @round_nxv16bf16(<vscale x 16 x bfloat> %x) { 110; CHECK-LABEL: round_nxv16bf16: 111; CHECK: # %bb.0: 112; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 113; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 114; CHECK-NEXT: lui a0, 307200 115; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 116; CHECK-NEXT: vfabs.v v8, v16 117; CHECK-NEXT: fmv.w.x fa5, a0 118; CHECK-NEXT: vmflt.vf v0, v8, fa5 119; CHECK-NEXT: fsrmi a0, 4 120; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t 121; CHECK-NEXT: fsrm a0 122; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 123; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 124; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t 125; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 126; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 127; CHECK-NEXT: ret 128 %a = call <vscale x 16 x bfloat> @llvm.round.nxv16bf16(<vscale x 16 x bfloat> %x) 129 ret <vscale x 16 x bfloat> %a 130} 131 132define <vscale x 32 x bfloat> @round_nxv32bf16(<vscale x 32 x bfloat> %x) { 133; CHECK-LABEL: round_nxv32bf16: 134; CHECK: # %bb.0: 135; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 136; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 137; CHECK-NEXT: lui a0, 307200 138; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 139; CHECK-NEXT: vfabs.v v24, v16 140; CHECK-NEXT: fmv.w.x fa5, a0 141; CHECK-NEXT: vmflt.vf v0, v24, fa5 142; CHECK-NEXT: fsrmi a0, 4 143; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 144; CHECK-NEXT: fsrm a0 145; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 146; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 147; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 148; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 149; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 150; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 151; CHECK-NEXT: vfabs.v v8, v24 152; CHECK-NEXT: vmflt.vf v0, v8, fa5 153; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 154; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 155; CHECK-NEXT: fsrmi a0, 4 156; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 157; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t 158; CHECK-NEXT: fsrm a0 159; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 160; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 161; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t 162; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 163; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 164; CHECK-NEXT: ret 165 %a = call <vscale x 32 x bfloat> @llvm.round.nxv32bf16(<vscale x 32 x bfloat> %x) 166 ret <vscale x 32 x bfloat> %a 167} 168 169define <vscale x 1 x half> @round_nxv1f16(<vscale x 1 x half> %x) { 170; ZVFH-LABEL: round_nxv1f16: 171; ZVFH: # %bb.0: 172; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) 173; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) 174; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 175; ZVFH-NEXT: vfabs.v v9, v8 176; ZVFH-NEXT: vmflt.vf v0, v9, fa5 177; ZVFH-NEXT: fsrmi a0, 4 178; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 179; ZVFH-NEXT: fsrm a0 180; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 181; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 182; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 183; ZVFH-NEXT: ret 184; 185; ZVFHMIN-LABEL: round_nxv1f16: 186; ZVFHMIN: # %bb.0: 187; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 188; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 189; ZVFHMIN-NEXT: lui a0, 307200 190; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 191; ZVFHMIN-NEXT: vfabs.v v8, v9 192; ZVFHMIN-NEXT: fmv.w.x fa5, a0 193; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 194; ZVFHMIN-NEXT: fsrmi a0, 4 195; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 196; ZVFHMIN-NEXT: fsrm a0 197; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 198; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 199; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 200; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 201; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 202; ZVFHMIN-NEXT: ret 203 %a = call <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half> %x) 204 ret <vscale x 1 x half> %a 205} 206declare <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half>) 207 208define <vscale x 2 x half> @round_nxv2f16(<vscale x 2 x half> %x) { 209; ZVFH-LABEL: round_nxv2f16: 210; ZVFH: # %bb.0: 211; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) 212; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) 213; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 214; ZVFH-NEXT: vfabs.v v9, v8 215; ZVFH-NEXT: vmflt.vf v0, v9, fa5 216; ZVFH-NEXT: fsrmi a0, 4 217; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 218; ZVFH-NEXT: fsrm a0 219; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 220; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 221; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 222; ZVFH-NEXT: ret 223; 224; ZVFHMIN-LABEL: round_nxv2f16: 225; ZVFHMIN: # %bb.0: 226; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 227; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 228; ZVFHMIN-NEXT: lui a0, 307200 229; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 230; ZVFHMIN-NEXT: vfabs.v v8, v9 231; ZVFHMIN-NEXT: fmv.w.x fa5, a0 232; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 233; ZVFHMIN-NEXT: fsrmi a0, 4 234; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 235; ZVFHMIN-NEXT: fsrm a0 236; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 237; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 238; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 239; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 240; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 241; ZVFHMIN-NEXT: ret 242 %a = call <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half> %x) 243 ret <vscale x 2 x half> %a 244} 245declare <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half>) 246 247define <vscale x 4 x half> @round_nxv4f16(<vscale x 4 x half> %x) { 248; ZVFH-LABEL: round_nxv4f16: 249; ZVFH: # %bb.0: 250; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) 251; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) 252; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma 253; ZVFH-NEXT: vfabs.v v9, v8 254; ZVFH-NEXT: vmflt.vf v0, v9, fa5 255; ZVFH-NEXT: fsrmi a0, 4 256; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 257; ZVFH-NEXT: fsrm a0 258; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 259; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 260; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 261; ZVFH-NEXT: ret 262; 263; ZVFHMIN-LABEL: round_nxv4f16: 264; ZVFHMIN: # %bb.0: 265; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma 266; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 267; ZVFHMIN-NEXT: lui a0, 307200 268; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 269; ZVFHMIN-NEXT: vfabs.v v8, v10 270; ZVFHMIN-NEXT: fmv.w.x fa5, a0 271; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 272; ZVFHMIN-NEXT: fsrmi a0, 4 273; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 274; ZVFHMIN-NEXT: fsrm a0 275; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 276; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 277; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 278; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 279; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 280; ZVFHMIN-NEXT: ret 281 %a = call <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half> %x) 282 ret <vscale x 4 x half> %a 283} 284declare <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half>) 285 286define <vscale x 8 x half> @round_nxv8f16(<vscale x 8 x half> %x) { 287; ZVFH-LABEL: round_nxv8f16: 288; ZVFH: # %bb.0: 289; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) 290; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) 291; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma 292; ZVFH-NEXT: vfabs.v v10, v8 293; ZVFH-NEXT: vmflt.vf v0, v10, fa5 294; ZVFH-NEXT: fsrmi a0, 4 295; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t 296; ZVFH-NEXT: fsrm a0 297; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t 298; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 299; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t 300; ZVFH-NEXT: ret 301; 302; ZVFHMIN-LABEL: round_nxv8f16: 303; ZVFHMIN: # %bb.0: 304; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma 305; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 306; ZVFHMIN-NEXT: lui a0, 307200 307; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 308; ZVFHMIN-NEXT: vfabs.v v8, v12 309; ZVFHMIN-NEXT: fmv.w.x fa5, a0 310; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 311; ZVFHMIN-NEXT: fsrmi a0, 4 312; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t 313; ZVFHMIN-NEXT: fsrm a0 314; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 315; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 316; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t 317; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 318; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 319; ZVFHMIN-NEXT: ret 320 %a = call <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half> %x) 321 ret <vscale x 8 x half> %a 322} 323declare <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half>) 324 325define <vscale x 16 x half> @round_nxv16f16(<vscale x 16 x half> %x) { 326; ZVFH-LABEL: round_nxv16f16: 327; ZVFH: # %bb.0: 328; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) 329; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) 330; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma 331; ZVFH-NEXT: vfabs.v v12, v8 332; ZVFH-NEXT: vmflt.vf v0, v12, fa5 333; ZVFH-NEXT: fsrmi a0, 4 334; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t 335; ZVFH-NEXT: fsrm a0 336; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t 337; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu 338; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t 339; ZVFH-NEXT: ret 340; 341; ZVFHMIN-LABEL: round_nxv16f16: 342; ZVFHMIN: # %bb.0: 343; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 344; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 345; ZVFHMIN-NEXT: lui a0, 307200 346; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 347; ZVFHMIN-NEXT: vfabs.v v8, v16 348; ZVFHMIN-NEXT: fmv.w.x fa5, a0 349; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 350; ZVFHMIN-NEXT: fsrmi a0, 4 351; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t 352; ZVFHMIN-NEXT: fsrm a0 353; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 354; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 355; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t 356; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 357; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 358; ZVFHMIN-NEXT: ret 359 %a = call <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half> %x) 360 ret <vscale x 16 x half> %a 361} 362declare <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half>) 363 364define <vscale x 32 x half> @round_nxv32f16(<vscale x 32 x half> %x) { 365; ZVFH-LABEL: round_nxv32f16: 366; ZVFH: # %bb.0: 367; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) 368; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) 369; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma 370; ZVFH-NEXT: vfabs.v v16, v8 371; ZVFH-NEXT: vmflt.vf v0, v16, fa5 372; ZVFH-NEXT: fsrmi a0, 4 373; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t 374; ZVFH-NEXT: fsrm a0 375; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t 376; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu 377; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t 378; ZVFH-NEXT: ret 379; 380; ZVFHMIN-LABEL: round_nxv32f16: 381; ZVFHMIN: # %bb.0: 382; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 383; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 384; ZVFHMIN-NEXT: lui a0, 307200 385; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 386; ZVFHMIN-NEXT: vfabs.v v24, v16 387; ZVFHMIN-NEXT: fmv.w.x fa5, a0 388; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 389; ZVFHMIN-NEXT: fsrmi a0, 4 390; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t 391; ZVFHMIN-NEXT: fsrm a0 392; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t 393; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 394; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t 395; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 396; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 397; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 398; ZVFHMIN-NEXT: vfabs.v v8, v24 399; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 400; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 401; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 402; ZVFHMIN-NEXT: fsrmi a0, 4 403; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 404; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t 405; ZVFHMIN-NEXT: fsrm a0 406; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t 407; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 408; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t 409; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 410; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 411; ZVFHMIN-NEXT: ret 412 %a = call <vscale x 32 x half> @llvm.round.nxv32f16(<vscale x 32 x half> %x) 413 ret <vscale x 32 x half> %a 414} 415declare <vscale x 32 x half> @llvm.round.nxv32f16(<vscale x 32 x half>) 416 417define <vscale x 1 x float> @round_nxv1f32(<vscale x 1 x float> %x) { 418; CHECK-LABEL: round_nxv1f32: 419; CHECK: # %bb.0: 420; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 421; CHECK-NEXT: vfabs.v v9, v8 422; CHECK-NEXT: lui a0, 307200 423; CHECK-NEXT: fmv.w.x fa5, a0 424; CHECK-NEXT: vmflt.vf v0, v9, fa5 425; CHECK-NEXT: fsrmi a0, 4 426; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 427; CHECK-NEXT: fsrm a0 428; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 429; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 430; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 431; CHECK-NEXT: ret 432 %a = call <vscale x 1 x float> @llvm.round.nxv1f32(<vscale x 1 x float> %x) 433 ret <vscale x 1 x float> %a 434} 435declare <vscale x 1 x float> @llvm.round.nxv1f32(<vscale x 1 x float>) 436 437define <vscale x 2 x float> @round_nxv2f32(<vscale x 2 x float> %x) { 438; CHECK-LABEL: round_nxv2f32: 439; CHECK: # %bb.0: 440; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 441; CHECK-NEXT: vfabs.v v9, v8 442; CHECK-NEXT: lui a0, 307200 443; CHECK-NEXT: fmv.w.x fa5, a0 444; CHECK-NEXT: vmflt.vf v0, v9, fa5 445; CHECK-NEXT: fsrmi a0, 4 446; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 447; CHECK-NEXT: fsrm a0 448; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 449; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 450; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 451; CHECK-NEXT: ret 452 %a = call <vscale x 2 x float> @llvm.round.nxv2f32(<vscale x 2 x float> %x) 453 ret <vscale x 2 x float> %a 454} 455declare <vscale x 2 x float> @llvm.round.nxv2f32(<vscale x 2 x float>) 456 457define <vscale x 4 x float> @round_nxv4f32(<vscale x 4 x float> %x) { 458; CHECK-LABEL: round_nxv4f32: 459; CHECK: # %bb.0: 460; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 461; CHECK-NEXT: vfabs.v v10, v8 462; CHECK-NEXT: lui a0, 307200 463; CHECK-NEXT: fmv.w.x fa5, a0 464; CHECK-NEXT: vmflt.vf v0, v10, fa5 465; CHECK-NEXT: fsrmi a0, 4 466; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 467; CHECK-NEXT: fsrm a0 468; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 469; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 470; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 471; CHECK-NEXT: ret 472 %a = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> %x) 473 ret <vscale x 4 x float> %a 474} 475declare <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float>) 476 477define <vscale x 8 x float> @round_nxv8f32(<vscale x 8 x float> %x) { 478; CHECK-LABEL: round_nxv8f32: 479; CHECK: # %bb.0: 480; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma 481; CHECK-NEXT: vfabs.v v12, v8 482; CHECK-NEXT: lui a0, 307200 483; CHECK-NEXT: fmv.w.x fa5, a0 484; CHECK-NEXT: vmflt.vf v0, v12, fa5 485; CHECK-NEXT: fsrmi a0, 4 486; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 487; CHECK-NEXT: fsrm a0 488; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 489; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 490; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 491; CHECK-NEXT: ret 492 %a = call <vscale x 8 x float> @llvm.round.nxv8f32(<vscale x 8 x float> %x) 493 ret <vscale x 8 x float> %a 494} 495declare <vscale x 8 x float> @llvm.round.nxv8f32(<vscale x 8 x float>) 496 497define <vscale x 16 x float> @round_nxv16f32(<vscale x 16 x float> %x) { 498; CHECK-LABEL: round_nxv16f32: 499; CHECK: # %bb.0: 500; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 501; CHECK-NEXT: vfabs.v v16, v8 502; CHECK-NEXT: lui a0, 307200 503; CHECK-NEXT: fmv.w.x fa5, a0 504; CHECK-NEXT: vmflt.vf v0, v16, fa5 505; CHECK-NEXT: fsrmi a0, 4 506; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 507; CHECK-NEXT: fsrm a0 508; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 509; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 510; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 511; CHECK-NEXT: ret 512 %a = call <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float> %x) 513 ret <vscale x 16 x float> %a 514} 515declare <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float>) 516 517define <vscale x 1 x double> @round_nxv1f64(<vscale x 1 x double> %x) { 518; CHECK-LABEL: round_nxv1f64: 519; CHECK: # %bb.0: 520; CHECK-NEXT: lui a0, %hi(.LCPI17_0) 521; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) 522; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 523; CHECK-NEXT: vfabs.v v9, v8 524; CHECK-NEXT: vmflt.vf v0, v9, fa5 525; CHECK-NEXT: fsrmi a0, 4 526; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 527; CHECK-NEXT: fsrm a0 528; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 529; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 530; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 531; CHECK-NEXT: ret 532 %a = call <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double> %x) 533 ret <vscale x 1 x double> %a 534} 535declare <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double>) 536 537define <vscale x 2 x double> @round_nxv2f64(<vscale x 2 x double> %x) { 538; CHECK-LABEL: round_nxv2f64: 539; CHECK: # %bb.0: 540; CHECK-NEXT: lui a0, %hi(.LCPI18_0) 541; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) 542; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma 543; CHECK-NEXT: vfabs.v v10, v8 544; CHECK-NEXT: vmflt.vf v0, v10, fa5 545; CHECK-NEXT: fsrmi a0, 4 546; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 547; CHECK-NEXT: fsrm a0 548; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 549; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 550; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 551; CHECK-NEXT: ret 552 %a = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> %x) 553 ret <vscale x 2 x double> %a 554} 555declare <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double>) 556 557define <vscale x 4 x double> @round_nxv4f64(<vscale x 4 x double> %x) { 558; CHECK-LABEL: round_nxv4f64: 559; CHECK: # %bb.0: 560; CHECK-NEXT: lui a0, %hi(.LCPI19_0) 561; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) 562; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 563; CHECK-NEXT: vfabs.v v12, v8 564; CHECK-NEXT: vmflt.vf v0, v12, fa5 565; CHECK-NEXT: fsrmi a0, 4 566; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 567; CHECK-NEXT: fsrm a0 568; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 569; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 570; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 571; CHECK-NEXT: ret 572 %a = call <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double> %x) 573 ret <vscale x 4 x double> %a 574} 575declare <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double>) 576 577define <vscale x 8 x double> @round_nxv8f64(<vscale x 8 x double> %x) { 578; CHECK-LABEL: round_nxv8f64: 579; CHECK: # %bb.0: 580; CHECK-NEXT: lui a0, %hi(.LCPI20_0) 581; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) 582; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 583; CHECK-NEXT: vfabs.v v16, v8 584; CHECK-NEXT: vmflt.vf v0, v16, fa5 585; CHECK-NEXT: fsrmi a0, 4 586; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 587; CHECK-NEXT: fsrm a0 588; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 589; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 590; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 591; CHECK-NEXT: ret 592 %a = call <vscale x 8 x double> @llvm.round.nxv8f64(<vscale x 8 x double> %x) 593 ret <vscale x 8 x double> %a 594} 595declare <vscale x 8 x double> @llvm.round.nxv8f64(<vscale x 8 x double>) 596