1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 3; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 4; RUN: --check-prefixes=CHECK,ZVFH 5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 6; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 7; RUN: --check-prefixes=CHECK,ZVFH 8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 9; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 10; RUN: --check-prefixes=CHECK,ZVFHMIN 11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 12; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 13; RUN: --check-prefixes=CHECK,ZVFHMIN 14 15define <vscale x 1 x bfloat> @ceil_nxv1bf16(<vscale x 1 x bfloat> %x) { 16; CHECK-LABEL: ceil_nxv1bf16: 17; CHECK: # %bb.0: 18; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 19; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 20; CHECK-NEXT: lui a0, 307200 21; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 22; CHECK-NEXT: vfabs.v v8, v9 23; CHECK-NEXT: fmv.w.x fa5, a0 24; CHECK-NEXT: vmflt.vf v0, v8, fa5 25; CHECK-NEXT: fsrmi a0, 3 26; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t 27; CHECK-NEXT: fsrm a0 28; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 29; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 30; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t 31; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 32; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 33; CHECK-NEXT: ret 34 %a = call <vscale x 1 x bfloat> @llvm.ceil.nxv1bf16(<vscale x 1 x bfloat> %x) 35 ret <vscale x 1 x bfloat> %a 36} 37 38define <vscale x 2 x bfloat> @ceil_nxv2bf16(<vscale x 2 x bfloat> %x) { 39; CHECK-LABEL: ceil_nxv2bf16: 40; CHECK: # %bb.0: 41; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 42; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 43; CHECK-NEXT: lui a0, 307200 44; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 45; CHECK-NEXT: vfabs.v v8, v9 46; CHECK-NEXT: fmv.w.x fa5, a0 47; CHECK-NEXT: vmflt.vf v0, v8, fa5 48; CHECK-NEXT: fsrmi a0, 3 49; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t 50; CHECK-NEXT: fsrm a0 51; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 52; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 53; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t 54; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 55; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 56; CHECK-NEXT: ret 57 %a = call <vscale x 2 x bfloat> @llvm.ceil.nxv2bf16(<vscale x 2 x bfloat> %x) 58 ret <vscale x 2 x bfloat> %a 59} 60 61define <vscale x 4 x bfloat> @ceil_nxv4bf16(<vscale x 4 x bfloat> %x) { 62; CHECK-LABEL: ceil_nxv4bf16: 63; CHECK: # %bb.0: 64; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 65; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 66; CHECK-NEXT: lui a0, 307200 67; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 68; CHECK-NEXT: vfabs.v v8, v10 69; CHECK-NEXT: fmv.w.x fa5, a0 70; CHECK-NEXT: vmflt.vf v0, v8, fa5 71; CHECK-NEXT: fsrmi a0, 3 72; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 73; CHECK-NEXT: fsrm a0 74; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 75; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 76; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 77; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 78; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 79; CHECK-NEXT: ret 80 %a = call <vscale x 4 x bfloat> @llvm.ceil.nxv4bf16(<vscale x 4 x bfloat> %x) 81 ret <vscale x 4 x bfloat> %a 82} 83 84define <vscale x 8 x bfloat> @ceil_nxv8bf16(<vscale x 8 x bfloat> %x) { 85; CHECK-LABEL: ceil_nxv8bf16: 86; CHECK: # %bb.0: 87; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma 88; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 89; CHECK-NEXT: lui a0, 307200 90; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 91; CHECK-NEXT: vfabs.v v8, v12 92; CHECK-NEXT: fmv.w.x fa5, a0 93; CHECK-NEXT: vmflt.vf v0, v8, fa5 94; CHECK-NEXT: fsrmi a0, 3 95; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t 96; CHECK-NEXT: fsrm a0 97; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 98; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 99; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t 100; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 101; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 102; CHECK-NEXT: ret 103 %a = call <vscale x 8 x bfloat> @llvm.ceil.nxv8bf16(<vscale x 8 x bfloat> %x) 104 ret <vscale x 8 x bfloat> %a 105} 106 107define <vscale x 16 x bfloat> @ceil_nxv16bf16(<vscale x 16 x bfloat> %x) { 108; CHECK-LABEL: ceil_nxv16bf16: 109; CHECK: # %bb.0: 110; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 111; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 112; CHECK-NEXT: lui a0, 307200 113; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 114; CHECK-NEXT: vfabs.v v8, v16 115; CHECK-NEXT: fmv.w.x fa5, a0 116; CHECK-NEXT: vmflt.vf v0, v8, fa5 117; CHECK-NEXT: fsrmi a0, 3 118; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t 119; CHECK-NEXT: fsrm a0 120; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 121; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 122; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t 123; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 124; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 125; CHECK-NEXT: ret 126 %a = call <vscale x 16 x bfloat> @llvm.ceil.nxv16bf16(<vscale x 16 x bfloat> %x) 127 ret <vscale x 16 x bfloat> %a 128} 129 130define <vscale x 32 x bfloat> @ceil_nxv32bf16(<vscale x 32 x bfloat> %x) { 131; CHECK-LABEL: ceil_nxv32bf16: 132; CHECK: # %bb.0: 133; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 134; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 135; CHECK-NEXT: lui a0, 307200 136; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 137; CHECK-NEXT: vfabs.v v24, v16 138; CHECK-NEXT: fmv.w.x fa5, a0 139; CHECK-NEXT: vmflt.vf v0, v24, fa5 140; CHECK-NEXT: fsrmi a0, 3 141; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 142; CHECK-NEXT: fsrm a0 143; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 144; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 145; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 146; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 147; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 148; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 149; CHECK-NEXT: vfabs.v v8, v24 150; CHECK-NEXT: vmflt.vf v0, v8, fa5 151; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 152; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 153; CHECK-NEXT: fsrmi a0, 3 154; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 155; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t 156; CHECK-NEXT: fsrm a0 157; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 158; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 159; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t 160; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 161; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 162; CHECK-NEXT: ret 163 %a = call <vscale x 32 x bfloat> @llvm.ceil.nxv32bf16(<vscale x 32 x bfloat> %x) 164 ret <vscale x 32 x bfloat> %a 165} 166 167define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) { 168; ZVFH-LABEL: ceil_nxv1f16: 169; ZVFH: # %bb.0: 170; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) 171; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) 172; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 173; ZVFH-NEXT: vfabs.v v9, v8 174; ZVFH-NEXT: vmflt.vf v0, v9, fa5 175; ZVFH-NEXT: fsrmi a0, 3 176; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 177; ZVFH-NEXT: fsrm a0 178; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 179; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 180; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 181; ZVFH-NEXT: ret 182; 183; ZVFHMIN-LABEL: ceil_nxv1f16: 184; ZVFHMIN: # %bb.0: 185; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 186; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 187; ZVFHMIN-NEXT: lui a0, 307200 188; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 189; ZVFHMIN-NEXT: vfabs.v v8, v9 190; ZVFHMIN-NEXT: fmv.w.x fa5, a0 191; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 192; ZVFHMIN-NEXT: fsrmi a0, 3 193; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 194; ZVFHMIN-NEXT: fsrm a0 195; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 196; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 197; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 198; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 199; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 200; ZVFHMIN-NEXT: ret 201 %a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x) 202 ret <vscale x 1 x half> %a 203} 204declare <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half>) 205 206define <vscale x 2 x half> @ceil_nxv2f16(<vscale x 2 x half> %x) { 207; ZVFH-LABEL: ceil_nxv2f16: 208; ZVFH: # %bb.0: 209; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) 210; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) 211; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 212; ZVFH-NEXT: vfabs.v v9, v8 213; ZVFH-NEXT: vmflt.vf v0, v9, fa5 214; ZVFH-NEXT: fsrmi a0, 3 215; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 216; ZVFH-NEXT: fsrm a0 217; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 218; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 219; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 220; ZVFH-NEXT: ret 221; 222; ZVFHMIN-LABEL: ceil_nxv2f16: 223; ZVFHMIN: # %bb.0: 224; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 225; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 226; ZVFHMIN-NEXT: lui a0, 307200 227; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 228; ZVFHMIN-NEXT: vfabs.v v8, v9 229; ZVFHMIN-NEXT: fmv.w.x fa5, a0 230; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 231; ZVFHMIN-NEXT: fsrmi a0, 3 232; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 233; ZVFHMIN-NEXT: fsrm a0 234; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 235; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 236; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 237; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 238; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 239; ZVFHMIN-NEXT: ret 240 %a = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %x) 241 ret <vscale x 2 x half> %a 242} 243declare <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half>) 244 245define <vscale x 4 x half> @ceil_nxv4f16(<vscale x 4 x half> %x) { 246; ZVFH-LABEL: ceil_nxv4f16: 247; ZVFH: # %bb.0: 248; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) 249; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) 250; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma 251; ZVFH-NEXT: vfabs.v v9, v8 252; ZVFH-NEXT: vmflt.vf v0, v9, fa5 253; ZVFH-NEXT: fsrmi a0, 3 254; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 255; ZVFH-NEXT: fsrm a0 256; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 257; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 258; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 259; ZVFH-NEXT: ret 260; 261; ZVFHMIN-LABEL: ceil_nxv4f16: 262; ZVFHMIN: # %bb.0: 263; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma 264; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 265; ZVFHMIN-NEXT: lui a0, 307200 266; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 267; ZVFHMIN-NEXT: vfabs.v v8, v10 268; ZVFHMIN-NEXT: fmv.w.x fa5, a0 269; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 270; ZVFHMIN-NEXT: fsrmi a0, 3 271; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 272; ZVFHMIN-NEXT: fsrm a0 273; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 274; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 275; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 276; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 277; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 278; ZVFHMIN-NEXT: ret 279 %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x) 280 ret <vscale x 4 x half> %a 281} 282declare <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half>) 283 284define <vscale x 8 x half> @ceil_nxv8f16(<vscale x 8 x half> %x) { 285; ZVFH-LABEL: ceil_nxv8f16: 286; ZVFH: # %bb.0: 287; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) 288; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) 289; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma 290; ZVFH-NEXT: vfabs.v v10, v8 291; ZVFH-NEXT: vmflt.vf v0, v10, fa5 292; ZVFH-NEXT: fsrmi a0, 3 293; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t 294; ZVFH-NEXT: fsrm a0 295; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t 296; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 297; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t 298; ZVFH-NEXT: ret 299; 300; ZVFHMIN-LABEL: ceil_nxv8f16: 301; ZVFHMIN: # %bb.0: 302; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma 303; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 304; ZVFHMIN-NEXT: lui a0, 307200 305; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 306; ZVFHMIN-NEXT: vfabs.v v8, v12 307; ZVFHMIN-NEXT: fmv.w.x fa5, a0 308; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 309; ZVFHMIN-NEXT: fsrmi a0, 3 310; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t 311; ZVFHMIN-NEXT: fsrm a0 312; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 313; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 314; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t 315; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 316; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 317; ZVFHMIN-NEXT: ret 318 %a = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %x) 319 ret <vscale x 8 x half> %a 320} 321declare <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half>) 322 323define <vscale x 16 x half> @ceil_nxv16f16(<vscale x 16 x half> %x) { 324; ZVFH-LABEL: ceil_nxv16f16: 325; ZVFH: # %bb.0: 326; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) 327; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) 328; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma 329; ZVFH-NEXT: vfabs.v v12, v8 330; ZVFH-NEXT: vmflt.vf v0, v12, fa5 331; ZVFH-NEXT: fsrmi a0, 3 332; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t 333; ZVFH-NEXT: fsrm a0 334; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t 335; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu 336; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t 337; ZVFH-NEXT: ret 338; 339; ZVFHMIN-LABEL: ceil_nxv16f16: 340; ZVFHMIN: # %bb.0: 341; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 342; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 343; ZVFHMIN-NEXT: lui a0, 307200 344; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 345; ZVFHMIN-NEXT: vfabs.v v8, v16 346; ZVFHMIN-NEXT: fmv.w.x fa5, a0 347; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 348; ZVFHMIN-NEXT: fsrmi a0, 3 349; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t 350; ZVFHMIN-NEXT: fsrm a0 351; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 352; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 353; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t 354; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 355; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 356; ZVFHMIN-NEXT: ret 357 %a = call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> %x) 358 ret <vscale x 16 x half> %a 359} 360declare <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half>) 361 362define <vscale x 32 x half> @ceil_nxv32f16(<vscale x 32 x half> %x) { 363; ZVFH-LABEL: ceil_nxv32f16: 364; ZVFH: # %bb.0: 365; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) 366; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) 367; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma 368; ZVFH-NEXT: vfabs.v v16, v8 369; ZVFH-NEXT: vmflt.vf v0, v16, fa5 370; ZVFH-NEXT: fsrmi a0, 3 371; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t 372; ZVFH-NEXT: fsrm a0 373; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t 374; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu 375; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t 376; ZVFH-NEXT: ret 377; 378; ZVFHMIN-LABEL: ceil_nxv32f16: 379; ZVFHMIN: # %bb.0: 380; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 381; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 382; ZVFHMIN-NEXT: lui a0, 307200 383; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 384; ZVFHMIN-NEXT: vfabs.v v24, v16 385; ZVFHMIN-NEXT: fmv.w.x fa5, a0 386; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 387; ZVFHMIN-NEXT: fsrmi a0, 3 388; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t 389; ZVFHMIN-NEXT: fsrm a0 390; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t 391; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 392; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t 393; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 394; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 395; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 396; ZVFHMIN-NEXT: vfabs.v v8, v24 397; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 398; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 399; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 400; ZVFHMIN-NEXT: fsrmi a0, 3 401; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 402; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t 403; ZVFHMIN-NEXT: fsrm a0 404; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t 405; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 406; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t 407; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 408; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 409; ZVFHMIN-NEXT: ret 410 %a = call <vscale x 32 x half> @llvm.ceil.nxv32f16(<vscale x 32 x half> %x) 411 ret <vscale x 32 x half> %a 412} 413declare <vscale x 32 x half> @llvm.ceil.nxv32f16(<vscale x 32 x half>) 414 415define <vscale x 1 x float> @ceil_nxv1f32(<vscale x 1 x float> %x) { 416; CHECK-LABEL: ceil_nxv1f32: 417; CHECK: # %bb.0: 418; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 419; CHECK-NEXT: vfabs.v v9, v8 420; CHECK-NEXT: lui a0, 307200 421; CHECK-NEXT: fmv.w.x fa5, a0 422; CHECK-NEXT: vmflt.vf v0, v9, fa5 423; CHECK-NEXT: fsrmi a0, 3 424; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 425; CHECK-NEXT: fsrm a0 426; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 427; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 428; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 429; CHECK-NEXT: ret 430 %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x) 431 ret <vscale x 1 x float> %a 432} 433declare <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float>) 434 435define <vscale x 2 x float> @ceil_nxv2f32(<vscale x 2 x float> %x) { 436; CHECK-LABEL: ceil_nxv2f32: 437; CHECK: # %bb.0: 438; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 439; CHECK-NEXT: vfabs.v v9, v8 440; CHECK-NEXT: lui a0, 307200 441; CHECK-NEXT: fmv.w.x fa5, a0 442; CHECK-NEXT: vmflt.vf v0, v9, fa5 443; CHECK-NEXT: fsrmi a0, 3 444; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 445; CHECK-NEXT: fsrm a0 446; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 447; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 448; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 449; CHECK-NEXT: ret 450 %a = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> %x) 451 ret <vscale x 2 x float> %a 452} 453declare <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float>) 454 455define <vscale x 4 x float> @ceil_nxv4f32(<vscale x 4 x float> %x) { 456; CHECK-LABEL: ceil_nxv4f32: 457; CHECK: # %bb.0: 458; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 459; CHECK-NEXT: vfabs.v v10, v8 460; CHECK-NEXT: lui a0, 307200 461; CHECK-NEXT: fmv.w.x fa5, a0 462; CHECK-NEXT: vmflt.vf v0, v10, fa5 463; CHECK-NEXT: fsrmi a0, 3 464; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 465; CHECK-NEXT: fsrm a0 466; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 467; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 468; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 469; CHECK-NEXT: ret 470 %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x) 471 ret <vscale x 4 x float> %a 472} 473declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>) 474 475define <vscale x 8 x float> @ceil_nxv8f32(<vscale x 8 x float> %x) { 476; CHECK-LABEL: ceil_nxv8f32: 477; CHECK: # %bb.0: 478; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma 479; CHECK-NEXT: vfabs.v v12, v8 480; CHECK-NEXT: lui a0, 307200 481; CHECK-NEXT: fmv.w.x fa5, a0 482; CHECK-NEXT: vmflt.vf v0, v12, fa5 483; CHECK-NEXT: fsrmi a0, 3 484; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 485; CHECK-NEXT: fsrm a0 486; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 487; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 488; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 489; CHECK-NEXT: ret 490 %a = call <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float> %x) 491 ret <vscale x 8 x float> %a 492} 493declare <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float>) 494 495define <vscale x 16 x float> @ceil_nxv16f32(<vscale x 16 x float> %x) { 496; CHECK-LABEL: ceil_nxv16f32: 497; CHECK: # %bb.0: 498; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 499; CHECK-NEXT: vfabs.v v16, v8 500; CHECK-NEXT: lui a0, 307200 501; CHECK-NEXT: fmv.w.x fa5, a0 502; CHECK-NEXT: vmflt.vf v0, v16, fa5 503; CHECK-NEXT: fsrmi a0, 3 504; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 505; CHECK-NEXT: fsrm a0 506; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 507; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 508; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 509; CHECK-NEXT: ret 510 %a = call <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float> %x) 511 ret <vscale x 16 x float> %a 512} 513declare <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float>) 514 515define <vscale x 1 x double> @ceil_nxv1f64(<vscale x 1 x double> %x) { 516; CHECK-LABEL: ceil_nxv1f64: 517; CHECK: # %bb.0: 518; CHECK-NEXT: lui a0, %hi(.LCPI17_0) 519; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) 520; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 521; CHECK-NEXT: vfabs.v v9, v8 522; CHECK-NEXT: vmflt.vf v0, v9, fa5 523; CHECK-NEXT: fsrmi a0, 3 524; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 525; CHECK-NEXT: fsrm a0 526; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 527; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 528; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 529; CHECK-NEXT: ret 530 %a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x) 531 ret <vscale x 1 x double> %a 532} 533declare <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double>) 534 535define <vscale x 2 x double> @ceil_nxv2f64(<vscale x 2 x double> %x) { 536; CHECK-LABEL: ceil_nxv2f64: 537; CHECK: # %bb.0: 538; CHECK-NEXT: lui a0, %hi(.LCPI18_0) 539; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) 540; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma 541; CHECK-NEXT: vfabs.v v10, v8 542; CHECK-NEXT: vmflt.vf v0, v10, fa5 543; CHECK-NEXT: fsrmi a0, 3 544; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 545; CHECK-NEXT: fsrm a0 546; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 547; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 548; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 549; CHECK-NEXT: ret 550 %a = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %x) 551 ret <vscale x 2 x double> %a 552} 553declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>) 554 555define <vscale x 4 x double> @ceil_nxv4f64(<vscale x 4 x double> %x) { 556; CHECK-LABEL: ceil_nxv4f64: 557; CHECK: # %bb.0: 558; CHECK-NEXT: lui a0, %hi(.LCPI19_0) 559; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) 560; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 561; CHECK-NEXT: vfabs.v v12, v8 562; CHECK-NEXT: vmflt.vf v0, v12, fa5 563; CHECK-NEXT: fsrmi a0, 3 564; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 565; CHECK-NEXT: fsrm a0 566; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 567; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 568; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 569; CHECK-NEXT: ret 570 %a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x) 571 ret <vscale x 4 x double> %a 572} 573declare <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double>) 574 575define <vscale x 8 x double> @ceil_nxv8f64(<vscale x 8 x double> %x) { 576; CHECK-LABEL: ceil_nxv8f64: 577; CHECK: # %bb.0: 578; CHECK-NEXT: lui a0, %hi(.LCPI20_0) 579; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) 580; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 581; CHECK-NEXT: vfabs.v v16, v8 582; CHECK-NEXT: vmflt.vf v0, v16, fa5 583; CHECK-NEXT: fsrmi a0, 3 584; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 585; CHECK-NEXT: fsrm a0 586; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 587; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 588; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 589; CHECK-NEXT: ret 590 %a = call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> %x) 591 ret <vscale x 8 x double> %a 592} 593declare <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double>) 594