1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | \ 3; RUN: FileCheck %s -check-prefix=RV32 4; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | \ 5; RUN: FileCheck %s -check-prefix=RV64 6 7; ================================================================================ 8; trunc <vscale x 1 x float> 9; ================================================================================ 10 11declare <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float>) 12 13define <vscale x 1 x i8> @trunc_nxv1f32_to_si8(<vscale x 1 x float> %x) { 14; RV32-LABEL: trunc_nxv1f32_to_si8: 15; RV32: # %bb.0: 16; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 17; RV32-NEXT: vfabs.v v9, v8 18; RV32-NEXT: lui a0, 307200 19; RV32-NEXT: fmv.w.x fa5, a0 20; RV32-NEXT: vmflt.vf v0, v9, fa5 21; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t 22; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t 23; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 24; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t 25; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 26; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 27; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 28; RV32-NEXT: vnsrl.wi v8, v9, 0 29; RV32-NEXT: ret 30; 31; RV64-LABEL: trunc_nxv1f32_to_si8: 32; RV64: # %bb.0: 33; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 34; RV64-NEXT: vfabs.v v9, v8 35; RV64-NEXT: lui a0, 307200 36; RV64-NEXT: fmv.w.x fa5, a0 37; RV64-NEXT: vmflt.vf v0, v9, fa5 38; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t 39; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t 40; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 41; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t 42; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 43; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 44; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 45; RV64-NEXT: vnsrl.wi v8, v9, 0 46; RV64-NEXT: ret 47 %a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x) 48 %b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i8> 49 ret <vscale x 1 x i8> %b 50} 51 52define <vscale x 1 x i8> @trunc_nxv1f32_to_ui8(<vscale x 1 x float> %x) { 53; RV32-LABEL: trunc_nxv1f32_to_ui8: 54; RV32: # %bb.0: 55; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 56; RV32-NEXT: vfabs.v v9, v8 57; RV32-NEXT: lui a0, 307200 58; RV32-NEXT: fmv.w.x fa5, a0 59; RV32-NEXT: vmflt.vf v0, v9, fa5 60; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t 61; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t 62; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 63; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t 64; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 65; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 66; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 67; RV32-NEXT: vnsrl.wi v8, v9, 0 68; RV32-NEXT: ret 69; 70; RV64-LABEL: trunc_nxv1f32_to_ui8: 71; RV64: # %bb.0: 72; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 73; RV64-NEXT: vfabs.v v9, v8 74; RV64-NEXT: lui a0, 307200 75; RV64-NEXT: fmv.w.x fa5, a0 76; RV64-NEXT: vmflt.vf v0, v9, fa5 77; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t 78; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t 79; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 80; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t 81; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 82; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 83; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 84; RV64-NEXT: vnsrl.wi v8, v9, 0 85; RV64-NEXT: ret 86 %a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x) 87 %b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i8> 88 ret <vscale x 1 x i8> %b 89} 90 91define <vscale x 1 x i16> @trunc_nxv1f32_to_si16(<vscale x 1 x float> %x) { 92; RV32-LABEL: trunc_nxv1f32_to_si16: 93; RV32: # %bb.0: 94; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 95; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 96; RV32-NEXT: vmv1r.v v8, v9 97; RV32-NEXT: ret 98; 99; RV64-LABEL: trunc_nxv1f32_to_si16: 100; RV64: # %bb.0: 101; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 102; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 103; RV64-NEXT: vmv1r.v v8, v9 104; RV64-NEXT: ret 105 %a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x) 106 %b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i16> 107 ret <vscale x 1 x i16> %b 108} 109 110define <vscale x 1 x i16> @trunc_nxv1f32_to_ui16(<vscale x 1 x float> %x) { 111; RV32-LABEL: trunc_nxv1f32_to_ui16: 112; RV32: # %bb.0: 113; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 114; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 115; RV32-NEXT: vmv1r.v v8, v9 116; RV32-NEXT: ret 117; 118; RV64-LABEL: trunc_nxv1f32_to_ui16: 119; RV64: # %bb.0: 120; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 121; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 122; RV64-NEXT: vmv1r.v v8, v9 123; RV64-NEXT: ret 124 %a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x) 125 %b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i16> 126 ret <vscale x 1 x i16> %b 127} 128 129define <vscale x 1 x i32> @trunc_nxv1f32_to_si32(<vscale x 1 x float> %x) { 130; RV32-LABEL: trunc_nxv1f32_to_si32: 131; RV32: # %bb.0: 132; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 133; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 134; RV32-NEXT: ret 135; 136; RV64-LABEL: trunc_nxv1f32_to_si32: 137; RV64: # %bb.0: 138; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 139; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 140; RV64-NEXT: ret 141 %a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x) 142 %b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i32> 143 ret <vscale x 1 x i32> %b 144} 145 146define <vscale x 1 x i32> @trunc_nxv1f32_to_ui32(<vscale x 1 x float> %x) { 147; RV32-LABEL: trunc_nxv1f32_to_ui32: 148; RV32: # %bb.0: 149; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 150; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 151; RV32-NEXT: ret 152; 153; RV64-LABEL: trunc_nxv1f32_to_ui32: 154; RV64: # %bb.0: 155; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 156; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 157; RV64-NEXT: ret 158 %a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x) 159 %b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i32> 160 ret <vscale x 1 x i32> %b 161} 162 163define <vscale x 1 x i64> @trunc_nxv1f32_to_si64(<vscale x 1 x float> %x) { 164; RV32-LABEL: trunc_nxv1f32_to_si64: 165; RV32: # %bb.0: 166; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 167; RV32-NEXT: vfwcvt.rtz.x.f.v v9, v8 168; RV32-NEXT: vmv1r.v v8, v9 169; RV32-NEXT: ret 170; 171; RV64-LABEL: trunc_nxv1f32_to_si64: 172; RV64: # %bb.0: 173; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 174; RV64-NEXT: vfwcvt.rtz.x.f.v v9, v8 175; RV64-NEXT: vmv1r.v v8, v9 176; RV64-NEXT: ret 177 %a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x) 178 %b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i64> 179 ret <vscale x 1 x i64> %b 180} 181 182define <vscale x 1 x i64> @trunc_nxv1f32_to_ui64(<vscale x 1 x float> %x) { 183; RV32-LABEL: trunc_nxv1f32_to_ui64: 184; RV32: # %bb.0: 185; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 186; RV32-NEXT: vfwcvt.rtz.xu.f.v v9, v8 187; RV32-NEXT: vmv1r.v v8, v9 188; RV32-NEXT: ret 189; 190; RV64-LABEL: trunc_nxv1f32_to_ui64: 191; RV64: # %bb.0: 192; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 193; RV64-NEXT: vfwcvt.rtz.xu.f.v v9, v8 194; RV64-NEXT: vmv1r.v v8, v9 195; RV64-NEXT: ret 196 %a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x) 197 %b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i64> 198 ret <vscale x 1 x i64> %b 199} 200 201; ================================================================================ 202; trunc <vscale x 4 x float> 203; ================================================================================ 204 205declare <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float>) 206 207define <vscale x 4 x i8> @trunc_nxv4f32_to_si8(<vscale x 4 x float> %x) { 208; RV32-LABEL: trunc_nxv4f32_to_si8: 209; RV32: # %bb.0: 210; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 211; RV32-NEXT: vfabs.v v10, v8 212; RV32-NEXT: lui a0, 307200 213; RV32-NEXT: fmv.w.x fa5, a0 214; RV32-NEXT: vmflt.vf v0, v10, fa5 215; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t 216; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t 217; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu 218; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t 219; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 220; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8 221; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 222; RV32-NEXT: vnsrl.wi v8, v10, 0 223; RV32-NEXT: ret 224; 225; RV64-LABEL: trunc_nxv4f32_to_si8: 226; RV64: # %bb.0: 227; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 228; RV64-NEXT: vfabs.v v10, v8 229; RV64-NEXT: lui a0, 307200 230; RV64-NEXT: fmv.w.x fa5, a0 231; RV64-NEXT: vmflt.vf v0, v10, fa5 232; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t 233; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t 234; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu 235; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t 236; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma 237; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8 238; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 239; RV64-NEXT: vnsrl.wi v8, v10, 0 240; RV64-NEXT: ret 241 %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x) 242 %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i8> 243 ret <vscale x 4 x i8> %b 244} 245 246define <vscale x 4 x i8> @trunc_nxv4f32_to_ui8(<vscale x 4 x float> %x) { 247; RV32-LABEL: trunc_nxv4f32_to_ui8: 248; RV32: # %bb.0: 249; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 250; RV32-NEXT: vfabs.v v10, v8 251; RV32-NEXT: lui a0, 307200 252; RV32-NEXT: fmv.w.x fa5, a0 253; RV32-NEXT: vmflt.vf v0, v10, fa5 254; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t 255; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t 256; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu 257; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t 258; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 259; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8 260; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 261; RV32-NEXT: vnsrl.wi v8, v10, 0 262; RV32-NEXT: ret 263; 264; RV64-LABEL: trunc_nxv4f32_to_ui8: 265; RV64: # %bb.0: 266; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 267; RV64-NEXT: vfabs.v v10, v8 268; RV64-NEXT: lui a0, 307200 269; RV64-NEXT: fmv.w.x fa5, a0 270; RV64-NEXT: vmflt.vf v0, v10, fa5 271; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t 272; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t 273; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu 274; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t 275; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma 276; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8 277; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 278; RV64-NEXT: vnsrl.wi v8, v10, 0 279; RV64-NEXT: ret 280 %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x) 281 %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i8> 282 ret <vscale x 4 x i8> %b 283} 284 285define <vscale x 4 x i16> @trunc_nxv4f32_to_si16(<vscale x 4 x float> %x) { 286; RV32-LABEL: trunc_nxv4f32_to_si16: 287; RV32: # %bb.0: 288; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma 289; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8 290; RV32-NEXT: vmv.v.v v8, v10 291; RV32-NEXT: ret 292; 293; RV64-LABEL: trunc_nxv4f32_to_si16: 294; RV64: # %bb.0: 295; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma 296; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8 297; RV64-NEXT: vmv.v.v v8, v10 298; RV64-NEXT: ret 299 %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x) 300 %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i16> 301 ret <vscale x 4 x i16> %b 302} 303 304define <vscale x 4 x i16> @trunc_nxv4f32_to_ui16(<vscale x 4 x float> %x) { 305; RV32-LABEL: trunc_nxv4f32_to_ui16: 306; RV32: # %bb.0: 307; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma 308; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8 309; RV32-NEXT: vmv.v.v v8, v10 310; RV32-NEXT: ret 311; 312; RV64-LABEL: trunc_nxv4f32_to_ui16: 313; RV64: # %bb.0: 314; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma 315; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8 316; RV64-NEXT: vmv.v.v v8, v10 317; RV64-NEXT: ret 318 %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x) 319 %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i16> 320 ret <vscale x 4 x i16> %b 321} 322 323define <vscale x 4 x i32> @trunc_nxv4f32_to_si32(<vscale x 4 x float> %x) { 324; RV32-LABEL: trunc_nxv4f32_to_si32: 325; RV32: # %bb.0: 326; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 327; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 328; RV32-NEXT: ret 329; 330; RV64-LABEL: trunc_nxv4f32_to_si32: 331; RV64: # %bb.0: 332; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 333; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 334; RV64-NEXT: ret 335 %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x) 336 %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i32> 337 ret <vscale x 4 x i32> %b 338} 339 340define <vscale x 4 x i32> @trunc_nxv4f32_to_ui32(<vscale x 4 x float> %x) { 341; RV32-LABEL: trunc_nxv4f32_to_ui32: 342; RV32: # %bb.0: 343; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 344; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 345; RV32-NEXT: ret 346; 347; RV64-LABEL: trunc_nxv4f32_to_ui32: 348; RV64: # %bb.0: 349; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 350; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 351; RV64-NEXT: ret 352 %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x) 353 %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i32> 354 ret <vscale x 4 x i32> %b 355} 356 357define <vscale x 4 x i64> @trunc_nxv4f32_to_si64(<vscale x 4 x float> %x) { 358; RV32-LABEL: trunc_nxv4f32_to_si64: 359; RV32: # %bb.0: 360; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 361; RV32-NEXT: vfwcvt.rtz.x.f.v v12, v8 362; RV32-NEXT: vmv4r.v v8, v12 363; RV32-NEXT: ret 364; 365; RV64-LABEL: trunc_nxv4f32_to_si64: 366; RV64: # %bb.0: 367; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 368; RV64-NEXT: vfwcvt.rtz.x.f.v v12, v8 369; RV64-NEXT: vmv4r.v v8, v12 370; RV64-NEXT: ret 371 %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x) 372 %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64> 373 ret <vscale x 4 x i64> %b 374} 375 376define <vscale x 4 x i64> @trunc_nxv4f32_to_ui64(<vscale x 4 x float> %x) { 377; RV32-LABEL: trunc_nxv4f32_to_ui64: 378; RV32: # %bb.0: 379; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 380; RV32-NEXT: vfwcvt.rtz.xu.f.v v12, v8 381; RV32-NEXT: vmv4r.v v8, v12 382; RV32-NEXT: ret 383; 384; RV64-LABEL: trunc_nxv4f32_to_ui64: 385; RV64: # %bb.0: 386; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 387; RV64-NEXT: vfwcvt.rtz.xu.f.v v12, v8 388; RV64-NEXT: vmv4r.v v8, v12 389; RV64-NEXT: ret 390 %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x) 391 %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64> 392 ret <vscale x 4 x i64> %b 393} 394 395; ================================================================================ 396; ceil <vscale x 1 x float> 397; ================================================================================ 398 399declare <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float>) 400 401define <vscale x 1 x i8> @ceil_nxv1f32_to_si8(<vscale x 1 x float> %x) { 402; RV32-LABEL: ceil_nxv1f32_to_si8: 403; RV32: # %bb.0: 404; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 405; RV32-NEXT: vfabs.v v9, v8 406; RV32-NEXT: lui a0, 307200 407; RV32-NEXT: fmv.w.x fa5, a0 408; RV32-NEXT: vmflt.vf v0, v9, fa5 409; RV32-NEXT: fsrmi a0, 3 410; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t 411; RV32-NEXT: fsrm a0 412; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t 413; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 414; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t 415; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 416; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 417; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 418; RV32-NEXT: vnsrl.wi v8, v9, 0 419; RV32-NEXT: ret 420; 421; RV64-LABEL: ceil_nxv1f32_to_si8: 422; RV64: # %bb.0: 423; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 424; RV64-NEXT: vfabs.v v9, v8 425; RV64-NEXT: lui a0, 307200 426; RV64-NEXT: fmv.w.x fa5, a0 427; RV64-NEXT: vmflt.vf v0, v9, fa5 428; RV64-NEXT: fsrmi a0, 3 429; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t 430; RV64-NEXT: fsrm a0 431; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t 432; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 433; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t 434; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 435; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 436; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 437; RV64-NEXT: vnsrl.wi v8, v9, 0 438; RV64-NEXT: ret 439 %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x) 440 %b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i8> 441 ret <vscale x 1 x i8> %b 442} 443 444define <vscale x 1 x i8> @ceil_nxv1f32_to_ui8(<vscale x 1 x float> %x) { 445; RV32-LABEL: ceil_nxv1f32_to_ui8: 446; RV32: # %bb.0: 447; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 448; RV32-NEXT: vfabs.v v9, v8 449; RV32-NEXT: lui a0, 307200 450; RV32-NEXT: fmv.w.x fa5, a0 451; RV32-NEXT: vmflt.vf v0, v9, fa5 452; RV32-NEXT: fsrmi a0, 3 453; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t 454; RV32-NEXT: fsrm a0 455; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t 456; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 457; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t 458; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 459; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 460; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 461; RV32-NEXT: vnsrl.wi v8, v9, 0 462; RV32-NEXT: ret 463; 464; RV64-LABEL: ceil_nxv1f32_to_ui8: 465; RV64: # %bb.0: 466; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 467; RV64-NEXT: vfabs.v v9, v8 468; RV64-NEXT: lui a0, 307200 469; RV64-NEXT: fmv.w.x fa5, a0 470; RV64-NEXT: vmflt.vf v0, v9, fa5 471; RV64-NEXT: fsrmi a0, 3 472; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t 473; RV64-NEXT: fsrm a0 474; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t 475; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 476; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t 477; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 478; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 479; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 480; RV64-NEXT: vnsrl.wi v8, v9, 0 481; RV64-NEXT: ret 482 %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x) 483 %b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i8> 484 ret <vscale x 1 x i8> %b 485} 486 487define <vscale x 1 x i16> @ceil_nxv1f32_to_si16(<vscale x 1 x float> %x) { 488; RV32-LABEL: ceil_nxv1f32_to_si16: 489; RV32: # %bb.0: 490; RV32-NEXT: fsrmi a0, 3 491; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma 492; RV32-NEXT: vfncvt.x.f.w v9, v8 493; RV32-NEXT: fsrm a0 494; RV32-NEXT: vmv1r.v v8, v9 495; RV32-NEXT: ret 496; 497; RV64-LABEL: ceil_nxv1f32_to_si16: 498; RV64: # %bb.0: 499; RV64-NEXT: fsrmi a0, 3 500; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma 501; RV64-NEXT: vfncvt.x.f.w v9, v8 502; RV64-NEXT: fsrm a0 503; RV64-NEXT: vmv1r.v v8, v9 504; RV64-NEXT: ret 505 %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x) 506 %b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i16> 507 ret <vscale x 1 x i16> %b 508} 509 510define <vscale x 1 x i16> @ceil_nxv1f32_to_ui16(<vscale x 1 x float> %x) { 511; RV32-LABEL: ceil_nxv1f32_to_ui16: 512; RV32: # %bb.0: 513; RV32-NEXT: fsrmi a0, 3 514; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma 515; RV32-NEXT: vfncvt.xu.f.w v9, v8 516; RV32-NEXT: fsrm a0 517; RV32-NEXT: vmv1r.v v8, v9 518; RV32-NEXT: ret 519; 520; RV64-LABEL: ceil_nxv1f32_to_ui16: 521; RV64: # %bb.0: 522; RV64-NEXT: fsrmi a0, 3 523; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma 524; RV64-NEXT: vfncvt.xu.f.w v9, v8 525; RV64-NEXT: fsrm a0 526; RV64-NEXT: vmv1r.v v8, v9 527; RV64-NEXT: ret 528 %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x) 529 %b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i16> 530 ret <vscale x 1 x i16> %b 531} 532 533define <vscale x 1 x i32> @ceil_nxv1f32_to_si32(<vscale x 1 x float> %x) { 534; RV32-LABEL: ceil_nxv1f32_to_si32: 535; RV32: # %bb.0: 536; RV32-NEXT: fsrmi a0, 3 537; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma 538; RV32-NEXT: vfcvt.x.f.v v8, v8 539; RV32-NEXT: fsrm a0 540; RV32-NEXT: ret 541; 542; RV64-LABEL: ceil_nxv1f32_to_si32: 543; RV64: # %bb.0: 544; RV64-NEXT: fsrmi a0, 3 545; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma 546; RV64-NEXT: vfcvt.x.f.v v8, v8 547; RV64-NEXT: fsrm a0 548; RV64-NEXT: ret 549 %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x) 550 %b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i32> 551 ret <vscale x 1 x i32> %b 552} 553 554define <vscale x 1 x i32> @ceil_nxv1f32_to_ui32(<vscale x 1 x float> %x) { 555; RV32-LABEL: ceil_nxv1f32_to_ui32: 556; RV32: # %bb.0: 557; RV32-NEXT: fsrmi a0, 3 558; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma 559; RV32-NEXT: vfcvt.xu.f.v v8, v8 560; RV32-NEXT: fsrm a0 561; RV32-NEXT: ret 562; 563; RV64-LABEL: ceil_nxv1f32_to_ui32: 564; RV64: # %bb.0: 565; RV64-NEXT: fsrmi a0, 3 566; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma 567; RV64-NEXT: vfcvt.xu.f.v v8, v8 568; RV64-NEXT: fsrm a0 569; RV64-NEXT: ret 570 %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x) 571 %b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i32> 572 ret <vscale x 1 x i32> %b 573} 574 575define <vscale x 1 x i64> @ceil_nxv1f32_to_si64(<vscale x 1 x float> %x) { 576; RV32-LABEL: ceil_nxv1f32_to_si64: 577; RV32: # %bb.0: 578; RV32-NEXT: fsrmi a0, 3 579; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma 580; RV32-NEXT: vfwcvt.x.f.v v9, v8 581; RV32-NEXT: fsrm a0 582; RV32-NEXT: vmv1r.v v8, v9 583; RV32-NEXT: ret 584; 585; RV64-LABEL: ceil_nxv1f32_to_si64: 586; RV64: # %bb.0: 587; RV64-NEXT: fsrmi a0, 3 588; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma 589; RV64-NEXT: vfwcvt.x.f.v v9, v8 590; RV64-NEXT: fsrm a0 591; RV64-NEXT: vmv1r.v v8, v9 592; RV64-NEXT: ret 593 %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x) 594 %b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i64> 595 ret <vscale x 1 x i64> %b 596} 597 598define <vscale x 1 x i64> @ceil_nxv1f32_to_ui64(<vscale x 1 x float> %x) { 599; RV32-LABEL: ceil_nxv1f32_to_ui64: 600; RV32: # %bb.0: 601; RV32-NEXT: fsrmi a0, 3 602; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma 603; RV32-NEXT: vfwcvt.xu.f.v v9, v8 604; RV32-NEXT: fsrm a0 605; RV32-NEXT: vmv1r.v v8, v9 606; RV32-NEXT: ret 607; 608; RV64-LABEL: ceil_nxv1f32_to_ui64: 609; RV64: # %bb.0: 610; RV64-NEXT: fsrmi a0, 3 611; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma 612; RV64-NEXT: vfwcvt.xu.f.v v9, v8 613; RV64-NEXT: fsrm a0 614; RV64-NEXT: vmv1r.v v8, v9 615; RV64-NEXT: ret 616 %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x) 617 %b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i64> 618 ret <vscale x 1 x i64> %b 619} 620 621; ================================================================================ 622; ceil <vscale x 4 x float> 623; ================================================================================ 624 625declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>) 626 627define <vscale x 4 x i8> @ceil_nxv4f32_to_si8(<vscale x 4 x float> %x) { 628; RV32-LABEL: ceil_nxv4f32_to_si8: 629; RV32: # %bb.0: 630; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 631; RV32-NEXT: vfabs.v v10, v8 632; RV32-NEXT: lui a0, 307200 633; RV32-NEXT: fmv.w.x fa5, a0 634; RV32-NEXT: vmflt.vf v0, v10, fa5 635; RV32-NEXT: fsrmi a0, 3 636; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t 637; RV32-NEXT: fsrm a0 638; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t 639; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu 640; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t 641; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 642; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8 643; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 644; RV32-NEXT: vnsrl.wi v8, v10, 0 645; RV32-NEXT: ret 646; 647; RV64-LABEL: ceil_nxv4f32_to_si8: 648; RV64: # %bb.0: 649; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 650; RV64-NEXT: vfabs.v v10, v8 651; RV64-NEXT: lui a0, 307200 652; RV64-NEXT: fmv.w.x fa5, a0 653; RV64-NEXT: vmflt.vf v0, v10, fa5 654; RV64-NEXT: fsrmi a0, 3 655; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t 656; RV64-NEXT: fsrm a0 657; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t 658; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu 659; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t 660; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma 661; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8 662; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 663; RV64-NEXT: vnsrl.wi v8, v10, 0 664; RV64-NEXT: ret 665 %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x) 666 %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i8> 667 ret <vscale x 4 x i8> %b 668} 669 670define <vscale x 4 x i8> @ceil_nxv4f32_to_ui8(<vscale x 4 x float> %x) { 671; RV32-LABEL: ceil_nxv4f32_to_ui8: 672; RV32: # %bb.0: 673; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 674; RV32-NEXT: vfabs.v v10, v8 675; RV32-NEXT: lui a0, 307200 676; RV32-NEXT: fmv.w.x fa5, a0 677; RV32-NEXT: vmflt.vf v0, v10, fa5 678; RV32-NEXT: fsrmi a0, 3 679; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t 680; RV32-NEXT: fsrm a0 681; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t 682; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu 683; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t 684; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 685; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8 686; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 687; RV32-NEXT: vnsrl.wi v8, v10, 0 688; RV32-NEXT: ret 689; 690; RV64-LABEL: ceil_nxv4f32_to_ui8: 691; RV64: # %bb.0: 692; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 693; RV64-NEXT: vfabs.v v10, v8 694; RV64-NEXT: lui a0, 307200 695; RV64-NEXT: fmv.w.x fa5, a0 696; RV64-NEXT: vmflt.vf v0, v10, fa5 697; RV64-NEXT: fsrmi a0, 3 698; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t 699; RV64-NEXT: fsrm a0 700; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t 701; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu 702; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t 703; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma 704; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8 705; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 706; RV64-NEXT: vnsrl.wi v8, v10, 0 707; RV64-NEXT: ret 708 %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x) 709 %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i8> 710 ret <vscale x 4 x i8> %b 711} 712 713define <vscale x 4 x i16> @ceil_nxv4f32_to_si16(<vscale x 4 x float> %x) { 714; RV32-LABEL: ceil_nxv4f32_to_si16: 715; RV32: # %bb.0: 716; RV32-NEXT: fsrmi a0, 3 717; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma 718; RV32-NEXT: vfncvt.x.f.w v10, v8 719; RV32-NEXT: fsrm a0 720; RV32-NEXT: vmv.v.v v8, v10 721; RV32-NEXT: ret 722; 723; RV64-LABEL: ceil_nxv4f32_to_si16: 724; RV64: # %bb.0: 725; RV64-NEXT: fsrmi a0, 3 726; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma 727; RV64-NEXT: vfncvt.x.f.w v10, v8 728; RV64-NEXT: fsrm a0 729; RV64-NEXT: vmv.v.v v8, v10 730; RV64-NEXT: ret 731 %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x) 732 %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i16> 733 ret <vscale x 4 x i16> %b 734} 735 736define <vscale x 4 x i16> @ceil_nxv4f32_to_ui16(<vscale x 4 x float> %x) { 737; RV32-LABEL: ceil_nxv4f32_to_ui16: 738; RV32: # %bb.0: 739; RV32-NEXT: fsrmi a0, 3 740; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma 741; RV32-NEXT: vfncvt.xu.f.w v10, v8 742; RV32-NEXT: fsrm a0 743; RV32-NEXT: vmv.v.v v8, v10 744; RV32-NEXT: ret 745; 746; RV64-LABEL: ceil_nxv4f32_to_ui16: 747; RV64: # %bb.0: 748; RV64-NEXT: fsrmi a0, 3 749; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma 750; RV64-NEXT: vfncvt.xu.f.w v10, v8 751; RV64-NEXT: fsrm a0 752; RV64-NEXT: vmv.v.v v8, v10 753; RV64-NEXT: ret 754 %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x) 755 %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i16> 756 ret <vscale x 4 x i16> %b 757} 758 759define <vscale x 4 x i32> @ceil_nxv4f32_to_si32(<vscale x 4 x float> %x) { 760; RV32-LABEL: ceil_nxv4f32_to_si32: 761; RV32: # %bb.0: 762; RV32-NEXT: fsrmi a0, 3 763; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma 764; RV32-NEXT: vfcvt.x.f.v v8, v8 765; RV32-NEXT: fsrm a0 766; RV32-NEXT: ret 767; 768; RV64-LABEL: ceil_nxv4f32_to_si32: 769; RV64: # %bb.0: 770; RV64-NEXT: fsrmi a0, 3 771; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma 772; RV64-NEXT: vfcvt.x.f.v v8, v8 773; RV64-NEXT: fsrm a0 774; RV64-NEXT: ret 775 %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x) 776 %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i32> 777 ret <vscale x 4 x i32> %b 778} 779 780define <vscale x 4 x i32> @ceil_nxv4f32_to_ui32(<vscale x 4 x float> %x) { 781; RV32-LABEL: ceil_nxv4f32_to_ui32: 782; RV32: # %bb.0: 783; RV32-NEXT: fsrmi a0, 3 784; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma 785; RV32-NEXT: vfcvt.xu.f.v v8, v8 786; RV32-NEXT: fsrm a0 787; RV32-NEXT: ret 788; 789; RV64-LABEL: ceil_nxv4f32_to_ui32: 790; RV64: # %bb.0: 791; RV64-NEXT: fsrmi a0, 3 792; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma 793; RV64-NEXT: vfcvt.xu.f.v v8, v8 794; RV64-NEXT: fsrm a0 795; RV64-NEXT: ret 796 %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x) 797 %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i32> 798 ret <vscale x 4 x i32> %b 799} 800 801define <vscale x 4 x i64> @ceil_nxv4f32_to_si64(<vscale x 4 x float> %x) { 802; RV32-LABEL: ceil_nxv4f32_to_si64: 803; RV32: # %bb.0: 804; RV32-NEXT: fsrmi a0, 3 805; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma 806; RV32-NEXT: vfwcvt.x.f.v v12, v8 807; RV32-NEXT: fsrm a0 808; RV32-NEXT: vmv4r.v v8, v12 809; RV32-NEXT: ret 810; 811; RV64-LABEL: ceil_nxv4f32_to_si64: 812; RV64: # %bb.0: 813; RV64-NEXT: fsrmi a0, 3 814; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma 815; RV64-NEXT: vfwcvt.x.f.v v12, v8 816; RV64-NEXT: fsrm a0 817; RV64-NEXT: vmv4r.v v8, v12 818; RV64-NEXT: ret 819 %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x) 820 %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64> 821 ret <vscale x 4 x i64> %b 822} 823 824define <vscale x 4 x i64> @ceil_nxv4f32_to_ui64(<vscale x 4 x float> %x) { 825; RV32-LABEL: ceil_nxv4f32_to_ui64: 826; RV32: # %bb.0: 827; RV32-NEXT: fsrmi a0, 3 828; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma 829; RV32-NEXT: vfwcvt.xu.f.v v12, v8 830; RV32-NEXT: fsrm a0 831; RV32-NEXT: vmv4r.v v8, v12 832; RV32-NEXT: ret 833; 834; RV64-LABEL: ceil_nxv4f32_to_ui64: 835; RV64: # %bb.0: 836; RV64-NEXT: fsrmi a0, 3 837; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma 838; RV64-NEXT: vfwcvt.xu.f.v v12, v8 839; RV64-NEXT: fsrm a0 840; RV64-NEXT: vmv4r.v v8, v12 841; RV64-NEXT: ret 842 %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x) 843 %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64> 844 ret <vscale x 4 x i64> %b 845} 846 847; ================================================================================ 848; rint <vscale x 4 x float> 849; ================================================================================ 850 851declare <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float>) 852 853define <vscale x 4 x i8> @rint_nxv4f32_to_si8(<vscale x 4 x float> %x) { 854; RV32-LABEL: rint_nxv4f32_to_si8: 855; RV32: # %bb.0: 856; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 857; RV32-NEXT: vfabs.v v10, v8 858; RV32-NEXT: lui a0, 307200 859; RV32-NEXT: fmv.w.x fa5, a0 860; RV32-NEXT: vmflt.vf v0, v10, fa5 861; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t 862; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t 863; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu 864; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t 865; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 866; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8 867; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 868; RV32-NEXT: vnsrl.wi v8, v10, 0 869; RV32-NEXT: ret 870; 871; RV64-LABEL: rint_nxv4f32_to_si8: 872; RV64: # %bb.0: 873; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 874; RV64-NEXT: vfabs.v v10, v8 875; RV64-NEXT: lui a0, 307200 876; RV64-NEXT: fmv.w.x fa5, a0 877; RV64-NEXT: vmflt.vf v0, v10, fa5 878; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t 879; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t 880; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu 881; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t 882; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma 883; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8 884; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 885; RV64-NEXT: vnsrl.wi v8, v10, 0 886; RV64-NEXT: ret 887 %a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x) 888 %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i8> 889 ret <vscale x 4 x i8> %b 890} 891 892define <vscale x 4 x i8> @rint_nxv4f32_to_ui8(<vscale x 4 x float> %x) { 893; RV32-LABEL: rint_nxv4f32_to_ui8: 894; RV32: # %bb.0: 895; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 896; RV32-NEXT: vfabs.v v10, v8 897; RV32-NEXT: lui a0, 307200 898; RV32-NEXT: fmv.w.x fa5, a0 899; RV32-NEXT: vmflt.vf v0, v10, fa5 900; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t 901; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t 902; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu 903; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t 904; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 905; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8 906; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 907; RV32-NEXT: vnsrl.wi v8, v10, 0 908; RV32-NEXT: ret 909; 910; RV64-LABEL: rint_nxv4f32_to_ui8: 911; RV64: # %bb.0: 912; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 913; RV64-NEXT: vfabs.v v10, v8 914; RV64-NEXT: lui a0, 307200 915; RV64-NEXT: fmv.w.x fa5, a0 916; RV64-NEXT: vmflt.vf v0, v10, fa5 917; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t 918; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t 919; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu 920; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t 921; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma 922; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8 923; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 924; RV64-NEXT: vnsrl.wi v8, v10, 0 925; RV64-NEXT: ret 926 %a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x) 927 %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i8> 928 ret <vscale x 4 x i8> %b 929} 930 931define <vscale x 4 x i16> @rint_nxv4f32_to_si16(<vscale x 4 x float> %x) { 932; RV32-LABEL: rint_nxv4f32_to_si16: 933; RV32: # %bb.0: 934; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma 935; RV32-NEXT: vfncvt.x.f.w v10, v8 936; RV32-NEXT: vmv.v.v v8, v10 937; RV32-NEXT: ret 938; 939; RV64-LABEL: rint_nxv4f32_to_si16: 940; RV64: # %bb.0: 941; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma 942; RV64-NEXT: vfncvt.x.f.w v10, v8 943; RV64-NEXT: vmv.v.v v8, v10 944; RV64-NEXT: ret 945 %a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x) 946 %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i16> 947 ret <vscale x 4 x i16> %b 948} 949 950define <vscale x 4 x i16> @rint_nxv4f32_to_ui16(<vscale x 4 x float> %x) { 951; RV32-LABEL: rint_nxv4f32_to_ui16: 952; RV32: # %bb.0: 953; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma 954; RV32-NEXT: vfncvt.xu.f.w v10, v8 955; RV32-NEXT: vmv.v.v v8, v10 956; RV32-NEXT: ret 957; 958; RV64-LABEL: rint_nxv4f32_to_ui16: 959; RV64: # %bb.0: 960; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma 961; RV64-NEXT: vfncvt.xu.f.w v10, v8 962; RV64-NEXT: vmv.v.v v8, v10 963; RV64-NEXT: ret 964 %a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x) 965 %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i16> 966 ret <vscale x 4 x i16> %b 967} 968 969define <vscale x 4 x i32> @rint_nxv4f32_to_si32(<vscale x 4 x float> %x) { 970; RV32-LABEL: rint_nxv4f32_to_si32: 971; RV32: # %bb.0: 972; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 973; RV32-NEXT: vfcvt.x.f.v v8, v8 974; RV32-NEXT: ret 975; 976; RV64-LABEL: rint_nxv4f32_to_si32: 977; RV64: # %bb.0: 978; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 979; RV64-NEXT: vfcvt.x.f.v v8, v8 980; RV64-NEXT: ret 981 %a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x) 982 %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i32> 983 ret <vscale x 4 x i32> %b 984} 985 986define <vscale x 4 x i32> @rint_nxv4f32_to_ui32(<vscale x 4 x float> %x) { 987; RV32-LABEL: rint_nxv4f32_to_ui32: 988; RV32: # %bb.0: 989; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 990; RV32-NEXT: vfcvt.xu.f.v v8, v8 991; RV32-NEXT: ret 992; 993; RV64-LABEL: rint_nxv4f32_to_ui32: 994; RV64: # %bb.0: 995; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 996; RV64-NEXT: vfcvt.xu.f.v v8, v8 997; RV64-NEXT: ret 998 %a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x) 999 %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i32> 1000 ret <vscale x 4 x i32> %b 1001} 1002 1003define <vscale x 4 x i64> @rint_nxv4f32_to_si64(<vscale x 4 x float> %x) { 1004; RV32-LABEL: rint_nxv4f32_to_si64: 1005; RV32: # %bb.0: 1006; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1007; RV32-NEXT: vfwcvt.x.f.v v12, v8 1008; RV32-NEXT: vmv4r.v v8, v12 1009; RV32-NEXT: ret 1010; 1011; RV64-LABEL: rint_nxv4f32_to_si64: 1012; RV64: # %bb.0: 1013; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1014; RV64-NEXT: vfwcvt.x.f.v v12, v8 1015; RV64-NEXT: vmv4r.v v8, v12 1016; RV64-NEXT: ret 1017 %a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x) 1018 %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64> 1019 ret <vscale x 4 x i64> %b 1020} 1021 1022define <vscale x 4 x i64> @rint_nxv4f32_to_ui64(<vscale x 4 x float> %x) { 1023; RV32-LABEL: rint_nxv4f32_to_ui64: 1024; RV32: # %bb.0: 1025; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1026; RV32-NEXT: vfwcvt.xu.f.v v12, v8 1027; RV32-NEXT: vmv4r.v v8, v12 1028; RV32-NEXT: ret 1029; 1030; RV64-LABEL: rint_nxv4f32_to_ui64: 1031; RV64: # %bb.0: 1032; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1033; RV64-NEXT: vfwcvt.xu.f.v v12, v8 1034; RV64-NEXT: vmv4r.v v8, v12 1035; RV64-NEXT: ret 1036 %a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x) 1037 %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64> 1038 ret <vscale x 4 x i64> %b 1039} 1040