1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v,+m \ 3; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 4; RUN: --check-prefixes=CHECK,ZVFH 5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v,+m \ 6; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 7; RUN: --check-prefixes=CHECK,ZVFH 8; RUN: llc -mtriple=riscv32 \ 9; RUN: -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m \ 10; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 11; RUN: --check-prefixes=CHECK,ZVFHMIN 12; RUN: llc -mtriple=riscv64 \ 13; RUN: -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m \ 14; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 15; RUN: --check-prefixes=CHECK,ZVFHMIN 16 17declare <vscale x 1 x bfloat> @llvm.vp.minimum.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) 18 19define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { 20; CHECK-LABEL: vfmin_vv_nxv1bf16: 21; CHECK: # %bb.0: 22; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 23; CHECK-NEXT: vmv1r.v v10, v0 24; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9, v0.t 25; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t 26; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 27; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t 28; CHECK-NEXT: vmerge.vvm v8, v9, v11, v0 29; CHECK-NEXT: vmv1r.v v0, v10 30; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t 31; CHECK-NEXT: vmerge.vvm v9, v11, v9, v0 32; CHECK-NEXT: vmv1r.v v0, v10 33; CHECK-NEXT: vfmin.vv v9, v9, v8, v0.t 34; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 35; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t 36; CHECK-NEXT: ret 37 %v = call <vscale x 1 x bfloat> @llvm.vp.minimum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl) 38 ret <vscale x 1 x bfloat> %v 39} 40 41define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 zeroext %evl) { 42; CHECK-LABEL: vfmin_vv_nxv1bf16_unmasked: 43; CHECK: # %bb.0: 44; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 45; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 46; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 47; CHECK-NEXT: vmfeq.vv v0, v10, v10 48; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 49; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 50; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 51; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 52; CHECK-NEXT: vmfeq.vv v0, v8, v8 53; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 54; CHECK-NEXT: vfmin.vv v9, v8, v9 55; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 56; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 57; CHECK-NEXT: ret 58 %v = call <vscale x 1 x bfloat> @llvm.vp.minimum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) 59 ret <vscale x 1 x bfloat> %v 60} 61 62declare <vscale x 2 x bfloat> @llvm.vp.minimum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) 63 64define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { 65; CHECK-LABEL: vfmin_vv_nxv2bf16: 66; CHECK: # %bb.0: 67; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 68; CHECK-NEXT: vmv1r.v v10, v0 69; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9, v0.t 70; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t 71; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 72; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t 73; CHECK-NEXT: vmerge.vvm v8, v9, v11, v0 74; CHECK-NEXT: vmv1r.v v0, v10 75; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t 76; CHECK-NEXT: vmerge.vvm v9, v11, v9, v0 77; CHECK-NEXT: vmv1r.v v0, v10 78; CHECK-NEXT: vfmin.vv v9, v9, v8, v0.t 79; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 80; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t 81; CHECK-NEXT: ret 82 %v = call <vscale x 2 x bfloat> @llvm.vp.minimum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl) 83 ret <vscale x 2 x bfloat> %v 84} 85 86define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evl) { 87; CHECK-LABEL: vfmin_vv_nxv2bf16_unmasked: 88; CHECK: # %bb.0: 89; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 90; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 91; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 92; CHECK-NEXT: vmfeq.vv v0, v10, v10 93; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 94; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 95; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 96; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 97; CHECK-NEXT: vmfeq.vv v0, v8, v8 98; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 99; CHECK-NEXT: vfmin.vv v9, v8, v9 100; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 101; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 102; CHECK-NEXT: ret 103 %v = call <vscale x 2 x bfloat> @llvm.vp.minimum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) 104 ret <vscale x 2 x bfloat> %v 105} 106 107declare <vscale x 4 x bfloat> @llvm.vp.minimum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) 108 109define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { 110; CHECK-LABEL: vfmin_vv_nxv4bf16: 111; CHECK: # %bb.0: 112; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 113; CHECK-NEXT: vmv1r.v v10, v0 114; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9, v0.t 115; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8, v0.t 116; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 117; CHECK-NEXT: vmfeq.vv v8, v14, v14, v0.t 118; CHECK-NEXT: vmv1r.v v0, v8 119; CHECK-NEXT: vmerge.vvm v16, v14, v12, v0 120; CHECK-NEXT: vmv1r.v v0, v10 121; CHECK-NEXT: vmfeq.vv v8, v12, v12, v0.t 122; CHECK-NEXT: vmv1r.v v0, v8 123; CHECK-NEXT: vmerge.vvm v8, v12, v14, v0 124; CHECK-NEXT: vmv1r.v v0, v10 125; CHECK-NEXT: vfmin.vv v12, v8, v16, v0.t 126; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 127; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t 128; CHECK-NEXT: ret 129 %v = call <vscale x 4 x bfloat> @llvm.vp.minimum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl) 130 ret <vscale x 4 x bfloat> %v 131} 132 133define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 zeroext %evl) { 134; CHECK-LABEL: vfmin_vv_nxv4bf16_unmasked: 135; CHECK: # %bb.0: 136; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 137; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 138; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 139; CHECK-NEXT: vmfeq.vv v0, v10, v10 140; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 141; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 142; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 143; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0 144; CHECK-NEXT: vmfeq.vv v0, v12, v12 145; CHECK-NEXT: vmerge.vvm v10, v12, v10, v0 146; CHECK-NEXT: vfmin.vv v10, v10, v8 147; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 148; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 149; CHECK-NEXT: ret 150 %v = call <vscale x 4 x bfloat> @llvm.vp.minimum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) 151 ret <vscale x 4 x bfloat> %v 152} 153 154declare <vscale x 8 x bfloat> @llvm.vp.minimum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) 155 156define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { 157; CHECK-LABEL: vfmin_vv_nxv8bf16: 158; CHECK: # %bb.0: 159; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 160; CHECK-NEXT: vmv1r.v v12, v0 161; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10, v0.t 162; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v8, v0.t 163; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 164; CHECK-NEXT: vmfeq.vv v8, v20, v20, v0.t 165; CHECK-NEXT: vmv1r.v v0, v8 166; CHECK-NEXT: vmerge.vvm v24, v20, v16, v0 167; CHECK-NEXT: vmv1r.v v0, v12 168; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t 169; CHECK-NEXT: vmv1r.v v0, v8 170; CHECK-NEXT: vmerge.vvm v8, v16, v20, v0 171; CHECK-NEXT: vmv1r.v v0, v12 172; CHECK-NEXT: vfmin.vv v16, v8, v24, v0.t 173; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 174; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t 175; CHECK-NEXT: ret 176 %v = call <vscale x 8 x bfloat> @llvm.vp.minimum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl) 177 ret <vscale x 8 x bfloat> %v 178} 179 180define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 zeroext %evl) { 181; CHECK-LABEL: vfmin_vv_nxv8bf16_unmasked: 182; CHECK: # %bb.0: 183; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 184; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 185; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 186; CHECK-NEXT: vmfeq.vv v0, v12, v12 187; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 188; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 189; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 190; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0 191; CHECK-NEXT: vmfeq.vv v0, v16, v16 192; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0 193; CHECK-NEXT: vfmin.vv v12, v12, v8 194; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 195; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 196; CHECK-NEXT: ret 197 %v = call <vscale x 8 x bfloat> @llvm.vp.minimum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) 198 ret <vscale x 8 x bfloat> %v 199} 200 201declare <vscale x 16 x bfloat> @llvm.vp.minimum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) 202 203define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) { 204; CHECK-LABEL: vfmin_vv_nxv16bf16: 205; CHECK: # %bb.0: 206; CHECK-NEXT: addi sp, sp, -16 207; CHECK-NEXT: .cfi_def_cfa_offset 16 208; CHECK-NEXT: csrr a1, vlenb 209; CHECK-NEXT: li a2, 24 210; CHECK-NEXT: mul a1, a1, a2 211; CHECK-NEXT: sub sp, sp, a1 212; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 213; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 214; CHECK-NEXT: vmv1r.v v16, v0 215; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t 216; CHECK-NEXT: addi a0, sp, 16 217; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 218; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t 219; CHECK-NEXT: csrr a0, vlenb 220; CHECK-NEXT: slli a0, a0, 4 221; CHECK-NEXT: add a0, sp, a0 222; CHECK-NEXT: addi a0, a0, 16 223; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 224; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 225; CHECK-NEXT: vmfeq.vv v8, v24, v24, v0.t 226; CHECK-NEXT: vmv1r.v v0, v8 227; CHECK-NEXT: addi a0, sp, 16 228; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 229; CHECK-NEXT: csrr a0, vlenb 230; CHECK-NEXT: slli a0, a0, 4 231; CHECK-NEXT: add a0, sp, a0 232; CHECK-NEXT: addi a0, a0, 16 233; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 234; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 235; CHECK-NEXT: csrr a0, vlenb 236; CHECK-NEXT: slli a0, a0, 3 237; CHECK-NEXT: add a0, sp, a0 238; CHECK-NEXT: addi a0, a0, 16 239; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 240; CHECK-NEXT: vmv1r.v v0, v16 241; CHECK-NEXT: vmfeq.vv v8, v24, v24, v0.t 242; CHECK-NEXT: vmv1r.v v0, v8 243; CHECK-NEXT: csrr a0, vlenb 244; CHECK-NEXT: slli a0, a0, 4 245; CHECK-NEXT: add a0, sp, a0 246; CHECK-NEXT: addi a0, a0, 16 247; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 248; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 249; CHECK-NEXT: vmv1r.v v0, v16 250; CHECK-NEXT: csrr a0, vlenb 251; CHECK-NEXT: slli a0, a0, 3 252; CHECK-NEXT: add a0, sp, a0 253; CHECK-NEXT: addi a0, a0, 16 254; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 255; CHECK-NEXT: vfmin.vv v24, v8, v24, v0.t 256; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 257; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t 258; CHECK-NEXT: csrr a0, vlenb 259; CHECK-NEXT: li a1, 24 260; CHECK-NEXT: mul a0, a0, a1 261; CHECK-NEXT: add sp, sp, a0 262; CHECK-NEXT: .cfi_def_cfa sp, 16 263; CHECK-NEXT: addi sp, sp, 16 264; CHECK-NEXT: .cfi_def_cfa_offset 0 265; CHECK-NEXT: ret 266 %v = call <vscale x 16 x bfloat> @llvm.vp.minimum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl) 267 ret <vscale x 16 x bfloat> %v 268} 269 270define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 zeroext %evl) { 271; CHECK-LABEL: vfmin_vv_nxv16bf16_unmasked: 272; CHECK: # %bb.0: 273; CHECK-NEXT: addi sp, sp, -16 274; CHECK-NEXT: .cfi_def_cfa_offset 16 275; CHECK-NEXT: csrr a1, vlenb 276; CHECK-NEXT: slli a1, a1, 3 277; CHECK-NEXT: sub sp, sp, a1 278; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 279; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 280; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 281; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 282; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 283; CHECK-NEXT: vmfeq.vv v0, v24, v24 284; CHECK-NEXT: vmfeq.vv v7, v16, v16 285; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 286; CHECK-NEXT: addi a0, sp, 16 287; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 288; CHECK-NEXT: vmv1r.v v0, v7 289; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 290; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 291; CHECK-NEXT: vfmin.vv v16, v8, v16 292; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 293; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 294; CHECK-NEXT: csrr a0, vlenb 295; CHECK-NEXT: slli a0, a0, 3 296; CHECK-NEXT: add sp, sp, a0 297; CHECK-NEXT: .cfi_def_cfa sp, 16 298; CHECK-NEXT: addi sp, sp, 16 299; CHECK-NEXT: .cfi_def_cfa_offset 0 300; CHECK-NEXT: ret 301 %v = call <vscale x 16 x bfloat> @llvm.vp.minimum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) 302 ret <vscale x 16 x bfloat> %v 303} 304 305declare <vscale x 32 x bfloat> @llvm.vp.minimum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) 306 307define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) { 308; CHECK-LABEL: vfmin_vv_nxv32bf16: 309; CHECK: # %bb.0: 310; CHECK-NEXT: addi sp, sp, -16 311; CHECK-NEXT: .cfi_def_cfa_offset 16 312; CHECK-NEXT: csrr a1, vlenb 313; CHECK-NEXT: slli a1, a1, 5 314; CHECK-NEXT: sub sp, sp, a1 315; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 316; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 317; CHECK-NEXT: vmv1r.v v7, v0 318; CHECK-NEXT: csrr a1, vlenb 319; CHECK-NEXT: li a2, 24 320; CHECK-NEXT: mul a1, a1, a2 321; CHECK-NEXT: add a1, sp, a1 322; CHECK-NEXT: addi a1, a1, 16 323; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 324; CHECK-NEXT: vmv8r.v v16, v8 325; CHECK-NEXT: csrr a2, vlenb 326; CHECK-NEXT: slli a1, a2, 1 327; CHECK-NEXT: srli a2, a2, 2 328; CHECK-NEXT: sub a3, a0, a1 329; CHECK-NEXT: vslidedown.vx v25, v0, a2 330; CHECK-NEXT: sltu a2, a0, a3 331; CHECK-NEXT: addi a2, a2, -1 332; CHECK-NEXT: and a2, a2, a3 333; CHECK-NEXT: vmv1r.v v0, v25 334; CHECK-NEXT: csrr a3, vlenb 335; CHECK-NEXT: slli a3, a3, 3 336; CHECK-NEXT: add a3, sp, a3 337; CHECK-NEXT: addi a3, a3, 16 338; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 339; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 340; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t 341; CHECK-NEXT: csrr a2, vlenb 342; CHECK-NEXT: slli a2, a2, 4 343; CHECK-NEXT: add a2, sp, a2 344; CHECK-NEXT: addi a2, a2, 16 345; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 346; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 347; CHECK-NEXT: vmfeq.vv v24, v8, v8, v0.t 348; CHECK-NEXT: csrr a2, vlenb 349; CHECK-NEXT: li a3, 24 350; CHECK-NEXT: mul a2, a2, a3 351; CHECK-NEXT: add a2, sp, a2 352; CHECK-NEXT: addi a2, a2, 16 353; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 354; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 355; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t 356; CHECK-NEXT: vmv1r.v v0, v24 357; CHECK-NEXT: csrr a2, vlenb 358; CHECK-NEXT: slli a2, a2, 4 359; CHECK-NEXT: add a2, sp, a2 360; CHECK-NEXT: addi a2, a2, 16 361; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 362; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 363; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 364; CHECK-NEXT: addi a2, sp, 16 365; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 366; CHECK-NEXT: vmv1r.v v0, v25 367; CHECK-NEXT: vmfeq.vv v12, v16, v16, v0.t 368; CHECK-NEXT: vmv1r.v v0, v12 369; CHECK-NEXT: csrr a2, vlenb 370; CHECK-NEXT: slli a2, a2, 4 371; CHECK-NEXT: add a2, sp, a2 372; CHECK-NEXT: addi a2, a2, 16 373; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 374; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 375; CHECK-NEXT: vmv1r.v v0, v25 376; CHECK-NEXT: addi a2, sp, 16 377; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 378; CHECK-NEXT: vfmin.vv v16, v16, v8, v0.t 379; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 380; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16, v0.t 381; CHECK-NEXT: bltu a0, a1, .LBB10_2 382; CHECK-NEXT: # %bb.1: 383; CHECK-NEXT: mv a0, a1 384; CHECK-NEXT: .LBB10_2: 385; CHECK-NEXT: vmv1r.v v0, v7 386; CHECK-NEXT: csrr a1, vlenb 387; CHECK-NEXT: slli a1, a1, 3 388; CHECK-NEXT: add a1, sp, a1 389; CHECK-NEXT: addi a1, a1, 16 390; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 391; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 392; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t 393; CHECK-NEXT: csrr a0, vlenb 394; CHECK-NEXT: slli a0, a0, 4 395; CHECK-NEXT: add a0, sp, a0 396; CHECK-NEXT: addi a0, a0, 16 397; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 398; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 399; CHECK-NEXT: vmfeq.vv v8, v24, v24, v0.t 400; CHECK-NEXT: csrr a0, vlenb 401; CHECK-NEXT: li a1, 24 402; CHECK-NEXT: mul a0, a0, a1 403; CHECK-NEXT: add a0, sp, a0 404; CHECK-NEXT: addi a0, a0, 16 405; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 406; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 407; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t 408; CHECK-NEXT: vmv1r.v v0, v8 409; CHECK-NEXT: csrr a0, vlenb 410; CHECK-NEXT: slli a0, a0, 4 411; CHECK-NEXT: add a0, sp, a0 412; CHECK-NEXT: addi a0, a0, 16 413; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 414; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 415; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 416; CHECK-NEXT: csrr a0, vlenb 417; CHECK-NEXT: li a1, 24 418; CHECK-NEXT: mul a0, a0, a1 419; CHECK-NEXT: add a0, sp, a0 420; CHECK-NEXT: addi a0, a0, 16 421; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 422; CHECK-NEXT: vmv1r.v v0, v7 423; CHECK-NEXT: vmfeq.vv v8, v24, v24, v0.t 424; CHECK-NEXT: vmv1r.v v0, v8 425; CHECK-NEXT: csrr a0, vlenb 426; CHECK-NEXT: slli a0, a0, 4 427; CHECK-NEXT: add a0, sp, a0 428; CHECK-NEXT: addi a0, a0, 16 429; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 430; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 431; CHECK-NEXT: vmv1r.v v0, v7 432; CHECK-NEXT: csrr a0, vlenb 433; CHECK-NEXT: li a1, 24 434; CHECK-NEXT: mul a0, a0, a1 435; CHECK-NEXT: add a0, sp, a0 436; CHECK-NEXT: addi a0, a0, 16 437; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 438; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t 439; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 440; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t 441; CHECK-NEXT: csrr a0, vlenb 442; CHECK-NEXT: slli a0, a0, 5 443; CHECK-NEXT: add sp, sp, a0 444; CHECK-NEXT: .cfi_def_cfa sp, 16 445; CHECK-NEXT: addi sp, sp, 16 446; CHECK-NEXT: .cfi_def_cfa_offset 0 447; CHECK-NEXT: ret 448 %v = call <vscale x 32 x bfloat> @llvm.vp.minimum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl) 449 ret <vscale x 32 x bfloat> %v 450} 451 452define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 zeroext %evl) { 453; CHECK-LABEL: vfmin_vv_nxv32bf16_unmasked: 454; CHECK: # %bb.0: 455; CHECK-NEXT: addi sp, sp, -16 456; CHECK-NEXT: .cfi_def_cfa_offset 16 457; CHECK-NEXT: csrr a1, vlenb 458; CHECK-NEXT: slli a1, a1, 5 459; CHECK-NEXT: sub sp, sp, a1 460; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 461; CHECK-NEXT: csrr a1, vlenb 462; CHECK-NEXT: li a2, 24 463; CHECK-NEXT: mul a1, a1, a2 464; CHECK-NEXT: add a1, sp, a1 465; CHECK-NEXT: addi a1, a1, 16 466; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 467; CHECK-NEXT: csrr a2, vlenb 468; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma 469; CHECK-NEXT: vmset.m v16 470; CHECK-NEXT: slli a1, a2, 1 471; CHECK-NEXT: srli a2, a2, 2 472; CHECK-NEXT: sub a3, a0, a1 473; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma 474; CHECK-NEXT: vslidedown.vx v24, v16, a2 475; CHECK-NEXT: sltu a2, a0, a3 476; CHECK-NEXT: addi a2, a2, -1 477; CHECK-NEXT: and a2, a2, a3 478; CHECK-NEXT: vmv1r.v v0, v24 479; CHECK-NEXT: csrr a3, vlenb 480; CHECK-NEXT: slli a3, a3, 3 481; CHECK-NEXT: add a3, sp, a3 482; CHECK-NEXT: addi a3, a3, 16 483; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 484; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 485; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t 486; CHECK-NEXT: csrr a2, vlenb 487; CHECK-NEXT: slli a2, a2, 4 488; CHECK-NEXT: add a2, sp, a2 489; CHECK-NEXT: addi a2, a2, 16 490; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill 491; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 492; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t 493; CHECK-NEXT: csrr a2, vlenb 494; CHECK-NEXT: li a3, 24 495; CHECK-NEXT: mul a2, a2, a3 496; CHECK-NEXT: add a2, sp, a2 497; CHECK-NEXT: addi a2, a2, 16 498; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 499; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 500; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t 501; CHECK-NEXT: vmv1r.v v0, v25 502; CHECK-NEXT: csrr a2, vlenb 503; CHECK-NEXT: slli a2, a2, 4 504; CHECK-NEXT: add a2, sp, a2 505; CHECK-NEXT: addi a2, a2, 16 506; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 507; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 508; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 509; CHECK-NEXT: addi a2, sp, 16 510; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 511; CHECK-NEXT: vmv1r.v v0, v24 512; CHECK-NEXT: vmfeq.vv v12, v16, v16, v0.t 513; CHECK-NEXT: vmv1r.v v0, v12 514; CHECK-NEXT: csrr a2, vlenb 515; CHECK-NEXT: slli a2, a2, 4 516; CHECK-NEXT: add a2, sp, a2 517; CHECK-NEXT: addi a2, a2, 16 518; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 519; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 520; CHECK-NEXT: vmv1r.v v0, v24 521; CHECK-NEXT: addi a2, sp, 16 522; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 523; CHECK-NEXT: vfmin.vv v16, v16, v8, v0.t 524; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 525; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16, v0.t 526; CHECK-NEXT: csrr a2, vlenb 527; CHECK-NEXT: slli a2, a2, 4 528; CHECK-NEXT: add a2, sp, a2 529; CHECK-NEXT: addi a2, a2, 16 530; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 531; CHECK-NEXT: bltu a0, a1, .LBB11_2 532; CHECK-NEXT: # %bb.1: 533; CHECK-NEXT: mv a0, a1 534; CHECK-NEXT: .LBB11_2: 535; CHECK-NEXT: csrr a1, vlenb 536; CHECK-NEXT: slli a1, a1, 3 537; CHECK-NEXT: add a1, sp, a1 538; CHECK-NEXT: addi a1, a1, 16 539; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 540; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 541; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24 542; CHECK-NEXT: csrr a0, vlenb 543; CHECK-NEXT: li a1, 24 544; CHECK-NEXT: mul a0, a0, a1 545; CHECK-NEXT: add a0, sp, a0 546; CHECK-NEXT: addi a0, a0, 16 547; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 548; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 549; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 550; CHECK-NEXT: vmfeq.vv v0, v8, v8 551; CHECK-NEXT: vmfeq.vv v7, v24, v24 552; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 553; CHECK-NEXT: csrr a0, vlenb 554; CHECK-NEXT: li a1, 24 555; CHECK-NEXT: mul a0, a0, a1 556; CHECK-NEXT: add a0, sp, a0 557; CHECK-NEXT: addi a0, a0, 16 558; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 559; CHECK-NEXT: vmv1r.v v0, v7 560; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 561; CHECK-NEXT: csrr a0, vlenb 562; CHECK-NEXT: li a1, 24 563; CHECK-NEXT: mul a0, a0, a1 564; CHECK-NEXT: add a0, sp, a0 565; CHECK-NEXT: addi a0, a0, 16 566; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 567; CHECK-NEXT: vfmin.vv v16, v16, v24 568; CHECK-NEXT: csrr a0, vlenb 569; CHECK-NEXT: slli a0, a0, 4 570; CHECK-NEXT: add a0, sp, a0 571; CHECK-NEXT: addi a0, a0, 16 572; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 573; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 574; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 575; CHECK-NEXT: csrr a0, vlenb 576; CHECK-NEXT: slli a0, a0, 5 577; CHECK-NEXT: add sp, sp, a0 578; CHECK-NEXT: .cfi_def_cfa sp, 16 579; CHECK-NEXT: addi sp, sp, 16 580; CHECK-NEXT: .cfi_def_cfa_offset 0 581; CHECK-NEXT: ret 582 %v = call <vscale x 32 x bfloat> @llvm.vp.minimum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl) 583 ret <vscale x 32 x bfloat> %v 584} 585declare <vscale x 1 x half> @llvm.vp.minimum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32) 586 587define <vscale x 1 x half> @vfmin_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { 588; ZVFH-LABEL: vfmin_vv_nxv1f16: 589; ZVFH: # %bb.0: 590; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 591; ZVFH-NEXT: vmv1r.v v10, v0 592; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t 593; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 594; ZVFH-NEXT: vmv1r.v v0, v10 595; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t 596; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 597; ZVFH-NEXT: vmv1r.v v0, v10 598; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t 599; ZVFH-NEXT: ret 600; 601; ZVFHMIN-LABEL: vfmin_vv_nxv1f16: 602; ZVFHMIN: # %bb.0: 603; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 604; ZVFHMIN-NEXT: vmv1r.v v10, v0 605; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t 606; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t 607; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 608; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9, v0.t 609; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0 610; ZVFHMIN-NEXT: vmv1r.v v0, v10 611; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t 612; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v9, v0 613; ZVFHMIN-NEXT: vmv1r.v v0, v10 614; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8, v0.t 615; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 616; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t 617; ZVFHMIN-NEXT: ret 618 %v = call <vscale x 1 x half> @llvm.vp.minimum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl) 619 ret <vscale x 1 x half> %v 620} 621 622define <vscale x 1 x half> @vfmin_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, i32 zeroext %evl) { 623; ZVFH-LABEL: vfmin_vv_nxv1f16_unmasked: 624; ZVFH: # %bb.0: 625; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 626; ZVFH-NEXT: vmfeq.vv v0, v8, v8 627; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 628; ZVFH-NEXT: vmfeq.vv v0, v9, v9 629; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 630; ZVFH-NEXT: vfmin.vv v8, v8, v10 631; ZVFH-NEXT: ret 632; 633; ZVFHMIN-LABEL: vfmin_vv_nxv1f16_unmasked: 634; ZVFHMIN: # %bb.0: 635; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 636; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 637; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 638; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 639; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 640; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 641; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 642; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 643; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 644; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 645; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 646; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 647; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 648; ZVFHMIN-NEXT: ret 649 %v = call <vscale x 1 x half> @llvm.vp.minimum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) 650 ret <vscale x 1 x half> %v 651} 652 653declare <vscale x 2 x half> @llvm.vp.minimum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x i1>, i32) 654 655define <vscale x 2 x half> @vfmin_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { 656; ZVFH-LABEL: vfmin_vv_nxv2f16: 657; ZVFH: # %bb.0: 658; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 659; ZVFH-NEXT: vmv1r.v v10, v0 660; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t 661; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 662; ZVFH-NEXT: vmv1r.v v0, v10 663; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t 664; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 665; ZVFH-NEXT: vmv1r.v v0, v10 666; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t 667; ZVFH-NEXT: ret 668; 669; ZVFHMIN-LABEL: vfmin_vv_nxv2f16: 670; ZVFHMIN: # %bb.0: 671; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 672; ZVFHMIN-NEXT: vmv1r.v v10, v0 673; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t 674; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t 675; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 676; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9, v0.t 677; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0 678; ZVFHMIN-NEXT: vmv1r.v v0, v10 679; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t 680; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v9, v0 681; ZVFHMIN-NEXT: vmv1r.v v0, v10 682; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8, v0.t 683; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 684; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t 685; ZVFHMIN-NEXT: ret 686 %v = call <vscale x 2 x half> @llvm.vp.minimum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl) 687 ret <vscale x 2 x half> %v 688} 689 690define <vscale x 2 x half> @vfmin_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, i32 zeroext %evl) { 691; ZVFH-LABEL: vfmin_vv_nxv2f16_unmasked: 692; ZVFH: # %bb.0: 693; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 694; ZVFH-NEXT: vmfeq.vv v0, v8, v8 695; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 696; ZVFH-NEXT: vmfeq.vv v0, v9, v9 697; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 698; ZVFH-NEXT: vfmin.vv v8, v8, v10 699; ZVFH-NEXT: ret 700; 701; ZVFHMIN-LABEL: vfmin_vv_nxv2f16_unmasked: 702; ZVFHMIN: # %bb.0: 703; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 704; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 705; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 706; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 707; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 708; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 709; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 710; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 711; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 712; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 713; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 714; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 715; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 716; ZVFHMIN-NEXT: ret 717 %v = call <vscale x 2 x half> @llvm.vp.minimum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) 718 ret <vscale x 2 x half> %v 719} 720 721declare <vscale x 4 x half> @llvm.vp.minimum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x i1>, i32) 722 723define <vscale x 4 x half> @vfmin_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { 724; ZVFH-LABEL: vfmin_vv_nxv4f16: 725; ZVFH: # %bb.0: 726; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 727; ZVFH-NEXT: vmv1r.v v10, v0 728; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t 729; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 730; ZVFH-NEXT: vmv1r.v v0, v10 731; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t 732; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 733; ZVFH-NEXT: vmv1r.v v0, v10 734; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t 735; ZVFH-NEXT: ret 736; 737; ZVFHMIN-LABEL: vfmin_vv_nxv4f16: 738; ZVFHMIN: # %bb.0: 739; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 740; ZVFHMIN-NEXT: vmv1r.v v10, v0 741; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t 742; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t 743; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 744; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t 745; ZVFHMIN-NEXT: vmv1r.v v0, v8 746; ZVFHMIN-NEXT: vmerge.vvm v16, v14, v12, v0 747; ZVFHMIN-NEXT: vmv1r.v v0, v10 748; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t 749; ZVFHMIN-NEXT: vmv1r.v v0, v8 750; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v14, v0 751; ZVFHMIN-NEXT: vmv1r.v v0, v10 752; ZVFHMIN-NEXT: vfmin.vv v12, v8, v16, v0.t 753; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 754; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t 755; ZVFHMIN-NEXT: ret 756 %v = call <vscale x 4 x half> @llvm.vp.minimum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl) 757 ret <vscale x 4 x half> %v 758} 759 760define <vscale x 4 x half> @vfmin_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, i32 zeroext %evl) { 761; ZVFH-LABEL: vfmin_vv_nxv4f16_unmasked: 762; ZVFH: # %bb.0: 763; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 764; ZVFH-NEXT: vmfeq.vv v0, v8, v8 765; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 766; ZVFH-NEXT: vmfeq.vv v0, v9, v9 767; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 768; ZVFH-NEXT: vfmin.vv v8, v8, v10 769; ZVFH-NEXT: ret 770; 771; ZVFHMIN-LABEL: vfmin_vv_nxv4f16_unmasked: 772; ZVFHMIN: # %bb.0: 773; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 774; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 775; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 776; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 777; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 778; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 779; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 780; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 781; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 782; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 783; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 784; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 785; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 786; ZVFHMIN-NEXT: ret 787 %v = call <vscale x 4 x half> @llvm.vp.minimum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) 788 ret <vscale x 4 x half> %v 789} 790 791declare <vscale x 8 x half> @llvm.vp.minimum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, i32) 792 793define <vscale x 8 x half> @vfmin_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { 794; ZVFH-LABEL: vfmin_vv_nxv8f16: 795; ZVFH: # %bb.0: 796; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 797; ZVFH-NEXT: vmv1r.v v12, v0 798; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t 799; ZVFH-NEXT: vmv1r.v v0, v13 800; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 801; ZVFH-NEXT: vmv1r.v v0, v12 802; ZVFH-NEXT: vmfeq.vv v13, v10, v10, v0.t 803; ZVFH-NEXT: vmv1r.v v0, v13 804; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 805; ZVFH-NEXT: vmv1r.v v0, v12 806; ZVFH-NEXT: vfmin.vv v8, v8, v14, v0.t 807; ZVFH-NEXT: ret 808; 809; ZVFHMIN-LABEL: vfmin_vv_nxv8f16: 810; ZVFHMIN: # %bb.0: 811; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 812; ZVFHMIN-NEXT: vmv1r.v v12, v0 813; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t 814; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t 815; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 816; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t 817; ZVFHMIN-NEXT: vmv1r.v v0, v8 818; ZVFHMIN-NEXT: vmerge.vvm v24, v20, v16, v0 819; ZVFHMIN-NEXT: vmv1r.v v0, v12 820; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t 821; ZVFHMIN-NEXT: vmv1r.v v0, v8 822; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v20, v0 823; ZVFHMIN-NEXT: vmv1r.v v0, v12 824; ZVFHMIN-NEXT: vfmin.vv v16, v8, v24, v0.t 825; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 826; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t 827; ZVFHMIN-NEXT: ret 828 %v = call <vscale x 8 x half> @llvm.vp.minimum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl) 829 ret <vscale x 8 x half> %v 830} 831 832define <vscale x 8 x half> @vfmin_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, i32 zeroext %evl) { 833; ZVFH-LABEL: vfmin_vv_nxv8f16_unmasked: 834; ZVFH: # %bb.0: 835; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 836; ZVFH-NEXT: vmfeq.vv v0, v8, v8 837; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 838; ZVFH-NEXT: vmfeq.vv v0, v10, v10 839; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 840; ZVFH-NEXT: vfmin.vv v8, v8, v12 841; ZVFH-NEXT: ret 842; 843; ZVFHMIN-LABEL: vfmin_vv_nxv8f16_unmasked: 844; ZVFHMIN: # %bb.0: 845; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 846; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 847; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 848; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 849; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 850; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 851; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 852; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 853; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 854; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 855; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 856; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 857; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 858; ZVFHMIN-NEXT: ret 859 %v = call <vscale x 8 x half> @llvm.vp.minimum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) 860 ret <vscale x 8 x half> %v 861} 862 863declare <vscale x 16 x half> @llvm.vp.minimum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>, <vscale x 16 x i1>, i32) 864 865define <vscale x 16 x half> @vfmin_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) { 866; ZVFH-LABEL: vfmin_vv_nxv16f16: 867; ZVFH: # %bb.0: 868; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 869; ZVFH-NEXT: vmv1r.v v16, v0 870; ZVFH-NEXT: vmfeq.vv v17, v8, v8, v0.t 871; ZVFH-NEXT: vmv1r.v v0, v17 872; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 873; ZVFH-NEXT: vmv1r.v v0, v16 874; ZVFH-NEXT: vmfeq.vv v17, v12, v12, v0.t 875; ZVFH-NEXT: vmv1r.v v0, v17 876; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 877; ZVFH-NEXT: vmv1r.v v0, v16 878; ZVFH-NEXT: vfmin.vv v8, v8, v20, v0.t 879; ZVFH-NEXT: ret 880; 881; ZVFHMIN-LABEL: vfmin_vv_nxv16f16: 882; ZVFHMIN: # %bb.0: 883; ZVFHMIN-NEXT: addi sp, sp, -16 884; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 885; ZVFHMIN-NEXT: csrr a1, vlenb 886; ZVFHMIN-NEXT: li a2, 24 887; ZVFHMIN-NEXT: mul a1, a1, a2 888; ZVFHMIN-NEXT: sub sp, sp, a1 889; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 890; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 891; ZVFHMIN-NEXT: vmv1r.v v16, v0 892; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t 893; ZVFHMIN-NEXT: addi a0, sp, 16 894; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 895; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t 896; ZVFHMIN-NEXT: csrr a0, vlenb 897; ZVFHMIN-NEXT: slli a0, a0, 4 898; ZVFHMIN-NEXT: add a0, sp, a0 899; ZVFHMIN-NEXT: addi a0, a0, 16 900; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 901; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 902; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t 903; ZVFHMIN-NEXT: vmv1r.v v0, v8 904; ZVFHMIN-NEXT: addi a0, sp, 16 905; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 906; ZVFHMIN-NEXT: csrr a0, vlenb 907; ZVFHMIN-NEXT: slli a0, a0, 4 908; ZVFHMIN-NEXT: add a0, sp, a0 909; ZVFHMIN-NEXT: addi a0, a0, 16 910; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 911; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 912; ZVFHMIN-NEXT: csrr a0, vlenb 913; ZVFHMIN-NEXT: slli a0, a0, 3 914; ZVFHMIN-NEXT: add a0, sp, a0 915; ZVFHMIN-NEXT: addi a0, a0, 16 916; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 917; ZVFHMIN-NEXT: vmv1r.v v0, v16 918; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t 919; ZVFHMIN-NEXT: vmv1r.v v0, v8 920; ZVFHMIN-NEXT: csrr a0, vlenb 921; ZVFHMIN-NEXT: slli a0, a0, 4 922; ZVFHMIN-NEXT: add a0, sp, a0 923; ZVFHMIN-NEXT: addi a0, a0, 16 924; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 925; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0 926; ZVFHMIN-NEXT: vmv1r.v v0, v16 927; ZVFHMIN-NEXT: csrr a0, vlenb 928; ZVFHMIN-NEXT: slli a0, a0, 3 929; ZVFHMIN-NEXT: add a0, sp, a0 930; ZVFHMIN-NEXT: addi a0, a0, 16 931; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 932; ZVFHMIN-NEXT: vfmin.vv v24, v8, v24, v0.t 933; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 934; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t 935; ZVFHMIN-NEXT: csrr a0, vlenb 936; ZVFHMIN-NEXT: li a1, 24 937; ZVFHMIN-NEXT: mul a0, a0, a1 938; ZVFHMIN-NEXT: add sp, sp, a0 939; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 940; ZVFHMIN-NEXT: addi sp, sp, 16 941; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 942; ZVFHMIN-NEXT: ret 943 %v = call <vscale x 16 x half> @llvm.vp.minimum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl) 944 ret <vscale x 16 x half> %v 945} 946 947define <vscale x 16 x half> @vfmin_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, i32 zeroext %evl) { 948; ZVFH-LABEL: vfmin_vv_nxv16f16_unmasked: 949; ZVFH: # %bb.0: 950; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 951; ZVFH-NEXT: vmfeq.vv v0, v8, v8 952; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 953; ZVFH-NEXT: vmfeq.vv v0, v12, v12 954; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 955; ZVFH-NEXT: vfmin.vv v8, v8, v16 956; ZVFH-NEXT: ret 957; 958; ZVFHMIN-LABEL: vfmin_vv_nxv16f16_unmasked: 959; ZVFHMIN: # %bb.0: 960; ZVFHMIN-NEXT: addi sp, sp, -16 961; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 962; ZVFHMIN-NEXT: csrr a1, vlenb 963; ZVFHMIN-NEXT: slli a1, a1, 3 964; ZVFHMIN-NEXT: sub sp, sp, a1 965; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 966; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 967; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 968; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 969; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 970; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 971; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16 972; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 973; ZVFHMIN-NEXT: addi a0, sp, 16 974; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 975; ZVFHMIN-NEXT: vmv1r.v v0, v7 976; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 977; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 978; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16 979; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 980; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 981; ZVFHMIN-NEXT: csrr a0, vlenb 982; ZVFHMIN-NEXT: slli a0, a0, 3 983; ZVFHMIN-NEXT: add sp, sp, a0 984; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 985; ZVFHMIN-NEXT: addi sp, sp, 16 986; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 987; ZVFHMIN-NEXT: ret 988 %v = call <vscale x 16 x half> @llvm.vp.minimum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) 989 ret <vscale x 16 x half> %v 990} 991 992declare <vscale x 32 x half> @llvm.vp.minimum.nxv32f16(<vscale x 32 x half>, <vscale x 32 x half>, <vscale x 32 x i1>, i32) 993 994define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) { 995; ZVFH-LABEL: vfmin_vv_nxv32f16: 996; ZVFH: # %bb.0: 997; ZVFH-NEXT: addi sp, sp, -16 998; ZVFH-NEXT: .cfi_def_cfa_offset 16 999; ZVFH-NEXT: csrr a1, vlenb 1000; ZVFH-NEXT: slli a1, a1, 3 1001; ZVFH-NEXT: sub sp, sp, a1 1002; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1003; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma 1004; ZVFH-NEXT: vmv1r.v v7, v0 1005; ZVFH-NEXT: vmfeq.vv v25, v8, v8, v0.t 1006; ZVFH-NEXT: vmv1r.v v0, v25 1007; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 1008; ZVFH-NEXT: addi a0, sp, 16 1009; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 1010; ZVFH-NEXT: vmv1r.v v0, v7 1011; ZVFH-NEXT: vmfeq.vv v25, v16, v16, v0.t 1012; ZVFH-NEXT: vmv1r.v v0, v25 1013; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 1014; ZVFH-NEXT: vmv1r.v v0, v7 1015; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1016; ZVFH-NEXT: vfmin.vv v8, v8, v16, v0.t 1017; ZVFH-NEXT: csrr a0, vlenb 1018; ZVFH-NEXT: slli a0, a0, 3 1019; ZVFH-NEXT: add sp, sp, a0 1020; ZVFH-NEXT: .cfi_def_cfa sp, 16 1021; ZVFH-NEXT: addi sp, sp, 16 1022; ZVFH-NEXT: .cfi_def_cfa_offset 0 1023; ZVFH-NEXT: ret 1024; 1025; ZVFHMIN-LABEL: vfmin_vv_nxv32f16: 1026; ZVFHMIN: # %bb.0: 1027; ZVFHMIN-NEXT: addi sp, sp, -16 1028; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 1029; ZVFHMIN-NEXT: csrr a1, vlenb 1030; ZVFHMIN-NEXT: slli a1, a1, 5 1031; ZVFHMIN-NEXT: sub sp, sp, a1 1032; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 1033; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 1034; ZVFHMIN-NEXT: vmv1r.v v7, v0 1035; ZVFHMIN-NEXT: csrr a1, vlenb 1036; ZVFHMIN-NEXT: li a2, 24 1037; ZVFHMIN-NEXT: mul a1, a1, a2 1038; ZVFHMIN-NEXT: add a1, sp, a1 1039; ZVFHMIN-NEXT: addi a1, a1, 16 1040; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1041; ZVFHMIN-NEXT: vmv8r.v v16, v8 1042; ZVFHMIN-NEXT: csrr a2, vlenb 1043; ZVFHMIN-NEXT: slli a1, a2, 1 1044; ZVFHMIN-NEXT: srli a2, a2, 2 1045; ZVFHMIN-NEXT: sub a3, a0, a1 1046; ZVFHMIN-NEXT: vslidedown.vx v25, v0, a2 1047; ZVFHMIN-NEXT: sltu a2, a0, a3 1048; ZVFHMIN-NEXT: addi a2, a2, -1 1049; ZVFHMIN-NEXT: and a2, a2, a3 1050; ZVFHMIN-NEXT: vmv1r.v v0, v25 1051; ZVFHMIN-NEXT: csrr a3, vlenb 1052; ZVFHMIN-NEXT: slli a3, a3, 3 1053; ZVFHMIN-NEXT: add a3, sp, a3 1054; ZVFHMIN-NEXT: addi a3, a3, 16 1055; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 1056; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma 1057; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t 1058; ZVFHMIN-NEXT: csrr a2, vlenb 1059; ZVFHMIN-NEXT: slli a2, a2, 4 1060; ZVFHMIN-NEXT: add a2, sp, a2 1061; ZVFHMIN-NEXT: addi a2, a2, 16 1062; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 1063; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1064; ZVFHMIN-NEXT: vmfeq.vv v24, v8, v8, v0.t 1065; ZVFHMIN-NEXT: csrr a2, vlenb 1066; ZVFHMIN-NEXT: li a3, 24 1067; ZVFHMIN-NEXT: mul a2, a2, a3 1068; ZVFHMIN-NEXT: add a2, sp, a2 1069; ZVFHMIN-NEXT: addi a2, a2, 16 1070; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 1071; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1072; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t 1073; ZVFHMIN-NEXT: vmv1r.v v0, v24 1074; ZVFHMIN-NEXT: csrr a2, vlenb 1075; ZVFHMIN-NEXT: slli a2, a2, 4 1076; ZVFHMIN-NEXT: add a2, sp, a2 1077; ZVFHMIN-NEXT: addi a2, a2, 16 1078; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 1079; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1080; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 1081; ZVFHMIN-NEXT: addi a2, sp, 16 1082; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 1083; ZVFHMIN-NEXT: vmv1r.v v0, v25 1084; ZVFHMIN-NEXT: vmfeq.vv v12, v16, v16, v0.t 1085; ZVFHMIN-NEXT: vmv1r.v v0, v12 1086; ZVFHMIN-NEXT: csrr a2, vlenb 1087; ZVFHMIN-NEXT: slli a2, a2, 4 1088; ZVFHMIN-NEXT: add a2, sp, a2 1089; ZVFHMIN-NEXT: addi a2, a2, 16 1090; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 1091; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 1092; ZVFHMIN-NEXT: vmv1r.v v0, v25 1093; ZVFHMIN-NEXT: addi a2, sp, 16 1094; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 1095; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8, v0.t 1096; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1097; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t 1098; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 1099; ZVFHMIN-NEXT: # %bb.1: 1100; ZVFHMIN-NEXT: mv a0, a1 1101; ZVFHMIN-NEXT: .LBB22_2: 1102; ZVFHMIN-NEXT: vmv1r.v v0, v7 1103; ZVFHMIN-NEXT: csrr a1, vlenb 1104; ZVFHMIN-NEXT: slli a1, a1, 3 1105; ZVFHMIN-NEXT: add a1, sp, a1 1106; ZVFHMIN-NEXT: addi a1, a1, 16 1107; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 1108; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1109; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t 1110; ZVFHMIN-NEXT: csrr a0, vlenb 1111; ZVFHMIN-NEXT: slli a0, a0, 4 1112; ZVFHMIN-NEXT: add a0, sp, a0 1113; ZVFHMIN-NEXT: addi a0, a0, 16 1114; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 1115; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1116; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t 1117; ZVFHMIN-NEXT: csrr a0, vlenb 1118; ZVFHMIN-NEXT: li a1, 24 1119; ZVFHMIN-NEXT: mul a0, a0, a1 1120; ZVFHMIN-NEXT: add a0, sp, a0 1121; ZVFHMIN-NEXT: addi a0, a0, 16 1122; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1123; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1124; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t 1125; ZVFHMIN-NEXT: vmv1r.v v0, v8 1126; ZVFHMIN-NEXT: csrr a0, vlenb 1127; ZVFHMIN-NEXT: slli a0, a0, 4 1128; ZVFHMIN-NEXT: add a0, sp, a0 1129; ZVFHMIN-NEXT: addi a0, a0, 16 1130; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1131; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1132; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 1133; ZVFHMIN-NEXT: csrr a0, vlenb 1134; ZVFHMIN-NEXT: li a1, 24 1135; ZVFHMIN-NEXT: mul a0, a0, a1 1136; ZVFHMIN-NEXT: add a0, sp, a0 1137; ZVFHMIN-NEXT: addi a0, a0, 16 1138; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1139; ZVFHMIN-NEXT: vmv1r.v v0, v7 1140; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t 1141; ZVFHMIN-NEXT: vmv1r.v v0, v8 1142; ZVFHMIN-NEXT: csrr a0, vlenb 1143; ZVFHMIN-NEXT: slli a0, a0, 4 1144; ZVFHMIN-NEXT: add a0, sp, a0 1145; ZVFHMIN-NEXT: addi a0, a0, 16 1146; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1147; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 1148; ZVFHMIN-NEXT: vmv1r.v v0, v7 1149; ZVFHMIN-NEXT: csrr a0, vlenb 1150; ZVFHMIN-NEXT: li a1, 24 1151; ZVFHMIN-NEXT: mul a0, a0, a1 1152; ZVFHMIN-NEXT: add a0, sp, a0 1153; ZVFHMIN-NEXT: addi a0, a0, 16 1154; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 1155; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24, v0.t 1156; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1157; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t 1158; ZVFHMIN-NEXT: csrr a0, vlenb 1159; ZVFHMIN-NEXT: slli a0, a0, 5 1160; ZVFHMIN-NEXT: add sp, sp, a0 1161; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 1162; ZVFHMIN-NEXT: addi sp, sp, 16 1163; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 1164; ZVFHMIN-NEXT: ret 1165 %v = call <vscale x 32 x half> @llvm.vp.minimum.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 %evl) 1166 ret <vscale x 32 x half> %v 1167} 1168 1169define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, i32 zeroext %evl) { 1170; ZVFH-LABEL: vfmin_vv_nxv32f16_unmasked: 1171; ZVFH: # %bb.0: 1172; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma 1173; ZVFH-NEXT: vmfeq.vv v0, v8, v8 1174; ZVFH-NEXT: vmfeq.vv v7, v16, v16 1175; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 1176; ZVFH-NEXT: vmv1r.v v0, v7 1177; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 1178; ZVFH-NEXT: vfmin.vv v8, v8, v24 1179; ZVFH-NEXT: ret 1180; 1181; ZVFHMIN-LABEL: vfmin_vv_nxv32f16_unmasked: 1182; ZVFHMIN: # %bb.0: 1183; ZVFHMIN-NEXT: addi sp, sp, -16 1184; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 1185; ZVFHMIN-NEXT: csrr a1, vlenb 1186; ZVFHMIN-NEXT: slli a1, a1, 5 1187; ZVFHMIN-NEXT: sub sp, sp, a1 1188; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 1189; ZVFHMIN-NEXT: csrr a1, vlenb 1190; ZVFHMIN-NEXT: li a2, 24 1191; ZVFHMIN-NEXT: mul a1, a1, a2 1192; ZVFHMIN-NEXT: add a1, sp, a1 1193; ZVFHMIN-NEXT: addi a1, a1, 16 1194; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1195; ZVFHMIN-NEXT: csrr a2, vlenb 1196; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma 1197; ZVFHMIN-NEXT: vmset.m v16 1198; ZVFHMIN-NEXT: slli a1, a2, 1 1199; ZVFHMIN-NEXT: srli a2, a2, 2 1200; ZVFHMIN-NEXT: sub a3, a0, a1 1201; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma 1202; ZVFHMIN-NEXT: vslidedown.vx v24, v16, a2 1203; ZVFHMIN-NEXT: sltu a2, a0, a3 1204; ZVFHMIN-NEXT: addi a2, a2, -1 1205; ZVFHMIN-NEXT: and a2, a2, a3 1206; ZVFHMIN-NEXT: vmv1r.v v0, v24 1207; ZVFHMIN-NEXT: csrr a3, vlenb 1208; ZVFHMIN-NEXT: slli a3, a3, 3 1209; ZVFHMIN-NEXT: add a3, sp, a3 1210; ZVFHMIN-NEXT: addi a3, a3, 16 1211; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 1212; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma 1213; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t 1214; ZVFHMIN-NEXT: csrr a2, vlenb 1215; ZVFHMIN-NEXT: slli a2, a2, 4 1216; ZVFHMIN-NEXT: add a2, sp, a2 1217; ZVFHMIN-NEXT: addi a2, a2, 16 1218; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill 1219; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1220; ZVFHMIN-NEXT: vmfeq.vv v25, v16, v16, v0.t 1221; ZVFHMIN-NEXT: csrr a2, vlenb 1222; ZVFHMIN-NEXT: li a3, 24 1223; ZVFHMIN-NEXT: mul a2, a2, a3 1224; ZVFHMIN-NEXT: add a2, sp, a2 1225; ZVFHMIN-NEXT: addi a2, a2, 16 1226; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 1227; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1228; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t 1229; ZVFHMIN-NEXT: vmv1r.v v0, v25 1230; ZVFHMIN-NEXT: csrr a2, vlenb 1231; ZVFHMIN-NEXT: slli a2, a2, 4 1232; ZVFHMIN-NEXT: add a2, sp, a2 1233; ZVFHMIN-NEXT: addi a2, a2, 16 1234; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 1235; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1236; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 1237; ZVFHMIN-NEXT: addi a2, sp, 16 1238; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 1239; ZVFHMIN-NEXT: vmv1r.v v0, v24 1240; ZVFHMIN-NEXT: vmfeq.vv v12, v16, v16, v0.t 1241; ZVFHMIN-NEXT: vmv1r.v v0, v12 1242; ZVFHMIN-NEXT: csrr a2, vlenb 1243; ZVFHMIN-NEXT: slli a2, a2, 4 1244; ZVFHMIN-NEXT: add a2, sp, a2 1245; ZVFHMIN-NEXT: addi a2, a2, 16 1246; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 1247; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 1248; ZVFHMIN-NEXT: vmv1r.v v0, v24 1249; ZVFHMIN-NEXT: addi a2, sp, 16 1250; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 1251; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8, v0.t 1252; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1253; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t 1254; ZVFHMIN-NEXT: csrr a2, vlenb 1255; ZVFHMIN-NEXT: slli a2, a2, 4 1256; ZVFHMIN-NEXT: add a2, sp, a2 1257; ZVFHMIN-NEXT: addi a2, a2, 16 1258; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 1259; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 1260; ZVFHMIN-NEXT: # %bb.1: 1261; ZVFHMIN-NEXT: mv a0, a1 1262; ZVFHMIN-NEXT: .LBB23_2: 1263; ZVFHMIN-NEXT: csrr a1, vlenb 1264; ZVFHMIN-NEXT: slli a1, a1, 3 1265; ZVFHMIN-NEXT: add a1, sp, a1 1266; ZVFHMIN-NEXT: addi a1, a1, 16 1267; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1268; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1269; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 1270; ZVFHMIN-NEXT: csrr a0, vlenb 1271; ZVFHMIN-NEXT: li a1, 24 1272; ZVFHMIN-NEXT: mul a0, a0, a1 1273; ZVFHMIN-NEXT: add a0, sp, a0 1274; ZVFHMIN-NEXT: addi a0, a0, 16 1275; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 1276; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 1277; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1278; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 1279; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 1280; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 1281; ZVFHMIN-NEXT: csrr a0, vlenb 1282; ZVFHMIN-NEXT: li a1, 24 1283; ZVFHMIN-NEXT: mul a0, a0, a1 1284; ZVFHMIN-NEXT: add a0, sp, a0 1285; ZVFHMIN-NEXT: addi a0, a0, 16 1286; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1287; ZVFHMIN-NEXT: vmv1r.v v0, v7 1288; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 1289; ZVFHMIN-NEXT: csrr a0, vlenb 1290; ZVFHMIN-NEXT: li a1, 24 1291; ZVFHMIN-NEXT: mul a0, a0, a1 1292; ZVFHMIN-NEXT: add a0, sp, a0 1293; ZVFHMIN-NEXT: addi a0, a0, 16 1294; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 1295; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24 1296; ZVFHMIN-NEXT: csrr a0, vlenb 1297; ZVFHMIN-NEXT: slli a0, a0, 4 1298; ZVFHMIN-NEXT: add a0, sp, a0 1299; ZVFHMIN-NEXT: addi a0, a0, 16 1300; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 1301; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1302; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 1303; ZVFHMIN-NEXT: csrr a0, vlenb 1304; ZVFHMIN-NEXT: slli a0, a0, 5 1305; ZVFHMIN-NEXT: add sp, sp, a0 1306; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 1307; ZVFHMIN-NEXT: addi sp, sp, 16 1308; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 1309; ZVFHMIN-NEXT: ret 1310 %v = call <vscale x 32 x half> @llvm.vp.minimum.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl) 1311 ret <vscale x 32 x half> %v 1312} 1313 1314declare <vscale x 1 x float> @llvm.vp.minimum.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32) 1315 1316define <vscale x 1 x float> @vfmin_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1317; CHECK-LABEL: vfmin_vv_nxv1f32: 1318; CHECK: # %bb.0: 1319; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1320; CHECK-NEXT: vmv1r.v v10, v0 1321; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t 1322; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 1323; CHECK-NEXT: vmv1r.v v0, v10 1324; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t 1325; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 1326; CHECK-NEXT: vmv1r.v v0, v10 1327; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t 1328; CHECK-NEXT: ret 1329 %v = call <vscale x 1 x float> @llvm.vp.minimum.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl) 1330 ret <vscale x 1 x float> %v 1331} 1332 1333define <vscale x 1 x float> @vfmin_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, i32 zeroext %evl) { 1334; CHECK-LABEL: vfmin_vv_nxv1f32_unmasked: 1335; CHECK: # %bb.0: 1336; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1337; CHECK-NEXT: vmfeq.vv v0, v8, v8 1338; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 1339; CHECK-NEXT: vmfeq.vv v0, v9, v9 1340; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 1341; CHECK-NEXT: vfmin.vv v8, v8, v10 1342; CHECK-NEXT: ret 1343 %v = call <vscale x 1 x float> @llvm.vp.minimum.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1344 ret <vscale x 1 x float> %v 1345} 1346 1347declare <vscale x 2 x float> @llvm.vp.minimum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32) 1348 1349define <vscale x 2 x float> @vfmin_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1350; CHECK-LABEL: vfmin_vv_nxv2f32: 1351; CHECK: # %bb.0: 1352; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1353; CHECK-NEXT: vmv1r.v v10, v0 1354; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t 1355; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 1356; CHECK-NEXT: vmv1r.v v0, v10 1357; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t 1358; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 1359; CHECK-NEXT: vmv1r.v v0, v10 1360; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t 1361; CHECK-NEXT: ret 1362 %v = call <vscale x 2 x float> @llvm.vp.minimum.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl) 1363 ret <vscale x 2 x float> %v 1364} 1365 1366define <vscale x 2 x float> @vfmin_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, i32 zeroext %evl) { 1367; CHECK-LABEL: vfmin_vv_nxv2f32_unmasked: 1368; CHECK: # %bb.0: 1369; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1370; CHECK-NEXT: vmfeq.vv v0, v8, v8 1371; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 1372; CHECK-NEXT: vmfeq.vv v0, v9, v9 1373; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 1374; CHECK-NEXT: vfmin.vv v8, v8, v10 1375; CHECK-NEXT: ret 1376 %v = call <vscale x 2 x float> @llvm.vp.minimum.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1377 ret <vscale x 2 x float> %v 1378} 1379 1380declare <vscale x 4 x float> @llvm.vp.minimum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, i32) 1381 1382define <vscale x 4 x float> @vfmin_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1383; CHECK-LABEL: vfmin_vv_nxv4f32: 1384; CHECK: # %bb.0: 1385; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1386; CHECK-NEXT: vmv1r.v v12, v0 1387; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t 1388; CHECK-NEXT: vmv1r.v v0, v13 1389; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 1390; CHECK-NEXT: vmv1r.v v0, v12 1391; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t 1392; CHECK-NEXT: vmv1r.v v0, v13 1393; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 1394; CHECK-NEXT: vmv1r.v v0, v12 1395; CHECK-NEXT: vfmin.vv v8, v8, v14, v0.t 1396; CHECK-NEXT: ret 1397 %v = call <vscale x 4 x float> @llvm.vp.minimum.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl) 1398 ret <vscale x 4 x float> %v 1399} 1400 1401define <vscale x 4 x float> @vfmin_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, i32 zeroext %evl) { 1402; CHECK-LABEL: vfmin_vv_nxv4f32_unmasked: 1403; CHECK: # %bb.0: 1404; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1405; CHECK-NEXT: vmfeq.vv v0, v8, v8 1406; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 1407; CHECK-NEXT: vmfeq.vv v0, v10, v10 1408; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 1409; CHECK-NEXT: vfmin.vv v8, v8, v12 1410; CHECK-NEXT: ret 1411 %v = call <vscale x 4 x float> @llvm.vp.minimum.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1412 ret <vscale x 4 x float> %v 1413} 1414 1415declare <vscale x 8 x float> @llvm.vp.minimum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x i1>, i32) 1416 1417define <vscale x 8 x float> @vfmin_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1418; CHECK-LABEL: vfmin_vv_nxv8f32: 1419; CHECK: # %bb.0: 1420; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1421; CHECK-NEXT: vmv1r.v v16, v0 1422; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t 1423; CHECK-NEXT: vmv1r.v v0, v17 1424; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 1425; CHECK-NEXT: vmv1r.v v0, v16 1426; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t 1427; CHECK-NEXT: vmv1r.v v0, v17 1428; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 1429; CHECK-NEXT: vmv1r.v v0, v16 1430; CHECK-NEXT: vfmin.vv v8, v8, v20, v0.t 1431; CHECK-NEXT: ret 1432 %v = call <vscale x 8 x float> @llvm.vp.minimum.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl) 1433 ret <vscale x 8 x float> %v 1434} 1435 1436define <vscale x 8 x float> @vfmin_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, i32 zeroext %evl) { 1437; CHECK-LABEL: vfmin_vv_nxv8f32_unmasked: 1438; CHECK: # %bb.0: 1439; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1440; CHECK-NEXT: vmfeq.vv v0, v8, v8 1441; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 1442; CHECK-NEXT: vmfeq.vv v0, v12, v12 1443; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 1444; CHECK-NEXT: vfmin.vv v8, v8, v16 1445; CHECK-NEXT: ret 1446 %v = call <vscale x 8 x float> @llvm.vp.minimum.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1447 ret <vscale x 8 x float> %v 1448} 1449 1450declare <vscale x 1 x double> @llvm.vp.minimum.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x i1>, i32) 1451 1452define <vscale x 1 x double> @vfmin_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1453; CHECK-LABEL: vfmin_vv_nxv1f64: 1454; CHECK: # %bb.0: 1455; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1456; CHECK-NEXT: vmv1r.v v10, v0 1457; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t 1458; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 1459; CHECK-NEXT: vmv1r.v v0, v10 1460; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t 1461; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 1462; CHECK-NEXT: vmv1r.v v0, v10 1463; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t 1464; CHECK-NEXT: ret 1465 %v = call <vscale x 1 x double> @llvm.vp.minimum.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 %evl) 1466 ret <vscale x 1 x double> %v 1467} 1468 1469define <vscale x 1 x double> @vfmin_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, i32 zeroext %evl) { 1470; CHECK-LABEL: vfmin_vv_nxv1f64_unmasked: 1471; CHECK: # %bb.0: 1472; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1473; CHECK-NEXT: vmfeq.vv v0, v8, v8 1474; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 1475; CHECK-NEXT: vmfeq.vv v0, v9, v9 1476; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 1477; CHECK-NEXT: vfmin.vv v8, v8, v10 1478; CHECK-NEXT: ret 1479 %v = call <vscale x 1 x double> @llvm.vp.minimum.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1480 ret <vscale x 1 x double> %v 1481} 1482 1483declare <vscale x 2 x double> @llvm.vp.minimum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, i32) 1484 1485define <vscale x 2 x double> @vfmin_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1486; CHECK-LABEL: vfmin_vv_nxv2f64: 1487; CHECK: # %bb.0: 1488; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1489; CHECK-NEXT: vmv1r.v v12, v0 1490; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t 1491; CHECK-NEXT: vmv1r.v v0, v13 1492; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 1493; CHECK-NEXT: vmv1r.v v0, v12 1494; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t 1495; CHECK-NEXT: vmv1r.v v0, v13 1496; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 1497; CHECK-NEXT: vmv1r.v v0, v12 1498; CHECK-NEXT: vfmin.vv v8, v8, v14, v0.t 1499; CHECK-NEXT: ret 1500 %v = call <vscale x 2 x double> @llvm.vp.minimum.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 %evl) 1501 ret <vscale x 2 x double> %v 1502} 1503 1504define <vscale x 2 x double> @vfmin_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, i32 zeroext %evl) { 1505; CHECK-LABEL: vfmin_vv_nxv2f64_unmasked: 1506; CHECK: # %bb.0: 1507; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1508; CHECK-NEXT: vmfeq.vv v0, v8, v8 1509; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 1510; CHECK-NEXT: vmfeq.vv v0, v10, v10 1511; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 1512; CHECK-NEXT: vfmin.vv v8, v8, v12 1513; CHECK-NEXT: ret 1514 %v = call <vscale x 2 x double> @llvm.vp.minimum.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1515 ret <vscale x 2 x double> %v 1516} 1517 1518declare <vscale x 4 x double> @llvm.vp.minimum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x i1>, i32) 1519 1520define <vscale x 4 x double> @vfmin_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1521; CHECK-LABEL: vfmin_vv_nxv4f64: 1522; CHECK: # %bb.0: 1523; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1524; CHECK-NEXT: vmv1r.v v16, v0 1525; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t 1526; CHECK-NEXT: vmv1r.v v0, v17 1527; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 1528; CHECK-NEXT: vmv1r.v v0, v16 1529; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t 1530; CHECK-NEXT: vmv1r.v v0, v17 1531; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 1532; CHECK-NEXT: vmv1r.v v0, v16 1533; CHECK-NEXT: vfmin.vv v8, v8, v20, v0.t 1534; CHECK-NEXT: ret 1535 %v = call <vscale x 4 x double> @llvm.vp.minimum.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 %evl) 1536 ret <vscale x 4 x double> %v 1537} 1538 1539define <vscale x 4 x double> @vfmin_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, i32 zeroext %evl) { 1540; CHECK-LABEL: vfmin_vv_nxv4f64_unmasked: 1541; CHECK: # %bb.0: 1542; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1543; CHECK-NEXT: vmfeq.vv v0, v8, v8 1544; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 1545; CHECK-NEXT: vmfeq.vv v0, v12, v12 1546; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 1547; CHECK-NEXT: vfmin.vv v8, v8, v16 1548; CHECK-NEXT: ret 1549 %v = call <vscale x 4 x double> @llvm.vp.minimum.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1550 ret <vscale x 4 x double> %v 1551} 1552 1553declare <vscale x 8 x double> @llvm.vp.minimum.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x i1>, i32) 1554 1555define <vscale x 8 x double> @vfmin_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1556; CHECK-LABEL: vfmin_vv_nxv8f64: 1557; CHECK: # %bb.0: 1558; CHECK-NEXT: addi sp, sp, -16 1559; CHECK-NEXT: .cfi_def_cfa_offset 16 1560; CHECK-NEXT: csrr a1, vlenb 1561; CHECK-NEXT: slli a1, a1, 3 1562; CHECK-NEXT: sub sp, sp, a1 1563; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1564; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1565; CHECK-NEXT: vmv1r.v v7, v0 1566; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t 1567; CHECK-NEXT: vmv1r.v v0, v25 1568; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 1569; CHECK-NEXT: addi a0, sp, 16 1570; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 1571; CHECK-NEXT: vmv1r.v v0, v7 1572; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t 1573; CHECK-NEXT: vmv1r.v v0, v25 1574; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 1575; CHECK-NEXT: vmv1r.v v0, v7 1576; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1577; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t 1578; CHECK-NEXT: csrr a0, vlenb 1579; CHECK-NEXT: slli a0, a0, 3 1580; CHECK-NEXT: add sp, sp, a0 1581; CHECK-NEXT: .cfi_def_cfa sp, 16 1582; CHECK-NEXT: addi sp, sp, 16 1583; CHECK-NEXT: .cfi_def_cfa_offset 0 1584; CHECK-NEXT: ret 1585 %v = call <vscale x 8 x double> @llvm.vp.minimum.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 %evl) 1586 ret <vscale x 8 x double> %v 1587} 1588 1589define <vscale x 8 x double> @vfmin_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, i32 zeroext %evl) { 1590; CHECK-LABEL: vfmin_vv_nxv8f64_unmasked: 1591; CHECK: # %bb.0: 1592; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1593; CHECK-NEXT: vmfeq.vv v0, v8, v8 1594; CHECK-NEXT: vmfeq.vv v7, v16, v16 1595; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 1596; CHECK-NEXT: vmv1r.v v0, v7 1597; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 1598; CHECK-NEXT: vfmin.vv v8, v8, v24 1599; CHECK-NEXT: ret 1600 %v = call <vscale x 8 x double> @llvm.vp.minimum.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1601 ret <vscale x 8 x double> %v 1602} 1603 1604declare <vscale x 16 x double> @llvm.vp.minimum.nxv16f64(<vscale x 16 x double>, <vscale x 16 x double>, <vscale x 16 x i1>, i32) 1605 1606define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x double> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1607; CHECK-LABEL: vfmin_vv_nxv16f64: 1608; CHECK: # %bb.0: 1609; CHECK-NEXT: addi sp, sp, -16 1610; CHECK-NEXT: .cfi_def_cfa_offset 16 1611; CHECK-NEXT: csrr a1, vlenb 1612; CHECK-NEXT: li a3, 24 1613; CHECK-NEXT: mul a1, a1, a3 1614; CHECK-NEXT: sub sp, sp, a1 1615; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 1616; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 1617; CHECK-NEXT: vmv1r.v v7, v0 1618; CHECK-NEXT: addi a1, sp, 16 1619; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1620; CHECK-NEXT: csrr a1, vlenb 1621; CHECK-NEXT: slli a1, a1, 4 1622; CHECK-NEXT: add a1, sp, a1 1623; CHECK-NEXT: addi a1, a1, 16 1624; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1625; CHECK-NEXT: csrr a1, vlenb 1626; CHECK-NEXT: slli a3, a1, 3 1627; CHECK-NEXT: srli a4, a1, 3 1628; CHECK-NEXT: vslidedown.vx v6, v0, a4 1629; CHECK-NEXT: sub a4, a2, a1 1630; CHECK-NEXT: add a3, a0, a3 1631; CHECK-NEXT: vl8re64.v v8, (a3) 1632; CHECK-NEXT: sltu a3, a2, a4 1633; CHECK-NEXT: addi a3, a3, -1 1634; CHECK-NEXT: and a3, a3, a4 1635; CHECK-NEXT: vmv1r.v v0, v6 1636; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma 1637; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t 1638; CHECK-NEXT: vmv1r.v v0, v26 1639; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 1640; CHECK-NEXT: csrr a3, vlenb 1641; CHECK-NEXT: slli a3, a3, 3 1642; CHECK-NEXT: add a3, sp, a3 1643; CHECK-NEXT: addi a3, a3, 16 1644; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill 1645; CHECK-NEXT: vmv1r.v v0, v6 1646; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t 1647; CHECK-NEXT: vl8re64.v v16, (a0) 1648; CHECK-NEXT: vmv1r.v v0, v26 1649; CHECK-NEXT: addi a0, sp, 16 1650; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 1651; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 1652; CHECK-NEXT: vmv1r.v v0, v6 1653; CHECK-NEXT: csrr a0, vlenb 1654; CHECK-NEXT: slli a0, a0, 3 1655; CHECK-NEXT: add a0, sp, a0 1656; CHECK-NEXT: addi a0, a0, 16 1657; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 1658; CHECK-NEXT: vfmin.vv v8, v24, v8, v0.t 1659; CHECK-NEXT: csrr a0, vlenb 1660; CHECK-NEXT: slli a0, a0, 3 1661; CHECK-NEXT: add a0, sp, a0 1662; CHECK-NEXT: addi a0, a0, 16 1663; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 1664; CHECK-NEXT: bltu a2, a1, .LBB40_2 1665; CHECK-NEXT: # %bb.1: 1666; CHECK-NEXT: mv a2, a1 1667; CHECK-NEXT: .LBB40_2: 1668; CHECK-NEXT: vmv1r.v v0, v7 1669; CHECK-NEXT: csrr a0, vlenb 1670; CHECK-NEXT: slli a0, a0, 4 1671; CHECK-NEXT: add a0, sp, a0 1672; CHECK-NEXT: addi a0, a0, 16 1673; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 1674; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 1675; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t 1676; CHECK-NEXT: vmv1r.v v0, v25 1677; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 1678; CHECK-NEXT: addi a0, sp, 16 1679; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 1680; CHECK-NEXT: vmv1r.v v0, v7 1681; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t 1682; CHECK-NEXT: vmv1r.v v0, v25 1683; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 1684; CHECK-NEXT: vmv1r.v v0, v7 1685; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 1686; CHECK-NEXT: vfmin.vv v8, v16, v8, v0.t 1687; CHECK-NEXT: csrr a0, vlenb 1688; CHECK-NEXT: slli a0, a0, 3 1689; CHECK-NEXT: add a0, sp, a0 1690; CHECK-NEXT: addi a0, a0, 16 1691; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1692; CHECK-NEXT: csrr a0, vlenb 1693; CHECK-NEXT: li a1, 24 1694; CHECK-NEXT: mul a0, a0, a1 1695; CHECK-NEXT: add sp, sp, a0 1696; CHECK-NEXT: .cfi_def_cfa sp, 16 1697; CHECK-NEXT: addi sp, sp, 16 1698; CHECK-NEXT: .cfi_def_cfa_offset 0 1699; CHECK-NEXT: ret 1700 %v = call <vscale x 16 x double> @llvm.vp.minimum.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x double> %vb, <vscale x 16 x i1> %m, i32 %evl) 1701 ret <vscale x 16 x double> %v 1702} 1703 1704define <vscale x 16 x double> @vfmin_vv_nxv16f64_unmasked(<vscale x 16 x double> %va, <vscale x 16 x double> %vb, i32 zeroext %evl) { 1705; CHECK-LABEL: vfmin_vv_nxv16f64_unmasked: 1706; CHECK: # %bb.0: 1707; CHECK-NEXT: addi sp, sp, -16 1708; CHECK-NEXT: .cfi_def_cfa_offset 16 1709; CHECK-NEXT: csrr a1, vlenb 1710; CHECK-NEXT: li a3, 24 1711; CHECK-NEXT: mul a1, a1, a3 1712; CHECK-NEXT: sub sp, sp, a1 1713; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 1714; CHECK-NEXT: csrr a1, vlenb 1715; CHECK-NEXT: slli a1, a1, 4 1716; CHECK-NEXT: add a1, sp, a1 1717; CHECK-NEXT: addi a1, a1, 16 1718; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1719; CHECK-NEXT: csrr a1, vlenb 1720; CHECK-NEXT: slli a3, a1, 3 1721; CHECK-NEXT: sub a4, a2, a1 1722; CHECK-NEXT: add a3, a0, a3 1723; CHECK-NEXT: vl8re64.v v24, (a3) 1724; CHECK-NEXT: sltu a3, a2, a4 1725; CHECK-NEXT: addi a3, a3, -1 1726; CHECK-NEXT: and a3, a3, a4 1727; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma 1728; CHECK-NEXT: vmfeq.vv v0, v16, v16 1729; CHECK-NEXT: vmfeq.vv v7, v24, v24 1730; CHECK-NEXT: vl8re64.v v8, (a0) 1731; CHECK-NEXT: csrr a0, vlenb 1732; CHECK-NEXT: slli a0, a0, 3 1733; CHECK-NEXT: add a0, sp, a0 1734; CHECK-NEXT: addi a0, a0, 16 1735; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 1736; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 1737; CHECK-NEXT: vmv1r.v v0, v7 1738; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 1739; CHECK-NEXT: vfmin.vv v8, v16, v8 1740; CHECK-NEXT: addi a0, sp, 16 1741; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 1742; CHECK-NEXT: bltu a2, a1, .LBB41_2 1743; CHECK-NEXT: # %bb.1: 1744; CHECK-NEXT: mv a2, a1 1745; CHECK-NEXT: .LBB41_2: 1746; CHECK-NEXT: csrr a0, vlenb 1747; CHECK-NEXT: slli a0, a0, 4 1748; CHECK-NEXT: add a0, sp, a0 1749; CHECK-NEXT: addi a0, a0, 16 1750; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1751; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 1752; CHECK-NEXT: vmfeq.vv v0, v16, v16 1753; CHECK-NEXT: csrr a0, vlenb 1754; CHECK-NEXT: slli a0, a0, 3 1755; CHECK-NEXT: add a0, sp, a0 1756; CHECK-NEXT: addi a0, a0, 16 1757; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 1758; CHECK-NEXT: vmfeq.vv v7, v8, v8 1759; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 1760; CHECK-NEXT: vmv1r.v v0, v7 1761; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 1762; CHECK-NEXT: vfmin.vv v8, v8, v24 1763; CHECK-NEXT: addi a0, sp, 16 1764; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1765; CHECK-NEXT: csrr a0, vlenb 1766; CHECK-NEXT: li a1, 24 1767; CHECK-NEXT: mul a0, a0, a1 1768; CHECK-NEXT: add sp, sp, a0 1769; CHECK-NEXT: .cfi_def_cfa sp, 16 1770; CHECK-NEXT: addi sp, sp, 16 1771; CHECK-NEXT: .cfi_def_cfa_offset 0 1772; CHECK-NEXT: ret 1773 %v = call <vscale x 16 x double> @llvm.vp.minimum.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x double> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) 1774 ret <vscale x 16 x double> %v 1775} 1776