1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH 6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 10 11declare <2 x half> @llvm.maximum.v2f16(<2 x half>, <2 x half>) 12 13define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) { 14; ZVFH-LABEL: vfmax_v2f16_vv: 15; ZVFH: # %bb.0: 16; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 17; ZVFH-NEXT: vmfeq.vv v0, v8, v8 18; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 19; ZVFH-NEXT: vmfeq.vv v0, v9, v9 20; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 21; ZVFH-NEXT: vfmax.vv v8, v8, v10 22; ZVFH-NEXT: ret 23; 24; ZVFHMIN-LABEL: vfmax_v2f16_vv: 25; ZVFHMIN: # %bb.0: 26; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 27; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 28; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 29; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 30; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 31; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 32; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 33; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 34; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 35; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 36; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 37; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 38; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 39; ZVFHMIN-NEXT: ret 40 %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) 41 ret <2 x half> %v 42} 43 44declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>) 45 46define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) { 47; ZVFH-LABEL: vfmax_v4f16_vv: 48; ZVFH: # %bb.0: 49; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 50; ZVFH-NEXT: vmfeq.vv v0, v8, v8 51; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 52; ZVFH-NEXT: vmfeq.vv v0, v9, v9 53; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 54; ZVFH-NEXT: vfmax.vv v8, v8, v10 55; ZVFH-NEXT: ret 56; 57; ZVFHMIN-LABEL: vfmax_v4f16_vv: 58; ZVFHMIN: # %bb.0: 59; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 60; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 61; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 62; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 63; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 64; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 65; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 66; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 67; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 68; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 69; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 70; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 71; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 72; ZVFHMIN-NEXT: ret 73 %v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b) 74 ret <4 x half> %v 75} 76 77declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>) 78 79define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) { 80; ZVFH-LABEL: vfmax_v8f16_vv: 81; ZVFH: # %bb.0: 82; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 83; ZVFH-NEXT: vmfeq.vv v0, v8, v8 84; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 85; ZVFH-NEXT: vmfeq.vv v0, v9, v9 86; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 87; ZVFH-NEXT: vfmax.vv v8, v8, v10 88; ZVFH-NEXT: ret 89; 90; ZVFHMIN-LABEL: vfmax_v8f16_vv: 91; ZVFHMIN: # %bb.0: 92; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 93; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 94; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 95; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 96; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 97; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 98; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 99; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 100; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 101; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 102; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 103; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 104; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 105; ZVFHMIN-NEXT: ret 106 %v = call <8 x half> @llvm.maximum.v8f16(<8 x half> %a, <8 x half> %b) 107 ret <8 x half> %v 108} 109 110declare <16 x half> @llvm.maximum.v16f16(<16 x half>, <16 x half>) 111 112define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) { 113; ZVFH-LABEL: vfmax_v16f16_vv: 114; ZVFH: # %bb.0: 115; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma 116; ZVFH-NEXT: vmfeq.vv v0, v8, v8 117; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 118; ZVFH-NEXT: vmfeq.vv v0, v10, v10 119; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 120; ZVFH-NEXT: vfmax.vv v8, v8, v12 121; ZVFH-NEXT: ret 122; 123; ZVFHMIN-LABEL: vfmax_v16f16_vv: 124; ZVFHMIN: # %bb.0: 125; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma 126; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 127; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 128; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 129; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 130; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 131; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 132; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 133; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 134; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 135; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 136; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 137; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 138; ZVFHMIN-NEXT: ret 139 %v = call <16 x half> @llvm.maximum.v16f16(<16 x half> %a, <16 x half> %b) 140 ret <16 x half> %v 141} 142 143declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>) 144 145define <2 x float> @vfmax_v2f32_vv(<2 x float> %a, <2 x float> %b) { 146; CHECK-LABEL: vfmax_v2f32_vv: 147; CHECK: # %bb.0: 148; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 149; CHECK-NEXT: vmfeq.vv v0, v8, v8 150; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 151; CHECK-NEXT: vmfeq.vv v0, v9, v9 152; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 153; CHECK-NEXT: vfmax.vv v8, v8, v10 154; CHECK-NEXT: ret 155 %v = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) 156 ret <2 x float> %v 157} 158 159declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>) 160 161define <4 x float> @vfmax_v4f32_vv(<4 x float> %a, <4 x float> %b) { 162; CHECK-LABEL: vfmax_v4f32_vv: 163; CHECK: # %bb.0: 164; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 165; CHECK-NEXT: vmfeq.vv v0, v8, v8 166; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 167; CHECK-NEXT: vmfeq.vv v0, v9, v9 168; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 169; CHECK-NEXT: vfmax.vv v8, v8, v10 170; CHECK-NEXT: ret 171 %v = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b) 172 ret <4 x float> %v 173} 174 175declare <8 x float> @llvm.maximum.v8f32(<8 x float>, <8 x float>) 176 177define <8 x float> @vfmax_v8f32_vv(<8 x float> %a, <8 x float> %b) { 178; CHECK-LABEL: vfmax_v8f32_vv: 179; CHECK: # %bb.0: 180; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 181; CHECK-NEXT: vmfeq.vv v0, v8, v8 182; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 183; CHECK-NEXT: vmfeq.vv v0, v10, v10 184; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 185; CHECK-NEXT: vfmax.vv v8, v8, v12 186; CHECK-NEXT: ret 187 %v = call <8 x float> @llvm.maximum.v8f32(<8 x float> %a, <8 x float> %b) 188 ret <8 x float> %v 189} 190 191declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>) 192 193define <16 x float> @vfmax_v16f32_vv(<16 x float> %a, <16 x float> %b) { 194; CHECK-LABEL: vfmax_v16f32_vv: 195; CHECK: # %bb.0: 196; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 197; CHECK-NEXT: vmfeq.vv v0, v8, v8 198; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 199; CHECK-NEXT: vmfeq.vv v0, v12, v12 200; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 201; CHECK-NEXT: vfmax.vv v8, v8, v16 202; CHECK-NEXT: ret 203 %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %a, <16 x float> %b) 204 ret <16 x float> %v 205} 206 207declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) 208 209define <2 x double> @vfmax_v2f64_vv(<2 x double> %a, <2 x double> %b) { 210; CHECK-LABEL: vfmax_v2f64_vv: 211; CHECK: # %bb.0: 212; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 213; CHECK-NEXT: vmfeq.vv v0, v8, v8 214; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 215; CHECK-NEXT: vmfeq.vv v0, v9, v9 216; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 217; CHECK-NEXT: vfmax.vv v8, v8, v10 218; CHECK-NEXT: ret 219 %v = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b) 220 ret <2 x double> %v 221} 222 223declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>) 224 225define <4 x double> @vfmax_v4f64_vv(<4 x double> %a, <4 x double> %b) { 226; CHECK-LABEL: vfmax_v4f64_vv: 227; CHECK: # %bb.0: 228; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 229; CHECK-NEXT: vmfeq.vv v0, v8, v8 230; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 231; CHECK-NEXT: vmfeq.vv v0, v10, v10 232; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 233; CHECK-NEXT: vfmax.vv v8, v8, v12 234; CHECK-NEXT: ret 235 %v = call <4 x double> @llvm.maximum.v4f64(<4 x double> %a, <4 x double> %b) 236 ret <4 x double> %v 237} 238 239declare <8 x double> @llvm.maximum.v8f64(<8 x double>, <8 x double>) 240 241define <8 x double> @vfmax_v8f64_vv(<8 x double> %a, <8 x double> %b) { 242; CHECK-LABEL: vfmax_v8f64_vv: 243; CHECK: # %bb.0: 244; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 245; CHECK-NEXT: vmfeq.vv v0, v8, v8 246; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 247; CHECK-NEXT: vmfeq.vv v0, v12, v12 248; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 249; CHECK-NEXT: vfmax.vv v8, v8, v16 250; CHECK-NEXT: ret 251 %v = call <8 x double> @llvm.maximum.v8f64(<8 x double> %a, <8 x double> %b) 252 ret <8 x double> %v 253} 254 255declare <16 x double> @llvm.maximum.v16f64(<16 x double>, <16 x double>) 256 257define <16 x double> @vfmax_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwind { 258; CHECK-LABEL: vfmax_v16f64_vv: 259; CHECK: # %bb.0: 260; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 261; CHECK-NEXT: vmfeq.vv v0, v8, v8 262; CHECK-NEXT: vmfeq.vv v7, v16, v16 263; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 264; CHECK-NEXT: vmv1r.v v0, v7 265; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 266; CHECK-NEXT: vfmax.vv v8, v8, v24 267; CHECK-NEXT: ret 268 %v = call <16 x double> @llvm.maximum.v16f64(<16 x double> %a, <16 x double> %b) 269 ret <16 x double> %v 270} 271 272define <2 x half> @vfmax_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) { 273; ZVFH-LABEL: vfmax_v2f16_vv_nnan: 274; ZVFH: # %bb.0: 275; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 276; ZVFH-NEXT: vfmax.vv v8, v8, v9 277; ZVFH-NEXT: ret 278; 279; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnan: 280; ZVFHMIN: # %bb.0: 281; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 282; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 283; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 284; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 285; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 286; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 287; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 288; ZVFHMIN-NEXT: ret 289 %v = call nnan <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) 290 ret <2 x half> %v 291} 292 293; FIXME: The nnan from fadd isn't propagating. 294define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { 295; ZVFH-LABEL: vfmax_v2f16_vv_nnana: 296; ZVFH: # %bb.0: 297; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 298; ZVFH-NEXT: vmfeq.vv v0, v9, v9 299; ZVFH-NEXT: vfadd.vv v8, v8, v8 300; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0 301; ZVFH-NEXT: vmfeq.vv v0, v8, v8 302; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0 303; ZVFH-NEXT: vfmax.vv v8, v10, v8 304; ZVFH-NEXT: ret 305; 306; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnana: 307; ZVFHMIN: # %bb.0: 308; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 309; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 310; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 311; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 312; ZVFHMIN-NEXT: vfadd.vv v9, v10, v10 313; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 314; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 315; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 316; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 317; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 318; ZVFHMIN-NEXT: vmerge.vvm v10, v8, v9, v0 319; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 320; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v8, v0 321; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8 322; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 323; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 324; ZVFHMIN-NEXT: ret 325 %c = fadd nnan <2 x half> %a, %a 326 %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %b) 327 ret <2 x half> %v 328} 329 330; FIXME: The nnan from fadd isn't propagating. 331define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { 332; ZVFH-LABEL: vfmax_v2f16_vv_nnanb: 333; ZVFH: # %bb.0: 334; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 335; ZVFH-NEXT: vmfeq.vv v0, v8, v8 336; ZVFH-NEXT: vfadd.vv v9, v9, v9 337; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 338; ZVFH-NEXT: vmfeq.vv v0, v9, v9 339; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 340; ZVFH-NEXT: vfmax.vv v8, v8, v10 341; ZVFH-NEXT: ret 342; 343; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnanb: 344; ZVFHMIN: # %bb.0: 345; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 346; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 347; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 348; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 349; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10 350; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 351; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 352; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 353; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 354; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 355; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v8, v0 356; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 357; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v9, v0 358; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10 359; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 360; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 361; ZVFHMIN-NEXT: ret 362 %c = fadd nnan <2 x half> %b, %b 363 %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c) 364 ret <2 x half> %v 365} 366