1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s 6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s 7 8define half @v_minimumnum_f16(half %x, half %y) { 9; GFX8-LABEL: v_minimumnum_f16: 10; GFX8: ; %bb.0: 11; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 13; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 14; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 15; GFX8-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX9-LABEL: v_minimumnum_f16: 18; GFX9: ; %bb.0: 19; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX9-NEXT: v_max_f16_e32 v1, v1, v1 21; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 22; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 23; GFX9-NEXT: s_setpc_b64 s[30:31] 24; 25; GFX10-LABEL: v_minimumnum_f16: 26; GFX10: ; %bb.0: 27; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; GFX10-NEXT: v_max_f16_e32 v1, v1, v1 29; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 30; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 31; GFX10-NEXT: s_setpc_b64 s[30:31] 32; 33; GFX11-LABEL: v_minimumnum_f16: 34; GFX11: ; %bb.0: 35; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; GFX11-NEXT: v_max_f16_e32 v1, v1, v1 37; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 38; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 39; GFX11-NEXT: v_min_f16_e32 v0, v0, v1 40; GFX11-NEXT: s_setpc_b64 s[30:31] 41; 42; GFX12-LABEL: v_minimumnum_f16: 43; GFX12: ; %bb.0: 44; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 45; GFX12-NEXT: s_wait_expcnt 0x0 46; GFX12-NEXT: s_wait_samplecnt 0x0 47; GFX12-NEXT: s_wait_bvhcnt 0x0 48; GFX12-NEXT: s_wait_kmcnt 0x0 49; GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1 50; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 51; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 52; GFX12-NEXT: v_min_num_f16_e32 v0, v0, v1 53; GFX12-NEXT: s_setpc_b64 s[30:31] 54 %result = call half @llvm.minimumnum.f16(half %x, half %y) 55 ret half %result 56} 57 58define half @v_minimumnum_f16_nnan(half %x, half %y) { 59; GFX8-LABEL: v_minimumnum_f16_nnan: 60; GFX8: ; %bb.0: 61; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 63; GFX8-NEXT: s_setpc_b64 s[30:31] 64; 65; GFX9-LABEL: v_minimumnum_f16_nnan: 66; GFX9: ; %bb.0: 67; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 69; GFX9-NEXT: s_setpc_b64 s[30:31] 70; 71; GFX10-LABEL: v_minimumnum_f16_nnan: 72; GFX10: ; %bb.0: 73; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 75; GFX10-NEXT: s_setpc_b64 s[30:31] 76; 77; GFX11-LABEL: v_minimumnum_f16_nnan: 78; GFX11: ; %bb.0: 79; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX11-NEXT: v_min_f16_e32 v0, v0, v1 81; GFX11-NEXT: s_setpc_b64 s[30:31] 82; 83; GFX12-LABEL: v_minimumnum_f16_nnan: 84; GFX12: ; %bb.0: 85; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 86; GFX12-NEXT: s_wait_expcnt 0x0 87; GFX12-NEXT: s_wait_samplecnt 0x0 88; GFX12-NEXT: s_wait_bvhcnt 0x0 89; GFX12-NEXT: s_wait_kmcnt 0x0 90; GFX12-NEXT: v_min_num_f16_e32 v0, v0, v1 91; GFX12-NEXT: s_setpc_b64 s[30:31] 92 %result = call nnan half @llvm.minimumnum.f16(half %x, half %y) 93 ret half %result 94} 95 96define half @v_minimumnum_f16_1.0(half %x) { 97; GFX8-LABEL: v_minimumnum_f16_1.0: 98; GFX8: ; %bb.0: 99; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 101; GFX8-NEXT: v_min_f16_e32 v0, 1.0, v0 102; GFX8-NEXT: s_setpc_b64 s[30:31] 103; 104; GFX9-LABEL: v_minimumnum_f16_1.0: 105; GFX9: ; %bb.0: 106; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 108; GFX9-NEXT: v_min_f16_e32 v0, 1.0, v0 109; GFX9-NEXT: s_setpc_b64 s[30:31] 110; 111; GFX10-LABEL: v_minimumnum_f16_1.0: 112; GFX10: ; %bb.0: 113; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 114; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 115; GFX10-NEXT: v_min_f16_e32 v0, 1.0, v0 116; GFX10-NEXT: s_setpc_b64 s[30:31] 117; 118; GFX11-LABEL: v_minimumnum_f16_1.0: 119; GFX11: ; %bb.0: 120; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 122; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 123; GFX11-NEXT: v_min_f16_e32 v0, 1.0, v0 124; GFX11-NEXT: s_setpc_b64 s[30:31] 125; 126; GFX12-LABEL: v_minimumnum_f16_1.0: 127; GFX12: ; %bb.0: 128; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 129; GFX12-NEXT: s_wait_expcnt 0x0 130; GFX12-NEXT: s_wait_samplecnt 0x0 131; GFX12-NEXT: s_wait_bvhcnt 0x0 132; GFX12-NEXT: s_wait_kmcnt 0x0 133; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 134; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 135; GFX12-NEXT: v_min_num_f16_e32 v0, 1.0, v0 136; GFX12-NEXT: s_setpc_b64 s[30:31] 137 %result = call half @llvm.minimumnum.f16(half %x, half 1.0) 138 ret half %result 139} 140 141define bfloat @v_minimumnum_bf16(bfloat %x, bfloat %y) { 142; GFX8-LABEL: v_minimumnum_bf16: 143; GFX8: ; %bb.0: 144; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v0 146; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 147; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1 148; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 149; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 150; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 151; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1 152; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0 153; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v2 154; GFX8-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc 155; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 156; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 157; GFX8-NEXT: v_bfe_u32 v3, v2, 16, 1 158; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v2 159; GFX8-NEXT: s_movk_i32 s4, 0x7fff 160; GFX8-NEXT: v_add_u32_e32 v3, vcc, s4, v3 161; GFX8-NEXT: v_or_b32_e32 v4, 0x400000, v2 162; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 163; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc 164; GFX8-NEXT: s_movk_i32 s4, 0x8000 165; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v2 166; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 167; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 168; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, s4, v1 169; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 170; GFX8-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 171; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1 172; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 173; GFX8-NEXT: s_setpc_b64 s[30:31] 174; 175; GFX9-LABEL: v_minimumnum_bf16: 176; GFX9: ; %bb.0: 177; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0 179; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 180; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v1 181; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 182; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 183; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 184; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0 185; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v1 186; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3 187; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc 188; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 189; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 190; GFX9-NEXT: v_bfe_u32 v3, v2, 16, 1 191; GFX9-NEXT: s_movk_i32 s4, 0x7fff 192; GFX9-NEXT: v_add3_u32 v3, v3, v2, s4 193; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v2 194; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 195; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc 196; GFX9-NEXT: s_movk_i32 s4, 0x8000 197; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2 198; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 199; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 200; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s4, v1 201; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 202; GFX9-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 203; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1 204; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 205; GFX9-NEXT: s_setpc_b64 s[30:31] 206; 207; GFX10-LABEL: v_minimumnum_bf16: 208; GFX10: ; %bb.0: 209; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 210; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v0 211; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v1 212; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 213; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 214; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 215; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v0 216; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo 217; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v1 218; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v3 219; GFX10-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc_lo 220; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 221; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 222; GFX10-NEXT: v_bfe_u32 v3, v2, 16, 1 223; GFX10-NEXT: v_or_b32_e32 v4, 0x400000, v2 224; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 225; GFX10-NEXT: v_add3_u32 v3, v3, v2, 0x7fff 226; GFX10-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc_lo 227; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0 228; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v2 229; GFX10-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 230; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo 231; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1 232; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 233; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2 234; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo 235; GFX10-NEXT: s_setpc_b64 s[30:31] 236; 237; GFX11-LABEL: v_minimumnum_bf16: 238; GFX11: ; %bb.0: 239; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 240; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v0 241; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 242; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 243; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 244; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v1 245; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v0 246; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) 247; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 248; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo 249; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v1 250; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 251; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v3 252; GFX11-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc_lo 253; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 254; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 255; GFX11-NEXT: v_max_f32_e32 v2, v2, v2 256; GFX11-NEXT: v_bfe_u32 v3, v2, 16, 1 257; GFX11-NEXT: v_or_b32_e32 v4, 0x400000, v2 258; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 259; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 260; GFX11-NEXT: v_add3_u32 v3, v3, v2, 0x7fff 261; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc_lo 262; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0 263; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 264; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v2 265; GFX11-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 266; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo 267; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1 268; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 269; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 270; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2 271; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 272; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo 273; GFX11-NEXT: s_setpc_b64 s[30:31] 274; 275; GFX12-LABEL: v_minimumnum_bf16: 276; GFX12: ; %bb.0: 277; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 278; GFX12-NEXT: s_wait_expcnt 0x0 279; GFX12-NEXT: s_wait_samplecnt 0x0 280; GFX12-NEXT: s_wait_bvhcnt 0x0 281; GFX12-NEXT: s_wait_kmcnt 0x0 282; GFX12-NEXT: v_lshlrev_b32_e32 v2, 16, v0 283; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 284; GFX12-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 285; GFX12-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 286; GFX12-NEXT: v_lshlrev_b32_e32 v3, 16, v1 287; GFX12-NEXT: v_lshlrev_b32_e32 v2, 16, v0 288; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) 289; GFX12-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 290; GFX12-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo 291; GFX12-NEXT: v_lshlrev_b32_e32 v3, 16, v1 292; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 293; GFX12-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v3 294; GFX12-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc_lo 295; GFX12-NEXT: v_lshlrev_b32_e32 v2, 16, v2 296; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 297; GFX12-NEXT: v_max_num_f32_e32 v2, v2, v2 298; GFX12-NEXT: v_bfe_u32 v3, v2, 16, 1 299; GFX12-NEXT: v_or_b32_e32 v4, 0x400000, v2 300; GFX12-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 301; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 302; GFX12-NEXT: v_add3_u32 v3, v3, v2, 0x7fff 303; GFX12-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc_lo 304; GFX12-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0 305; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 306; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v2 307; GFX12-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 308; GFX12-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo 309; GFX12-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1 310; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 311; GFX12-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 312; GFX12-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2 313; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 314; GFX12-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo 315; GFX12-NEXT: s_setpc_b64 s[30:31] 316 %result = call bfloat @llvm.minimumnum.bf16(bfloat %x, bfloat %y) 317 ret bfloat %result 318} 319 320define bfloat @v_minimumnum_bf16_nnan(bfloat %x, bfloat %y) { 321; GFX8-LABEL: v_minimumnum_bf16_nnan: 322; GFX8: ; %bb.0: 323; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 324; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1 325; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0 326; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v2 327; GFX8-NEXT: s_movk_i32 s4, 0x8000 328; GFX8-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc 329; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 330; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 331; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, s4, v1 332; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 333; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v2 334; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1 335; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 336; GFX8-NEXT: s_setpc_b64 s[30:31] 337; 338; GFX9-LABEL: v_minimumnum_bf16_nnan: 339; GFX9: ; %bb.0: 340; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 341; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v1 342; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0 343; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v3, v2 344; GFX9-NEXT: s_movk_i32 s4, 0x8000 345; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc 346; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 347; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 348; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s4, v1 349; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 350; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v2 351; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1 352; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 353; GFX9-NEXT: s_setpc_b64 s[30:31] 354; 355; GFX10-LABEL: v_minimumnum_bf16_nnan: 356; GFX10: ; %bb.0: 357; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 358; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v1 359; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v0 360; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v2 361; GFX10-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc_lo 362; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0 363; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v2 364; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo 365; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1 366; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 367; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v3 368; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo 369; GFX10-NEXT: s_setpc_b64 s[30:31] 370; 371; GFX11-LABEL: v_minimumnum_bf16_nnan: 372; GFX11: ; %bb.0: 373; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 374; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v1 375; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v0 376; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 377; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v2 378; GFX11-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc_lo 379; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0 380; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_lshlrev_b32 v3, 16, v2 381; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1 382; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) 383; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 384; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v3 385; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 386; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo 387; GFX11-NEXT: s_setpc_b64 s[30:31] 388; 389; GFX12-LABEL: v_minimumnum_bf16_nnan: 390; GFX12: ; %bb.0: 391; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 392; GFX12-NEXT: s_wait_expcnt 0x0 393; GFX12-NEXT: s_wait_samplecnt 0x0 394; GFX12-NEXT: s_wait_bvhcnt 0x0 395; GFX12-NEXT: s_wait_kmcnt 0x0 396; GFX12-NEXT: v_lshlrev_b32_e32 v2, 16, v1 397; GFX12-NEXT: v_lshlrev_b32_e32 v3, 16, v0 398; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 399; GFX12-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v2 400; GFX12-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc_lo 401; GFX12-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0 402; GFX12-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_lshlrev_b32 v3, 16, v2 403; GFX12-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1 404; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) 405; GFX12-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 406; GFX12-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v3 407; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 408; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo 409; GFX12-NEXT: s_setpc_b64 s[30:31] 410 %result = call nnan bfloat @llvm.minimumnum.bf16(bfloat %x, bfloat %y) 411 ret bfloat %result 412} 413 414define float @v_minimumnum_f32(float %x, float %y) { 415; GFX8-LABEL: v_minimumnum_f32: 416; GFX8: ; %bb.0: 417; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 418; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 419; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 420; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 421; GFX8-NEXT: s_setpc_b64 s[30:31] 422; 423; GFX9-LABEL: v_minimumnum_f32: 424; GFX9: ; %bb.0: 425; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 426; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 427; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 428; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 429; GFX9-NEXT: s_setpc_b64 s[30:31] 430; 431; GFX10-LABEL: v_minimumnum_f32: 432; GFX10: ; %bb.0: 433; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 435; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 436; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 437; GFX10-NEXT: s_setpc_b64 s[30:31] 438; 439; GFX11-LABEL: v_minimumnum_f32: 440; GFX11: ; %bb.0: 441; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 442; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 443; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 444; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 445; GFX11-NEXT: s_setpc_b64 s[30:31] 446; 447; GFX12-LABEL: v_minimumnum_f32: 448; GFX12: ; %bb.0: 449; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 450; GFX12-NEXT: s_wait_expcnt 0x0 451; GFX12-NEXT: s_wait_samplecnt 0x0 452; GFX12-NEXT: s_wait_bvhcnt 0x0 453; GFX12-NEXT: s_wait_kmcnt 0x0 454; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 455; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 456; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 457; GFX12-NEXT: s_setpc_b64 s[30:31] 458 %result = call float @llvm.minimumnum.f32(float %x, float %y) 459 ret float %result 460} 461 462define float @v_minimumnum_f32_nnan(float %x, float %y) { 463; GFX8-LABEL: v_minimumnum_f32_nnan: 464; GFX8: ; %bb.0: 465; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 466; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 467; GFX8-NEXT: s_setpc_b64 s[30:31] 468; 469; GFX9-LABEL: v_minimumnum_f32_nnan: 470; GFX9: ; %bb.0: 471; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 472; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 473; GFX9-NEXT: s_setpc_b64 s[30:31] 474; 475; GFX10-LABEL: v_minimumnum_f32_nnan: 476; GFX10: ; %bb.0: 477; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 478; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 479; GFX10-NEXT: s_setpc_b64 s[30:31] 480; 481; GFX11-LABEL: v_minimumnum_f32_nnan: 482; GFX11: ; %bb.0: 483; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 484; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 485; GFX11-NEXT: s_setpc_b64 s[30:31] 486; 487; GFX12-LABEL: v_minimumnum_f32_nnan: 488; GFX12: ; %bb.0: 489; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 490; GFX12-NEXT: s_wait_expcnt 0x0 491; GFX12-NEXT: s_wait_samplecnt 0x0 492; GFX12-NEXT: s_wait_bvhcnt 0x0 493; GFX12-NEXT: s_wait_kmcnt 0x0 494; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 495; GFX12-NEXT: s_setpc_b64 s[30:31] 496 %result = call nnan float @llvm.minimumnum.f32(float %x, float %y) 497 ret float %result 498} 499 500define double @v_minimumnum_f64(double %x, double %y) { 501; GFX8-LABEL: v_minimumnum_f64: 502; GFX8: ; %bb.0: 503; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 504; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 505; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 506; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 507; GFX8-NEXT: s_setpc_b64 s[30:31] 508; 509; GFX9-LABEL: v_minimumnum_f64: 510; GFX9: ; %bb.0: 511; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 512; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 513; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 514; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 515; GFX9-NEXT: s_setpc_b64 s[30:31] 516; 517; GFX10-LABEL: v_minimumnum_f64: 518; GFX10: ; %bb.0: 519; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 520; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 521; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 522; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 523; GFX10-NEXT: s_setpc_b64 s[30:31] 524; 525; GFX11-LABEL: v_minimumnum_f64: 526; GFX11: ; %bb.0: 527; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 528; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 529; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 530; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 531; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 532; GFX11-NEXT: s_setpc_b64 s[30:31] 533; 534; GFX12-LABEL: v_minimumnum_f64: 535; GFX12: ; %bb.0: 536; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 537; GFX12-NEXT: s_wait_expcnt 0x0 538; GFX12-NEXT: s_wait_samplecnt 0x0 539; GFX12-NEXT: s_wait_bvhcnt 0x0 540; GFX12-NEXT: s_wait_kmcnt 0x0 541; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] 542; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] 543; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 544; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] 545; GFX12-NEXT: s_setpc_b64 s[30:31] 546 %result = call double @llvm.minimumnum.f64(double %x, double %y) 547 ret double %result 548} 549 550define double @v_minimumnum_f64_nnan(double %x, double %y) { 551; GFX8-LABEL: v_minimumnum_f64_nnan: 552; GFX8: ; %bb.0: 553; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 554; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 555; GFX8-NEXT: s_setpc_b64 s[30:31] 556; 557; GFX9-LABEL: v_minimumnum_f64_nnan: 558; GFX9: ; %bb.0: 559; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 560; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 561; GFX9-NEXT: s_setpc_b64 s[30:31] 562; 563; GFX10-LABEL: v_minimumnum_f64_nnan: 564; GFX10: ; %bb.0: 565; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 566; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 567; GFX10-NEXT: s_setpc_b64 s[30:31] 568; 569; GFX11-LABEL: v_minimumnum_f64_nnan: 570; GFX11: ; %bb.0: 571; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 572; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 573; GFX11-NEXT: s_setpc_b64 s[30:31] 574; 575; GFX12-LABEL: v_minimumnum_f64_nnan: 576; GFX12: ; %bb.0: 577; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 578; GFX12-NEXT: s_wait_expcnt 0x0 579; GFX12-NEXT: s_wait_samplecnt 0x0 580; GFX12-NEXT: s_wait_bvhcnt 0x0 581; GFX12-NEXT: s_wait_kmcnt 0x0 582; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] 583; GFX12-NEXT: s_setpc_b64 s[30:31] 584 %result = call nnan double @llvm.minimumnum.f64(double %x, double %y) 585 ret double %result 586} 587 588define float @v_minimumnum_f32_1.0(float %x) { 589; GFX8-LABEL: v_minimumnum_f32_1.0: 590; GFX8: ; %bb.0: 591; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 592; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 593; GFX8-NEXT: v_min_f32_e32 v0, 1.0, v0 594; GFX8-NEXT: s_setpc_b64 s[30:31] 595; 596; GFX9-LABEL: v_minimumnum_f32_1.0: 597; GFX9: ; %bb.0: 598; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 599; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 600; GFX9-NEXT: v_min_f32_e32 v0, 1.0, v0 601; GFX9-NEXT: s_setpc_b64 s[30:31] 602; 603; GFX10-LABEL: v_minimumnum_f32_1.0: 604; GFX10: ; %bb.0: 605; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 606; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 607; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0 608; GFX10-NEXT: s_setpc_b64 s[30:31] 609; 610; GFX11-LABEL: v_minimumnum_f32_1.0: 611; GFX11: ; %bb.0: 612; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 613; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 614; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 615; GFX11-NEXT: v_min_f32_e32 v0, 1.0, v0 616; GFX11-NEXT: s_setpc_b64 s[30:31] 617; 618; GFX12-LABEL: v_minimumnum_f32_1.0: 619; GFX12: ; %bb.0: 620; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 621; GFX12-NEXT: s_wait_expcnt 0x0 622; GFX12-NEXT: s_wait_samplecnt 0x0 623; GFX12-NEXT: s_wait_bvhcnt 0x0 624; GFX12-NEXT: s_wait_kmcnt 0x0 625; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 626; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 627; GFX12-NEXT: v_min_num_f32_e32 v0, 1.0, v0 628; GFX12-NEXT: s_setpc_b64 s[30:31] 629 %result = call float @llvm.minimumnum.f32(float %x, float 1.0) 630 ret float %result 631} 632 633define float @v_minimumnum_f32_rhs_not_snan(float %x, float %y) { 634; GFX8-LABEL: v_minimumnum_f32_rhs_not_snan: 635; GFX8: ; %bb.0: 636; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 637; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 638; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 639; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 640; GFX8-NEXT: s_setpc_b64 s[30:31] 641; 642; GFX9-LABEL: v_minimumnum_f32_rhs_not_snan: 643; GFX9: ; %bb.0: 644; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 645; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 646; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 647; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 648; GFX9-NEXT: s_setpc_b64 s[30:31] 649; 650; GFX10-LABEL: v_minimumnum_f32_rhs_not_snan: 651; GFX10: ; %bb.0: 652; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 653; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 654; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 655; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 656; GFX10-NEXT: s_setpc_b64 s[30:31] 657; 658; GFX11-LABEL: v_minimumnum_f32_rhs_not_snan: 659; GFX11: ; %bb.0: 660; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 661; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 662; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 663; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 664; GFX11-NEXT: s_setpc_b64 s[30:31] 665; 666; GFX12-LABEL: v_minimumnum_f32_rhs_not_snan: 667; GFX12: ; %bb.0: 668; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 669; GFX12-NEXT: s_wait_expcnt 0x0 670; GFX12-NEXT: s_wait_samplecnt 0x0 671; GFX12-NEXT: s_wait_bvhcnt 0x0 672; GFX12-NEXT: s_wait_kmcnt 0x0 673; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 674; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 675; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 676; GFX12-NEXT: s_setpc_b64 s[30:31] 677 %canon.y = call float @llvm.canonicalize.f32(float %y) 678 %result = call float @llvm.minimumnum.f32(float %x, float %canon.y) 679 ret float %result 680} 681 682define float @v_minimumnum_f32_lhs_not_snan(float %x, float %y) { 683; GFX8-LABEL: v_minimumnum_f32_lhs_not_snan: 684; GFX8: ; %bb.0: 685; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 686; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 687; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 688; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 689; GFX8-NEXT: s_setpc_b64 s[30:31] 690; 691; GFX9-LABEL: v_minimumnum_f32_lhs_not_snan: 692; GFX9: ; %bb.0: 693; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 694; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 695; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 696; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 697; GFX9-NEXT: s_setpc_b64 s[30:31] 698; 699; GFX10-LABEL: v_minimumnum_f32_lhs_not_snan: 700; GFX10: ; %bb.0: 701; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 702; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 703; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 704; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 705; GFX10-NEXT: s_setpc_b64 s[30:31] 706; 707; GFX11-LABEL: v_minimumnum_f32_lhs_not_snan: 708; GFX11: ; %bb.0: 709; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 710; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 711; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 712; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 713; GFX11-NEXT: s_setpc_b64 s[30:31] 714; 715; GFX12-LABEL: v_minimumnum_f32_lhs_not_snan: 716; GFX12: ; %bb.0: 717; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 718; GFX12-NEXT: s_wait_expcnt 0x0 719; GFX12-NEXT: s_wait_samplecnt 0x0 720; GFX12-NEXT: s_wait_bvhcnt 0x0 721; GFX12-NEXT: s_wait_kmcnt 0x0 722; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 723; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 724; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 725; GFX12-NEXT: s_setpc_b64 s[30:31] 726 %canon.x = call float @llvm.canonicalize.f32(float %x) 727 %result = call float @llvm.minimumnum.f32(float %canon.x, float %y) 728 ret float %result 729} 730 731define float @v_minimumnum_f32_both_operands_not_snan(float %x, float %y) { 732; GFX8-LABEL: v_minimumnum_f32_both_operands_not_snan: 733; GFX8: ; %bb.0: 734; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 735; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 736; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 737; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 738; GFX8-NEXT: s_setpc_b64 s[30:31] 739; 740; GFX9-LABEL: v_minimumnum_f32_both_operands_not_snan: 741; GFX9: ; %bb.0: 742; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 743; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 744; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 745; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 746; GFX9-NEXT: s_setpc_b64 s[30:31] 747; 748; GFX10-LABEL: v_minimumnum_f32_both_operands_not_snan: 749; GFX10: ; %bb.0: 750; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 751; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 752; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 753; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 754; GFX10-NEXT: s_setpc_b64 s[30:31] 755; 756; GFX11-LABEL: v_minimumnum_f32_both_operands_not_snan: 757; GFX11: ; %bb.0: 758; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 759; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 760; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 761; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 762; GFX11-NEXT: s_setpc_b64 s[30:31] 763; 764; GFX12-LABEL: v_minimumnum_f32_both_operands_not_snan: 765; GFX12: ; %bb.0: 766; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 767; GFX12-NEXT: s_wait_expcnt 0x0 768; GFX12-NEXT: s_wait_samplecnt 0x0 769; GFX12-NEXT: s_wait_bvhcnt 0x0 770; GFX12-NEXT: s_wait_kmcnt 0x0 771; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 772; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 773; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 774; GFX12-NEXT: s_setpc_b64 s[30:31] 775 %canon.x = call float @llvm.canonicalize.f32(float %x) 776 %canon.y = call float @llvm.canonicalize.f32(float %y) 777 %result = call float @llvm.minimumnum.f32(float %canon.x, float %canon.y) 778 ret float %result 779} 780 781define double @v_minimumnum_f64_1.0(double %x) { 782; GFX8-LABEL: v_minimumnum_f64_1.0: 783; GFX8: ; %bb.0: 784; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 785; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 786; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], 1.0 787; GFX8-NEXT: s_setpc_b64 s[30:31] 788; 789; GFX9-LABEL: v_minimumnum_f64_1.0: 790; GFX9: ; %bb.0: 791; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 792; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 793; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], 1.0 794; GFX9-NEXT: s_setpc_b64 s[30:31] 795; 796; GFX10-LABEL: v_minimumnum_f64_1.0: 797; GFX10: ; %bb.0: 798; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 799; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 800; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], 1.0 801; GFX10-NEXT: s_setpc_b64 s[30:31] 802; 803; GFX11-LABEL: v_minimumnum_f64_1.0: 804; GFX11: ; %bb.0: 805; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 806; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 807; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 808; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], 1.0 809; GFX11-NEXT: s_setpc_b64 s[30:31] 810; 811; GFX12-LABEL: v_minimumnum_f64_1.0: 812; GFX12: ; %bb.0: 813; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 814; GFX12-NEXT: s_wait_expcnt 0x0 815; GFX12-NEXT: s_wait_samplecnt 0x0 816; GFX12-NEXT: s_wait_bvhcnt 0x0 817; GFX12-NEXT: s_wait_kmcnt 0x0 818; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] 819; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 820; GFX12-NEXT: v_min_num_f64_e32 v[0:1], 1.0, v[0:1] 821; GFX12-NEXT: s_setpc_b64 s[30:31] 822 %result = call double @llvm.minimumnum.f64(double %x, double 1.0) 823 ret double %result 824} 825 826define half @v_minimumnum_f16_v_s(half %x, half inreg %y) { 827; GFX8-LABEL: v_minimumnum_f16_v_s: 828; GFX8: ; %bb.0: 829; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 830; GFX8-NEXT: v_max_f16_e64 v1, s16, s16 831; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 832; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 833; GFX8-NEXT: s_setpc_b64 s[30:31] 834; 835; GFX9-LABEL: v_minimumnum_f16_v_s: 836; GFX9: ; %bb.0: 837; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 838; GFX9-NEXT: v_max_f16_e64 v1, s16, s16 839; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 840; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 841; GFX9-NEXT: s_setpc_b64 s[30:31] 842; 843; GFX10-LABEL: v_minimumnum_f16_v_s: 844; GFX10: ; %bb.0: 845; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 846; GFX10-NEXT: v_max_f16_e64 v1, s16, s16 847; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 848; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 849; GFX10-NEXT: s_setpc_b64 s[30:31] 850; 851; GFX11-LABEL: v_minimumnum_f16_v_s: 852; GFX11: ; %bb.0: 853; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 854; GFX11-NEXT: v_max_f16_e64 v1, s0, s0 855; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 856; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 857; GFX11-NEXT: v_min_f16_e32 v0, v0, v1 858; GFX11-NEXT: s_setpc_b64 s[30:31] 859; 860; GFX12-LABEL: v_minimumnum_f16_v_s: 861; GFX12: ; %bb.0: 862; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 863; GFX12-NEXT: s_wait_expcnt 0x0 864; GFX12-NEXT: s_wait_samplecnt 0x0 865; GFX12-NEXT: s_wait_bvhcnt 0x0 866; GFX12-NEXT: s_wait_kmcnt 0x0 867; GFX12-NEXT: v_max_num_f16_e64 v1, s0, s0 868; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 869; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 870; GFX12-NEXT: v_min_num_f16_e32 v0, v0, v1 871; GFX12-NEXT: s_setpc_b64 s[30:31] 872 %result = call half @llvm.minimumnum.f16(half %x, half %y) 873 ret half %result 874} 875 876define half @v_minimumnum_f16_s_s(half inreg %x, half inreg %y) { 877; GFX8-LABEL: v_minimumnum_f16_s_s: 878; GFX8: ; %bb.0: 879; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 880; GFX8-NEXT: v_max_f16_e64 v0, s17, s17 881; GFX8-NEXT: v_max_f16_e64 v1, s16, s16 882; GFX8-NEXT: v_min_f16_e32 v0, v1, v0 883; GFX8-NEXT: s_setpc_b64 s[30:31] 884; 885; GFX9-LABEL: v_minimumnum_f16_s_s: 886; GFX9: ; %bb.0: 887; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 888; GFX9-NEXT: v_max_f16_e64 v0, s17, s17 889; GFX9-NEXT: v_max_f16_e64 v1, s16, s16 890; GFX9-NEXT: v_min_f16_e32 v0, v1, v0 891; GFX9-NEXT: s_setpc_b64 s[30:31] 892; 893; GFX10-LABEL: v_minimumnum_f16_s_s: 894; GFX10: ; %bb.0: 895; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 896; GFX10-NEXT: v_max_f16_e64 v0, s17, s17 897; GFX10-NEXT: v_max_f16_e64 v1, s16, s16 898; GFX10-NEXT: v_min_f16_e32 v0, v1, v0 899; GFX10-NEXT: s_setpc_b64 s[30:31] 900; 901; GFX11-LABEL: v_minimumnum_f16_s_s: 902; GFX11: ; %bb.0: 903; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 904; GFX11-NEXT: v_max_f16_e64 v0, s1, s1 905; GFX11-NEXT: v_max_f16_e64 v1, s0, s0 906; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 907; GFX11-NEXT: v_min_f16_e32 v0, v1, v0 908; GFX11-NEXT: s_setpc_b64 s[30:31] 909; 910; GFX12-LABEL: v_minimumnum_f16_s_s: 911; GFX12: ; %bb.0: 912; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 913; GFX12-NEXT: s_wait_expcnt 0x0 914; GFX12-NEXT: s_wait_samplecnt 0x0 915; GFX12-NEXT: s_wait_bvhcnt 0x0 916; GFX12-NEXT: s_wait_kmcnt 0x0 917; GFX12-NEXT: v_max_num_f16_e64 v0, s1, s1 918; GFX12-NEXT: v_max_num_f16_e64 v1, s0, s0 919; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 920; GFX12-NEXT: v_min_num_f16_e32 v0, v1, v0 921; GFX12-NEXT: s_setpc_b64 s[30:31] 922 %result = call half @llvm.minimumnum.f16(half %x, half %y) 923 ret half %result 924} 925 926define float @v_minimumnum_f32_s_v(float inreg %x, float %y) { 927; GFX8-LABEL: v_minimumnum_f32_s_v: 928; GFX8: ; %bb.0: 929; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 930; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 931; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16 932; GFX8-NEXT: v_min_f32_e32 v0, v1, v0 933; GFX8-NEXT: s_setpc_b64 s[30:31] 934; 935; GFX9-LABEL: v_minimumnum_f32_s_v: 936; GFX9: ; %bb.0: 937; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 938; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 939; GFX9-NEXT: v_max_f32_e64 v1, s16, s16 940; GFX9-NEXT: v_min_f32_e32 v0, v1, v0 941; GFX9-NEXT: s_setpc_b64 s[30:31] 942; 943; GFX10-LABEL: v_minimumnum_f32_s_v: 944; GFX10: ; %bb.0: 945; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 946; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 947; GFX10-NEXT: v_max_f32_e64 v1, s16, s16 948; GFX10-NEXT: v_min_f32_e32 v0, v1, v0 949; GFX10-NEXT: s_setpc_b64 s[30:31] 950; 951; GFX11-LABEL: v_minimumnum_f32_s_v: 952; GFX11: ; %bb.0: 953; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 954; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 955; GFX11-NEXT: v_max_f32_e64 v1, s0, s0 956; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 957; GFX11-NEXT: v_min_f32_e32 v0, v1, v0 958; GFX11-NEXT: s_setpc_b64 s[30:31] 959; 960; GFX12-LABEL: v_minimumnum_f32_s_v: 961; GFX12: ; %bb.0: 962; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 963; GFX12-NEXT: s_wait_expcnt 0x0 964; GFX12-NEXT: s_wait_samplecnt 0x0 965; GFX12-NEXT: s_wait_bvhcnt 0x0 966; GFX12-NEXT: s_wait_kmcnt 0x0 967; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 968; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0 969; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 970; GFX12-NEXT: v_min_num_f32_e32 v0, v1, v0 971; GFX12-NEXT: s_setpc_b64 s[30:31] 972 %result = call float @llvm.minimumnum.f32(float %x, float %y) 973 ret float %result 974} 975 976define float @v_minimumnum_f32_v_s(float %x, float inreg %y) { 977; GFX8-LABEL: v_minimumnum_f32_v_s: 978; GFX8: ; %bb.0: 979; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 980; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16 981; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 982; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 983; GFX8-NEXT: s_setpc_b64 s[30:31] 984; 985; GFX9-LABEL: v_minimumnum_f32_v_s: 986; GFX9: ; %bb.0: 987; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 988; GFX9-NEXT: v_max_f32_e64 v1, s16, s16 989; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 990; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 991; GFX9-NEXT: s_setpc_b64 s[30:31] 992; 993; GFX10-LABEL: v_minimumnum_f32_v_s: 994; GFX10: ; %bb.0: 995; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 996; GFX10-NEXT: v_max_f32_e64 v1, s16, s16 997; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 998; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 999; GFX10-NEXT: s_setpc_b64 s[30:31] 1000; 1001; GFX11-LABEL: v_minimumnum_f32_v_s: 1002; GFX11: ; %bb.0: 1003; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1004; GFX11-NEXT: v_max_f32_e64 v1, s0, s0 1005; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 1006; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1007; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 1008; GFX11-NEXT: s_setpc_b64 s[30:31] 1009; 1010; GFX12-LABEL: v_minimumnum_f32_v_s: 1011; GFX12: ; %bb.0: 1012; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1013; GFX12-NEXT: s_wait_expcnt 0x0 1014; GFX12-NEXT: s_wait_samplecnt 0x0 1015; GFX12-NEXT: s_wait_bvhcnt 0x0 1016; GFX12-NEXT: s_wait_kmcnt 0x0 1017; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0 1018; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 1019; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1020; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 1021; GFX12-NEXT: s_setpc_b64 s[30:31] 1022 %result = call float @llvm.minimumnum.f32(float %x, float %y) 1023 ret float %result 1024} 1025 1026define float @v_minimumnum_f32_s_s(float inreg %x, float inreg %y) { 1027; GFX8-LABEL: v_minimumnum_f32_s_s: 1028; GFX8: ; %bb.0: 1029; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1030; GFX8-NEXT: v_mul_f32_e64 v0, 1.0, s17 1031; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16 1032; GFX8-NEXT: v_min_f32_e32 v0, v1, v0 1033; GFX8-NEXT: s_setpc_b64 s[30:31] 1034; 1035; GFX9-LABEL: v_minimumnum_f32_s_s: 1036; GFX9: ; %bb.0: 1037; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1038; GFX9-NEXT: v_max_f32_e64 v0, s17, s17 1039; GFX9-NEXT: v_max_f32_e64 v1, s16, s16 1040; GFX9-NEXT: v_min_f32_e32 v0, v1, v0 1041; GFX9-NEXT: s_setpc_b64 s[30:31] 1042; 1043; GFX10-LABEL: v_minimumnum_f32_s_s: 1044; GFX10: ; %bb.0: 1045; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1046; GFX10-NEXT: v_max_f32_e64 v0, s17, s17 1047; GFX10-NEXT: v_max_f32_e64 v1, s16, s16 1048; GFX10-NEXT: v_min_f32_e32 v0, v1, v0 1049; GFX10-NEXT: s_setpc_b64 s[30:31] 1050; 1051; GFX11-LABEL: v_minimumnum_f32_s_s: 1052; GFX11: ; %bb.0: 1053; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1054; GFX11-NEXT: v_max_f32_e64 v0, s1, s1 1055; GFX11-NEXT: v_max_f32_e64 v1, s0, s0 1056; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1057; GFX11-NEXT: v_min_f32_e32 v0, v1, v0 1058; GFX11-NEXT: s_setpc_b64 s[30:31] 1059; 1060; GFX12-LABEL: v_minimumnum_f32_s_s: 1061; GFX12: ; %bb.0: 1062; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1063; GFX12-NEXT: s_wait_expcnt 0x0 1064; GFX12-NEXT: s_wait_samplecnt 0x0 1065; GFX12-NEXT: s_wait_bvhcnt 0x0 1066; GFX12-NEXT: s_wait_kmcnt 0x0 1067; GFX12-NEXT: v_max_num_f32_e64 v0, s1, s1 1068; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0 1069; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1070; GFX12-NEXT: v_min_num_f32_e32 v0, v1, v0 1071; GFX12-NEXT: s_setpc_b64 s[30:31] 1072 %result = call float @llvm.minimumnum.f32(float %x, float %y) 1073 ret float %result 1074} 1075 1076define double @v_minimumnum_f64_s_v(double inreg %x, double %y) { 1077; GFX8-LABEL: v_minimumnum_f64_s_v: 1078; GFX8: ; %bb.0: 1079; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1080; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1081; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1082; GFX8-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] 1083; GFX8-NEXT: s_setpc_b64 s[30:31] 1084; 1085; GFX9-LABEL: v_minimumnum_f64_s_v: 1086; GFX9: ; %bb.0: 1087; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1088; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1089; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1090; GFX9-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] 1091; GFX9-NEXT: s_setpc_b64 s[30:31] 1092; 1093; GFX10-LABEL: v_minimumnum_f64_s_v: 1094; GFX10: ; %bb.0: 1095; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1096; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1097; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1098; GFX10-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] 1099; GFX10-NEXT: s_setpc_b64 s[30:31] 1100; 1101; GFX11-LABEL: v_minimumnum_f64_s_v: 1102; GFX11: ; %bb.0: 1103; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1104; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] 1105; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1106; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1107; GFX11-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] 1108; GFX11-NEXT: s_setpc_b64 s[30:31] 1109; 1110; GFX12-LABEL: v_minimumnum_f64_s_v: 1111; GFX12: ; %bb.0: 1112; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1113; GFX12-NEXT: s_wait_expcnt 0x0 1114; GFX12-NEXT: s_wait_samplecnt 0x0 1115; GFX12-NEXT: s_wait_bvhcnt 0x0 1116; GFX12-NEXT: s_wait_kmcnt 0x0 1117; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1] 1118; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] 1119; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1120; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[2:3], v[0:1] 1121; GFX12-NEXT: s_setpc_b64 s[30:31] 1122 %result = call double @llvm.minimumnum.f64(double %x, double %y) 1123 ret double %result 1124} 1125 1126define double @v_minimumnum_f64_v_s(double %x, double inreg %y) { 1127; GFX8-LABEL: v_minimumnum_f64_v_s: 1128; GFX8: ; %bb.0: 1129; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1130; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1131; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1132; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 1133; GFX8-NEXT: s_setpc_b64 s[30:31] 1134; 1135; GFX9-LABEL: v_minimumnum_f64_v_s: 1136; GFX9: ; %bb.0: 1137; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1138; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1139; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1140; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 1141; GFX9-NEXT: s_setpc_b64 s[30:31] 1142; 1143; GFX10-LABEL: v_minimumnum_f64_v_s: 1144; GFX10: ; %bb.0: 1145; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1146; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1147; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1148; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 1149; GFX10-NEXT: s_setpc_b64 s[30:31] 1150; 1151; GFX11-LABEL: v_minimumnum_f64_v_s: 1152; GFX11: ; %bb.0: 1153; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1154; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] 1155; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1156; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1157; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 1158; GFX11-NEXT: s_setpc_b64 s[30:31] 1159; 1160; GFX12-LABEL: v_minimumnum_f64_v_s: 1161; GFX12: ; %bb.0: 1162; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1163; GFX12-NEXT: s_wait_expcnt 0x0 1164; GFX12-NEXT: s_wait_samplecnt 0x0 1165; GFX12-NEXT: s_wait_bvhcnt 0x0 1166; GFX12-NEXT: s_wait_kmcnt 0x0 1167; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1] 1168; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] 1169; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1170; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] 1171; GFX12-NEXT: s_setpc_b64 s[30:31] 1172 %result = call double @llvm.minimumnum.f64(double %x, double %y) 1173 ret double %result 1174} 1175 1176define double @v_minimumnum_f64_s_s(double inreg %x, double inreg %y) { 1177; GFX8-LABEL: v_minimumnum_f64_s_s: 1178; GFX8: ; %bb.0: 1179; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1180; GFX8-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] 1181; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1182; GFX8-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] 1183; GFX8-NEXT: s_setpc_b64 s[30:31] 1184; 1185; GFX9-LABEL: v_minimumnum_f64_s_s: 1186; GFX9: ; %bb.0: 1187; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1188; GFX9-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] 1189; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1190; GFX9-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] 1191; GFX9-NEXT: s_setpc_b64 s[30:31] 1192; 1193; GFX10-LABEL: v_minimumnum_f64_s_s: 1194; GFX10: ; %bb.0: 1195; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1196; GFX10-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] 1197; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1198; GFX10-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] 1199; GFX10-NEXT: s_setpc_b64 s[30:31] 1200; 1201; GFX11-LABEL: v_minimumnum_f64_s_s: 1202; GFX11: ; %bb.0: 1203; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1204; GFX11-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3] 1205; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] 1206; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1207; GFX11-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] 1208; GFX11-NEXT: s_setpc_b64 s[30:31] 1209; 1210; GFX12-LABEL: v_minimumnum_f64_s_s: 1211; GFX12: ; %bb.0: 1212; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1213; GFX12-NEXT: s_wait_expcnt 0x0 1214; GFX12-NEXT: s_wait_samplecnt 0x0 1215; GFX12-NEXT: s_wait_bvhcnt 0x0 1216; GFX12-NEXT: s_wait_kmcnt 0x0 1217; GFX12-NEXT: v_max_num_f64_e64 v[0:1], s[2:3], s[2:3] 1218; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1] 1219; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1220; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[2:3], v[0:1] 1221; GFX12-NEXT: s_setpc_b64 s[30:31] 1222 %result = call double @llvm.minimumnum.f64(double %x, double %y) 1223 ret double %result 1224} 1225 1226define float @v_minimumnum_f32_fabs_rhs(float %x, float %y) { 1227; GFX8-LABEL: v_minimumnum_f32_fabs_rhs: 1228; GFX8: ; %bb.0: 1229; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1230; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, |v1| 1231; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 1232; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 1233; GFX8-NEXT: s_setpc_b64 s[30:31] 1234; 1235; GFX9-LABEL: v_minimumnum_f32_fabs_rhs: 1236; GFX9: ; %bb.0: 1237; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1238; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1| 1239; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 1240; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 1241; GFX9-NEXT: s_setpc_b64 s[30:31] 1242; 1243; GFX10-LABEL: v_minimumnum_f32_fabs_rhs: 1244; GFX10: ; %bb.0: 1245; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1246; GFX10-NEXT: v_max_f32_e64 v1, |v1|, |v1| 1247; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 1248; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 1249; GFX10-NEXT: s_setpc_b64 s[30:31] 1250; 1251; GFX11-LABEL: v_minimumnum_f32_fabs_rhs: 1252; GFX11: ; %bb.0: 1253; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1254; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1| 1255; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 1256; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1257; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 1258; GFX11-NEXT: s_setpc_b64 s[30:31] 1259; 1260; GFX12-LABEL: v_minimumnum_f32_fabs_rhs: 1261; GFX12: ; %bb.0: 1262; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1263; GFX12-NEXT: s_wait_expcnt 0x0 1264; GFX12-NEXT: s_wait_samplecnt 0x0 1265; GFX12-NEXT: s_wait_bvhcnt 0x0 1266; GFX12-NEXT: s_wait_kmcnt 0x0 1267; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| 1268; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 1269; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1270; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 1271; GFX12-NEXT: s_setpc_b64 s[30:31] 1272 %fabs.y = call float @llvm.fabs.f32(float %y) 1273 %result = call float @llvm.minimumnum.f32(float %x, float %fabs.y) 1274 ret float %result 1275} 1276 1277define float @v_minimumnum_f32_fneg_fabs_rhs(float %x, float %y) { 1278; GFX8-LABEL: v_minimumnum_f32_fneg_fabs_rhs: 1279; GFX8: ; %bb.0: 1280; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1281; GFX8-NEXT: v_mul_f32_e64 v1, -1.0, |v1| 1282; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 1283; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 1284; GFX8-NEXT: s_setpc_b64 s[30:31] 1285; 1286; GFX9-LABEL: v_minimumnum_f32_fneg_fabs_rhs: 1287; GFX9: ; %bb.0: 1288; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1289; GFX9-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| 1290; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 1291; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 1292; GFX9-NEXT: s_setpc_b64 s[30:31] 1293; 1294; GFX10-LABEL: v_minimumnum_f32_fneg_fabs_rhs: 1295; GFX10: ; %bb.0: 1296; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1297; GFX10-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| 1298; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 1299; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 1300; GFX10-NEXT: s_setpc_b64 s[30:31] 1301; 1302; GFX11-LABEL: v_minimumnum_f32_fneg_fabs_rhs: 1303; GFX11: ; %bb.0: 1304; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1305; GFX11-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| 1306; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 1307; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1308; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 1309; GFX11-NEXT: s_setpc_b64 s[30:31] 1310; 1311; GFX12-LABEL: v_minimumnum_f32_fneg_fabs_rhs: 1312; GFX12: ; %bb.0: 1313; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1314; GFX12-NEXT: s_wait_expcnt 0x0 1315; GFX12-NEXT: s_wait_samplecnt 0x0 1316; GFX12-NEXT: s_wait_bvhcnt 0x0 1317; GFX12-NEXT: s_wait_kmcnt 0x0 1318; GFX12-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1| 1319; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 1320; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1321; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 1322; GFX12-NEXT: s_setpc_b64 s[30:31] 1323 %fabs.y = call float @llvm.fabs.f32(float %y) 1324 %fneg.fabs.y = fneg float %fabs.y 1325 %result = call float @llvm.minimumnum.f32(float %x, float %fneg.fabs.y) 1326 ret float %result 1327} 1328 1329define float @v_minimumnum_f32_fabs(float %x, float %y) { 1330; GFX8-LABEL: v_minimumnum_f32_fabs: 1331; GFX8: ; %bb.0: 1332; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1333; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, |v1| 1334; GFX8-NEXT: v_mul_f32_e64 v0, 1.0, |v0| 1335; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 1336; GFX8-NEXT: s_setpc_b64 s[30:31] 1337; 1338; GFX9-LABEL: v_minimumnum_f32_fabs: 1339; GFX9: ; %bb.0: 1340; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1341; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1| 1342; GFX9-NEXT: v_max_f32_e64 v0, |v0|, |v0| 1343; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 1344; GFX9-NEXT: s_setpc_b64 s[30:31] 1345; 1346; GFX10-LABEL: v_minimumnum_f32_fabs: 1347; GFX10: ; %bb.0: 1348; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1349; GFX10-NEXT: v_max_f32_e64 v1, |v1|, |v1| 1350; GFX10-NEXT: v_max_f32_e64 v0, |v0|, |v0| 1351; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 1352; GFX10-NEXT: s_setpc_b64 s[30:31] 1353; 1354; GFX11-LABEL: v_minimumnum_f32_fabs: 1355; GFX11: ; %bb.0: 1356; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1357; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1| 1358; GFX11-NEXT: v_max_f32_e64 v0, |v0|, |v0| 1359; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1360; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 1361; GFX11-NEXT: s_setpc_b64 s[30:31] 1362; 1363; GFX12-LABEL: v_minimumnum_f32_fabs: 1364; GFX12: ; %bb.0: 1365; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1366; GFX12-NEXT: s_wait_expcnt 0x0 1367; GFX12-NEXT: s_wait_samplecnt 0x0 1368; GFX12-NEXT: s_wait_bvhcnt 0x0 1369; GFX12-NEXT: s_wait_kmcnt 0x0 1370; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| 1371; GFX12-NEXT: v_max_num_f32_e64 v0, |v0|, |v0| 1372; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1373; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 1374; GFX12-NEXT: s_setpc_b64 s[30:31] 1375 %fabs.x = call float @llvm.fabs.f32(float %x) 1376 %fabs.y = call float @llvm.fabs.f32(float %y) 1377 %result = call float @llvm.minimumnum.f32(float %fabs.x, float %fabs.y) 1378 ret float %result 1379} 1380 1381define float @v_minimumnum_f32_fneg(float %x, float %y) { 1382; GFX8-LABEL: v_minimumnum_f32_fneg: 1383; GFX8: ; %bb.0: 1384; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1385; GFX8-NEXT: v_mul_f32_e32 v1, -1.0, v1 1386; GFX8-NEXT: v_mul_f32_e32 v0, -1.0, v0 1387; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 1388; GFX8-NEXT: s_setpc_b64 s[30:31] 1389; 1390; GFX9-LABEL: v_minimumnum_f32_fneg: 1391; GFX9: ; %bb.0: 1392; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1393; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1 1394; GFX9-NEXT: v_max_f32_e64 v0, -v0, -v0 1395; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 1396; GFX9-NEXT: s_setpc_b64 s[30:31] 1397; 1398; GFX10-LABEL: v_minimumnum_f32_fneg: 1399; GFX10: ; %bb.0: 1400; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1401; GFX10-NEXT: v_max_f32_e64 v1, -v1, -v1 1402; GFX10-NEXT: v_max_f32_e64 v0, -v0, -v0 1403; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 1404; GFX10-NEXT: s_setpc_b64 s[30:31] 1405; 1406; GFX11-LABEL: v_minimumnum_f32_fneg: 1407; GFX11: ; %bb.0: 1408; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1409; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1 1410; GFX11-NEXT: v_max_f32_e64 v0, -v0, -v0 1411; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1412; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 1413; GFX11-NEXT: s_setpc_b64 s[30:31] 1414; 1415; GFX12-LABEL: v_minimumnum_f32_fneg: 1416; GFX12: ; %bb.0: 1417; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1418; GFX12-NEXT: s_wait_expcnt 0x0 1419; GFX12-NEXT: s_wait_samplecnt 0x0 1420; GFX12-NEXT: s_wait_bvhcnt 0x0 1421; GFX12-NEXT: s_wait_kmcnt 0x0 1422; GFX12-NEXT: v_max_num_f32_e64 v1, -v1, -v1 1423; GFX12-NEXT: v_max_num_f32_e64 v0, -v0, -v0 1424; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1425; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 1426; GFX12-NEXT: s_setpc_b64 s[30:31] 1427 %fneg.x = fneg float %x 1428 %fneg.y = fneg float %y 1429 %result = call float @llvm.minimumnum.f32(float %fneg.x, float %fneg.y) 1430 ret float %result 1431} 1432 1433define half @v_minimumnum_f16_fabs_rhs(half %x, half %y) { 1434; GFX8-LABEL: v_minimumnum_f16_fabs_rhs: 1435; GFX8: ; %bb.0: 1436; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1437; GFX8-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1438; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 1439; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 1440; GFX8-NEXT: s_setpc_b64 s[30:31] 1441; 1442; GFX9-LABEL: v_minimumnum_f16_fabs_rhs: 1443; GFX9: ; %bb.0: 1444; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1445; GFX9-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1446; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 1447; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 1448; GFX9-NEXT: s_setpc_b64 s[30:31] 1449; 1450; GFX10-LABEL: v_minimumnum_f16_fabs_rhs: 1451; GFX10: ; %bb.0: 1452; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1453; GFX10-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1454; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 1455; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 1456; GFX10-NEXT: s_setpc_b64 s[30:31] 1457; 1458; GFX11-LABEL: v_minimumnum_f16_fabs_rhs: 1459; GFX11: ; %bb.0: 1460; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1461; GFX11-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1462; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 1463; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1464; GFX11-NEXT: v_min_f16_e32 v0, v0, v1 1465; GFX11-NEXT: s_setpc_b64 s[30:31] 1466; 1467; GFX12-LABEL: v_minimumnum_f16_fabs_rhs: 1468; GFX12: ; %bb.0: 1469; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1470; GFX12-NEXT: s_wait_expcnt 0x0 1471; GFX12-NEXT: s_wait_samplecnt 0x0 1472; GFX12-NEXT: s_wait_bvhcnt 0x0 1473; GFX12-NEXT: s_wait_kmcnt 0x0 1474; GFX12-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| 1475; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 1476; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1477; GFX12-NEXT: v_min_num_f16_e32 v0, v0, v1 1478; GFX12-NEXT: s_setpc_b64 s[30:31] 1479 %fabs.y = call half @llvm.fabs.f16(half %y) 1480 %result = call half @llvm.minimumnum.f16(half %x, half %fabs.y) 1481 ret half %result 1482} 1483 1484define half @v_minimumnum_f16_fneg_fabs_rhs(half %x, half %y) { 1485; GFX8-LABEL: v_minimumnum_f16_fneg_fabs_rhs: 1486; GFX8: ; %bb.0: 1487; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1488; GFX8-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| 1489; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 1490; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 1491; GFX8-NEXT: s_setpc_b64 s[30:31] 1492; 1493; GFX9-LABEL: v_minimumnum_f16_fneg_fabs_rhs: 1494; GFX9: ; %bb.0: 1495; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1496; GFX9-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| 1497; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 1498; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 1499; GFX9-NEXT: s_setpc_b64 s[30:31] 1500; 1501; GFX10-LABEL: v_minimumnum_f16_fneg_fabs_rhs: 1502; GFX10: ; %bb.0: 1503; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1504; GFX10-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| 1505; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 1506; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 1507; GFX10-NEXT: s_setpc_b64 s[30:31] 1508; 1509; GFX11-LABEL: v_minimumnum_f16_fneg_fabs_rhs: 1510; GFX11: ; %bb.0: 1511; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1512; GFX11-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| 1513; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 1514; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1515; GFX11-NEXT: v_min_f16_e32 v0, v0, v1 1516; GFX11-NEXT: s_setpc_b64 s[30:31] 1517; 1518; GFX12-LABEL: v_minimumnum_f16_fneg_fabs_rhs: 1519; GFX12: ; %bb.0: 1520; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1521; GFX12-NEXT: s_wait_expcnt 0x0 1522; GFX12-NEXT: s_wait_samplecnt 0x0 1523; GFX12-NEXT: s_wait_bvhcnt 0x0 1524; GFX12-NEXT: s_wait_kmcnt 0x0 1525; GFX12-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1| 1526; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 1527; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1528; GFX12-NEXT: v_min_num_f16_e32 v0, v0, v1 1529; GFX12-NEXT: s_setpc_b64 s[30:31] 1530 %fabs.y = call half @llvm.fabs.f16(half %y) 1531 %fneg.fabs.y = fneg half %fabs.y 1532 %result = call half @llvm.minimumnum.f16(half %x, half %fneg.fabs.y) 1533 ret half %result 1534} 1535 1536define half @v_minimumnum_f16_fabs(half %x, half %y) { 1537; GFX8-LABEL: v_minimumnum_f16_fabs: 1538; GFX8: ; %bb.0: 1539; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1540; GFX8-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1541; GFX8-NEXT: v_max_f16_e64 v0, |v0|, |v0| 1542; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 1543; GFX8-NEXT: s_setpc_b64 s[30:31] 1544; 1545; GFX9-LABEL: v_minimumnum_f16_fabs: 1546; GFX9: ; %bb.0: 1547; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1548; GFX9-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1549; GFX9-NEXT: v_max_f16_e64 v0, |v0|, |v0| 1550; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 1551; GFX9-NEXT: s_setpc_b64 s[30:31] 1552; 1553; GFX10-LABEL: v_minimumnum_f16_fabs: 1554; GFX10: ; %bb.0: 1555; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1556; GFX10-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1557; GFX10-NEXT: v_max_f16_e64 v0, |v0|, |v0| 1558; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 1559; GFX10-NEXT: s_setpc_b64 s[30:31] 1560; 1561; GFX11-LABEL: v_minimumnum_f16_fabs: 1562; GFX11: ; %bb.0: 1563; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1564; GFX11-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1565; GFX11-NEXT: v_max_f16_e64 v0, |v0|, |v0| 1566; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1567; GFX11-NEXT: v_min_f16_e32 v0, v0, v1 1568; GFX11-NEXT: s_setpc_b64 s[30:31] 1569; 1570; GFX12-LABEL: v_minimumnum_f16_fabs: 1571; GFX12: ; %bb.0: 1572; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1573; GFX12-NEXT: s_wait_expcnt 0x0 1574; GFX12-NEXT: s_wait_samplecnt 0x0 1575; GFX12-NEXT: s_wait_bvhcnt 0x0 1576; GFX12-NEXT: s_wait_kmcnt 0x0 1577; GFX12-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| 1578; GFX12-NEXT: v_max_num_f16_e64 v0, |v0|, |v0| 1579; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1580; GFX12-NEXT: v_min_num_f16_e32 v0, v0, v1 1581; GFX12-NEXT: s_setpc_b64 s[30:31] 1582 %fabs.x = call half @llvm.fabs.f16(half %x) 1583 %fabs.y = call half @llvm.fabs.f16(half %y) 1584 %result = call half @llvm.minimumnum.f16(half %fabs.x, half %fabs.y) 1585 ret half %result 1586} 1587 1588define half @v_minimumnum_f16_fneg(half %x, half %y) { 1589; GFX8-LABEL: v_minimumnum_f16_fneg: 1590; GFX8: ; %bb.0: 1591; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1592; GFX8-NEXT: v_max_f16_e64 v1, -v1, -v1 1593; GFX8-NEXT: v_max_f16_e64 v0, -v0, -v0 1594; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 1595; GFX8-NEXT: s_setpc_b64 s[30:31] 1596; 1597; GFX9-LABEL: v_minimumnum_f16_fneg: 1598; GFX9: ; %bb.0: 1599; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1600; GFX9-NEXT: v_max_f16_e64 v1, -v1, -v1 1601; GFX9-NEXT: v_max_f16_e64 v0, -v0, -v0 1602; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 1603; GFX9-NEXT: s_setpc_b64 s[30:31] 1604; 1605; GFX10-LABEL: v_minimumnum_f16_fneg: 1606; GFX10: ; %bb.0: 1607; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1608; GFX10-NEXT: v_max_f16_e64 v1, -v1, -v1 1609; GFX10-NEXT: v_max_f16_e64 v0, -v0, -v0 1610; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 1611; GFX10-NEXT: s_setpc_b64 s[30:31] 1612; 1613; GFX11-LABEL: v_minimumnum_f16_fneg: 1614; GFX11: ; %bb.0: 1615; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1616; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1 1617; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0 1618; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1619; GFX11-NEXT: v_min_f16_e32 v0, v0, v1 1620; GFX11-NEXT: s_setpc_b64 s[30:31] 1621; 1622; GFX12-LABEL: v_minimumnum_f16_fneg: 1623; GFX12: ; %bb.0: 1624; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1625; GFX12-NEXT: s_wait_expcnt 0x0 1626; GFX12-NEXT: s_wait_samplecnt 0x0 1627; GFX12-NEXT: s_wait_bvhcnt 0x0 1628; GFX12-NEXT: s_wait_kmcnt 0x0 1629; GFX12-NEXT: v_max_num_f16_e64 v1, -v1, -v1 1630; GFX12-NEXT: v_max_num_f16_e64 v0, -v0, -v0 1631; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1632; GFX12-NEXT: v_min_num_f16_e32 v0, v0, v1 1633; GFX12-NEXT: s_setpc_b64 s[30:31] 1634 %fneg.x = fneg half %x 1635 %fneg.y = fneg half %y 1636 %result = call half @llvm.minimumnum.f16(half %fneg.x, half %fneg.y) 1637 ret half %result 1638} 1639 1640define double @v_minimumnum_f64_fneg(double %x, double %y) { 1641; GFX8-LABEL: v_minimumnum_f64_fneg: 1642; GFX8: ; %bb.0: 1643; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1644; GFX8-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] 1645; GFX8-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] 1646; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 1647; GFX8-NEXT: s_setpc_b64 s[30:31] 1648; 1649; GFX9-LABEL: v_minimumnum_f64_fneg: 1650; GFX9: ; %bb.0: 1651; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1652; GFX9-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] 1653; GFX9-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] 1654; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 1655; GFX9-NEXT: s_setpc_b64 s[30:31] 1656; 1657; GFX10-LABEL: v_minimumnum_f64_fneg: 1658; GFX10: ; %bb.0: 1659; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1660; GFX10-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] 1661; GFX10-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] 1662; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 1663; GFX10-NEXT: s_setpc_b64 s[30:31] 1664; 1665; GFX11-LABEL: v_minimumnum_f64_fneg: 1666; GFX11: ; %bb.0: 1667; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1668; GFX11-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] 1669; GFX11-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] 1670; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1671; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 1672; GFX11-NEXT: s_setpc_b64 s[30:31] 1673; 1674; GFX12-LABEL: v_minimumnum_f64_fneg: 1675; GFX12: ; %bb.0: 1676; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1677; GFX12-NEXT: s_wait_expcnt 0x0 1678; GFX12-NEXT: s_wait_samplecnt 0x0 1679; GFX12-NEXT: s_wait_bvhcnt 0x0 1680; GFX12-NEXT: s_wait_kmcnt 0x0 1681; GFX12-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3] 1682; GFX12-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1] 1683; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1684; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] 1685; GFX12-NEXT: s_setpc_b64 s[30:31] 1686 %fneg.x = fneg double %x 1687 %fneg.y = fneg double %y 1688 %result = call double @llvm.minimumnum.f64(double %fneg.x, double %fneg.y) 1689 ret double %result 1690} 1691 1692define <2 x half> @v_minimumnum_v2f16(<2 x half> %x, <2 x half> %y) { 1693; GFX8-LABEL: v_minimumnum_v2f16: 1694; GFX8: ; %bb.0: 1695; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1696; GFX8-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1697; GFX8-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1698; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 1699; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 1700; GFX8-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1701; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 1702; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1703; GFX8-NEXT: s_setpc_b64 s[30:31] 1704; 1705; GFX9-LABEL: v_minimumnum_v2f16: 1706; GFX9: ; %bb.0: 1707; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1708; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 1709; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 1710; GFX9-NEXT: v_pk_min_f16 v0, v0, v1 1711; GFX9-NEXT: s_setpc_b64 s[30:31] 1712; 1713; GFX10-LABEL: v_minimumnum_v2f16: 1714; GFX10: ; %bb.0: 1715; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1716; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 1717; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 1718; GFX10-NEXT: v_pk_min_f16 v0, v0, v1 1719; GFX10-NEXT: s_setpc_b64 s[30:31] 1720; 1721; GFX11-LABEL: v_minimumnum_v2f16: 1722; GFX11: ; %bb.0: 1723; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1724; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 1725; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 1726; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1727; GFX11-NEXT: v_pk_min_f16 v0, v0, v1 1728; GFX11-NEXT: s_setpc_b64 s[30:31] 1729; 1730; GFX12-LABEL: v_minimumnum_v2f16: 1731; GFX12: ; %bb.0: 1732; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1733; GFX12-NEXT: s_wait_expcnt 0x0 1734; GFX12-NEXT: s_wait_samplecnt 0x0 1735; GFX12-NEXT: s_wait_bvhcnt 0x0 1736; GFX12-NEXT: s_wait_kmcnt 0x0 1737; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 1738; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 1739; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1740; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1 1741; GFX12-NEXT: s_setpc_b64 s[30:31] 1742 %result = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> %y) 1743 ret <2 x half> %result 1744} 1745 1746define <2 x half> @v_minimumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) { 1747; GFX8-LABEL: v_minimumnum_v2f16_nnan: 1748; GFX8: ; %bb.0: 1749; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1750; GFX8-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1751; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 1752; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1753; GFX8-NEXT: s_setpc_b64 s[30:31] 1754; 1755; GFX9-LABEL: v_minimumnum_v2f16_nnan: 1756; GFX9: ; %bb.0: 1757; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1758; GFX9-NEXT: v_pk_min_f16 v0, v0, v1 1759; GFX9-NEXT: s_setpc_b64 s[30:31] 1760; 1761; GFX10-LABEL: v_minimumnum_v2f16_nnan: 1762; GFX10: ; %bb.0: 1763; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1764; GFX10-NEXT: v_pk_min_f16 v0, v0, v1 1765; GFX10-NEXT: s_setpc_b64 s[30:31] 1766; 1767; GFX11-LABEL: v_minimumnum_v2f16_nnan: 1768; GFX11: ; %bb.0: 1769; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1770; GFX11-NEXT: v_pk_min_f16 v0, v0, v1 1771; GFX11-NEXT: s_setpc_b64 s[30:31] 1772; 1773; GFX12-LABEL: v_minimumnum_v2f16_nnan: 1774; GFX12: ; %bb.0: 1775; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1776; GFX12-NEXT: s_wait_expcnt 0x0 1777; GFX12-NEXT: s_wait_samplecnt 0x0 1778; GFX12-NEXT: s_wait_bvhcnt 0x0 1779; GFX12-NEXT: s_wait_kmcnt 0x0 1780; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1 1781; GFX12-NEXT: s_setpc_b64 s[30:31] 1782 %result = call nnan <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> %y) 1783 ret <2 x half> %result 1784} 1785 1786define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) { 1787; GFX8-LABEL: v_minimumnum_v3f16: 1788; GFX8: ; %bb.0: 1789; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1790; GFX8-NEXT: v_max_f16_sdwa v4, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1791; GFX8-NEXT: v_max_f16_sdwa v5, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1792; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 1793; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 1794; GFX8-NEXT: v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1795; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 1796; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 1797; GFX8-NEXT: v_min_f16_e32 v0, v0, v2 1798; GFX8-NEXT: v_min_f16_e32 v1, v1, v3 1799; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 1800; GFX8-NEXT: s_setpc_b64 s[30:31] 1801; 1802; GFX9-LABEL: v_minimumnum_v3f16: 1803; GFX9: ; %bb.0: 1804; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1805; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 1806; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 1807; GFX9-NEXT: v_pk_min_f16 v0, v0, v2 1808; GFX9-NEXT: v_pk_max_f16 v2, v3, v3 1809; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 1810; GFX9-NEXT: v_pk_min_f16 v1, v1, v2 1811; GFX9-NEXT: s_setpc_b64 s[30:31] 1812; 1813; GFX10-LABEL: v_minimumnum_v3f16: 1814; GFX10: ; %bb.0: 1815; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1816; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 1817; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 1818; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 1819; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 1820; GFX10-NEXT: v_pk_min_f16 v0, v0, v2 1821; GFX10-NEXT: v_pk_min_f16 v1, v1, v3 1822; GFX10-NEXT: s_setpc_b64 s[30:31] 1823; 1824; GFX11-LABEL: v_minimumnum_v3f16: 1825; GFX11: ; %bb.0: 1826; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1827; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 1828; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 1829; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 1830; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 1831; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 1832; GFX11-NEXT: v_pk_min_f16 v0, v0, v2 1833; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 1834; GFX11-NEXT: s_setpc_b64 s[30:31] 1835; 1836; GFX12-LABEL: v_minimumnum_v3f16: 1837; GFX12: ; %bb.0: 1838; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1839; GFX12-NEXT: s_wait_expcnt 0x0 1840; GFX12-NEXT: s_wait_samplecnt 0x0 1841; GFX12-NEXT: s_wait_bvhcnt 0x0 1842; GFX12-NEXT: s_wait_kmcnt 0x0 1843; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 1844; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 1845; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 1846; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 1847; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 1848; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2 1849; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3 1850; GFX12-NEXT: s_setpc_b64 s[30:31] 1851 %result = call <3 x half> @llvm.minimumnum.v3f16(<3 x half> %x, <3 x half> %y) 1852 ret <3 x half> %result 1853} 1854 1855define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) { 1856; GFX8-LABEL: v_minimumnum_v3f16_nnan: 1857; GFX8: ; %bb.0: 1858; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1859; GFX8-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1860; GFX8-NEXT: v_min_f16_e32 v0, v0, v2 1861; GFX8-NEXT: v_min_f16_e32 v1, v1, v3 1862; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 1863; GFX8-NEXT: s_setpc_b64 s[30:31] 1864; 1865; GFX9-LABEL: v_minimumnum_v3f16_nnan: 1866; GFX9: ; %bb.0: 1867; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1868; GFX9-NEXT: v_pk_min_f16 v0, v0, v2 1869; GFX9-NEXT: v_pk_min_f16 v1, v1, v3 1870; GFX9-NEXT: s_setpc_b64 s[30:31] 1871; 1872; GFX10-LABEL: v_minimumnum_v3f16_nnan: 1873; GFX10: ; %bb.0: 1874; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1875; GFX10-NEXT: v_pk_min_f16 v0, v0, v2 1876; GFX10-NEXT: v_pk_min_f16 v1, v1, v3 1877; GFX10-NEXT: s_setpc_b64 s[30:31] 1878; 1879; GFX11-LABEL: v_minimumnum_v3f16_nnan: 1880; GFX11: ; %bb.0: 1881; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1882; GFX11-NEXT: v_pk_min_f16 v0, v0, v2 1883; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 1884; GFX11-NEXT: s_setpc_b64 s[30:31] 1885; 1886; GFX12-LABEL: v_minimumnum_v3f16_nnan: 1887; GFX12: ; %bb.0: 1888; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1889; GFX12-NEXT: s_wait_expcnt 0x0 1890; GFX12-NEXT: s_wait_samplecnt 0x0 1891; GFX12-NEXT: s_wait_bvhcnt 0x0 1892; GFX12-NEXT: s_wait_kmcnt 0x0 1893; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2 1894; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3 1895; GFX12-NEXT: s_setpc_b64 s[30:31] 1896 %result = call nnan <3 x half> @llvm.minimumnum.v3f16(<3 x half> %x, <3 x half> %y) 1897 ret <3 x half> %result 1898} 1899 1900define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) { 1901; GFX8-LABEL: v_minimumnum_v4f16: 1902; GFX8: ; %bb.0: 1903; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1904; GFX8-NEXT: v_max_f16_sdwa v4, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1905; GFX8-NEXT: v_max_f16_sdwa v5, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1906; GFX8-NEXT: v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1907; GFX8-NEXT: v_max_f16_sdwa v5, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1908; GFX8-NEXT: v_max_f16_sdwa v6, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1909; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 1910; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 1911; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 1912; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 1913; GFX8-NEXT: v_min_f16_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1914; GFX8-NEXT: v_min_f16_e32 v1, v1, v3 1915; GFX8-NEXT: v_min_f16_e32 v0, v0, v2 1916; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 1917; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 1918; GFX8-NEXT: s_setpc_b64 s[30:31] 1919; 1920; GFX9-LABEL: v_minimumnum_v4f16: 1921; GFX9: ; %bb.0: 1922; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1923; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 1924; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 1925; GFX9-NEXT: v_pk_min_f16 v0, v0, v2 1926; GFX9-NEXT: v_pk_max_f16 v2, v3, v3 1927; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 1928; GFX9-NEXT: v_pk_min_f16 v1, v1, v2 1929; GFX9-NEXT: s_setpc_b64 s[30:31] 1930; 1931; GFX10-LABEL: v_minimumnum_v4f16: 1932; GFX10: ; %bb.0: 1933; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1934; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 1935; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 1936; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 1937; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 1938; GFX10-NEXT: v_pk_min_f16 v0, v0, v2 1939; GFX10-NEXT: v_pk_min_f16 v1, v1, v3 1940; GFX10-NEXT: s_setpc_b64 s[30:31] 1941; 1942; GFX11-LABEL: v_minimumnum_v4f16: 1943; GFX11: ; %bb.0: 1944; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1945; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 1946; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 1947; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 1948; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 1949; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 1950; GFX11-NEXT: v_pk_min_f16 v0, v0, v2 1951; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 1952; GFX11-NEXT: s_setpc_b64 s[30:31] 1953; 1954; GFX12-LABEL: v_minimumnum_v4f16: 1955; GFX12: ; %bb.0: 1956; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1957; GFX12-NEXT: s_wait_expcnt 0x0 1958; GFX12-NEXT: s_wait_samplecnt 0x0 1959; GFX12-NEXT: s_wait_bvhcnt 0x0 1960; GFX12-NEXT: s_wait_kmcnt 0x0 1961; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 1962; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 1963; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 1964; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 1965; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 1966; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2 1967; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3 1968; GFX12-NEXT: s_setpc_b64 s[30:31] 1969 %result = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> %x, <4 x half> %y) 1970 ret <4 x half> %result 1971} 1972 1973define <4 x half> @v_minimumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) { 1974; GFX8-LABEL: v_minimumnum_v4f16_nnan: 1975; GFX8: ; %bb.0: 1976; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1977; GFX8-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1978; GFX8-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1979; GFX8-NEXT: v_min_f16_e32 v1, v1, v3 1980; GFX8-NEXT: v_min_f16_e32 v0, v0, v2 1981; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 1982; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 1983; GFX8-NEXT: s_setpc_b64 s[30:31] 1984; 1985; GFX9-LABEL: v_minimumnum_v4f16_nnan: 1986; GFX9: ; %bb.0: 1987; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1988; GFX9-NEXT: v_pk_min_f16 v0, v0, v2 1989; GFX9-NEXT: v_pk_min_f16 v1, v1, v3 1990; GFX9-NEXT: s_setpc_b64 s[30:31] 1991; 1992; GFX10-LABEL: v_minimumnum_v4f16_nnan: 1993; GFX10: ; %bb.0: 1994; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1995; GFX10-NEXT: v_pk_min_f16 v0, v0, v2 1996; GFX10-NEXT: v_pk_min_f16 v1, v1, v3 1997; GFX10-NEXT: s_setpc_b64 s[30:31] 1998; 1999; GFX11-LABEL: v_minimumnum_v4f16_nnan: 2000; GFX11: ; %bb.0: 2001; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2002; GFX11-NEXT: v_pk_min_f16 v0, v0, v2 2003; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 2004; GFX11-NEXT: s_setpc_b64 s[30:31] 2005; 2006; GFX12-LABEL: v_minimumnum_v4f16_nnan: 2007; GFX12: ; %bb.0: 2008; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2009; GFX12-NEXT: s_wait_expcnt 0x0 2010; GFX12-NEXT: s_wait_samplecnt 0x0 2011; GFX12-NEXT: s_wait_bvhcnt 0x0 2012; GFX12-NEXT: s_wait_kmcnt 0x0 2013; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2 2014; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3 2015; GFX12-NEXT: s_setpc_b64 s[30:31] 2016 %result = call nnan <4 x half> @llvm.minimumnum.v4f16(<4 x half> %x, <4 x half> %y) 2017 ret <4 x half> %result 2018} 2019 2020define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) { 2021; GFX8-LABEL: v_minimumnum_v6f16: 2022; GFX8: ; %bb.0: 2023; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2024; GFX8-NEXT: v_max_f16_sdwa v6, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2025; GFX8-NEXT: v_max_f16_sdwa v7, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2026; GFX8-NEXT: v_min_f16_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2027; GFX8-NEXT: v_max_f16_sdwa v7, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2028; GFX8-NEXT: v_max_f16_sdwa v8, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2029; GFX8-NEXT: v_min_f16_sdwa v7, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2030; GFX8-NEXT: v_max_f16_sdwa v8, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2031; GFX8-NEXT: v_max_f16_sdwa v9, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2032; GFX8-NEXT: v_max_f16_e32 v5, v5, v5 2033; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 2034; GFX8-NEXT: v_max_f16_e32 v4, v4, v4 2035; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 2036; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 2037; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 2038; GFX8-NEXT: v_min_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2039; GFX8-NEXT: v_min_f16_e32 v2, v2, v5 2040; GFX8-NEXT: v_min_f16_e32 v1, v1, v4 2041; GFX8-NEXT: v_min_f16_e32 v0, v0, v3 2042; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 2043; GFX8-NEXT: v_or_b32_e32 v1, v1, v7 2044; GFX8-NEXT: v_or_b32_e32 v2, v2, v6 2045; GFX8-NEXT: s_setpc_b64 s[30:31] 2046; 2047; GFX9-LABEL: v_minimumnum_v6f16: 2048; GFX9: ; %bb.0: 2049; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2050; GFX9-NEXT: v_pk_max_f16 v3, v3, v3 2051; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 2052; GFX9-NEXT: v_pk_min_f16 v0, v0, v3 2053; GFX9-NEXT: v_pk_max_f16 v3, v4, v4 2054; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 2055; GFX9-NEXT: v_pk_min_f16 v1, v1, v3 2056; GFX9-NEXT: v_pk_max_f16 v3, v5, v5 2057; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 2058; GFX9-NEXT: v_pk_min_f16 v2, v2, v3 2059; GFX9-NEXT: s_setpc_b64 s[30:31] 2060; 2061; GFX10-LABEL: v_minimumnum_v6f16: 2062; GFX10: ; %bb.0: 2063; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2064; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 2065; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 2066; GFX10-NEXT: v_pk_max_f16 v4, v4, v4 2067; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 2068; GFX10-NEXT: v_pk_max_f16 v5, v5, v5 2069; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 2070; GFX10-NEXT: v_pk_min_f16 v0, v0, v3 2071; GFX10-NEXT: v_pk_min_f16 v1, v1, v4 2072; GFX10-NEXT: v_pk_min_f16 v2, v2, v5 2073; GFX10-NEXT: s_setpc_b64 s[30:31] 2074; 2075; GFX11-LABEL: v_minimumnum_v6f16: 2076; GFX11: ; %bb.0: 2077; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2078; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 2079; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 2080; GFX11-NEXT: v_pk_max_f16 v4, v4, v4 2081; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 2082; GFX11-NEXT: v_pk_max_f16 v5, v5, v5 2083; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 2084; GFX11-NEXT: v_pk_min_f16 v0, v0, v3 2085; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 2086; GFX11-NEXT: v_pk_min_f16 v1, v1, v4 2087; GFX11-NEXT: v_pk_min_f16 v2, v2, v5 2088; GFX11-NEXT: s_setpc_b64 s[30:31] 2089; 2090; GFX12-LABEL: v_minimumnum_v6f16: 2091; GFX12: ; %bb.0: 2092; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2093; GFX12-NEXT: s_wait_expcnt 0x0 2094; GFX12-NEXT: s_wait_samplecnt 0x0 2095; GFX12-NEXT: s_wait_bvhcnt 0x0 2096; GFX12-NEXT: s_wait_kmcnt 0x0 2097; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 2098; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 2099; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4 2100; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 2101; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5 2102; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 2103; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v3 2104; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 2105; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v4 2106; GFX12-NEXT: v_pk_min_num_f16 v2, v2, v5 2107; GFX12-NEXT: s_setpc_b64 s[30:31] 2108 %result = call <6 x half> @llvm.minimumnum.v6f16(<6 x half> %x, <6 x half> %y) 2109 ret <6 x half> %result 2110} 2111 2112define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) { 2113; GFX8-LABEL: v_minimumnum_v8f16: 2114; GFX8: ; %bb.0: 2115; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2116; GFX8-NEXT: v_max_f16_sdwa v8, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2117; GFX8-NEXT: v_max_f16_sdwa v9, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2118; GFX8-NEXT: v_min_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2119; GFX8-NEXT: v_max_f16_sdwa v9, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2120; GFX8-NEXT: v_max_f16_sdwa v10, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2121; GFX8-NEXT: v_min_f16_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2122; GFX8-NEXT: v_max_f16_sdwa v10, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2123; GFX8-NEXT: v_max_f16_sdwa v11, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2124; GFX8-NEXT: v_min_f16_sdwa v10, v11, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2125; GFX8-NEXT: v_max_f16_sdwa v11, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2126; GFX8-NEXT: v_max_f16_sdwa v12, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2127; GFX8-NEXT: v_max_f16_e32 v7, v7, v7 2128; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 2129; GFX8-NEXT: v_max_f16_e32 v6, v6, v6 2130; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 2131; GFX8-NEXT: v_max_f16_e32 v5, v5, v5 2132; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 2133; GFX8-NEXT: v_max_f16_e32 v4, v4, v4 2134; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 2135; GFX8-NEXT: v_min_f16_sdwa v11, v12, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2136; GFX8-NEXT: v_min_f16_e32 v3, v3, v7 2137; GFX8-NEXT: v_min_f16_e32 v2, v2, v6 2138; GFX8-NEXT: v_min_f16_e32 v1, v1, v5 2139; GFX8-NEXT: v_min_f16_e32 v0, v0, v4 2140; GFX8-NEXT: v_or_b32_e32 v0, v0, v11 2141; GFX8-NEXT: v_or_b32_e32 v1, v1, v10 2142; GFX8-NEXT: v_or_b32_e32 v2, v2, v9 2143; GFX8-NEXT: v_or_b32_e32 v3, v3, v8 2144; GFX8-NEXT: s_setpc_b64 s[30:31] 2145; 2146; GFX9-LABEL: v_minimumnum_v8f16: 2147; GFX9: ; %bb.0: 2148; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2149; GFX9-NEXT: v_pk_max_f16 v4, v4, v4 2150; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 2151; GFX9-NEXT: v_pk_min_f16 v0, v0, v4 2152; GFX9-NEXT: v_pk_max_f16 v4, v5, v5 2153; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 2154; GFX9-NEXT: v_pk_min_f16 v1, v1, v4 2155; GFX9-NEXT: v_pk_max_f16 v4, v6, v6 2156; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 2157; GFX9-NEXT: v_pk_min_f16 v2, v2, v4 2158; GFX9-NEXT: v_pk_max_f16 v4, v7, v7 2159; GFX9-NEXT: v_pk_max_f16 v3, v3, v3 2160; GFX9-NEXT: v_pk_min_f16 v3, v3, v4 2161; GFX9-NEXT: s_setpc_b64 s[30:31] 2162; 2163; GFX10-LABEL: v_minimumnum_v8f16: 2164; GFX10: ; %bb.0: 2165; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2166; GFX10-NEXT: v_pk_max_f16 v4, v4, v4 2167; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 2168; GFX10-NEXT: v_pk_max_f16 v5, v5, v5 2169; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 2170; GFX10-NEXT: v_pk_max_f16 v6, v6, v6 2171; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 2172; GFX10-NEXT: v_pk_max_f16 v7, v7, v7 2173; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 2174; GFX10-NEXT: v_pk_min_f16 v0, v0, v4 2175; GFX10-NEXT: v_pk_min_f16 v1, v1, v5 2176; GFX10-NEXT: v_pk_min_f16 v2, v2, v6 2177; GFX10-NEXT: v_pk_min_f16 v3, v3, v7 2178; GFX10-NEXT: s_setpc_b64 s[30:31] 2179; 2180; GFX11-LABEL: v_minimumnum_v8f16: 2181; GFX11: ; %bb.0: 2182; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2183; GFX11-NEXT: v_pk_max_f16 v4, v4, v4 2184; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 2185; GFX11-NEXT: v_pk_max_f16 v5, v5, v5 2186; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 2187; GFX11-NEXT: v_pk_max_f16 v6, v6, v6 2188; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 2189; GFX11-NEXT: v_pk_max_f16 v7, v7, v7 2190; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 2191; GFX11-NEXT: v_pk_min_f16 v0, v0, v4 2192; GFX11-NEXT: v_pk_min_f16 v1, v1, v5 2193; GFX11-NEXT: v_pk_min_f16 v2, v2, v6 2194; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 2195; GFX11-NEXT: v_pk_min_f16 v3, v3, v7 2196; GFX11-NEXT: s_setpc_b64 s[30:31] 2197; 2198; GFX12-LABEL: v_minimumnum_v8f16: 2199; GFX12: ; %bb.0: 2200; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2201; GFX12-NEXT: s_wait_expcnt 0x0 2202; GFX12-NEXT: s_wait_samplecnt 0x0 2203; GFX12-NEXT: s_wait_bvhcnt 0x0 2204; GFX12-NEXT: s_wait_kmcnt 0x0 2205; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4 2206; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 2207; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5 2208; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 2209; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6 2210; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 2211; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7 2212; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 2213; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v4 2214; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v5 2215; GFX12-NEXT: v_pk_min_num_f16 v2, v2, v6 2216; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) 2217; GFX12-NEXT: v_pk_min_num_f16 v3, v3, v7 2218; GFX12-NEXT: s_setpc_b64 s[30:31] 2219 %result = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> %x, <8 x half> %y) 2220 ret <8 x half> %result 2221} 2222 2223define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) { 2224; GFX8-LABEL: v_minimumnum_v2f32: 2225; GFX8: ; %bb.0: 2226; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2227; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 2228; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 2229; GFX8-NEXT: v_min_f32_e32 v0, v0, v2 2230; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v3 2231; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 2232; GFX8-NEXT: v_min_f32_e32 v1, v1, v2 2233; GFX8-NEXT: s_setpc_b64 s[30:31] 2234; 2235; GFX9-LABEL: v_minimumnum_v2f32: 2236; GFX9: ; %bb.0: 2237; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2238; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 2239; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 2240; GFX9-NEXT: v_min_f32_e32 v0, v0, v2 2241; GFX9-NEXT: v_max_f32_e32 v2, v3, v3 2242; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 2243; GFX9-NEXT: v_min_f32_e32 v1, v1, v2 2244; GFX9-NEXT: s_setpc_b64 s[30:31] 2245; 2246; GFX10-LABEL: v_minimumnum_v2f32: 2247; GFX10: ; %bb.0: 2248; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2249; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 2250; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 2251; GFX10-NEXT: v_max_f32_e32 v3, v3, v3 2252; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 2253; GFX10-NEXT: v_min_f32_e32 v0, v0, v2 2254; GFX10-NEXT: v_min_f32_e32 v1, v1, v3 2255; GFX10-NEXT: s_setpc_b64 s[30:31] 2256; 2257; GFX11-LABEL: v_minimumnum_v2f32: 2258; GFX11: ; %bb.0: 2259; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2260; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 2261; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 2262; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2263; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3 2264; GFX11-NEXT: s_setpc_b64 s[30:31] 2265; 2266; GFX12-LABEL: v_minimumnum_v2f32: 2267; GFX12: ; %bb.0: 2268; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2269; GFX12-NEXT: s_wait_expcnt 0x0 2270; GFX12-NEXT: s_wait_samplecnt 0x0 2271; GFX12-NEXT: s_wait_bvhcnt 0x0 2272; GFX12-NEXT: s_wait_kmcnt 0x0 2273; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 2274; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 2275; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 2276; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3 2277; GFX12-NEXT: s_setpc_b64 s[30:31] 2278 %result = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> %x, <2 x float> %y) 2279 ret <2 x float> %result 2280} 2281 2282define <2 x float> @v_minimumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) { 2283; GFX8-LABEL: v_minimumnum_v2f32_nnan: 2284; GFX8: ; %bb.0: 2285; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2286; GFX8-NEXT: v_min_f32_e32 v0, v0, v2 2287; GFX8-NEXT: v_min_f32_e32 v1, v1, v3 2288; GFX8-NEXT: s_setpc_b64 s[30:31] 2289; 2290; GFX9-LABEL: v_minimumnum_v2f32_nnan: 2291; GFX9: ; %bb.0: 2292; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2293; GFX9-NEXT: v_min_f32_e32 v0, v0, v2 2294; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 2295; GFX9-NEXT: s_setpc_b64 s[30:31] 2296; 2297; GFX10-LABEL: v_minimumnum_v2f32_nnan: 2298; GFX10: ; %bb.0: 2299; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2300; GFX10-NEXT: v_min_f32_e32 v0, v0, v2 2301; GFX10-NEXT: v_min_f32_e32 v1, v1, v3 2302; GFX10-NEXT: s_setpc_b64 s[30:31] 2303; 2304; GFX11-LABEL: v_minimumnum_v2f32_nnan: 2305; GFX11: ; %bb.0: 2306; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2307; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3 2308; GFX11-NEXT: s_setpc_b64 s[30:31] 2309; 2310; GFX12-LABEL: v_minimumnum_v2f32_nnan: 2311; GFX12: ; %bb.0: 2312; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2313; GFX12-NEXT: s_wait_expcnt 0x0 2314; GFX12-NEXT: s_wait_samplecnt 0x0 2315; GFX12-NEXT: s_wait_bvhcnt 0x0 2316; GFX12-NEXT: s_wait_kmcnt 0x0 2317; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3 2318; GFX12-NEXT: s_setpc_b64 s[30:31] 2319 %result = call nnan <2 x float> @llvm.minimumnum.v2f32(<2 x float> %x, <2 x float> %y) 2320 ret <2 x float> %result 2321} 2322 2323define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) { 2324; GFX8-LABEL: v_minimumnum_v3f32: 2325; GFX8: ; %bb.0: 2326; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2327; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v3 2328; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 2329; GFX8-NEXT: v_min_f32_e32 v0, v0, v3 2330; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v4 2331; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 2332; GFX8-NEXT: v_min_f32_e32 v1, v1, v3 2333; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v5 2334; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 2335; GFX8-NEXT: v_min_f32_e32 v2, v2, v3 2336; GFX8-NEXT: s_setpc_b64 s[30:31] 2337; 2338; GFX9-LABEL: v_minimumnum_v3f32: 2339; GFX9: ; %bb.0: 2340; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2341; GFX9-NEXT: v_max_f32_e32 v3, v3, v3 2342; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 2343; GFX9-NEXT: v_min_f32_e32 v0, v0, v3 2344; GFX9-NEXT: v_max_f32_e32 v3, v4, v4 2345; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 2346; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 2347; GFX9-NEXT: v_max_f32_e32 v3, v5, v5 2348; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 2349; GFX9-NEXT: v_min_f32_e32 v2, v2, v3 2350; GFX9-NEXT: s_setpc_b64 s[30:31] 2351; 2352; GFX10-LABEL: v_minimumnum_v3f32: 2353; GFX10: ; %bb.0: 2354; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2355; GFX10-NEXT: v_max_f32_e32 v3, v3, v3 2356; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 2357; GFX10-NEXT: v_max_f32_e32 v4, v4, v4 2358; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 2359; GFX10-NEXT: v_max_f32_e32 v5, v5, v5 2360; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 2361; GFX10-NEXT: v_min_f32_e32 v0, v0, v3 2362; GFX10-NEXT: v_min_f32_e32 v1, v1, v4 2363; GFX10-NEXT: v_min_f32_e32 v2, v2, v5 2364; GFX10-NEXT: s_setpc_b64 s[30:31] 2365; 2366; GFX11-LABEL: v_minimumnum_v3f32: 2367; GFX11: ; %bb.0: 2368; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2369; GFX11-NEXT: v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v0, v0, v0 2370; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v1, v1, v1 2371; GFX11-NEXT: v_dual_max_f32 v5, v5, v5 :: v_dual_max_f32 v2, v2, v2 2372; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2373; GFX11-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4 2374; GFX11-NEXT: v_min_f32_e32 v2, v2, v5 2375; GFX11-NEXT: s_setpc_b64 s[30:31] 2376; 2377; GFX12-LABEL: v_minimumnum_v3f32: 2378; GFX12: ; %bb.0: 2379; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2380; GFX12-NEXT: s_wait_expcnt 0x0 2381; GFX12-NEXT: s_wait_samplecnt 0x0 2382; GFX12-NEXT: s_wait_bvhcnt 0x0 2383; GFX12-NEXT: s_wait_kmcnt 0x0 2384; GFX12-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0 2385; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1 2386; GFX12-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2 2387; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2388; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4 2389; GFX12-NEXT: v_min_num_f32_e32 v2, v2, v5 2390; GFX12-NEXT: s_setpc_b64 s[30:31] 2391 %result = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> %x, <3 x float> %y) 2392 ret <3 x float> %result 2393} 2394 2395define <3 x float> @v_minimumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) { 2396; GFX8-LABEL: v_minimumnum_v3f32_nnan: 2397; GFX8: ; %bb.0: 2398; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2399; GFX8-NEXT: v_min_f32_e32 v0, v0, v3 2400; GFX8-NEXT: v_min_f32_e32 v1, v1, v4 2401; GFX8-NEXT: v_min_f32_e32 v2, v2, v5 2402; GFX8-NEXT: s_setpc_b64 s[30:31] 2403; 2404; GFX9-LABEL: v_minimumnum_v3f32_nnan: 2405; GFX9: ; %bb.0: 2406; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2407; GFX9-NEXT: v_min_f32_e32 v0, v0, v3 2408; GFX9-NEXT: v_min_f32_e32 v1, v1, v4 2409; GFX9-NEXT: v_min_f32_e32 v2, v2, v5 2410; GFX9-NEXT: s_setpc_b64 s[30:31] 2411; 2412; GFX10-LABEL: v_minimumnum_v3f32_nnan: 2413; GFX10: ; %bb.0: 2414; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2415; GFX10-NEXT: v_min_f32_e32 v0, v0, v3 2416; GFX10-NEXT: v_min_f32_e32 v1, v1, v4 2417; GFX10-NEXT: v_min_f32_e32 v2, v2, v5 2418; GFX10-NEXT: s_setpc_b64 s[30:31] 2419; 2420; GFX11-LABEL: v_minimumnum_v3f32_nnan: 2421; GFX11: ; %bb.0: 2422; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2423; GFX11-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4 2424; GFX11-NEXT: v_min_f32_e32 v2, v2, v5 2425; GFX11-NEXT: s_setpc_b64 s[30:31] 2426; 2427; GFX12-LABEL: v_minimumnum_v3f32_nnan: 2428; GFX12: ; %bb.0: 2429; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2430; GFX12-NEXT: s_wait_expcnt 0x0 2431; GFX12-NEXT: s_wait_samplecnt 0x0 2432; GFX12-NEXT: s_wait_bvhcnt 0x0 2433; GFX12-NEXT: s_wait_kmcnt 0x0 2434; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4 2435; GFX12-NEXT: v_min_num_f32_e32 v2, v2, v5 2436; GFX12-NEXT: s_setpc_b64 s[30:31] 2437 %result = call nnan <3 x float> @llvm.minimumnum.v3f32(<3 x float> %x, <3 x float> %y) 2438 ret <3 x float> %result 2439} 2440 2441define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) { 2442; GFX8-LABEL: v_minimumnum_v4f32: 2443; GFX8: ; %bb.0: 2444; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2445; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v4 2446; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 2447; GFX8-NEXT: v_min_f32_e32 v0, v0, v4 2448; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v5 2449; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 2450; GFX8-NEXT: v_min_f32_e32 v1, v1, v4 2451; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v6 2452; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 2453; GFX8-NEXT: v_min_f32_e32 v2, v2, v4 2454; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v7 2455; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v3 2456; GFX8-NEXT: v_min_f32_e32 v3, v3, v4 2457; GFX8-NEXT: s_setpc_b64 s[30:31] 2458; 2459; GFX9-LABEL: v_minimumnum_v4f32: 2460; GFX9: ; %bb.0: 2461; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2462; GFX9-NEXT: v_max_f32_e32 v4, v4, v4 2463; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 2464; GFX9-NEXT: v_min_f32_e32 v0, v0, v4 2465; GFX9-NEXT: v_max_f32_e32 v4, v5, v5 2466; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 2467; GFX9-NEXT: v_min_f32_e32 v1, v1, v4 2468; GFX9-NEXT: v_max_f32_e32 v4, v6, v6 2469; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 2470; GFX9-NEXT: v_min_f32_e32 v2, v2, v4 2471; GFX9-NEXT: v_max_f32_e32 v4, v7, v7 2472; GFX9-NEXT: v_max_f32_e32 v3, v3, v3 2473; GFX9-NEXT: v_min_f32_e32 v3, v3, v4 2474; GFX9-NEXT: s_setpc_b64 s[30:31] 2475; 2476; GFX10-LABEL: v_minimumnum_v4f32: 2477; GFX10: ; %bb.0: 2478; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2479; GFX10-NEXT: v_max_f32_e32 v4, v4, v4 2480; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 2481; GFX10-NEXT: v_max_f32_e32 v5, v5, v5 2482; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 2483; GFX10-NEXT: v_max_f32_e32 v6, v6, v6 2484; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 2485; GFX10-NEXT: v_max_f32_e32 v7, v7, v7 2486; GFX10-NEXT: v_max_f32_e32 v3, v3, v3 2487; GFX10-NEXT: v_min_f32_e32 v0, v0, v4 2488; GFX10-NEXT: v_min_f32_e32 v1, v1, v5 2489; GFX10-NEXT: v_min_f32_e32 v2, v2, v6 2490; GFX10-NEXT: v_min_f32_e32 v3, v3, v7 2491; GFX10-NEXT: s_setpc_b64 s[30:31] 2492; 2493; GFX11-LABEL: v_minimumnum_v4f32: 2494; GFX11: ; %bb.0: 2495; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2496; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5 2497; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 2498; GFX11-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7 2499; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 2500; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2501; GFX11-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5 2502; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7 2503; GFX11-NEXT: s_setpc_b64 s[30:31] 2504; 2505; GFX12-LABEL: v_minimumnum_v4f32: 2506; GFX12: ; %bb.0: 2507; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2508; GFX12-NEXT: s_wait_expcnt 0x0 2509; GFX12-NEXT: s_wait_samplecnt 0x0 2510; GFX12-NEXT: s_wait_bvhcnt 0x0 2511; GFX12-NEXT: s_wait_kmcnt 0x0 2512; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 2513; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 2514; GFX12-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 2515; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 2516; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2517; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5 2518; GFX12-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7 2519; GFX12-NEXT: s_setpc_b64 s[30:31] 2520 %result = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> %x, <4 x float> %y) 2521 ret <4 x float> %result 2522} 2523 2524define <4 x float> @v_minimumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) { 2525; GFX8-LABEL: v_minimumnum_v4f32_nnan: 2526; GFX8: ; %bb.0: 2527; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2528; GFX8-NEXT: v_min_f32_e32 v0, v0, v4 2529; GFX8-NEXT: v_min_f32_e32 v1, v1, v5 2530; GFX8-NEXT: v_min_f32_e32 v2, v2, v6 2531; GFX8-NEXT: v_min_f32_e32 v3, v3, v7 2532; GFX8-NEXT: s_setpc_b64 s[30:31] 2533; 2534; GFX9-LABEL: v_minimumnum_v4f32_nnan: 2535; GFX9: ; %bb.0: 2536; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2537; GFX9-NEXT: v_min_f32_e32 v0, v0, v4 2538; GFX9-NEXT: v_min_f32_e32 v1, v1, v5 2539; GFX9-NEXT: v_min_f32_e32 v2, v2, v6 2540; GFX9-NEXT: v_min_f32_e32 v3, v3, v7 2541; GFX9-NEXT: s_setpc_b64 s[30:31] 2542; 2543; GFX10-LABEL: v_minimumnum_v4f32_nnan: 2544; GFX10: ; %bb.0: 2545; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2546; GFX10-NEXT: v_min_f32_e32 v0, v0, v4 2547; GFX10-NEXT: v_min_f32_e32 v1, v1, v5 2548; GFX10-NEXT: v_min_f32_e32 v2, v2, v6 2549; GFX10-NEXT: v_min_f32_e32 v3, v3, v7 2550; GFX10-NEXT: s_setpc_b64 s[30:31] 2551; 2552; GFX11-LABEL: v_minimumnum_v4f32_nnan: 2553; GFX11: ; %bb.0: 2554; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2555; GFX11-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5 2556; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7 2557; GFX11-NEXT: s_setpc_b64 s[30:31] 2558; 2559; GFX12-LABEL: v_minimumnum_v4f32_nnan: 2560; GFX12: ; %bb.0: 2561; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2562; GFX12-NEXT: s_wait_expcnt 0x0 2563; GFX12-NEXT: s_wait_samplecnt 0x0 2564; GFX12-NEXT: s_wait_bvhcnt 0x0 2565; GFX12-NEXT: s_wait_kmcnt 0x0 2566; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5 2567; GFX12-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7 2568; GFX12-NEXT: s_setpc_b64 s[30:31] 2569 %result = call nnan <4 x float> @llvm.minimumnum.v4f32(<4 x float> %x, <4 x float> %y) 2570 ret <4 x float> %result 2571} 2572 2573define <2 x double> @v_minimumnum_v2f64(<2 x double> %x, <2 x double> %y) { 2574; GFX8-LABEL: v_minimumnum_v2f64: 2575; GFX8: ; %bb.0: 2576; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2577; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2578; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2579; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2580; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2581; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] 2582; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] 2583; GFX8-NEXT: s_setpc_b64 s[30:31] 2584; 2585; GFX9-LABEL: v_minimumnum_v2f64: 2586; GFX9: ; %bb.0: 2587; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2588; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2589; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2590; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2591; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2592; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] 2593; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] 2594; GFX9-NEXT: s_setpc_b64 s[30:31] 2595; 2596; GFX10-LABEL: v_minimumnum_v2f64: 2597; GFX10: ; %bb.0: 2598; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2599; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2600; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2601; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2602; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2603; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] 2604; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] 2605; GFX10-NEXT: s_setpc_b64 s[30:31] 2606; 2607; GFX11-LABEL: v_minimumnum_v2f64: 2608; GFX11: ; %bb.0: 2609; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2610; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2611; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2612; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2613; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2614; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2615; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] 2616; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] 2617; GFX11-NEXT: s_setpc_b64 s[30:31] 2618; 2619; GFX12-LABEL: v_minimumnum_v2f64: 2620; GFX12: ; %bb.0: 2621; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2622; GFX12-NEXT: s_wait_expcnt 0x0 2623; GFX12-NEXT: s_wait_samplecnt 0x0 2624; GFX12-NEXT: s_wait_bvhcnt 0x0 2625; GFX12-NEXT: s_wait_kmcnt 0x0 2626; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] 2627; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] 2628; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] 2629; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] 2630; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2631; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5] 2632; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7] 2633; GFX12-NEXT: s_setpc_b64 s[30:31] 2634 %result = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> %y) 2635 ret <2 x double> %result 2636} 2637 2638define <2 x double> @v_minimumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) { 2639; GFX8-LABEL: v_minimumnum_v2f64_nnan: 2640; GFX8: ; %bb.0: 2641; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2642; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] 2643; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] 2644; GFX8-NEXT: s_setpc_b64 s[30:31] 2645; 2646; GFX9-LABEL: v_minimumnum_v2f64_nnan: 2647; GFX9: ; %bb.0: 2648; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2649; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] 2650; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] 2651; GFX9-NEXT: s_setpc_b64 s[30:31] 2652; 2653; GFX10-LABEL: v_minimumnum_v2f64_nnan: 2654; GFX10: ; %bb.0: 2655; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2656; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] 2657; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] 2658; GFX10-NEXT: s_setpc_b64 s[30:31] 2659; 2660; GFX11-LABEL: v_minimumnum_v2f64_nnan: 2661; GFX11: ; %bb.0: 2662; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2663; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] 2664; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] 2665; GFX11-NEXT: s_setpc_b64 s[30:31] 2666; 2667; GFX12-LABEL: v_minimumnum_v2f64_nnan: 2668; GFX12: ; %bb.0: 2669; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2670; GFX12-NEXT: s_wait_expcnt 0x0 2671; GFX12-NEXT: s_wait_samplecnt 0x0 2672; GFX12-NEXT: s_wait_bvhcnt 0x0 2673; GFX12-NEXT: s_wait_kmcnt 0x0 2674; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5] 2675; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7] 2676; GFX12-NEXT: s_setpc_b64 s[30:31] 2677 %result = call nnan <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> %y) 2678 ret <2 x double> %result 2679} 2680 2681define <3 x double> @v_minimumnum_v3f64(<3 x double> %x, <3 x double> %y) { 2682; GFX8-LABEL: v_minimumnum_v3f64: 2683; GFX8: ; %bb.0: 2684; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2685; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2686; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2687; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2688; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2689; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2690; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2691; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] 2692; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] 2693; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] 2694; GFX8-NEXT: s_setpc_b64 s[30:31] 2695; 2696; GFX9-LABEL: v_minimumnum_v3f64: 2697; GFX9: ; %bb.0: 2698; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2699; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2700; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2701; GFX9-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2702; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2703; GFX9-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2704; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2705; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] 2706; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] 2707; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] 2708; GFX9-NEXT: s_setpc_b64 s[30:31] 2709; 2710; GFX10-LABEL: v_minimumnum_v3f64: 2711; GFX10: ; %bb.0: 2712; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2713; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2714; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2715; GFX10-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2716; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2717; GFX10-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2718; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2719; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] 2720; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] 2721; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] 2722; GFX10-NEXT: s_setpc_b64 s[30:31] 2723; 2724; GFX11-LABEL: v_minimumnum_v3f64: 2725; GFX11: ; %bb.0: 2726; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2727; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2728; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2729; GFX11-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2730; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2731; GFX11-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2732; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2733; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] 2734; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 2735; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] 2736; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] 2737; GFX11-NEXT: s_setpc_b64 s[30:31] 2738; 2739; GFX12-LABEL: v_minimumnum_v3f64: 2740; GFX12: ; %bb.0: 2741; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2742; GFX12-NEXT: s_wait_expcnt 0x0 2743; GFX12-NEXT: s_wait_samplecnt 0x0 2744; GFX12-NEXT: s_wait_bvhcnt 0x0 2745; GFX12-NEXT: s_wait_kmcnt 0x0 2746; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] 2747; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] 2748; GFX12-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] 2749; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] 2750; GFX12-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] 2751; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] 2752; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7] 2753; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 2754; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9] 2755; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11] 2756; GFX12-NEXT: s_setpc_b64 s[30:31] 2757 %result = call <3 x double> @llvm.minimumnum.v3f64(<3 x double> %x, <3 x double> %y) 2758 ret <3 x double> %result 2759} 2760 2761define <3 x double> @v_minimumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) { 2762; GFX8-LABEL: v_minimumnum_v3f64_nnan: 2763; GFX8: ; %bb.0: 2764; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2765; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] 2766; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] 2767; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] 2768; GFX8-NEXT: s_setpc_b64 s[30:31] 2769; 2770; GFX9-LABEL: v_minimumnum_v3f64_nnan: 2771; GFX9: ; %bb.0: 2772; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2773; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] 2774; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] 2775; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] 2776; GFX9-NEXT: s_setpc_b64 s[30:31] 2777; 2778; GFX10-LABEL: v_minimumnum_v3f64_nnan: 2779; GFX10: ; %bb.0: 2780; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2781; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] 2782; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] 2783; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] 2784; GFX10-NEXT: s_setpc_b64 s[30:31] 2785; 2786; GFX11-LABEL: v_minimumnum_v3f64_nnan: 2787; GFX11: ; %bb.0: 2788; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2789; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] 2790; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] 2791; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] 2792; GFX11-NEXT: s_setpc_b64 s[30:31] 2793; 2794; GFX12-LABEL: v_minimumnum_v3f64_nnan: 2795; GFX12: ; %bb.0: 2796; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2797; GFX12-NEXT: s_wait_expcnt 0x0 2798; GFX12-NEXT: s_wait_samplecnt 0x0 2799; GFX12-NEXT: s_wait_bvhcnt 0x0 2800; GFX12-NEXT: s_wait_kmcnt 0x0 2801; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7] 2802; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9] 2803; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11] 2804; GFX12-NEXT: s_setpc_b64 s[30:31] 2805 %result = call nnan <3 x double> @llvm.minimumnum.v3f64(<3 x double> %x, <3 x double> %y) 2806 ret <3 x double> %result 2807} 2808 2809define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) { 2810; GFX8-LABEL: v_minimumnum_v4f64: 2811; GFX8: ; %bb.0: 2812; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2813; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2814; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2815; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2816; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2817; GFX8-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 2818; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2819; GFX8-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] 2820; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2821; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] 2822; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] 2823; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] 2824; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] 2825; GFX8-NEXT: s_setpc_b64 s[30:31] 2826; 2827; GFX9-LABEL: v_minimumnum_v4f64: 2828; GFX9: ; %bb.0: 2829; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2830; GFX9-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2831; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2832; GFX9-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2833; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2834; GFX9-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 2835; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2836; GFX9-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] 2837; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2838; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] 2839; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] 2840; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] 2841; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] 2842; GFX9-NEXT: s_setpc_b64 s[30:31] 2843; 2844; GFX10-LABEL: v_minimumnum_v4f64: 2845; GFX10: ; %bb.0: 2846; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2847; GFX10-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2848; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2849; GFX10-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2850; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2851; GFX10-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 2852; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2853; GFX10-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] 2854; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2855; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] 2856; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] 2857; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] 2858; GFX10-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] 2859; GFX10-NEXT: s_setpc_b64 s[30:31] 2860; 2861; GFX11-LABEL: v_minimumnum_v4f64: 2862; GFX11: ; %bb.0: 2863; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2864; GFX11-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2865; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2866; GFX11-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2867; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2868; GFX11-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 2869; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2870; GFX11-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] 2871; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2872; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] 2873; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] 2874; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] 2875; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 2876; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] 2877; GFX11-NEXT: s_setpc_b64 s[30:31] 2878; 2879; GFX12-LABEL: v_minimumnum_v4f64: 2880; GFX12: ; %bb.0: 2881; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2882; GFX12-NEXT: s_wait_expcnt 0x0 2883; GFX12-NEXT: s_wait_samplecnt 0x0 2884; GFX12-NEXT: s_wait_bvhcnt 0x0 2885; GFX12-NEXT: s_wait_kmcnt 0x0 2886; GFX12-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] 2887; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] 2888; GFX12-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] 2889; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] 2890; GFX12-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13] 2891; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] 2892; GFX12-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15] 2893; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] 2894; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[8:9] 2895; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[10:11] 2896; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[12:13] 2897; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) 2898; GFX12-NEXT: v_min_num_f64_e32 v[6:7], v[6:7], v[14:15] 2899; GFX12-NEXT: s_setpc_b64 s[30:31] 2900 %result = call <4 x double> @llvm.minimumnum.v4f64(<4 x double> %x, <4 x double> %y) 2901 ret <4 x double> %result 2902} 2903 2904define <4 x double> @v_minimumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) { 2905; GFX8-LABEL: v_minimumnum_v4f64_nnan: 2906; GFX8: ; %bb.0: 2907; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2908; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] 2909; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] 2910; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] 2911; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] 2912; GFX8-NEXT: s_setpc_b64 s[30:31] 2913; 2914; GFX9-LABEL: v_minimumnum_v4f64_nnan: 2915; GFX9: ; %bb.0: 2916; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2917; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] 2918; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] 2919; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] 2920; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] 2921; GFX9-NEXT: s_setpc_b64 s[30:31] 2922; 2923; GFX10-LABEL: v_minimumnum_v4f64_nnan: 2924; GFX10: ; %bb.0: 2925; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2926; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] 2927; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] 2928; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] 2929; GFX10-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] 2930; GFX10-NEXT: s_setpc_b64 s[30:31] 2931; 2932; GFX11-LABEL: v_minimumnum_v4f64_nnan: 2933; GFX11: ; %bb.0: 2934; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2935; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] 2936; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] 2937; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] 2938; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] 2939; GFX11-NEXT: s_setpc_b64 s[30:31] 2940; 2941; GFX12-LABEL: v_minimumnum_v4f64_nnan: 2942; GFX12: ; %bb.0: 2943; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2944; GFX12-NEXT: s_wait_expcnt 0x0 2945; GFX12-NEXT: s_wait_samplecnt 0x0 2946; GFX12-NEXT: s_wait_bvhcnt 0x0 2947; GFX12-NEXT: s_wait_kmcnt 0x0 2948; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[8:9] 2949; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[10:11] 2950; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[12:13] 2951; GFX12-NEXT: v_min_num_f64_e32 v[6:7], v[6:7], v[14:15] 2952; GFX12-NEXT: s_setpc_b64 s[30:31] 2953 %result = call nnan <4 x double> @llvm.minimumnum.v4f64(<4 x double> %x, <4 x double> %y) 2954 ret <4 x double> %result 2955} 2956