1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s 6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s 7 8define half @v_maximumnum_f16(half %x, half %y) { 9; GFX8-LABEL: v_maximumnum_f16: 10; GFX8: ; %bb.0: 11; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 13; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 14; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 15; GFX8-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX9-LABEL: v_maximumnum_f16: 18; GFX9: ; %bb.0: 19; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX9-NEXT: v_max_f16_e32 v1, v1, v1 21; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 22; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 23; GFX9-NEXT: s_setpc_b64 s[30:31] 24; 25; GFX10-LABEL: v_maximumnum_f16: 26; GFX10: ; %bb.0: 27; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; GFX10-NEXT: v_max_f16_e32 v1, v1, v1 29; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 30; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 31; GFX10-NEXT: s_setpc_b64 s[30:31] 32; 33; GFX11-LABEL: v_maximumnum_f16: 34; GFX11: ; %bb.0: 35; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; GFX11-NEXT: v_max_f16_e32 v1, v1, v1 37; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 38; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 39; GFX11-NEXT: v_max_f16_e32 v0, v0, v1 40; GFX11-NEXT: s_setpc_b64 s[30:31] 41; 42; GFX12-LABEL: v_maximumnum_f16: 43; GFX12: ; %bb.0: 44; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 45; GFX12-NEXT: s_wait_expcnt 0x0 46; GFX12-NEXT: s_wait_samplecnt 0x0 47; GFX12-NEXT: s_wait_bvhcnt 0x0 48; GFX12-NEXT: s_wait_kmcnt 0x0 49; GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1 50; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 51; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 52; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v1 53; GFX12-NEXT: s_setpc_b64 s[30:31] 54 %result = call half @llvm.maximumnum.f16(half %x, half %y) 55 ret half %result 56} 57 58define half @v_maximumnum_f16_nnan(half %x, half %y) { 59; GFX8-LABEL: v_maximumnum_f16_nnan: 60; GFX8: ; %bb.0: 61; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 63; GFX8-NEXT: s_setpc_b64 s[30:31] 64; 65; GFX9-LABEL: v_maximumnum_f16_nnan: 66; GFX9: ; %bb.0: 67; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 69; GFX9-NEXT: s_setpc_b64 s[30:31] 70; 71; GFX10-LABEL: v_maximumnum_f16_nnan: 72; GFX10: ; %bb.0: 73; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 75; GFX10-NEXT: s_setpc_b64 s[30:31] 76; 77; GFX11-LABEL: v_maximumnum_f16_nnan: 78; GFX11: ; %bb.0: 79; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX11-NEXT: v_max_f16_e32 v0, v0, v1 81; GFX11-NEXT: s_setpc_b64 s[30:31] 82; 83; GFX12-LABEL: v_maximumnum_f16_nnan: 84; GFX12: ; %bb.0: 85; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 86; GFX12-NEXT: s_wait_expcnt 0x0 87; GFX12-NEXT: s_wait_samplecnt 0x0 88; GFX12-NEXT: s_wait_bvhcnt 0x0 89; GFX12-NEXT: s_wait_kmcnt 0x0 90; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v1 91; GFX12-NEXT: s_setpc_b64 s[30:31] 92 %result = call nnan half @llvm.maximumnum.f16(half %x, half %y) 93 ret half %result 94} 95 96define half @v_maximumnum_f16_1.0(half %x) { 97; GFX8-LABEL: v_maximumnum_f16_1.0: 98; GFX8: ; %bb.0: 99; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 101; GFX8-NEXT: v_max_f16_e32 v0, 1.0, v0 102; GFX8-NEXT: s_setpc_b64 s[30:31] 103; 104; GFX9-LABEL: v_maximumnum_f16_1.0: 105; GFX9: ; %bb.0: 106; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 108; GFX9-NEXT: v_max_f16_e32 v0, 1.0, v0 109; GFX9-NEXT: s_setpc_b64 s[30:31] 110; 111; GFX10-LABEL: v_maximumnum_f16_1.0: 112; GFX10: ; %bb.0: 113; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 114; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 115; GFX10-NEXT: v_max_f16_e32 v0, 1.0, v0 116; GFX10-NEXT: s_setpc_b64 s[30:31] 117; 118; GFX11-LABEL: v_maximumnum_f16_1.0: 119; GFX11: ; %bb.0: 120; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 122; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 123; GFX11-NEXT: v_max_f16_e32 v0, 1.0, v0 124; GFX11-NEXT: s_setpc_b64 s[30:31] 125; 126; GFX12-LABEL: v_maximumnum_f16_1.0: 127; GFX12: ; %bb.0: 128; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 129; GFX12-NEXT: s_wait_expcnt 0x0 130; GFX12-NEXT: s_wait_samplecnt 0x0 131; GFX12-NEXT: s_wait_bvhcnt 0x0 132; GFX12-NEXT: s_wait_kmcnt 0x0 133; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 134; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 135; GFX12-NEXT: v_max_num_f16_e32 v0, 1.0, v0 136; GFX12-NEXT: s_setpc_b64 s[30:31] 137 %result = call half @llvm.maximumnum.f16(half %x, half 1.0) 138 ret half %result 139} 140 141define bfloat @v_maximumnum_bf16(bfloat %x, bfloat %y) { 142; GFX8-LABEL: v_maximumnum_bf16: 143; GFX8: ; %bb.0: 144; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v0 146; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 147; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1 148; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 149; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 150; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 151; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1 152; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0 153; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v3, v2 154; GFX8-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc 155; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 156; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 157; GFX8-NEXT: v_bfe_u32 v3, v2, 16, 1 158; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v2 159; GFX8-NEXT: s_movk_i32 s4, 0x7fff 160; GFX8-NEXT: v_add_u32_e32 v3, vcc, s4, v3 161; GFX8-NEXT: v_or_b32_e32 v4, 0x400000, v2 162; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 163; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc 164; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v2 165; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 166; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 167; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1 168; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 169; GFX8-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 170; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1 171; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 172; GFX8-NEXT: s_setpc_b64 s[30:31] 173; 174; GFX9-LABEL: v_maximumnum_bf16: 175; GFX9: ; %bb.0: 176; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 177; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0 178; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 179; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v1 180; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 181; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 182; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 183; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0 184; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v1 185; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v3 186; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc 187; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 188; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 189; GFX9-NEXT: v_bfe_u32 v3, v2, 16, 1 190; GFX9-NEXT: s_movk_i32 s4, 0x7fff 191; GFX9-NEXT: v_add3_u32 v3, v3, v2, s4 192; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v2 193; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 194; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc 195; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2 196; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 197; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 198; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1 199; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 200; GFX9-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 201; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1 202; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 203; GFX9-NEXT: s_setpc_b64 s[30:31] 204; 205; GFX10-LABEL: v_maximumnum_bf16: 206; GFX10: ; %bb.0: 207; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 208; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v0 209; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v1 210; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 211; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 212; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 213; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v0 214; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo 215; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v1 216; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v3 217; GFX10-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc_lo 218; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 219; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 220; GFX10-NEXT: v_bfe_u32 v3, v2, 16, 1 221; GFX10-NEXT: v_or_b32_e32 v4, 0x400000, v2 222; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 223; GFX10-NEXT: v_add3_u32 v3, v3, v2, 0x7fff 224; GFX10-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc_lo 225; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0 226; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v2 227; GFX10-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 228; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo 229; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v1 230; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 231; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2 232; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo 233; GFX10-NEXT: s_setpc_b64 s[30:31] 234; 235; GFX11-LABEL: v_maximumnum_bf16: 236; GFX11: ; %bb.0: 237; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 238; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v0 239; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 240; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 241; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 242; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v1 243; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v0 244; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) 245; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 246; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo 247; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v1 248; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 249; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v3 250; GFX11-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc_lo 251; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 252; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 253; GFX11-NEXT: v_max_f32_e32 v2, v2, v2 254; GFX11-NEXT: v_bfe_u32 v3, v2, 16, 1 255; GFX11-NEXT: v_or_b32_e32 v4, 0x400000, v2 256; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 257; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 258; GFX11-NEXT: v_add3_u32 v3, v3, v2, 0x7fff 259; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc_lo 260; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0 261; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 262; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v2 263; GFX11-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 264; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo 265; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v1 266; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 267; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 268; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2 269; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 270; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo 271; GFX11-NEXT: s_setpc_b64 s[30:31] 272; 273; GFX12-LABEL: v_maximumnum_bf16: 274; GFX12: ; %bb.0: 275; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 276; GFX12-NEXT: s_wait_expcnt 0x0 277; GFX12-NEXT: s_wait_samplecnt 0x0 278; GFX12-NEXT: s_wait_bvhcnt 0x0 279; GFX12-NEXT: s_wait_kmcnt 0x0 280; GFX12-NEXT: v_lshlrev_b32_e32 v2, 16, v0 281; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 282; GFX12-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 283; GFX12-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 284; GFX12-NEXT: v_lshlrev_b32_e32 v3, 16, v1 285; GFX12-NEXT: v_lshlrev_b32_e32 v2, 16, v0 286; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) 287; GFX12-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 288; GFX12-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo 289; GFX12-NEXT: v_lshlrev_b32_e32 v3, 16, v1 290; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 291; GFX12-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v3 292; GFX12-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc_lo 293; GFX12-NEXT: v_lshlrev_b32_e32 v2, 16, v2 294; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 295; GFX12-NEXT: v_max_num_f32_e32 v2, v2, v2 296; GFX12-NEXT: v_bfe_u32 v3, v2, 16, 1 297; GFX12-NEXT: v_or_b32_e32 v4, 0x400000, v2 298; GFX12-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2 299; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 300; GFX12-NEXT: v_add3_u32 v3, v3, v2, 0x7fff 301; GFX12-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc_lo 302; GFX12-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0 303; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 304; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v2 305; GFX12-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 306; GFX12-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo 307; GFX12-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v1 308; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 309; GFX12-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 310; GFX12-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2 311; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 312; GFX12-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo 313; GFX12-NEXT: s_setpc_b64 s[30:31] 314 %result = call bfloat @llvm.maximumnum.bf16(bfloat %x, bfloat %y) 315 ret bfloat %result 316} 317 318define bfloat @v_maximumnum_bf16_nnan(bfloat %x, bfloat %y) { 319; GFX8-LABEL: v_maximumnum_bf16_nnan: 320; GFX8: ; %bb.0: 321; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 322; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1 323; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0 324; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v3, v2 325; GFX8-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc 326; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 327; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 328; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1 329; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 330; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v2 331; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1 332; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 333; GFX8-NEXT: s_setpc_b64 s[30:31] 334; 335; GFX9-LABEL: v_maximumnum_bf16_nnan: 336; GFX9: ; %bb.0: 337; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 338; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v1 339; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0 340; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v2 341; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc 342; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 343; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 344; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1 345; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 346; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v2 347; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1 348; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 349; GFX9-NEXT: s_setpc_b64 s[30:31] 350; 351; GFX10-LABEL: v_maximumnum_bf16_nnan: 352; GFX10: ; %bb.0: 353; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 354; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v1 355; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v0 356; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v2 357; GFX10-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc_lo 358; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0 359; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v2 360; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo 361; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v1 362; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 363; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v3 364; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo 365; GFX10-NEXT: s_setpc_b64 s[30:31] 366; 367; GFX11-LABEL: v_maximumnum_bf16_nnan: 368; GFX11: ; %bb.0: 369; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 370; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v1 371; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v0 372; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 373; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v2 374; GFX11-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc_lo 375; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0 376; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_lshlrev_b32 v3, 16, v2 377; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v1 378; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) 379; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 380; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v3 381; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 382; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo 383; GFX11-NEXT: s_setpc_b64 s[30:31] 384; 385; GFX12-LABEL: v_maximumnum_bf16_nnan: 386; GFX12: ; %bb.0: 387; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 388; GFX12-NEXT: s_wait_expcnt 0x0 389; GFX12-NEXT: s_wait_samplecnt 0x0 390; GFX12-NEXT: s_wait_bvhcnt 0x0 391; GFX12-NEXT: s_wait_kmcnt 0x0 392; GFX12-NEXT: v_lshlrev_b32_e32 v2, 16, v1 393; GFX12-NEXT: v_lshlrev_b32_e32 v3, 16, v0 394; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 395; GFX12-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v2 396; GFX12-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc_lo 397; GFX12-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0 398; GFX12-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_lshlrev_b32 v3, 16, v2 399; GFX12-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v1 400; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) 401; GFX12-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 402; GFX12-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v3 403; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 404; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo 405; GFX12-NEXT: s_setpc_b64 s[30:31] 406 %result = call nnan bfloat @llvm.maximumnum.bf16(bfloat %x, bfloat %y) 407 ret bfloat %result 408} 409 410define float @v_maximumnum_f32(float %x, float %y) { 411; GFX8-LABEL: v_maximumnum_f32: 412; GFX8: ; %bb.0: 413; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 414; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 415; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 416; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 417; GFX8-NEXT: s_setpc_b64 s[30:31] 418; 419; GFX9-LABEL: v_maximumnum_f32: 420; GFX9: ; %bb.0: 421; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 422; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 423; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 424; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 425; GFX9-NEXT: s_setpc_b64 s[30:31] 426; 427; GFX10-LABEL: v_maximumnum_f32: 428; GFX10: ; %bb.0: 429; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 430; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 431; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 432; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 433; GFX10-NEXT: s_setpc_b64 s[30:31] 434; 435; GFX11-LABEL: v_maximumnum_f32: 436; GFX11: ; %bb.0: 437; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 438; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 439; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 440; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 441; GFX11-NEXT: s_setpc_b64 s[30:31] 442; 443; GFX12-LABEL: v_maximumnum_f32: 444; GFX12: ; %bb.0: 445; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 446; GFX12-NEXT: s_wait_expcnt 0x0 447; GFX12-NEXT: s_wait_samplecnt 0x0 448; GFX12-NEXT: s_wait_bvhcnt 0x0 449; GFX12-NEXT: s_wait_kmcnt 0x0 450; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 451; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 452; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 453; GFX12-NEXT: s_setpc_b64 s[30:31] 454 %result = call float @llvm.maximumnum.f32(float %x, float %y) 455 ret float %result 456} 457 458define float @v_maximumnum_f32_nnan(float %x, float %y) { 459; GFX8-LABEL: v_maximumnum_f32_nnan: 460; GFX8: ; %bb.0: 461; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 462; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 463; GFX8-NEXT: s_setpc_b64 s[30:31] 464; 465; GFX9-LABEL: v_maximumnum_f32_nnan: 466; GFX9: ; %bb.0: 467; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 468; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 469; GFX9-NEXT: s_setpc_b64 s[30:31] 470; 471; GFX10-LABEL: v_maximumnum_f32_nnan: 472; GFX10: ; %bb.0: 473; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 474; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 475; GFX10-NEXT: s_setpc_b64 s[30:31] 476; 477; GFX11-LABEL: v_maximumnum_f32_nnan: 478; GFX11: ; %bb.0: 479; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 480; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 481; GFX11-NEXT: s_setpc_b64 s[30:31] 482; 483; GFX12-LABEL: v_maximumnum_f32_nnan: 484; GFX12: ; %bb.0: 485; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 486; GFX12-NEXT: s_wait_expcnt 0x0 487; GFX12-NEXT: s_wait_samplecnt 0x0 488; GFX12-NEXT: s_wait_bvhcnt 0x0 489; GFX12-NEXT: s_wait_kmcnt 0x0 490; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 491; GFX12-NEXT: s_setpc_b64 s[30:31] 492 %result = call nnan float @llvm.maximumnum.f32(float %x, float %y) 493 ret float %result 494} 495 496define double @v_maximumnum_f64(double %x, double %y) { 497; GFX8-LABEL: v_maximumnum_f64: 498; GFX8: ; %bb.0: 499; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 500; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 501; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 502; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 503; GFX8-NEXT: s_setpc_b64 s[30:31] 504; 505; GFX9-LABEL: v_maximumnum_f64: 506; GFX9: ; %bb.0: 507; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 508; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 509; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 510; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 511; GFX9-NEXT: s_setpc_b64 s[30:31] 512; 513; GFX10-LABEL: v_maximumnum_f64: 514; GFX10: ; %bb.0: 515; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 516; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 517; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 518; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 519; GFX10-NEXT: s_setpc_b64 s[30:31] 520; 521; GFX11-LABEL: v_maximumnum_f64: 522; GFX11: ; %bb.0: 523; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 524; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 525; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 526; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 527; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 528; GFX11-NEXT: s_setpc_b64 s[30:31] 529; 530; GFX12-LABEL: v_maximumnum_f64: 531; GFX12: ; %bb.0: 532; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 533; GFX12-NEXT: s_wait_expcnt 0x0 534; GFX12-NEXT: s_wait_samplecnt 0x0 535; GFX12-NEXT: s_wait_bvhcnt 0x0 536; GFX12-NEXT: s_wait_kmcnt 0x0 537; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] 538; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] 539; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 540; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] 541; GFX12-NEXT: s_setpc_b64 s[30:31] 542 %result = call double @llvm.maximumnum.f64(double %x, double %y) 543 ret double %result 544} 545 546define double @v_maximumnum_f64_nnan(double %x, double %y) { 547; GFX8-LABEL: v_maximumnum_f64_nnan: 548; GFX8: ; %bb.0: 549; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 550; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 551; GFX8-NEXT: s_setpc_b64 s[30:31] 552; 553; GFX9-LABEL: v_maximumnum_f64_nnan: 554; GFX9: ; %bb.0: 555; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 556; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 557; GFX9-NEXT: s_setpc_b64 s[30:31] 558; 559; GFX10-LABEL: v_maximumnum_f64_nnan: 560; GFX10: ; %bb.0: 561; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 562; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 563; GFX10-NEXT: s_setpc_b64 s[30:31] 564; 565; GFX11-LABEL: v_maximumnum_f64_nnan: 566; GFX11: ; %bb.0: 567; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 568; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 569; GFX11-NEXT: s_setpc_b64 s[30:31] 570; 571; GFX12-LABEL: v_maximumnum_f64_nnan: 572; GFX12: ; %bb.0: 573; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 574; GFX12-NEXT: s_wait_expcnt 0x0 575; GFX12-NEXT: s_wait_samplecnt 0x0 576; GFX12-NEXT: s_wait_bvhcnt 0x0 577; GFX12-NEXT: s_wait_kmcnt 0x0 578; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] 579; GFX12-NEXT: s_setpc_b64 s[30:31] 580 %result = call nnan double @llvm.maximumnum.f64(double %x, double %y) 581 ret double %result 582} 583 584define float @v_maximumnum_f32_1.0(float %x) { 585; GFX8-LABEL: v_maximumnum_f32_1.0: 586; GFX8: ; %bb.0: 587; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 588; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 589; GFX8-NEXT: v_max_f32_e32 v0, 1.0, v0 590; GFX8-NEXT: s_setpc_b64 s[30:31] 591; 592; GFX9-LABEL: v_maximumnum_f32_1.0: 593; GFX9: ; %bb.0: 594; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 595; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 596; GFX9-NEXT: v_max_f32_e32 v0, 1.0, v0 597; GFX9-NEXT: s_setpc_b64 s[30:31] 598; 599; GFX10-LABEL: v_maximumnum_f32_1.0: 600; GFX10: ; %bb.0: 601; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 602; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 603; GFX10-NEXT: v_max_f32_e32 v0, 1.0, v0 604; GFX10-NEXT: s_setpc_b64 s[30:31] 605; 606; GFX11-LABEL: v_maximumnum_f32_1.0: 607; GFX11: ; %bb.0: 608; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 609; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 610; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 611; GFX11-NEXT: v_max_f32_e32 v0, 1.0, v0 612; GFX11-NEXT: s_setpc_b64 s[30:31] 613; 614; GFX12-LABEL: v_maximumnum_f32_1.0: 615; GFX12: ; %bb.0: 616; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 617; GFX12-NEXT: s_wait_expcnt 0x0 618; GFX12-NEXT: s_wait_samplecnt 0x0 619; GFX12-NEXT: s_wait_bvhcnt 0x0 620; GFX12-NEXT: s_wait_kmcnt 0x0 621; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 622; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 623; GFX12-NEXT: v_max_num_f32_e32 v0, 1.0, v0 624; GFX12-NEXT: s_setpc_b64 s[30:31] 625 %result = call float @llvm.maximumnum.f32(float %x, float 1.0) 626 ret float %result 627} 628 629define float @v_maximumnum_f32_rhs_not_snan(float %x, float %y) { 630; GFX8-LABEL: v_maximumnum_f32_rhs_not_snan: 631; GFX8: ; %bb.0: 632; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 633; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 634; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 635; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 636; GFX8-NEXT: s_setpc_b64 s[30:31] 637; 638; GFX9-LABEL: v_maximumnum_f32_rhs_not_snan: 639; GFX9: ; %bb.0: 640; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 641; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 642; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 643; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 644; GFX9-NEXT: s_setpc_b64 s[30:31] 645; 646; GFX10-LABEL: v_maximumnum_f32_rhs_not_snan: 647; GFX10: ; %bb.0: 648; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 649; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 650; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 651; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 652; GFX10-NEXT: s_setpc_b64 s[30:31] 653; 654; GFX11-LABEL: v_maximumnum_f32_rhs_not_snan: 655; GFX11: ; %bb.0: 656; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 657; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 658; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 659; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 660; GFX11-NEXT: s_setpc_b64 s[30:31] 661; 662; GFX12-LABEL: v_maximumnum_f32_rhs_not_snan: 663; GFX12: ; %bb.0: 664; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 665; GFX12-NEXT: s_wait_expcnt 0x0 666; GFX12-NEXT: s_wait_samplecnt 0x0 667; GFX12-NEXT: s_wait_bvhcnt 0x0 668; GFX12-NEXT: s_wait_kmcnt 0x0 669; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 670; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 671; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 672; GFX12-NEXT: s_setpc_b64 s[30:31] 673 %canon.y = call float @llvm.canonicalize.f32(float %y) 674 %result = call float @llvm.maximumnum.f32(float %x, float %canon.y) 675 ret float %result 676} 677 678define float @v_maximumnum_f32_lhs_not_snan(float %x, float %y) { 679; GFX8-LABEL: v_maximumnum_f32_lhs_not_snan: 680; GFX8: ; %bb.0: 681; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 682; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 683; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 684; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 685; GFX8-NEXT: s_setpc_b64 s[30:31] 686; 687; GFX9-LABEL: v_maximumnum_f32_lhs_not_snan: 688; GFX9: ; %bb.0: 689; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 690; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 691; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 692; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 693; GFX9-NEXT: s_setpc_b64 s[30:31] 694; 695; GFX10-LABEL: v_maximumnum_f32_lhs_not_snan: 696; GFX10: ; %bb.0: 697; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 698; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 699; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 700; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 701; GFX10-NEXT: s_setpc_b64 s[30:31] 702; 703; GFX11-LABEL: v_maximumnum_f32_lhs_not_snan: 704; GFX11: ; %bb.0: 705; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 706; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 707; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 708; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 709; GFX11-NEXT: s_setpc_b64 s[30:31] 710; 711; GFX12-LABEL: v_maximumnum_f32_lhs_not_snan: 712; GFX12: ; %bb.0: 713; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 714; GFX12-NEXT: s_wait_expcnt 0x0 715; GFX12-NEXT: s_wait_samplecnt 0x0 716; GFX12-NEXT: s_wait_bvhcnt 0x0 717; GFX12-NEXT: s_wait_kmcnt 0x0 718; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 719; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 720; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 721; GFX12-NEXT: s_setpc_b64 s[30:31] 722 %canon.x = call float @llvm.canonicalize.f32(float %x) 723 %result = call float @llvm.maximumnum.f32(float %canon.x, float %y) 724 ret float %result 725} 726 727define float @v_maximumnum_f32_both_operands_not_snan(float %x, float %y) { 728; GFX8-LABEL: v_maximumnum_f32_both_operands_not_snan: 729; GFX8: ; %bb.0: 730; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 731; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 732; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 733; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 734; GFX8-NEXT: s_setpc_b64 s[30:31] 735; 736; GFX9-LABEL: v_maximumnum_f32_both_operands_not_snan: 737; GFX9: ; %bb.0: 738; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 739; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 740; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 741; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 742; GFX9-NEXT: s_setpc_b64 s[30:31] 743; 744; GFX10-LABEL: v_maximumnum_f32_both_operands_not_snan: 745; GFX10: ; %bb.0: 746; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 747; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 748; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 749; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 750; GFX10-NEXT: s_setpc_b64 s[30:31] 751; 752; GFX11-LABEL: v_maximumnum_f32_both_operands_not_snan: 753; GFX11: ; %bb.0: 754; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 755; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 756; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 757; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 758; GFX11-NEXT: s_setpc_b64 s[30:31] 759; 760; GFX12-LABEL: v_maximumnum_f32_both_operands_not_snan: 761; GFX12: ; %bb.0: 762; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 763; GFX12-NEXT: s_wait_expcnt 0x0 764; GFX12-NEXT: s_wait_samplecnt 0x0 765; GFX12-NEXT: s_wait_bvhcnt 0x0 766; GFX12-NEXT: s_wait_kmcnt 0x0 767; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 768; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 769; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 770; GFX12-NEXT: s_setpc_b64 s[30:31] 771 %canon.x = call float @llvm.canonicalize.f32(float %x) 772 %canon.y = call float @llvm.canonicalize.f32(float %y) 773 %result = call float @llvm.maximumnum.f32(float %canon.x, float %canon.y) 774 ret float %result 775} 776 777define double @v_maximumnum_f64_1.0(double %x) { 778; GFX8-LABEL: v_maximumnum_f64_1.0: 779; GFX8: ; %bb.0: 780; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 781; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 782; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], 1.0 783; GFX8-NEXT: s_setpc_b64 s[30:31] 784; 785; GFX9-LABEL: v_maximumnum_f64_1.0: 786; GFX9: ; %bb.0: 787; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 788; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 789; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], 1.0 790; GFX9-NEXT: s_setpc_b64 s[30:31] 791; 792; GFX10-LABEL: v_maximumnum_f64_1.0: 793; GFX10: ; %bb.0: 794; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 795; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 796; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], 1.0 797; GFX10-NEXT: s_setpc_b64 s[30:31] 798; 799; GFX11-LABEL: v_maximumnum_f64_1.0: 800; GFX11: ; %bb.0: 801; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 802; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 803; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 804; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], 1.0 805; GFX11-NEXT: s_setpc_b64 s[30:31] 806; 807; GFX12-LABEL: v_maximumnum_f64_1.0: 808; GFX12: ; %bb.0: 809; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 810; GFX12-NEXT: s_wait_expcnt 0x0 811; GFX12-NEXT: s_wait_samplecnt 0x0 812; GFX12-NEXT: s_wait_bvhcnt 0x0 813; GFX12-NEXT: s_wait_kmcnt 0x0 814; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] 815; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 816; GFX12-NEXT: v_max_num_f64_e32 v[0:1], 1.0, v[0:1] 817; GFX12-NEXT: s_setpc_b64 s[30:31] 818 %result = call double @llvm.maximumnum.f64(double %x, double 1.0) 819 ret double %result 820} 821 822define half @v_maximumnum_f16_s_v(half inreg %x, half %y) { 823; GFX8-LABEL: v_maximumnum_f16_s_v: 824; GFX8: ; %bb.0: 825; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 826; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 827; GFX8-NEXT: v_max_f16_e64 v1, s16, s16 828; GFX8-NEXT: v_max_f16_e32 v0, v1, v0 829; GFX8-NEXT: s_setpc_b64 s[30:31] 830; 831; GFX9-LABEL: v_maximumnum_f16_s_v: 832; GFX9: ; %bb.0: 833; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 834; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 835; GFX9-NEXT: v_max_f16_e64 v1, s16, s16 836; GFX9-NEXT: v_max_f16_e32 v0, v1, v0 837; GFX9-NEXT: s_setpc_b64 s[30:31] 838; 839; GFX10-LABEL: v_maximumnum_f16_s_v: 840; GFX10: ; %bb.0: 841; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 842; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 843; GFX10-NEXT: v_max_f16_e64 v1, s16, s16 844; GFX10-NEXT: v_max_f16_e32 v0, v1, v0 845; GFX10-NEXT: s_setpc_b64 s[30:31] 846; 847; GFX11-LABEL: v_maximumnum_f16_s_v: 848; GFX11: ; %bb.0: 849; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 850; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 851; GFX11-NEXT: v_max_f16_e64 v1, s0, s0 852; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 853; GFX11-NEXT: v_max_f16_e32 v0, v1, v0 854; GFX11-NEXT: s_setpc_b64 s[30:31] 855; 856; GFX12-LABEL: v_maximumnum_f16_s_v: 857; GFX12: ; %bb.0: 858; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 859; GFX12-NEXT: s_wait_expcnt 0x0 860; GFX12-NEXT: s_wait_samplecnt 0x0 861; GFX12-NEXT: s_wait_bvhcnt 0x0 862; GFX12-NEXT: s_wait_kmcnt 0x0 863; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 864; GFX12-NEXT: v_max_num_f16_e64 v1, s0, s0 865; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 866; GFX12-NEXT: v_max_num_f16_e32 v0, v1, v0 867; GFX12-NEXT: s_setpc_b64 s[30:31] 868 %result = call half @llvm.maximumnum.f16(half %x, half %y) 869 ret half %result 870} 871 872define half @v_maximumnum_f16_v_s(half %x, half inreg %y) { 873; GFX8-LABEL: v_maximumnum_f16_v_s: 874; GFX8: ; %bb.0: 875; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 876; GFX8-NEXT: v_max_f16_e64 v1, s16, s16 877; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 878; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 879; GFX8-NEXT: s_setpc_b64 s[30:31] 880; 881; GFX9-LABEL: v_maximumnum_f16_v_s: 882; GFX9: ; %bb.0: 883; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 884; GFX9-NEXT: v_max_f16_e64 v1, s16, s16 885; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 886; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 887; GFX9-NEXT: s_setpc_b64 s[30:31] 888; 889; GFX10-LABEL: v_maximumnum_f16_v_s: 890; GFX10: ; %bb.0: 891; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 892; GFX10-NEXT: v_max_f16_e64 v1, s16, s16 893; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 894; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 895; GFX10-NEXT: s_setpc_b64 s[30:31] 896; 897; GFX11-LABEL: v_maximumnum_f16_v_s: 898; GFX11: ; %bb.0: 899; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 900; GFX11-NEXT: v_max_f16_e64 v1, s0, s0 901; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 902; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 903; GFX11-NEXT: v_max_f16_e32 v0, v0, v1 904; GFX11-NEXT: s_setpc_b64 s[30:31] 905; 906; GFX12-LABEL: v_maximumnum_f16_v_s: 907; GFX12: ; %bb.0: 908; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 909; GFX12-NEXT: s_wait_expcnt 0x0 910; GFX12-NEXT: s_wait_samplecnt 0x0 911; GFX12-NEXT: s_wait_bvhcnt 0x0 912; GFX12-NEXT: s_wait_kmcnt 0x0 913; GFX12-NEXT: v_max_num_f16_e64 v1, s0, s0 914; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 915; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 916; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v1 917; GFX12-NEXT: s_setpc_b64 s[30:31] 918 %result = call half @llvm.maximumnum.f16(half %x, half %y) 919 ret half %result 920} 921 922define half @v_maximumnum_f16_s_s(half inreg %x, half inreg %y) { 923; GFX8-LABEL: v_maximumnum_f16_s_s: 924; GFX8: ; %bb.0: 925; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 926; GFX8-NEXT: v_max_f16_e64 v0, s17, s17 927; GFX8-NEXT: v_max_f16_e64 v1, s16, s16 928; GFX8-NEXT: v_max_f16_e32 v0, v1, v0 929; GFX8-NEXT: s_setpc_b64 s[30:31] 930; 931; GFX9-LABEL: v_maximumnum_f16_s_s: 932; GFX9: ; %bb.0: 933; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 934; GFX9-NEXT: v_max_f16_e64 v0, s17, s17 935; GFX9-NEXT: v_max_f16_e64 v1, s16, s16 936; GFX9-NEXT: v_max_f16_e32 v0, v1, v0 937; GFX9-NEXT: s_setpc_b64 s[30:31] 938; 939; GFX10-LABEL: v_maximumnum_f16_s_s: 940; GFX10: ; %bb.0: 941; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 942; GFX10-NEXT: v_max_f16_e64 v0, s17, s17 943; GFX10-NEXT: v_max_f16_e64 v1, s16, s16 944; GFX10-NEXT: v_max_f16_e32 v0, v1, v0 945; GFX10-NEXT: s_setpc_b64 s[30:31] 946; 947; GFX11-LABEL: v_maximumnum_f16_s_s: 948; GFX11: ; %bb.0: 949; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 950; GFX11-NEXT: v_max_f16_e64 v0, s1, s1 951; GFX11-NEXT: v_max_f16_e64 v1, s0, s0 952; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 953; GFX11-NEXT: v_max_f16_e32 v0, v1, v0 954; GFX11-NEXT: s_setpc_b64 s[30:31] 955; 956; GFX12-LABEL: v_maximumnum_f16_s_s: 957; GFX12: ; %bb.0: 958; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 959; GFX12-NEXT: s_wait_expcnt 0x0 960; GFX12-NEXT: s_wait_samplecnt 0x0 961; GFX12-NEXT: s_wait_bvhcnt 0x0 962; GFX12-NEXT: s_wait_kmcnt 0x0 963; GFX12-NEXT: v_max_num_f16_e64 v0, s1, s1 964; GFX12-NEXT: v_max_num_f16_e64 v1, s0, s0 965; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 966; GFX12-NEXT: v_max_num_f16_e32 v0, v1, v0 967; GFX12-NEXT: s_setpc_b64 s[30:31] 968 %result = call half @llvm.maximumnum.f16(half %x, half %y) 969 ret half %result 970} 971 972define float @v_maximumnum_f32_s_v(float inreg %x, float %y) { 973; GFX8-LABEL: v_maximumnum_f32_s_v: 974; GFX8: ; %bb.0: 975; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 976; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 977; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16 978; GFX8-NEXT: v_max_f32_e32 v0, v1, v0 979; GFX8-NEXT: s_setpc_b64 s[30:31] 980; 981; GFX9-LABEL: v_maximumnum_f32_s_v: 982; GFX9: ; %bb.0: 983; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 984; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 985; GFX9-NEXT: v_max_f32_e64 v1, s16, s16 986; GFX9-NEXT: v_max_f32_e32 v0, v1, v0 987; GFX9-NEXT: s_setpc_b64 s[30:31] 988; 989; GFX10-LABEL: v_maximumnum_f32_s_v: 990; GFX10: ; %bb.0: 991; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 992; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 993; GFX10-NEXT: v_max_f32_e64 v1, s16, s16 994; GFX10-NEXT: v_max_f32_e32 v0, v1, v0 995; GFX10-NEXT: s_setpc_b64 s[30:31] 996; 997; GFX11-LABEL: v_maximumnum_f32_s_v: 998; GFX11: ; %bb.0: 999; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1000; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 1001; GFX11-NEXT: v_max_f32_e64 v1, s0, s0 1002; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1003; GFX11-NEXT: v_max_f32_e32 v0, v1, v0 1004; GFX11-NEXT: s_setpc_b64 s[30:31] 1005; 1006; GFX12-LABEL: v_maximumnum_f32_s_v: 1007; GFX12: ; %bb.0: 1008; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1009; GFX12-NEXT: s_wait_expcnt 0x0 1010; GFX12-NEXT: s_wait_samplecnt 0x0 1011; GFX12-NEXT: s_wait_bvhcnt 0x0 1012; GFX12-NEXT: s_wait_kmcnt 0x0 1013; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 1014; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0 1015; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1016; GFX12-NEXT: v_max_num_f32_e32 v0, v1, v0 1017; GFX12-NEXT: s_setpc_b64 s[30:31] 1018 %result = call float @llvm.maximumnum.f32(float %x, float %y) 1019 ret float %result 1020} 1021 1022define float @v_maximumnum_f32_v_s(float %x, float inreg %y) { 1023; GFX8-LABEL: v_maximumnum_f32_v_s: 1024; GFX8: ; %bb.0: 1025; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1026; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16 1027; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 1028; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 1029; GFX8-NEXT: s_setpc_b64 s[30:31] 1030; 1031; GFX9-LABEL: v_maximumnum_f32_v_s: 1032; GFX9: ; %bb.0: 1033; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1034; GFX9-NEXT: v_max_f32_e64 v1, s16, s16 1035; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 1036; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 1037; GFX9-NEXT: s_setpc_b64 s[30:31] 1038; 1039; GFX10-LABEL: v_maximumnum_f32_v_s: 1040; GFX10: ; %bb.0: 1041; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1042; GFX10-NEXT: v_max_f32_e64 v1, s16, s16 1043; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 1044; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 1045; GFX10-NEXT: s_setpc_b64 s[30:31] 1046; 1047; GFX11-LABEL: v_maximumnum_f32_v_s: 1048; GFX11: ; %bb.0: 1049; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1050; GFX11-NEXT: v_max_f32_e64 v1, s0, s0 1051; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 1052; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1053; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 1054; GFX11-NEXT: s_setpc_b64 s[30:31] 1055; 1056; GFX12-LABEL: v_maximumnum_f32_v_s: 1057; GFX12: ; %bb.0: 1058; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1059; GFX12-NEXT: s_wait_expcnt 0x0 1060; GFX12-NEXT: s_wait_samplecnt 0x0 1061; GFX12-NEXT: s_wait_bvhcnt 0x0 1062; GFX12-NEXT: s_wait_kmcnt 0x0 1063; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0 1064; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 1065; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1066; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 1067; GFX12-NEXT: s_setpc_b64 s[30:31] 1068 %result = call float @llvm.maximumnum.f32(float %x, float %y) 1069 ret float %result 1070} 1071 1072define float @v_maximumnum_f32_s_s(float inreg %x, float inreg %y) { 1073; GFX8-LABEL: v_maximumnum_f32_s_s: 1074; GFX8: ; %bb.0: 1075; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1076; GFX8-NEXT: v_mul_f32_e64 v0, 1.0, s17 1077; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16 1078; GFX8-NEXT: v_max_f32_e32 v0, v1, v0 1079; GFX8-NEXT: s_setpc_b64 s[30:31] 1080; 1081; GFX9-LABEL: v_maximumnum_f32_s_s: 1082; GFX9: ; %bb.0: 1083; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1084; GFX9-NEXT: v_max_f32_e64 v0, s17, s17 1085; GFX9-NEXT: v_max_f32_e64 v1, s16, s16 1086; GFX9-NEXT: v_max_f32_e32 v0, v1, v0 1087; GFX9-NEXT: s_setpc_b64 s[30:31] 1088; 1089; GFX10-LABEL: v_maximumnum_f32_s_s: 1090; GFX10: ; %bb.0: 1091; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1092; GFX10-NEXT: v_max_f32_e64 v0, s17, s17 1093; GFX10-NEXT: v_max_f32_e64 v1, s16, s16 1094; GFX10-NEXT: v_max_f32_e32 v0, v1, v0 1095; GFX10-NEXT: s_setpc_b64 s[30:31] 1096; 1097; GFX11-LABEL: v_maximumnum_f32_s_s: 1098; GFX11: ; %bb.0: 1099; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1100; GFX11-NEXT: v_max_f32_e64 v0, s1, s1 1101; GFX11-NEXT: v_max_f32_e64 v1, s0, s0 1102; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1103; GFX11-NEXT: v_max_f32_e32 v0, v1, v0 1104; GFX11-NEXT: s_setpc_b64 s[30:31] 1105; 1106; GFX12-LABEL: v_maximumnum_f32_s_s: 1107; GFX12: ; %bb.0: 1108; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1109; GFX12-NEXT: s_wait_expcnt 0x0 1110; GFX12-NEXT: s_wait_samplecnt 0x0 1111; GFX12-NEXT: s_wait_bvhcnt 0x0 1112; GFX12-NEXT: s_wait_kmcnt 0x0 1113; GFX12-NEXT: v_max_num_f32_e64 v0, s1, s1 1114; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0 1115; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1116; GFX12-NEXT: v_max_num_f32_e32 v0, v1, v0 1117; GFX12-NEXT: s_setpc_b64 s[30:31] 1118 %result = call float @llvm.maximumnum.f32(float %x, float %y) 1119 ret float %result 1120} 1121 1122define double @v_maximumnum_f64_s_v(double inreg %x, double %y) { 1123; GFX8-LABEL: v_maximumnum_f64_s_v: 1124; GFX8: ; %bb.0: 1125; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1126; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1127; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1128; GFX8-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] 1129; GFX8-NEXT: s_setpc_b64 s[30:31] 1130; 1131; GFX9-LABEL: v_maximumnum_f64_s_v: 1132; GFX9: ; %bb.0: 1133; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1134; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1135; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1136; GFX9-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] 1137; GFX9-NEXT: s_setpc_b64 s[30:31] 1138; 1139; GFX10-LABEL: v_maximumnum_f64_s_v: 1140; GFX10: ; %bb.0: 1141; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1142; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1143; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1144; GFX10-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] 1145; GFX10-NEXT: s_setpc_b64 s[30:31] 1146; 1147; GFX11-LABEL: v_maximumnum_f64_s_v: 1148; GFX11: ; %bb.0: 1149; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1150; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] 1151; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1152; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1153; GFX11-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] 1154; GFX11-NEXT: s_setpc_b64 s[30:31] 1155; 1156; GFX12-LABEL: v_maximumnum_f64_s_v: 1157; GFX12: ; %bb.0: 1158; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1159; GFX12-NEXT: s_wait_expcnt 0x0 1160; GFX12-NEXT: s_wait_samplecnt 0x0 1161; GFX12-NEXT: s_wait_bvhcnt 0x0 1162; GFX12-NEXT: s_wait_kmcnt 0x0 1163; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1] 1164; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] 1165; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1166; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[2:3], v[0:1] 1167; GFX12-NEXT: s_setpc_b64 s[30:31] 1168 %result = call double @llvm.maximumnum.f64(double %x, double %y) 1169 ret double %result 1170} 1171 1172define double @v_maximumnum_f64_v_s(double %x, double inreg %y) { 1173; GFX8-LABEL: v_maximumnum_f64_v_s: 1174; GFX8: ; %bb.0: 1175; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1176; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1177; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1178; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 1179; GFX8-NEXT: s_setpc_b64 s[30:31] 1180; 1181; GFX9-LABEL: v_maximumnum_f64_v_s: 1182; GFX9: ; %bb.0: 1183; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1184; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1185; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1186; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 1187; GFX9-NEXT: s_setpc_b64 s[30:31] 1188; 1189; GFX10-LABEL: v_maximumnum_f64_v_s: 1190; GFX10: ; %bb.0: 1191; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1192; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1193; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1194; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 1195; GFX10-NEXT: s_setpc_b64 s[30:31] 1196; 1197; GFX11-LABEL: v_maximumnum_f64_v_s: 1198; GFX11: ; %bb.0: 1199; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1200; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] 1201; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1202; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1203; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 1204; GFX11-NEXT: s_setpc_b64 s[30:31] 1205; 1206; GFX12-LABEL: v_maximumnum_f64_v_s: 1207; GFX12: ; %bb.0: 1208; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1209; GFX12-NEXT: s_wait_expcnt 0x0 1210; GFX12-NEXT: s_wait_samplecnt 0x0 1211; GFX12-NEXT: s_wait_bvhcnt 0x0 1212; GFX12-NEXT: s_wait_kmcnt 0x0 1213; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1] 1214; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] 1215; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1216; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] 1217; GFX12-NEXT: s_setpc_b64 s[30:31] 1218 %result = call double @llvm.maximumnum.f64(double %x, double %y) 1219 ret double %result 1220} 1221 1222define double @v_maximumnum_f64_s_s(double inreg %x, double inreg %y) { 1223; GFX8-LABEL: v_maximumnum_f64_s_s: 1224; GFX8: ; %bb.0: 1225; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1226; GFX8-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] 1227; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1228; GFX8-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] 1229; GFX8-NEXT: s_setpc_b64 s[30:31] 1230; 1231; GFX9-LABEL: v_maximumnum_f64_s_s: 1232; GFX9: ; %bb.0: 1233; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1234; GFX9-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] 1235; GFX9-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1236; GFX9-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] 1237; GFX9-NEXT: s_setpc_b64 s[30:31] 1238; 1239; GFX10-LABEL: v_maximumnum_f64_s_s: 1240; GFX10: ; %bb.0: 1241; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1242; GFX10-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] 1243; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] 1244; GFX10-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] 1245; GFX10-NEXT: s_setpc_b64 s[30:31] 1246; 1247; GFX11-LABEL: v_maximumnum_f64_s_s: 1248; GFX11: ; %bb.0: 1249; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1250; GFX11-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3] 1251; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] 1252; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1253; GFX11-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] 1254; GFX11-NEXT: s_setpc_b64 s[30:31] 1255; 1256; GFX12-LABEL: v_maximumnum_f64_s_s: 1257; GFX12: ; %bb.0: 1258; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1259; GFX12-NEXT: s_wait_expcnt 0x0 1260; GFX12-NEXT: s_wait_samplecnt 0x0 1261; GFX12-NEXT: s_wait_bvhcnt 0x0 1262; GFX12-NEXT: s_wait_kmcnt 0x0 1263; GFX12-NEXT: v_max_num_f64_e64 v[0:1], s[2:3], s[2:3] 1264; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1] 1265; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1266; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[2:3], v[0:1] 1267; GFX12-NEXT: s_setpc_b64 s[30:31] 1268 %result = call double @llvm.maximumnum.f64(double %x, double %y) 1269 ret double %result 1270} 1271 1272define float @v_maximumnum_f32_fabs_rhs(float %x, float %y) { 1273; GFX8-LABEL: v_maximumnum_f32_fabs_rhs: 1274; GFX8: ; %bb.0: 1275; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1276; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, |v1| 1277; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 1278; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 1279; GFX8-NEXT: s_setpc_b64 s[30:31] 1280; 1281; GFX9-LABEL: v_maximumnum_f32_fabs_rhs: 1282; GFX9: ; %bb.0: 1283; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1284; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1| 1285; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 1286; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 1287; GFX9-NEXT: s_setpc_b64 s[30:31] 1288; 1289; GFX10-LABEL: v_maximumnum_f32_fabs_rhs: 1290; GFX10: ; %bb.0: 1291; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1292; GFX10-NEXT: v_max_f32_e64 v1, |v1|, |v1| 1293; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 1294; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 1295; GFX10-NEXT: s_setpc_b64 s[30:31] 1296; 1297; GFX11-LABEL: v_maximumnum_f32_fabs_rhs: 1298; GFX11: ; %bb.0: 1299; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1300; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1| 1301; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 1302; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1303; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 1304; GFX11-NEXT: s_setpc_b64 s[30:31] 1305; 1306; GFX12-LABEL: v_maximumnum_f32_fabs_rhs: 1307; GFX12: ; %bb.0: 1308; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1309; GFX12-NEXT: s_wait_expcnt 0x0 1310; GFX12-NEXT: s_wait_samplecnt 0x0 1311; GFX12-NEXT: s_wait_bvhcnt 0x0 1312; GFX12-NEXT: s_wait_kmcnt 0x0 1313; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| 1314; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 1315; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1316; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 1317; GFX12-NEXT: s_setpc_b64 s[30:31] 1318 %fabs.y = call float @llvm.fabs.f32(float %y) 1319 %result = call float @llvm.maximumnum.f32(float %x, float %fabs.y) 1320 ret float %result 1321} 1322 1323define float @v_maximumnum_f32_fneg_fabs_rhs(float %x, float %y) { 1324; GFX8-LABEL: v_maximumnum_f32_fneg_fabs_rhs: 1325; GFX8: ; %bb.0: 1326; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1327; GFX8-NEXT: v_mul_f32_e64 v1, -1.0, |v1| 1328; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 1329; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 1330; GFX8-NEXT: s_setpc_b64 s[30:31] 1331; 1332; GFX9-LABEL: v_maximumnum_f32_fneg_fabs_rhs: 1333; GFX9: ; %bb.0: 1334; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1335; GFX9-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| 1336; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 1337; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 1338; GFX9-NEXT: s_setpc_b64 s[30:31] 1339; 1340; GFX10-LABEL: v_maximumnum_f32_fneg_fabs_rhs: 1341; GFX10: ; %bb.0: 1342; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1343; GFX10-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| 1344; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 1345; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 1346; GFX10-NEXT: s_setpc_b64 s[30:31] 1347; 1348; GFX11-LABEL: v_maximumnum_f32_fneg_fabs_rhs: 1349; GFX11: ; %bb.0: 1350; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1351; GFX11-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| 1352; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 1353; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1354; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 1355; GFX11-NEXT: s_setpc_b64 s[30:31] 1356; 1357; GFX12-LABEL: v_maximumnum_f32_fneg_fabs_rhs: 1358; GFX12: ; %bb.0: 1359; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1360; GFX12-NEXT: s_wait_expcnt 0x0 1361; GFX12-NEXT: s_wait_samplecnt 0x0 1362; GFX12-NEXT: s_wait_bvhcnt 0x0 1363; GFX12-NEXT: s_wait_kmcnt 0x0 1364; GFX12-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1| 1365; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 1366; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1367; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 1368; GFX12-NEXT: s_setpc_b64 s[30:31] 1369 %fabs.y = call float @llvm.fabs.f32(float %y) 1370 %fneg.fabs.y = fneg float %fabs.y 1371 %result = call float @llvm.maximumnum.f32(float %x, float %fneg.fabs.y) 1372 ret float %result 1373} 1374 1375define float @v_maximumnum_f32_fabs(float %x, float %y) { 1376; GFX8-LABEL: v_maximumnum_f32_fabs: 1377; GFX8: ; %bb.0: 1378; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1379; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, |v1| 1380; GFX8-NEXT: v_mul_f32_e64 v0, 1.0, |v0| 1381; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 1382; GFX8-NEXT: s_setpc_b64 s[30:31] 1383; 1384; GFX9-LABEL: v_maximumnum_f32_fabs: 1385; GFX9: ; %bb.0: 1386; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1387; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1| 1388; GFX9-NEXT: v_max_f32_e64 v0, |v0|, |v0| 1389; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 1390; GFX9-NEXT: s_setpc_b64 s[30:31] 1391; 1392; GFX10-LABEL: v_maximumnum_f32_fabs: 1393; GFX10: ; %bb.0: 1394; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1395; GFX10-NEXT: v_max_f32_e64 v1, |v1|, |v1| 1396; GFX10-NEXT: v_max_f32_e64 v0, |v0|, |v0| 1397; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 1398; GFX10-NEXT: s_setpc_b64 s[30:31] 1399; 1400; GFX11-LABEL: v_maximumnum_f32_fabs: 1401; GFX11: ; %bb.0: 1402; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1403; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1| 1404; GFX11-NEXT: v_max_f32_e64 v0, |v0|, |v0| 1405; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1406; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 1407; GFX11-NEXT: s_setpc_b64 s[30:31] 1408; 1409; GFX12-LABEL: v_maximumnum_f32_fabs: 1410; GFX12: ; %bb.0: 1411; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1412; GFX12-NEXT: s_wait_expcnt 0x0 1413; GFX12-NEXT: s_wait_samplecnt 0x0 1414; GFX12-NEXT: s_wait_bvhcnt 0x0 1415; GFX12-NEXT: s_wait_kmcnt 0x0 1416; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| 1417; GFX12-NEXT: v_max_num_f32_e64 v0, |v0|, |v0| 1418; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1419; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 1420; GFX12-NEXT: s_setpc_b64 s[30:31] 1421 %fabs.x = call float @llvm.fabs.f32(float %x) 1422 %fabs.y = call float @llvm.fabs.f32(float %y) 1423 %result = call float @llvm.maximumnum.f32(float %fabs.x, float %fabs.y) 1424 ret float %result 1425} 1426 1427define float @v_maximumnum_f32_fneg(float %x, float %y) { 1428; GFX8-LABEL: v_maximumnum_f32_fneg: 1429; GFX8: ; %bb.0: 1430; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1431; GFX8-NEXT: v_mul_f32_e32 v1, -1.0, v1 1432; GFX8-NEXT: v_mul_f32_e32 v0, -1.0, v0 1433; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 1434; GFX8-NEXT: s_setpc_b64 s[30:31] 1435; 1436; GFX9-LABEL: v_maximumnum_f32_fneg: 1437; GFX9: ; %bb.0: 1438; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1439; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1 1440; GFX9-NEXT: v_max_f32_e64 v0, -v0, -v0 1441; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 1442; GFX9-NEXT: s_setpc_b64 s[30:31] 1443; 1444; GFX10-LABEL: v_maximumnum_f32_fneg: 1445; GFX10: ; %bb.0: 1446; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1447; GFX10-NEXT: v_max_f32_e64 v1, -v1, -v1 1448; GFX10-NEXT: v_max_f32_e64 v0, -v0, -v0 1449; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 1450; GFX10-NEXT: s_setpc_b64 s[30:31] 1451; 1452; GFX11-LABEL: v_maximumnum_f32_fneg: 1453; GFX11: ; %bb.0: 1454; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1455; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1 1456; GFX11-NEXT: v_max_f32_e64 v0, -v0, -v0 1457; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1458; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 1459; GFX11-NEXT: s_setpc_b64 s[30:31] 1460; 1461; GFX12-LABEL: v_maximumnum_f32_fneg: 1462; GFX12: ; %bb.0: 1463; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1464; GFX12-NEXT: s_wait_expcnt 0x0 1465; GFX12-NEXT: s_wait_samplecnt 0x0 1466; GFX12-NEXT: s_wait_bvhcnt 0x0 1467; GFX12-NEXT: s_wait_kmcnt 0x0 1468; GFX12-NEXT: v_max_num_f32_e64 v1, -v1, -v1 1469; GFX12-NEXT: v_max_num_f32_e64 v0, -v0, -v0 1470; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1471; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 1472; GFX12-NEXT: s_setpc_b64 s[30:31] 1473 %fneg.x = fneg float %x 1474 %fneg.y = fneg float %y 1475 %result = call float @llvm.maximumnum.f32(float %fneg.x, float %fneg.y) 1476 ret float %result 1477} 1478 1479define half @v_maximumnum_f16_fabs_rhs(half %x, half %y) { 1480; GFX8-LABEL: v_maximumnum_f16_fabs_rhs: 1481; GFX8: ; %bb.0: 1482; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1483; GFX8-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1484; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 1485; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 1486; GFX8-NEXT: s_setpc_b64 s[30:31] 1487; 1488; GFX9-LABEL: v_maximumnum_f16_fabs_rhs: 1489; GFX9: ; %bb.0: 1490; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1491; GFX9-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1492; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 1493; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 1494; GFX9-NEXT: s_setpc_b64 s[30:31] 1495; 1496; GFX10-LABEL: v_maximumnum_f16_fabs_rhs: 1497; GFX10: ; %bb.0: 1498; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1499; GFX10-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1500; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 1501; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 1502; GFX10-NEXT: s_setpc_b64 s[30:31] 1503; 1504; GFX11-LABEL: v_maximumnum_f16_fabs_rhs: 1505; GFX11: ; %bb.0: 1506; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1507; GFX11-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1508; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 1509; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1510; GFX11-NEXT: v_max_f16_e32 v0, v0, v1 1511; GFX11-NEXT: s_setpc_b64 s[30:31] 1512; 1513; GFX12-LABEL: v_maximumnum_f16_fabs_rhs: 1514; GFX12: ; %bb.0: 1515; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1516; GFX12-NEXT: s_wait_expcnt 0x0 1517; GFX12-NEXT: s_wait_samplecnt 0x0 1518; GFX12-NEXT: s_wait_bvhcnt 0x0 1519; GFX12-NEXT: s_wait_kmcnt 0x0 1520; GFX12-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| 1521; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 1522; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1523; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v1 1524; GFX12-NEXT: s_setpc_b64 s[30:31] 1525 %fabs.y = call half @llvm.fabs.f16(half %y) 1526 %result = call half @llvm.maximumnum.f16(half %x, half %fabs.y) 1527 ret half %result 1528} 1529 1530define half @v_maximumnum_f16_fneg_fabs_rhs(half %x, half %y) { 1531; GFX8-LABEL: v_maximumnum_f16_fneg_fabs_rhs: 1532; GFX8: ; %bb.0: 1533; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1534; GFX8-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| 1535; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 1536; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 1537; GFX8-NEXT: s_setpc_b64 s[30:31] 1538; 1539; GFX9-LABEL: v_maximumnum_f16_fneg_fabs_rhs: 1540; GFX9: ; %bb.0: 1541; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1542; GFX9-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| 1543; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 1544; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 1545; GFX9-NEXT: s_setpc_b64 s[30:31] 1546; 1547; GFX10-LABEL: v_maximumnum_f16_fneg_fabs_rhs: 1548; GFX10: ; %bb.0: 1549; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1550; GFX10-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| 1551; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 1552; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 1553; GFX10-NEXT: s_setpc_b64 s[30:31] 1554; 1555; GFX11-LABEL: v_maximumnum_f16_fneg_fabs_rhs: 1556; GFX11: ; %bb.0: 1557; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1558; GFX11-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| 1559; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 1560; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1561; GFX11-NEXT: v_max_f16_e32 v0, v0, v1 1562; GFX11-NEXT: s_setpc_b64 s[30:31] 1563; 1564; GFX12-LABEL: v_maximumnum_f16_fneg_fabs_rhs: 1565; GFX12: ; %bb.0: 1566; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1567; GFX12-NEXT: s_wait_expcnt 0x0 1568; GFX12-NEXT: s_wait_samplecnt 0x0 1569; GFX12-NEXT: s_wait_bvhcnt 0x0 1570; GFX12-NEXT: s_wait_kmcnt 0x0 1571; GFX12-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1| 1572; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 1573; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1574; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v1 1575; GFX12-NEXT: s_setpc_b64 s[30:31] 1576 %fabs.y = call half @llvm.fabs.f16(half %y) 1577 %fneg.fabs.y = fneg half %fabs.y 1578 %result = call half @llvm.maximumnum.f16(half %x, half %fneg.fabs.y) 1579 ret half %result 1580} 1581 1582define half @v_maximumnum_f16_fabs(half %x, half %y) { 1583; GFX8-LABEL: v_maximumnum_f16_fabs: 1584; GFX8: ; %bb.0: 1585; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1586; GFX8-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1587; GFX8-NEXT: v_max_f16_e64 v0, |v0|, |v0| 1588; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 1589; GFX8-NEXT: s_setpc_b64 s[30:31] 1590; 1591; GFX9-LABEL: v_maximumnum_f16_fabs: 1592; GFX9: ; %bb.0: 1593; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1594; GFX9-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1595; GFX9-NEXT: v_max_f16_e64 v0, |v0|, |v0| 1596; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 1597; GFX9-NEXT: s_setpc_b64 s[30:31] 1598; 1599; GFX10-LABEL: v_maximumnum_f16_fabs: 1600; GFX10: ; %bb.0: 1601; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1602; GFX10-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1603; GFX10-NEXT: v_max_f16_e64 v0, |v0|, |v0| 1604; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 1605; GFX10-NEXT: s_setpc_b64 s[30:31] 1606; 1607; GFX11-LABEL: v_maximumnum_f16_fabs: 1608; GFX11: ; %bb.0: 1609; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1610; GFX11-NEXT: v_max_f16_e64 v1, |v1|, |v1| 1611; GFX11-NEXT: v_max_f16_e64 v0, |v0|, |v0| 1612; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1613; GFX11-NEXT: v_max_f16_e32 v0, v0, v1 1614; GFX11-NEXT: s_setpc_b64 s[30:31] 1615; 1616; GFX12-LABEL: v_maximumnum_f16_fabs: 1617; GFX12: ; %bb.0: 1618; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1619; GFX12-NEXT: s_wait_expcnt 0x0 1620; GFX12-NEXT: s_wait_samplecnt 0x0 1621; GFX12-NEXT: s_wait_bvhcnt 0x0 1622; GFX12-NEXT: s_wait_kmcnt 0x0 1623; GFX12-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| 1624; GFX12-NEXT: v_max_num_f16_e64 v0, |v0|, |v0| 1625; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1626; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v1 1627; GFX12-NEXT: s_setpc_b64 s[30:31] 1628 %fabs.x = call half @llvm.fabs.f16(half %x) 1629 %fabs.y = call half @llvm.fabs.f16(half %y) 1630 %result = call half @llvm.maximumnum.f16(half %fabs.x, half %fabs.y) 1631 ret half %result 1632} 1633 1634define half @v_maximumnum_f16_fneg(half %x, half %y) { 1635; GFX8-LABEL: v_maximumnum_f16_fneg: 1636; GFX8: ; %bb.0: 1637; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1638; GFX8-NEXT: v_max_f16_e64 v1, -v1, -v1 1639; GFX8-NEXT: v_max_f16_e64 v0, -v0, -v0 1640; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 1641; GFX8-NEXT: s_setpc_b64 s[30:31] 1642; 1643; GFX9-LABEL: v_maximumnum_f16_fneg: 1644; GFX9: ; %bb.0: 1645; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1646; GFX9-NEXT: v_max_f16_e64 v1, -v1, -v1 1647; GFX9-NEXT: v_max_f16_e64 v0, -v0, -v0 1648; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 1649; GFX9-NEXT: s_setpc_b64 s[30:31] 1650; 1651; GFX10-LABEL: v_maximumnum_f16_fneg: 1652; GFX10: ; %bb.0: 1653; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1654; GFX10-NEXT: v_max_f16_e64 v1, -v1, -v1 1655; GFX10-NEXT: v_max_f16_e64 v0, -v0, -v0 1656; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 1657; GFX10-NEXT: s_setpc_b64 s[30:31] 1658; 1659; GFX11-LABEL: v_maximumnum_f16_fneg: 1660; GFX11: ; %bb.0: 1661; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1662; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1 1663; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0 1664; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1665; GFX11-NEXT: v_max_f16_e32 v0, v0, v1 1666; GFX11-NEXT: s_setpc_b64 s[30:31] 1667; 1668; GFX12-LABEL: v_maximumnum_f16_fneg: 1669; GFX12: ; %bb.0: 1670; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1671; GFX12-NEXT: s_wait_expcnt 0x0 1672; GFX12-NEXT: s_wait_samplecnt 0x0 1673; GFX12-NEXT: s_wait_bvhcnt 0x0 1674; GFX12-NEXT: s_wait_kmcnt 0x0 1675; GFX12-NEXT: v_max_num_f16_e64 v1, -v1, -v1 1676; GFX12-NEXT: v_max_num_f16_e64 v0, -v0, -v0 1677; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1678; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v1 1679; GFX12-NEXT: s_setpc_b64 s[30:31] 1680 %fneg.x = fneg half %x 1681 %fneg.y = fneg half %y 1682 %result = call half @llvm.maximumnum.f16(half %fneg.x, half %fneg.y) 1683 ret half %result 1684} 1685 1686define double @v_maximumnum_f64_fneg(double %x, double %y) { 1687; GFX8-LABEL: v_maximumnum_f64_fneg: 1688; GFX8: ; %bb.0: 1689; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1690; GFX8-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] 1691; GFX8-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] 1692; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 1693; GFX8-NEXT: s_setpc_b64 s[30:31] 1694; 1695; GFX9-LABEL: v_maximumnum_f64_fneg: 1696; GFX9: ; %bb.0: 1697; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1698; GFX9-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] 1699; GFX9-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] 1700; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 1701; GFX9-NEXT: s_setpc_b64 s[30:31] 1702; 1703; GFX10-LABEL: v_maximumnum_f64_fneg: 1704; GFX10: ; %bb.0: 1705; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1706; GFX10-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] 1707; GFX10-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] 1708; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 1709; GFX10-NEXT: s_setpc_b64 s[30:31] 1710; 1711; GFX11-LABEL: v_maximumnum_f64_fneg: 1712; GFX11: ; %bb.0: 1713; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1714; GFX11-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] 1715; GFX11-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] 1716; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1717; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] 1718; GFX11-NEXT: s_setpc_b64 s[30:31] 1719; 1720; GFX12-LABEL: v_maximumnum_f64_fneg: 1721; GFX12: ; %bb.0: 1722; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1723; GFX12-NEXT: s_wait_expcnt 0x0 1724; GFX12-NEXT: s_wait_samplecnt 0x0 1725; GFX12-NEXT: s_wait_bvhcnt 0x0 1726; GFX12-NEXT: s_wait_kmcnt 0x0 1727; GFX12-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3] 1728; GFX12-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1] 1729; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1730; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] 1731; GFX12-NEXT: s_setpc_b64 s[30:31] 1732 %fneg.x = fneg double %x 1733 %fneg.y = fneg double %y 1734 %result = call double @llvm.maximumnum.f64(double %fneg.x, double %fneg.y) 1735 ret double %result 1736} 1737 1738define <2 x half> @v_maximumnum_v2f16(<2 x half> %x, <2 x half> %y) { 1739; GFX8-LABEL: v_maximumnum_v2f16: 1740; GFX8: ; %bb.0: 1741; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1742; GFX8-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1743; GFX8-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1744; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 1745; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 1746; GFX8-NEXT: v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1747; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 1748; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1749; GFX8-NEXT: s_setpc_b64 s[30:31] 1750; 1751; GFX9-LABEL: v_maximumnum_v2f16: 1752; GFX9: ; %bb.0: 1753; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1754; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 1755; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 1756; GFX9-NEXT: v_pk_max_f16 v0, v0, v1 1757; GFX9-NEXT: s_setpc_b64 s[30:31] 1758; 1759; GFX10-LABEL: v_maximumnum_v2f16: 1760; GFX10: ; %bb.0: 1761; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1762; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 1763; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 1764; GFX10-NEXT: v_pk_max_f16 v0, v0, v1 1765; GFX10-NEXT: s_setpc_b64 s[30:31] 1766; 1767; GFX11-LABEL: v_maximumnum_v2f16: 1768; GFX11: ; %bb.0: 1769; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1770; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 1771; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 1772; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1773; GFX11-NEXT: v_pk_max_f16 v0, v0, v1 1774; GFX11-NEXT: s_setpc_b64 s[30:31] 1775; 1776; GFX12-LABEL: v_maximumnum_v2f16: 1777; GFX12: ; %bb.0: 1778; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1779; GFX12-NEXT: s_wait_expcnt 0x0 1780; GFX12-NEXT: s_wait_samplecnt 0x0 1781; GFX12-NEXT: s_wait_bvhcnt 0x0 1782; GFX12-NEXT: s_wait_kmcnt 0x0 1783; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 1784; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 1785; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1786; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v1 1787; GFX12-NEXT: s_setpc_b64 s[30:31] 1788 %result = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> %x, <2 x half> %y) 1789 ret <2 x half> %result 1790} 1791 1792define <2 x half> @v_maximumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) { 1793; GFX8-LABEL: v_maximumnum_v2f16_nnan: 1794; GFX8: ; %bb.0: 1795; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1796; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1797; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 1798; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1799; GFX8-NEXT: s_setpc_b64 s[30:31] 1800; 1801; GFX9-LABEL: v_maximumnum_v2f16_nnan: 1802; GFX9: ; %bb.0: 1803; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1804; GFX9-NEXT: v_pk_max_f16 v0, v0, v1 1805; GFX9-NEXT: s_setpc_b64 s[30:31] 1806; 1807; GFX10-LABEL: v_maximumnum_v2f16_nnan: 1808; GFX10: ; %bb.0: 1809; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1810; GFX10-NEXT: v_pk_max_f16 v0, v0, v1 1811; GFX10-NEXT: s_setpc_b64 s[30:31] 1812; 1813; GFX11-LABEL: v_maximumnum_v2f16_nnan: 1814; GFX11: ; %bb.0: 1815; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1816; GFX11-NEXT: v_pk_max_f16 v0, v0, v1 1817; GFX11-NEXT: s_setpc_b64 s[30:31] 1818; 1819; GFX12-LABEL: v_maximumnum_v2f16_nnan: 1820; GFX12: ; %bb.0: 1821; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1822; GFX12-NEXT: s_wait_expcnt 0x0 1823; GFX12-NEXT: s_wait_samplecnt 0x0 1824; GFX12-NEXT: s_wait_bvhcnt 0x0 1825; GFX12-NEXT: s_wait_kmcnt 0x0 1826; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v1 1827; GFX12-NEXT: s_setpc_b64 s[30:31] 1828 %result = call nnan <2 x half> @llvm.maximumnum.v2f16(<2 x half> %x, <2 x half> %y) 1829 ret <2 x half> %result 1830} 1831 1832define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) { 1833; GFX8-LABEL: v_maximumnum_v3f16: 1834; GFX8: ; %bb.0: 1835; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1836; GFX8-NEXT: v_max_f16_sdwa v4, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1837; GFX8-NEXT: v_max_f16_sdwa v5, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1838; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 1839; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 1840; GFX8-NEXT: v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1841; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 1842; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 1843; GFX8-NEXT: v_max_f16_e32 v0, v0, v2 1844; GFX8-NEXT: v_max_f16_e32 v1, v1, v3 1845; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 1846; GFX8-NEXT: s_setpc_b64 s[30:31] 1847; 1848; GFX9-LABEL: v_maximumnum_v3f16: 1849; GFX9: ; %bb.0: 1850; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1851; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 1852; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 1853; GFX9-NEXT: v_pk_max_f16 v0, v0, v2 1854; GFX9-NEXT: v_pk_max_f16 v2, v3, v3 1855; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 1856; GFX9-NEXT: v_pk_max_f16 v1, v1, v2 1857; GFX9-NEXT: s_setpc_b64 s[30:31] 1858; 1859; GFX10-LABEL: v_maximumnum_v3f16: 1860; GFX10: ; %bb.0: 1861; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1862; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 1863; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 1864; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 1865; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 1866; GFX10-NEXT: v_pk_max_f16 v0, v0, v2 1867; GFX10-NEXT: v_pk_max_f16 v1, v1, v3 1868; GFX10-NEXT: s_setpc_b64 s[30:31] 1869; 1870; GFX11-LABEL: v_maximumnum_v3f16: 1871; GFX11: ; %bb.0: 1872; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1873; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 1874; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 1875; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 1876; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 1877; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 1878; GFX11-NEXT: v_pk_max_f16 v0, v0, v2 1879; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 1880; GFX11-NEXT: s_setpc_b64 s[30:31] 1881; 1882; GFX12-LABEL: v_maximumnum_v3f16: 1883; GFX12: ; %bb.0: 1884; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1885; GFX12-NEXT: s_wait_expcnt 0x0 1886; GFX12-NEXT: s_wait_samplecnt 0x0 1887; GFX12-NEXT: s_wait_bvhcnt 0x0 1888; GFX12-NEXT: s_wait_kmcnt 0x0 1889; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 1890; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 1891; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 1892; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 1893; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 1894; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v2 1895; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v3 1896; GFX12-NEXT: s_setpc_b64 s[30:31] 1897 %result = call <3 x half> @llvm.maximumnum.v3f16(<3 x half> %x, <3 x half> %y) 1898 ret <3 x half> %result 1899} 1900 1901define <3 x half> @v_maximumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) { 1902; GFX8-LABEL: v_maximumnum_v3f16_nnan: 1903; GFX8: ; %bb.0: 1904; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1905; GFX8-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1906; GFX8-NEXT: v_max_f16_e32 v0, v0, v2 1907; GFX8-NEXT: v_max_f16_e32 v1, v1, v3 1908; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 1909; GFX8-NEXT: s_setpc_b64 s[30:31] 1910; 1911; GFX9-LABEL: v_maximumnum_v3f16_nnan: 1912; GFX9: ; %bb.0: 1913; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1914; GFX9-NEXT: v_pk_max_f16 v0, v0, v2 1915; GFX9-NEXT: v_pk_max_f16 v1, v1, v3 1916; GFX9-NEXT: s_setpc_b64 s[30:31] 1917; 1918; GFX10-LABEL: v_maximumnum_v3f16_nnan: 1919; GFX10: ; %bb.0: 1920; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1921; GFX10-NEXT: v_pk_max_f16 v0, v0, v2 1922; GFX10-NEXT: v_pk_max_f16 v1, v1, v3 1923; GFX10-NEXT: s_setpc_b64 s[30:31] 1924; 1925; GFX11-LABEL: v_maximumnum_v3f16_nnan: 1926; GFX11: ; %bb.0: 1927; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1928; GFX11-NEXT: v_pk_max_f16 v0, v0, v2 1929; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 1930; GFX11-NEXT: s_setpc_b64 s[30:31] 1931; 1932; GFX12-LABEL: v_maximumnum_v3f16_nnan: 1933; GFX12: ; %bb.0: 1934; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1935; GFX12-NEXT: s_wait_expcnt 0x0 1936; GFX12-NEXT: s_wait_samplecnt 0x0 1937; GFX12-NEXT: s_wait_bvhcnt 0x0 1938; GFX12-NEXT: s_wait_kmcnt 0x0 1939; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v2 1940; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v3 1941; GFX12-NEXT: s_setpc_b64 s[30:31] 1942 %result = call nnan <3 x half> @llvm.maximumnum.v3f16(<3 x half> %x, <3 x half> %y) 1943 ret <3 x half> %result 1944} 1945 1946define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) { 1947; GFX8-LABEL: v_maximumnum_v4f16: 1948; GFX8: ; %bb.0: 1949; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1950; GFX8-NEXT: v_max_f16_sdwa v4, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1951; GFX8-NEXT: v_max_f16_sdwa v5, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1952; GFX8-NEXT: v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1953; GFX8-NEXT: v_max_f16_sdwa v5, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1954; GFX8-NEXT: v_max_f16_sdwa v6, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1955; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 1956; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 1957; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 1958; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 1959; GFX8-NEXT: v_max_f16_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1960; GFX8-NEXT: v_max_f16_e32 v1, v1, v3 1961; GFX8-NEXT: v_max_f16_e32 v0, v0, v2 1962; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 1963; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 1964; GFX8-NEXT: s_setpc_b64 s[30:31] 1965; 1966; GFX9-LABEL: v_maximumnum_v4f16: 1967; GFX9: ; %bb.0: 1968; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1969; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 1970; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 1971; GFX9-NEXT: v_pk_max_f16 v0, v0, v2 1972; GFX9-NEXT: v_pk_max_f16 v2, v3, v3 1973; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 1974; GFX9-NEXT: v_pk_max_f16 v1, v1, v2 1975; GFX9-NEXT: s_setpc_b64 s[30:31] 1976; 1977; GFX10-LABEL: v_maximumnum_v4f16: 1978; GFX10: ; %bb.0: 1979; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1980; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 1981; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 1982; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 1983; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 1984; GFX10-NEXT: v_pk_max_f16 v0, v0, v2 1985; GFX10-NEXT: v_pk_max_f16 v1, v1, v3 1986; GFX10-NEXT: s_setpc_b64 s[30:31] 1987; 1988; GFX11-LABEL: v_maximumnum_v4f16: 1989; GFX11: ; %bb.0: 1990; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1991; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 1992; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 1993; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 1994; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 1995; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 1996; GFX11-NEXT: v_pk_max_f16 v0, v0, v2 1997; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 1998; GFX11-NEXT: s_setpc_b64 s[30:31] 1999; 2000; GFX12-LABEL: v_maximumnum_v4f16: 2001; GFX12: ; %bb.0: 2002; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2003; GFX12-NEXT: s_wait_expcnt 0x0 2004; GFX12-NEXT: s_wait_samplecnt 0x0 2005; GFX12-NEXT: s_wait_bvhcnt 0x0 2006; GFX12-NEXT: s_wait_kmcnt 0x0 2007; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 2008; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 2009; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 2010; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 2011; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2012; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v2 2013; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v3 2014; GFX12-NEXT: s_setpc_b64 s[30:31] 2015 %result = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> %x, <4 x half> %y) 2016 ret <4 x half> %result 2017} 2018 2019define <4 x half> @v_maximumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) { 2020; GFX8-LABEL: v_maximumnum_v4f16_nnan: 2021; GFX8: ; %bb.0: 2022; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2023; GFX8-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2024; GFX8-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2025; GFX8-NEXT: v_max_f16_e32 v1, v1, v3 2026; GFX8-NEXT: v_max_f16_e32 v0, v0, v2 2027; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 2028; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 2029; GFX8-NEXT: s_setpc_b64 s[30:31] 2030; 2031; GFX9-LABEL: v_maximumnum_v4f16_nnan: 2032; GFX9: ; %bb.0: 2033; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2034; GFX9-NEXT: v_pk_max_f16 v0, v0, v2 2035; GFX9-NEXT: v_pk_max_f16 v1, v1, v3 2036; GFX9-NEXT: s_setpc_b64 s[30:31] 2037; 2038; GFX10-LABEL: v_maximumnum_v4f16_nnan: 2039; GFX10: ; %bb.0: 2040; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2041; GFX10-NEXT: v_pk_max_f16 v0, v0, v2 2042; GFX10-NEXT: v_pk_max_f16 v1, v1, v3 2043; GFX10-NEXT: s_setpc_b64 s[30:31] 2044; 2045; GFX11-LABEL: v_maximumnum_v4f16_nnan: 2046; GFX11: ; %bb.0: 2047; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2048; GFX11-NEXT: v_pk_max_f16 v0, v0, v2 2049; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 2050; GFX11-NEXT: s_setpc_b64 s[30:31] 2051; 2052; GFX12-LABEL: v_maximumnum_v4f16_nnan: 2053; GFX12: ; %bb.0: 2054; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2055; GFX12-NEXT: s_wait_expcnt 0x0 2056; GFX12-NEXT: s_wait_samplecnt 0x0 2057; GFX12-NEXT: s_wait_bvhcnt 0x0 2058; GFX12-NEXT: s_wait_kmcnt 0x0 2059; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v2 2060; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v3 2061; GFX12-NEXT: s_setpc_b64 s[30:31] 2062 %result = call nnan <4 x half> @llvm.maximumnum.v4f16(<4 x half> %x, <4 x half> %y) 2063 ret <4 x half> %result 2064} 2065 2066define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) { 2067; GFX8-LABEL: v_maximumnum_v6f16: 2068; GFX8: ; %bb.0: 2069; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2070; GFX8-NEXT: v_max_f16_sdwa v6, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2071; GFX8-NEXT: v_max_f16_sdwa v7, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2072; GFX8-NEXT: v_max_f16_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2073; GFX8-NEXT: v_max_f16_sdwa v7, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2074; GFX8-NEXT: v_max_f16_sdwa v8, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2075; GFX8-NEXT: v_max_f16_sdwa v7, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2076; GFX8-NEXT: v_max_f16_sdwa v8, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2077; GFX8-NEXT: v_max_f16_sdwa v9, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2078; GFX8-NEXT: v_max_f16_e32 v5, v5, v5 2079; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 2080; GFX8-NEXT: v_max_f16_e32 v4, v4, v4 2081; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 2082; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 2083; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 2084; GFX8-NEXT: v_max_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2085; GFX8-NEXT: v_max_f16_e32 v2, v2, v5 2086; GFX8-NEXT: v_max_f16_e32 v1, v1, v4 2087; GFX8-NEXT: v_max_f16_e32 v0, v0, v3 2088; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 2089; GFX8-NEXT: v_or_b32_e32 v1, v1, v7 2090; GFX8-NEXT: v_or_b32_e32 v2, v2, v6 2091; GFX8-NEXT: s_setpc_b64 s[30:31] 2092; 2093; GFX9-LABEL: v_maximumnum_v6f16: 2094; GFX9: ; %bb.0: 2095; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2096; GFX9-NEXT: v_pk_max_f16 v3, v3, v3 2097; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 2098; GFX9-NEXT: v_pk_max_f16 v0, v0, v3 2099; GFX9-NEXT: v_pk_max_f16 v3, v4, v4 2100; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 2101; GFX9-NEXT: v_pk_max_f16 v1, v1, v3 2102; GFX9-NEXT: v_pk_max_f16 v3, v5, v5 2103; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 2104; GFX9-NEXT: v_pk_max_f16 v2, v2, v3 2105; GFX9-NEXT: s_setpc_b64 s[30:31] 2106; 2107; GFX10-LABEL: v_maximumnum_v6f16: 2108; GFX10: ; %bb.0: 2109; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2110; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 2111; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 2112; GFX10-NEXT: v_pk_max_f16 v4, v4, v4 2113; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 2114; GFX10-NEXT: v_pk_max_f16 v5, v5, v5 2115; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 2116; GFX10-NEXT: v_pk_max_f16 v0, v0, v3 2117; GFX10-NEXT: v_pk_max_f16 v1, v1, v4 2118; GFX10-NEXT: v_pk_max_f16 v2, v2, v5 2119; GFX10-NEXT: s_setpc_b64 s[30:31] 2120; 2121; GFX11-LABEL: v_maximumnum_v6f16: 2122; GFX11: ; %bb.0: 2123; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2124; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 2125; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 2126; GFX11-NEXT: v_pk_max_f16 v4, v4, v4 2127; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 2128; GFX11-NEXT: v_pk_max_f16 v5, v5, v5 2129; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 2130; GFX11-NEXT: v_pk_max_f16 v0, v0, v3 2131; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 2132; GFX11-NEXT: v_pk_max_f16 v1, v1, v4 2133; GFX11-NEXT: v_pk_max_f16 v2, v2, v5 2134; GFX11-NEXT: s_setpc_b64 s[30:31] 2135; 2136; GFX12-LABEL: v_maximumnum_v6f16: 2137; GFX12: ; %bb.0: 2138; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2139; GFX12-NEXT: s_wait_expcnt 0x0 2140; GFX12-NEXT: s_wait_samplecnt 0x0 2141; GFX12-NEXT: s_wait_bvhcnt 0x0 2142; GFX12-NEXT: s_wait_kmcnt 0x0 2143; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 2144; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 2145; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4 2146; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 2147; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5 2148; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 2149; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v3 2150; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 2151; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v4 2152; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v5 2153; GFX12-NEXT: s_setpc_b64 s[30:31] 2154 %result = call <6 x half> @llvm.maximumnum.v6f16(<6 x half> %x, <6 x half> %y) 2155 ret <6 x half> %result 2156} 2157 2158define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) { 2159; GFX8-LABEL: v_maximumnum_v8f16: 2160; GFX8: ; %bb.0: 2161; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2162; GFX8-NEXT: v_max_f16_sdwa v8, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2163; GFX8-NEXT: v_max_f16_sdwa v9, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2164; GFX8-NEXT: v_max_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2165; GFX8-NEXT: v_max_f16_sdwa v9, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2166; GFX8-NEXT: v_max_f16_sdwa v10, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2167; GFX8-NEXT: v_max_f16_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2168; GFX8-NEXT: v_max_f16_sdwa v10, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2169; GFX8-NEXT: v_max_f16_sdwa v11, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2170; GFX8-NEXT: v_max_f16_sdwa v10, v11, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2171; GFX8-NEXT: v_max_f16_sdwa v11, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2172; GFX8-NEXT: v_max_f16_sdwa v12, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2173; GFX8-NEXT: v_max_f16_e32 v7, v7, v7 2174; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 2175; GFX8-NEXT: v_max_f16_e32 v6, v6, v6 2176; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 2177; GFX8-NEXT: v_max_f16_e32 v5, v5, v5 2178; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 2179; GFX8-NEXT: v_max_f16_e32 v4, v4, v4 2180; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 2181; GFX8-NEXT: v_max_f16_sdwa v11, v12, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2182; GFX8-NEXT: v_max_f16_e32 v3, v3, v7 2183; GFX8-NEXT: v_max_f16_e32 v2, v2, v6 2184; GFX8-NEXT: v_max_f16_e32 v1, v1, v5 2185; GFX8-NEXT: v_max_f16_e32 v0, v0, v4 2186; GFX8-NEXT: v_or_b32_e32 v0, v0, v11 2187; GFX8-NEXT: v_or_b32_e32 v1, v1, v10 2188; GFX8-NEXT: v_or_b32_e32 v2, v2, v9 2189; GFX8-NEXT: v_or_b32_e32 v3, v3, v8 2190; GFX8-NEXT: s_setpc_b64 s[30:31] 2191; 2192; GFX9-LABEL: v_maximumnum_v8f16: 2193; GFX9: ; %bb.0: 2194; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2195; GFX9-NEXT: v_pk_max_f16 v4, v4, v4 2196; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 2197; GFX9-NEXT: v_pk_max_f16 v0, v0, v4 2198; GFX9-NEXT: v_pk_max_f16 v4, v5, v5 2199; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 2200; GFX9-NEXT: v_pk_max_f16 v1, v1, v4 2201; GFX9-NEXT: v_pk_max_f16 v4, v6, v6 2202; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 2203; GFX9-NEXT: v_pk_max_f16 v2, v2, v4 2204; GFX9-NEXT: v_pk_max_f16 v4, v7, v7 2205; GFX9-NEXT: v_pk_max_f16 v3, v3, v3 2206; GFX9-NEXT: v_pk_max_f16 v3, v3, v4 2207; GFX9-NEXT: s_setpc_b64 s[30:31] 2208; 2209; GFX10-LABEL: v_maximumnum_v8f16: 2210; GFX10: ; %bb.0: 2211; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2212; GFX10-NEXT: v_pk_max_f16 v4, v4, v4 2213; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 2214; GFX10-NEXT: v_pk_max_f16 v5, v5, v5 2215; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 2216; GFX10-NEXT: v_pk_max_f16 v6, v6, v6 2217; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 2218; GFX10-NEXT: v_pk_max_f16 v7, v7, v7 2219; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 2220; GFX10-NEXT: v_pk_max_f16 v0, v0, v4 2221; GFX10-NEXT: v_pk_max_f16 v1, v1, v5 2222; GFX10-NEXT: v_pk_max_f16 v2, v2, v6 2223; GFX10-NEXT: v_pk_max_f16 v3, v3, v7 2224; GFX10-NEXT: s_setpc_b64 s[30:31] 2225; 2226; GFX11-LABEL: v_maximumnum_v8f16: 2227; GFX11: ; %bb.0: 2228; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2229; GFX11-NEXT: v_pk_max_f16 v4, v4, v4 2230; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 2231; GFX11-NEXT: v_pk_max_f16 v5, v5, v5 2232; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 2233; GFX11-NEXT: v_pk_max_f16 v6, v6, v6 2234; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 2235; GFX11-NEXT: v_pk_max_f16 v7, v7, v7 2236; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 2237; GFX11-NEXT: v_pk_max_f16 v0, v0, v4 2238; GFX11-NEXT: v_pk_max_f16 v1, v1, v5 2239; GFX11-NEXT: v_pk_max_f16 v2, v2, v6 2240; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 2241; GFX11-NEXT: v_pk_max_f16 v3, v3, v7 2242; GFX11-NEXT: s_setpc_b64 s[30:31] 2243; 2244; GFX12-LABEL: v_maximumnum_v8f16: 2245; GFX12: ; %bb.0: 2246; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2247; GFX12-NEXT: s_wait_expcnt 0x0 2248; GFX12-NEXT: s_wait_samplecnt 0x0 2249; GFX12-NEXT: s_wait_bvhcnt 0x0 2250; GFX12-NEXT: s_wait_kmcnt 0x0 2251; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4 2252; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 2253; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5 2254; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 2255; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6 2256; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 2257; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7 2258; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 2259; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v4 2260; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v5 2261; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v6 2262; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) 2263; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v7 2264; GFX12-NEXT: s_setpc_b64 s[30:31] 2265 %result = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> %x, <8 x half> %y) 2266 ret <8 x half> %result 2267} 2268 2269define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) { 2270; GFX8-LABEL: v_maximumnum_v2f32: 2271; GFX8: ; %bb.0: 2272; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2273; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 2274; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 2275; GFX8-NEXT: v_max_f32_e32 v0, v0, v2 2276; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v3 2277; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 2278; GFX8-NEXT: v_max_f32_e32 v1, v1, v2 2279; GFX8-NEXT: s_setpc_b64 s[30:31] 2280; 2281; GFX9-LABEL: v_maximumnum_v2f32: 2282; GFX9: ; %bb.0: 2283; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2284; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 2285; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 2286; GFX9-NEXT: v_max_f32_e32 v0, v0, v2 2287; GFX9-NEXT: v_max_f32_e32 v2, v3, v3 2288; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 2289; GFX9-NEXT: v_max_f32_e32 v1, v1, v2 2290; GFX9-NEXT: s_setpc_b64 s[30:31] 2291; 2292; GFX10-LABEL: v_maximumnum_v2f32: 2293; GFX10: ; %bb.0: 2294; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2295; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 2296; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 2297; GFX10-NEXT: v_max_f32_e32 v3, v3, v3 2298; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 2299; GFX10-NEXT: v_max_f32_e32 v0, v0, v2 2300; GFX10-NEXT: v_max_f32_e32 v1, v1, v3 2301; GFX10-NEXT: s_setpc_b64 s[30:31] 2302; 2303; GFX11-LABEL: v_maximumnum_v2f32: 2304; GFX11: ; %bb.0: 2305; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2306; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 2307; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 2308; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2309; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3 2310; GFX11-NEXT: s_setpc_b64 s[30:31] 2311; 2312; GFX12-LABEL: v_maximumnum_v2f32: 2313; GFX12: ; %bb.0: 2314; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2315; GFX12-NEXT: s_wait_expcnt 0x0 2316; GFX12-NEXT: s_wait_samplecnt 0x0 2317; GFX12-NEXT: s_wait_bvhcnt 0x0 2318; GFX12-NEXT: s_wait_kmcnt 0x0 2319; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 2320; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 2321; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 2322; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3 2323; GFX12-NEXT: s_setpc_b64 s[30:31] 2324 %result = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> %x, <2 x float> %y) 2325 ret <2 x float> %result 2326} 2327 2328define <2 x float> @v_maximumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) { 2329; GFX8-LABEL: v_maximumnum_v2f32_nnan: 2330; GFX8: ; %bb.0: 2331; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2332; GFX8-NEXT: v_max_f32_e32 v0, v0, v2 2333; GFX8-NEXT: v_max_f32_e32 v1, v1, v3 2334; GFX8-NEXT: s_setpc_b64 s[30:31] 2335; 2336; GFX9-LABEL: v_maximumnum_v2f32_nnan: 2337; GFX9: ; %bb.0: 2338; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2339; GFX9-NEXT: v_max_f32_e32 v0, v0, v2 2340; GFX9-NEXT: v_max_f32_e32 v1, v1, v3 2341; GFX9-NEXT: s_setpc_b64 s[30:31] 2342; 2343; GFX10-LABEL: v_maximumnum_v2f32_nnan: 2344; GFX10: ; %bb.0: 2345; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2346; GFX10-NEXT: v_max_f32_e32 v0, v0, v2 2347; GFX10-NEXT: v_max_f32_e32 v1, v1, v3 2348; GFX10-NEXT: s_setpc_b64 s[30:31] 2349; 2350; GFX11-LABEL: v_maximumnum_v2f32_nnan: 2351; GFX11: ; %bb.0: 2352; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2353; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3 2354; GFX11-NEXT: s_setpc_b64 s[30:31] 2355; 2356; GFX12-LABEL: v_maximumnum_v2f32_nnan: 2357; GFX12: ; %bb.0: 2358; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2359; GFX12-NEXT: s_wait_expcnt 0x0 2360; GFX12-NEXT: s_wait_samplecnt 0x0 2361; GFX12-NEXT: s_wait_bvhcnt 0x0 2362; GFX12-NEXT: s_wait_kmcnt 0x0 2363; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3 2364; GFX12-NEXT: s_setpc_b64 s[30:31] 2365 %result = call nnan <2 x float> @llvm.maximumnum.v2f32(<2 x float> %x, <2 x float> %y) 2366 ret <2 x float> %result 2367} 2368 2369define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) { 2370; GFX8-LABEL: v_maximumnum_v3f32: 2371; GFX8: ; %bb.0: 2372; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2373; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v3 2374; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 2375; GFX8-NEXT: v_max_f32_e32 v0, v0, v3 2376; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v4 2377; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 2378; GFX8-NEXT: v_max_f32_e32 v1, v1, v3 2379; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v5 2380; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 2381; GFX8-NEXT: v_max_f32_e32 v2, v2, v3 2382; GFX8-NEXT: s_setpc_b64 s[30:31] 2383; 2384; GFX9-LABEL: v_maximumnum_v3f32: 2385; GFX9: ; %bb.0: 2386; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2387; GFX9-NEXT: v_max_f32_e32 v3, v3, v3 2388; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 2389; GFX9-NEXT: v_max_f32_e32 v0, v0, v3 2390; GFX9-NEXT: v_max_f32_e32 v3, v4, v4 2391; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 2392; GFX9-NEXT: v_max_f32_e32 v1, v1, v3 2393; GFX9-NEXT: v_max_f32_e32 v3, v5, v5 2394; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 2395; GFX9-NEXT: v_max_f32_e32 v2, v2, v3 2396; GFX9-NEXT: s_setpc_b64 s[30:31] 2397; 2398; GFX10-LABEL: v_maximumnum_v3f32: 2399; GFX10: ; %bb.0: 2400; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2401; GFX10-NEXT: v_max_f32_e32 v3, v3, v3 2402; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 2403; GFX10-NEXT: v_max_f32_e32 v4, v4, v4 2404; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 2405; GFX10-NEXT: v_max_f32_e32 v5, v5, v5 2406; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 2407; GFX10-NEXT: v_max_f32_e32 v0, v0, v3 2408; GFX10-NEXT: v_max_f32_e32 v1, v1, v4 2409; GFX10-NEXT: v_max_f32_e32 v2, v2, v5 2410; GFX10-NEXT: s_setpc_b64 s[30:31] 2411; 2412; GFX11-LABEL: v_maximumnum_v3f32: 2413; GFX11: ; %bb.0: 2414; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2415; GFX11-NEXT: v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v0, v0, v0 2416; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v1, v1, v1 2417; GFX11-NEXT: v_dual_max_f32 v5, v5, v5 :: v_dual_max_f32 v2, v2, v2 2418; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2419; GFX11-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4 2420; GFX11-NEXT: v_max_f32_e32 v2, v2, v5 2421; GFX11-NEXT: s_setpc_b64 s[30:31] 2422; 2423; GFX12-LABEL: v_maximumnum_v3f32: 2424; GFX12: ; %bb.0: 2425; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2426; GFX12-NEXT: s_wait_expcnt 0x0 2427; GFX12-NEXT: s_wait_samplecnt 0x0 2428; GFX12-NEXT: s_wait_bvhcnt 0x0 2429; GFX12-NEXT: s_wait_kmcnt 0x0 2430; GFX12-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0 2431; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1 2432; GFX12-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2 2433; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2434; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4 2435; GFX12-NEXT: v_max_num_f32_e32 v2, v2, v5 2436; GFX12-NEXT: s_setpc_b64 s[30:31] 2437 %result = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> %x, <3 x float> %y) 2438 ret <3 x float> %result 2439} 2440 2441define <3 x float> @v_maximumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) { 2442; GFX8-LABEL: v_maximumnum_v3f32_nnan: 2443; GFX8: ; %bb.0: 2444; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2445; GFX8-NEXT: v_max_f32_e32 v0, v0, v3 2446; GFX8-NEXT: v_max_f32_e32 v1, v1, v4 2447; GFX8-NEXT: v_max_f32_e32 v2, v2, v5 2448; GFX8-NEXT: s_setpc_b64 s[30:31] 2449; 2450; GFX9-LABEL: v_maximumnum_v3f32_nnan: 2451; GFX9: ; %bb.0: 2452; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2453; GFX9-NEXT: v_max_f32_e32 v0, v0, v3 2454; GFX9-NEXT: v_max_f32_e32 v1, v1, v4 2455; GFX9-NEXT: v_max_f32_e32 v2, v2, v5 2456; GFX9-NEXT: s_setpc_b64 s[30:31] 2457; 2458; GFX10-LABEL: v_maximumnum_v3f32_nnan: 2459; GFX10: ; %bb.0: 2460; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2461; GFX10-NEXT: v_max_f32_e32 v0, v0, v3 2462; GFX10-NEXT: v_max_f32_e32 v1, v1, v4 2463; GFX10-NEXT: v_max_f32_e32 v2, v2, v5 2464; GFX10-NEXT: s_setpc_b64 s[30:31] 2465; 2466; GFX11-LABEL: v_maximumnum_v3f32_nnan: 2467; GFX11: ; %bb.0: 2468; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2469; GFX11-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4 2470; GFX11-NEXT: v_max_f32_e32 v2, v2, v5 2471; GFX11-NEXT: s_setpc_b64 s[30:31] 2472; 2473; GFX12-LABEL: v_maximumnum_v3f32_nnan: 2474; GFX12: ; %bb.0: 2475; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2476; GFX12-NEXT: s_wait_expcnt 0x0 2477; GFX12-NEXT: s_wait_samplecnt 0x0 2478; GFX12-NEXT: s_wait_bvhcnt 0x0 2479; GFX12-NEXT: s_wait_kmcnt 0x0 2480; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4 2481; GFX12-NEXT: v_max_num_f32_e32 v2, v2, v5 2482; GFX12-NEXT: s_setpc_b64 s[30:31] 2483 %result = call nnan <3 x float> @llvm.maximumnum.v3f32(<3 x float> %x, <3 x float> %y) 2484 ret <3 x float> %result 2485} 2486 2487define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) { 2488; GFX8-LABEL: v_maximumnum_v4f32: 2489; GFX8: ; %bb.0: 2490; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2491; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v4 2492; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 2493; GFX8-NEXT: v_max_f32_e32 v0, v0, v4 2494; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v5 2495; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 2496; GFX8-NEXT: v_max_f32_e32 v1, v1, v4 2497; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v6 2498; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 2499; GFX8-NEXT: v_max_f32_e32 v2, v2, v4 2500; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v7 2501; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v3 2502; GFX8-NEXT: v_max_f32_e32 v3, v3, v4 2503; GFX8-NEXT: s_setpc_b64 s[30:31] 2504; 2505; GFX9-LABEL: v_maximumnum_v4f32: 2506; GFX9: ; %bb.0: 2507; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2508; GFX9-NEXT: v_max_f32_e32 v4, v4, v4 2509; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 2510; GFX9-NEXT: v_max_f32_e32 v0, v0, v4 2511; GFX9-NEXT: v_max_f32_e32 v4, v5, v5 2512; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 2513; GFX9-NEXT: v_max_f32_e32 v1, v1, v4 2514; GFX9-NEXT: v_max_f32_e32 v4, v6, v6 2515; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 2516; GFX9-NEXT: v_max_f32_e32 v2, v2, v4 2517; GFX9-NEXT: v_max_f32_e32 v4, v7, v7 2518; GFX9-NEXT: v_max_f32_e32 v3, v3, v3 2519; GFX9-NEXT: v_max_f32_e32 v3, v3, v4 2520; GFX9-NEXT: s_setpc_b64 s[30:31] 2521; 2522; GFX10-LABEL: v_maximumnum_v4f32: 2523; GFX10: ; %bb.0: 2524; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2525; GFX10-NEXT: v_max_f32_e32 v4, v4, v4 2526; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 2527; GFX10-NEXT: v_max_f32_e32 v5, v5, v5 2528; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 2529; GFX10-NEXT: v_max_f32_e32 v6, v6, v6 2530; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 2531; GFX10-NEXT: v_max_f32_e32 v7, v7, v7 2532; GFX10-NEXT: v_max_f32_e32 v3, v3, v3 2533; GFX10-NEXT: v_max_f32_e32 v0, v0, v4 2534; GFX10-NEXT: v_max_f32_e32 v1, v1, v5 2535; GFX10-NEXT: v_max_f32_e32 v2, v2, v6 2536; GFX10-NEXT: v_max_f32_e32 v3, v3, v7 2537; GFX10-NEXT: s_setpc_b64 s[30:31] 2538; 2539; GFX11-LABEL: v_maximumnum_v4f32: 2540; GFX11: ; %bb.0: 2541; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2542; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5 2543; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 2544; GFX11-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7 2545; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 2546; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2547; GFX11-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5 2548; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7 2549; GFX11-NEXT: s_setpc_b64 s[30:31] 2550; 2551; GFX12-LABEL: v_maximumnum_v4f32: 2552; GFX12: ; %bb.0: 2553; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2554; GFX12-NEXT: s_wait_expcnt 0x0 2555; GFX12-NEXT: s_wait_samplecnt 0x0 2556; GFX12-NEXT: s_wait_bvhcnt 0x0 2557; GFX12-NEXT: s_wait_kmcnt 0x0 2558; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 2559; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 2560; GFX12-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 2561; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 2562; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2563; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5 2564; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7 2565; GFX12-NEXT: s_setpc_b64 s[30:31] 2566 %result = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y) 2567 ret <4 x float> %result 2568} 2569 2570define <4 x float> @v_maximumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) { 2571; GFX8-LABEL: v_maximumnum_v4f32_nnan: 2572; GFX8: ; %bb.0: 2573; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2574; GFX8-NEXT: v_max_f32_e32 v0, v0, v4 2575; GFX8-NEXT: v_max_f32_e32 v1, v1, v5 2576; GFX8-NEXT: v_max_f32_e32 v2, v2, v6 2577; GFX8-NEXT: v_max_f32_e32 v3, v3, v7 2578; GFX8-NEXT: s_setpc_b64 s[30:31] 2579; 2580; GFX9-LABEL: v_maximumnum_v4f32_nnan: 2581; GFX9: ; %bb.0: 2582; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2583; GFX9-NEXT: v_max_f32_e32 v0, v0, v4 2584; GFX9-NEXT: v_max_f32_e32 v1, v1, v5 2585; GFX9-NEXT: v_max_f32_e32 v2, v2, v6 2586; GFX9-NEXT: v_max_f32_e32 v3, v3, v7 2587; GFX9-NEXT: s_setpc_b64 s[30:31] 2588; 2589; GFX10-LABEL: v_maximumnum_v4f32_nnan: 2590; GFX10: ; %bb.0: 2591; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2592; GFX10-NEXT: v_max_f32_e32 v0, v0, v4 2593; GFX10-NEXT: v_max_f32_e32 v1, v1, v5 2594; GFX10-NEXT: v_max_f32_e32 v2, v2, v6 2595; GFX10-NEXT: v_max_f32_e32 v3, v3, v7 2596; GFX10-NEXT: s_setpc_b64 s[30:31] 2597; 2598; GFX11-LABEL: v_maximumnum_v4f32_nnan: 2599; GFX11: ; %bb.0: 2600; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2601; GFX11-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5 2602; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7 2603; GFX11-NEXT: s_setpc_b64 s[30:31] 2604; 2605; GFX12-LABEL: v_maximumnum_v4f32_nnan: 2606; GFX12: ; %bb.0: 2607; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2608; GFX12-NEXT: s_wait_expcnt 0x0 2609; GFX12-NEXT: s_wait_samplecnt 0x0 2610; GFX12-NEXT: s_wait_bvhcnt 0x0 2611; GFX12-NEXT: s_wait_kmcnt 0x0 2612; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5 2613; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7 2614; GFX12-NEXT: s_setpc_b64 s[30:31] 2615 %result = call nnan <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y) 2616 ret <4 x float> %result 2617} 2618 2619define <2 x double> @v_maximumnum_v2f64(<2 x double> %x, <2 x double> %y) { 2620; GFX8-LABEL: v_maximumnum_v2f64: 2621; GFX8: ; %bb.0: 2622; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2623; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2624; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2625; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2626; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2627; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] 2628; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] 2629; GFX8-NEXT: s_setpc_b64 s[30:31] 2630; 2631; GFX9-LABEL: v_maximumnum_v2f64: 2632; GFX9: ; %bb.0: 2633; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2634; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2635; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2636; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2637; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2638; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] 2639; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] 2640; GFX9-NEXT: s_setpc_b64 s[30:31] 2641; 2642; GFX10-LABEL: v_maximumnum_v2f64: 2643; GFX10: ; %bb.0: 2644; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2645; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2646; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2647; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2648; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2649; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] 2650; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] 2651; GFX10-NEXT: s_setpc_b64 s[30:31] 2652; 2653; GFX11-LABEL: v_maximumnum_v2f64: 2654; GFX11: ; %bb.0: 2655; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2656; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2657; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2658; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2659; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2660; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2661; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] 2662; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] 2663; GFX11-NEXT: s_setpc_b64 s[30:31] 2664; 2665; GFX12-LABEL: v_maximumnum_v2f64: 2666; GFX12: ; %bb.0: 2667; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2668; GFX12-NEXT: s_wait_expcnt 0x0 2669; GFX12-NEXT: s_wait_samplecnt 0x0 2670; GFX12-NEXT: s_wait_bvhcnt 0x0 2671; GFX12-NEXT: s_wait_kmcnt 0x0 2672; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] 2673; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] 2674; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] 2675; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] 2676; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2677; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[4:5] 2678; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7] 2679; GFX12-NEXT: s_setpc_b64 s[30:31] 2680 %result = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> %x, <2 x double> %y) 2681 ret <2 x double> %result 2682} 2683 2684define <2 x double> @v_maximumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) { 2685; GFX8-LABEL: v_maximumnum_v2f64_nnan: 2686; GFX8: ; %bb.0: 2687; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2688; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] 2689; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] 2690; GFX8-NEXT: s_setpc_b64 s[30:31] 2691; 2692; GFX9-LABEL: v_maximumnum_v2f64_nnan: 2693; GFX9: ; %bb.0: 2694; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2695; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] 2696; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] 2697; GFX9-NEXT: s_setpc_b64 s[30:31] 2698; 2699; GFX10-LABEL: v_maximumnum_v2f64_nnan: 2700; GFX10: ; %bb.0: 2701; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2702; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] 2703; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] 2704; GFX10-NEXT: s_setpc_b64 s[30:31] 2705; 2706; GFX11-LABEL: v_maximumnum_v2f64_nnan: 2707; GFX11: ; %bb.0: 2708; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2709; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] 2710; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] 2711; GFX11-NEXT: s_setpc_b64 s[30:31] 2712; 2713; GFX12-LABEL: v_maximumnum_v2f64_nnan: 2714; GFX12: ; %bb.0: 2715; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2716; GFX12-NEXT: s_wait_expcnt 0x0 2717; GFX12-NEXT: s_wait_samplecnt 0x0 2718; GFX12-NEXT: s_wait_bvhcnt 0x0 2719; GFX12-NEXT: s_wait_kmcnt 0x0 2720; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[4:5] 2721; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7] 2722; GFX12-NEXT: s_setpc_b64 s[30:31] 2723 %result = call nnan <2 x double> @llvm.maximumnum.v2f64(<2 x double> %x, <2 x double> %y) 2724 ret <2 x double> %result 2725} 2726 2727define <3 x double> @v_maximumnum_v3f64(<3 x double> %x, <3 x double> %y) { 2728; GFX8-LABEL: v_maximumnum_v3f64: 2729; GFX8: ; %bb.0: 2730; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2731; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2732; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2733; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2734; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2735; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2736; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2737; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] 2738; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] 2739; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] 2740; GFX8-NEXT: s_setpc_b64 s[30:31] 2741; 2742; GFX9-LABEL: v_maximumnum_v3f64: 2743; GFX9: ; %bb.0: 2744; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2745; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2746; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2747; GFX9-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2748; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2749; GFX9-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2750; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2751; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] 2752; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] 2753; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] 2754; GFX9-NEXT: s_setpc_b64 s[30:31] 2755; 2756; GFX10-LABEL: v_maximumnum_v3f64: 2757; GFX10: ; %bb.0: 2758; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2759; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2760; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2761; GFX10-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2762; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2763; GFX10-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2764; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2765; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] 2766; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] 2767; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] 2768; GFX10-NEXT: s_setpc_b64 s[30:31] 2769; 2770; GFX11-LABEL: v_maximumnum_v3f64: 2771; GFX11: ; %bb.0: 2772; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2773; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2774; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2775; GFX11-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2776; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2777; GFX11-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2778; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2779; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] 2780; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 2781; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] 2782; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] 2783; GFX11-NEXT: s_setpc_b64 s[30:31] 2784; 2785; GFX12-LABEL: v_maximumnum_v3f64: 2786; GFX12: ; %bb.0: 2787; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2788; GFX12-NEXT: s_wait_expcnt 0x0 2789; GFX12-NEXT: s_wait_samplecnt 0x0 2790; GFX12-NEXT: s_wait_bvhcnt 0x0 2791; GFX12-NEXT: s_wait_kmcnt 0x0 2792; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] 2793; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] 2794; GFX12-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] 2795; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] 2796; GFX12-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] 2797; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] 2798; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[6:7] 2799; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 2800; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[8:9] 2801; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[10:11] 2802; GFX12-NEXT: s_setpc_b64 s[30:31] 2803 %result = call <3 x double> @llvm.maximumnum.v3f64(<3 x double> %x, <3 x double> %y) 2804 ret <3 x double> %result 2805} 2806 2807define <3 x double> @v_maximumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) { 2808; GFX8-LABEL: v_maximumnum_v3f64_nnan: 2809; GFX8: ; %bb.0: 2810; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2811; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] 2812; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] 2813; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] 2814; GFX8-NEXT: s_setpc_b64 s[30:31] 2815; 2816; GFX9-LABEL: v_maximumnum_v3f64_nnan: 2817; GFX9: ; %bb.0: 2818; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2819; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] 2820; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] 2821; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] 2822; GFX9-NEXT: s_setpc_b64 s[30:31] 2823; 2824; GFX10-LABEL: v_maximumnum_v3f64_nnan: 2825; GFX10: ; %bb.0: 2826; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2827; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] 2828; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] 2829; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] 2830; GFX10-NEXT: s_setpc_b64 s[30:31] 2831; 2832; GFX11-LABEL: v_maximumnum_v3f64_nnan: 2833; GFX11: ; %bb.0: 2834; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2835; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] 2836; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] 2837; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] 2838; GFX11-NEXT: s_setpc_b64 s[30:31] 2839; 2840; GFX12-LABEL: v_maximumnum_v3f64_nnan: 2841; GFX12: ; %bb.0: 2842; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2843; GFX12-NEXT: s_wait_expcnt 0x0 2844; GFX12-NEXT: s_wait_samplecnt 0x0 2845; GFX12-NEXT: s_wait_bvhcnt 0x0 2846; GFX12-NEXT: s_wait_kmcnt 0x0 2847; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[6:7] 2848; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[8:9] 2849; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[10:11] 2850; GFX12-NEXT: s_setpc_b64 s[30:31] 2851 %result = call nnan <3 x double> @llvm.maximumnum.v3f64(<3 x double> %x, <3 x double> %y) 2852 ret <3 x double> %result 2853} 2854 2855define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) { 2856; GFX8-LABEL: v_maximumnum_v4f64: 2857; GFX8: ; %bb.0: 2858; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2859; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2860; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2861; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2862; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2863; GFX8-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 2864; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2865; GFX8-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] 2866; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2867; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] 2868; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] 2869; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] 2870; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] 2871; GFX8-NEXT: s_setpc_b64 s[30:31] 2872; 2873; GFX9-LABEL: v_maximumnum_v4f64: 2874; GFX9: ; %bb.0: 2875; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2876; GFX9-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2877; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2878; GFX9-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2879; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2880; GFX9-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 2881; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2882; GFX9-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] 2883; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2884; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] 2885; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] 2886; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] 2887; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] 2888; GFX9-NEXT: s_setpc_b64 s[30:31] 2889; 2890; GFX10-LABEL: v_maximumnum_v4f64: 2891; GFX10: ; %bb.0: 2892; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2893; GFX10-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2894; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2895; GFX10-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2896; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2897; GFX10-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 2898; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2899; GFX10-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] 2900; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2901; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] 2902; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] 2903; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] 2904; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] 2905; GFX10-NEXT: s_setpc_b64 s[30:31] 2906; 2907; GFX11-LABEL: v_maximumnum_v4f64: 2908; GFX11: ; %bb.0: 2909; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2910; GFX11-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] 2911; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 2912; GFX11-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] 2913; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] 2914; GFX11-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] 2915; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] 2916; GFX11-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] 2917; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] 2918; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] 2919; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] 2920; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] 2921; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 2922; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] 2923; GFX11-NEXT: s_setpc_b64 s[30:31] 2924; 2925; GFX12-LABEL: v_maximumnum_v4f64: 2926; GFX12: ; %bb.0: 2927; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2928; GFX12-NEXT: s_wait_expcnt 0x0 2929; GFX12-NEXT: s_wait_samplecnt 0x0 2930; GFX12-NEXT: s_wait_bvhcnt 0x0 2931; GFX12-NEXT: s_wait_kmcnt 0x0 2932; GFX12-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] 2933; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] 2934; GFX12-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] 2935; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] 2936; GFX12-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13] 2937; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] 2938; GFX12-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15] 2939; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] 2940; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[8:9] 2941; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[10:11] 2942; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[12:13] 2943; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) 2944; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[14:15] 2945; GFX12-NEXT: s_setpc_b64 s[30:31] 2946 %result = call <4 x double> @llvm.maximumnum.v4f64(<4 x double> %x, <4 x double> %y) 2947 ret <4 x double> %result 2948} 2949 2950define <4 x double> @v_maximumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) { 2951; GFX8-LABEL: v_maximumnum_v4f64_nnan: 2952; GFX8: ; %bb.0: 2953; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2954; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] 2955; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] 2956; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] 2957; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] 2958; GFX8-NEXT: s_setpc_b64 s[30:31] 2959; 2960; GFX9-LABEL: v_maximumnum_v4f64_nnan: 2961; GFX9: ; %bb.0: 2962; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2963; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] 2964; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] 2965; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] 2966; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] 2967; GFX9-NEXT: s_setpc_b64 s[30:31] 2968; 2969; GFX10-LABEL: v_maximumnum_v4f64_nnan: 2970; GFX10: ; %bb.0: 2971; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2972; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] 2973; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] 2974; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] 2975; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] 2976; GFX10-NEXT: s_setpc_b64 s[30:31] 2977; 2978; GFX11-LABEL: v_maximumnum_v4f64_nnan: 2979; GFX11: ; %bb.0: 2980; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2981; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] 2982; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] 2983; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] 2984; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] 2985; GFX11-NEXT: s_setpc_b64 s[30:31] 2986; 2987; GFX12-LABEL: v_maximumnum_v4f64_nnan: 2988; GFX12: ; %bb.0: 2989; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2990; GFX12-NEXT: s_wait_expcnt 0x0 2991; GFX12-NEXT: s_wait_samplecnt 0x0 2992; GFX12-NEXT: s_wait_bvhcnt 0x0 2993; GFX12-NEXT: s_wait_kmcnt 0x0 2994; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[8:9] 2995; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[10:11] 2996; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[12:13] 2997; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[14:15] 2998; GFX12-NEXT: s_setpc_b64 s[30:31] 2999 %result = call nnan <4 x double> @llvm.maximumnum.v4f64(<4 x double> %x, <4 x double> %y) 3000 ret <4 x double> %result 3001} 3002