1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SAFE %s 3; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-NNAN %s 4 5; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VI-SAFE %s 6; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NNAN %s 7 8; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI-SAFE %s 9; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NNAN %s 10 11; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SAFE %s 12; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-NNAN %s 13 14define half @test_fmax_legacy_ugt_f16(half %a, half %b) #0 { 15; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_f16: 16; GFX9-SAFE: ; %bb.0: 17; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1 19; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 20; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_f16: 23; GFX9-NNAN: ; %bb.0: 24; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX9-NNAN-NEXT: v_max_f16_e32 v0, v0, v1 26; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 27; 28; VI-SAFE-LABEL: test_fmax_legacy_ugt_f16: 29; VI-SAFE: ; %bb.0: 30; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1 32; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 33; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 34; 35; VI-NNAN-LABEL: test_fmax_legacy_ugt_f16: 36; VI-NNAN: ; %bb.0: 37; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 38; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v1 39; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 40; 41; SI-SAFE-LABEL: test_fmax_legacy_ugt_f16: 42; SI-SAFE: ; %bb.0: 43; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 45; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 46; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 47; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 48; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v1, v0 49; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 50; 51; SI-NNAN-LABEL: test_fmax_legacy_ugt_f16: 52; SI-NNAN: ; %bb.0: 53; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 54; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 55; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 56; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 57; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 58; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v1 59; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 60; 61; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_f16: 62; GFX11-SAFE: ; %bb.0: 63; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0, v1 65; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 66; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] 67; 68; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_f16: 69; GFX11-NNAN: ; %bb.0: 70; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 71; GFX11-NNAN-NEXT: v_max_f16_e32 v0, v0, v1 72; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] 73 %cmp = fcmp ugt half %a, %b 74 %val = select i1 %cmp, half %a, half %b 75 ret half %val 76} 77 78define <2 x half> @test_fmax_legacy_ugt_v2f16(<2 x half> %a, <2 x half> %b) #0 { 79; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v2f16: 80; GFX9-SAFE: ; %bb.0: 81; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 82; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1 83; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 84; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v2 85; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 86; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1 87; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 88; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 89; GFX9-SAFE-NEXT: v_perm_b32 v0, v2, v0, s4 90; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 91; 92; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v2f16: 93; GFX9-NNAN: ; %bb.0: 94; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v1 96; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 97; 98; VI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16: 99; VI-SAFE: ; %bb.0: 100; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 101; VI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1 102; VI-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 103; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v2 104; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 105; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1 106; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v2 107; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 108; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 109; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 110; 111; VI-NNAN-LABEL: test_fmax_legacy_ugt_v2f16: 112; VI-NNAN: ; %bb.0: 113; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 114; VI-NNAN-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 115; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v1 116; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v2 117; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 118; 119; SI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16: 120; SI-SAFE: ; %bb.0: 121; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 122; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 123; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 124; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 125; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 126; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 127; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 128; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 129; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 130; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v2, v0 131; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v3, v1 132; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 133; 134; SI-NNAN-LABEL: test_fmax_legacy_ugt_v2f16: 135; SI-NNAN: ; %bb.0: 136; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 137; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 138; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 139; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 140; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 141; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 142; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 143; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 144; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 145; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v2 146; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v3 147; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 148; 149; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v2f16: 150; GFX11-SAFE: ; %bb.0: 151; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 152; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1 153; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 154; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) 155; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v3, v2 156; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 157; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0, v1 158; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 159; GFX11-SAFE-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 160; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] 161; 162; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v2f16: 163; GFX11-NNAN: ; %bb.0: 164; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165; GFX11-NNAN-NEXT: v_pk_max_f16 v0, v0, v1 166; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] 167 %cmp = fcmp ugt <2 x half> %a, %b 168 %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b 169 ret <2 x half> %val 170} 171 172define <3 x half> @test_fmax_legacy_ugt_v3f16(<3 x half> %a, <3 x half> %b) #0 { 173; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v3f16: 174; GFX9-SAFE: ; %bb.0: 175; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 176; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 177; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 178; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4 179; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 180; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3 181; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 182; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2 183; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 184; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 185; GFX9-SAFE-NEXT: v_perm_b32 v0, v4, v0, s4 186; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 187; 188; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v3f16: 189; GFX9-NNAN: ; %bb.0: 190; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 191; GFX9-NNAN-NEXT: v_pk_max_f16 v1, v1, v3 192; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v2 193; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 194; 195; VI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16: 196; VI-SAFE: ; %bb.0: 197; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 198; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 199; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 200; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4 201; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 202; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3 203; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 204; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2 205; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 206; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4 207; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 208; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 209; 210; VI-NNAN-LABEL: test_fmax_legacy_ugt_v3f16: 211; VI-NNAN: ; %bb.0: 212; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 213; VI-NNAN-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 214; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v2 215; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v3 216; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v4 217; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 218; 219; SI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16: 220; SI-SAFE: ; %bb.0: 221; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 222; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 223; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 224; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 225; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 226; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 227; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 228; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 229; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 230; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 231; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 232; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 233; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 234; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v3, v0 235; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v4, v1 236; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v5, v2 237; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 238; 239; SI-NNAN-LABEL: test_fmax_legacy_ugt_v3f16: 240; SI-NNAN: ; %bb.0: 241; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 242; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 243; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 244; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 245; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 246; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 247; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 248; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 249; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 250; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 251; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 252; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 253; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 254; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v3 255; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v4 256; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v5 257; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 258; 259; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v3f16: 260; GFX11-SAFE: ; %bb.0: 261; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 262; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 263; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 264; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0, v2 265; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo 266; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3) 267; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v5, v4 268; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc_lo 269; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v1, v3 270; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo 271; GFX11-SAFE-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 272; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] 273; 274; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v3f16: 275; GFX11-NNAN: ; %bb.0: 276; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 277; GFX11-NNAN-NEXT: v_pk_max_f16 v0, v0, v2 278; GFX11-NNAN-NEXT: v_pk_max_f16 v1, v1, v3 279; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] 280 %cmp = fcmp ugt <3 x half> %a, %b 281 %val = select <3 x i1> %cmp, <3 x half> %a, <3 x half> %b 282 ret <3 x half> %val 283} 284 285define <4 x half> @test_fmax_legacy_ugt_v4f16(<4 x half> %a, <4 x half> %b) #0 { 286; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v4f16: 287; GFX9-SAFE: ; %bb.0: 288; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 289; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3 290; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1 291; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 292; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 293; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v7, v6 294; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 295; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4 296; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 297; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3 298; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 299; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2 300; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 301; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 302; GFX9-SAFE-NEXT: v_perm_b32 v0, v4, v0, s4 303; GFX9-SAFE-NEXT: v_perm_b32 v1, v6, v1, s4 304; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 305; 306; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v4f16: 307; GFX9-NNAN: ; %bb.0: 308; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 309; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v2 310; GFX9-NNAN-NEXT: v_pk_max_f16 v1, v1, v3 311; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 312; 313; VI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16: 314; VI-SAFE: ; %bb.0: 315; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 316; VI-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3 317; VI-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1 318; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 319; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 320; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v7, v6 321; VI-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 322; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4 323; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 324; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3 325; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 326; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2 327; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 328; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4 329; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 330; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v6 331; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 332; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 333; 334; VI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16: 335; VI-NNAN: ; %bb.0: 336; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 337; VI-NNAN-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 338; VI-NNAN-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 339; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v3 340; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v2 341; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v5 342; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v4 343; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 344; 345; SI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16: 346; SI-SAFE: ; %bb.0: 347; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 348; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 349; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 350; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 351; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 352; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 353; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 354; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 355; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 356; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 357; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 358; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 359; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 360; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 361; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 362; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 363; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 364; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v4, v0 365; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v5, v1 366; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v6, v2 367; SI-SAFE-NEXT: v_max_legacy_f32_e32 v3, v7, v3 368; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 369; 370; SI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16: 371; SI-NNAN: ; %bb.0: 372; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 373; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 374; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 375; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 376; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 377; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 378; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 379; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 380; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 381; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 382; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 383; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 384; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 385; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 386; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 387; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 388; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 389; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v4 390; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v5 391; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v6 392; SI-NNAN-NEXT: v_max_f32_e32 v3, v3, v7 393; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 394; 395; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v4f16: 396; GFX11-SAFE: ; %bb.0: 397; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 398; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v3 399; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v1 400; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v2 401; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v0 402; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 403; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v5, v4 404; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo 405; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v7, v6 406; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo 407; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0, v2 408; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo 409; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v1, v3 410; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo 411; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 412; GFX11-SAFE-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 413; GFX11-SAFE-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 414; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] 415; 416; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v4f16: 417; GFX11-NNAN: ; %bb.0: 418; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 419; GFX11-NNAN-NEXT: v_pk_max_f16 v0, v0, v2 420; GFX11-NNAN-NEXT: v_pk_max_f16 v1, v1, v3 421; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] 422 %cmp = fcmp ugt <4 x half> %a, %b 423 %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b 424 ret <4 x half> %val 425} 426 427define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) #0 { 428; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v8f16: 429; GFX9-SAFE: ; %bb.0: 430; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 431; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7 432; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3 433; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6 434; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2 435; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v15, v14 436; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5 437; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1 438; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc 439; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v13, v12 440; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 441; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0 442; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 443; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v11, v10 444; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 445; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v9, v8 446; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc 447; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v7 448; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc 449; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v2, v6 450; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 451; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v5 452; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 453; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v4 454; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 455; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 456; GFX9-SAFE-NEXT: v_perm_b32 v0, v8, v0, s4 457; GFX9-SAFE-NEXT: v_perm_b32 v1, v10, v1, s4 458; GFX9-SAFE-NEXT: v_perm_b32 v2, v12, v2, s4 459; GFX9-SAFE-NEXT: v_perm_b32 v3, v14, v3, s4 460; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 461; 462; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v8f16: 463; GFX9-NNAN: ; %bb.0: 464; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 465; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v4 466; GFX9-NNAN-NEXT: v_pk_max_f16 v1, v1, v5 467; GFX9-NNAN-NEXT: v_pk_max_f16 v2, v2, v6 468; GFX9-NNAN-NEXT: v_pk_max_f16 v3, v3, v7 469; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 470; 471; VI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16: 472; VI-SAFE: ; %bb.0: 473; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 474; VI-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7 475; VI-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3 476; VI-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6 477; VI-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2 478; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v15, v14 479; VI-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5 480; VI-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1 481; VI-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc 482; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v13, v12 483; VI-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 484; VI-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0 485; VI-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 486; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v11, v10 487; VI-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 488; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v9, v8 489; VI-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc 490; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v7 491; VI-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc 492; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v2, v6 493; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 494; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v5 495; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 496; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v4 497; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 498; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v8 499; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 500; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v10 501; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 502; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v12 503; VI-SAFE-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 504; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v14 505; VI-SAFE-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 506; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 507; 508; VI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16: 509; VI-NNAN: ; %bb.0: 510; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 511; VI-NNAN-NEXT: v_max_f16_sdwa v8, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 512; VI-NNAN-NEXT: v_max_f16_sdwa v9, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 513; VI-NNAN-NEXT: v_max_f16_sdwa v10, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 514; VI-NNAN-NEXT: v_max_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 515; VI-NNAN-NEXT: v_max_f16_e32 v3, v3, v7 516; VI-NNAN-NEXT: v_max_f16_e32 v2, v2, v6 517; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v5 518; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v4 519; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v11 520; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v10 521; VI-NNAN-NEXT: v_or_b32_e32 v2, v2, v9 522; VI-NNAN-NEXT: v_or_b32_e32 v3, v3, v8 523; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 524; 525; SI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16: 526; SI-SAFE: ; %bb.0: 527; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 528; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 529; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v15, v15 530; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 531; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v14, v14 532; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 533; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v13, v13 534; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 535; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v12, v12 536; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 537; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v11, v11 538; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 539; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v10, v10 540; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 541; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v9, v9 542; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 543; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v8, v8 544; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 545; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v15, v15 546; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 547; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v14, v14 548; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 549; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v13, v13 550; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 551; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v12, v12 552; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 553; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v11, v11 554; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 555; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v10, v10 556; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 557; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v9, v9 558; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 559; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v8, v8 560; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v8, v0 561; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v9, v1 562; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v10, v2 563; SI-SAFE-NEXT: v_max_legacy_f32_e32 v3, v11, v3 564; SI-SAFE-NEXT: v_max_legacy_f32_e32 v4, v12, v4 565; SI-SAFE-NEXT: v_max_legacy_f32_e32 v5, v13, v5 566; SI-SAFE-NEXT: v_max_legacy_f32_e32 v6, v14, v6 567; SI-SAFE-NEXT: v_max_legacy_f32_e32 v7, v15, v7 568; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 569; 570; SI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16: 571; SI-NNAN: ; %bb.0: 572; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 573; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v15, v15 574; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 575; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v14, v14 576; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 577; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v13, v13 578; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 579; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v12, v12 580; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 581; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v11, v11 582; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 583; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v10, v10 584; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 585; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v9, v9 586; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 587; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v8, v8 588; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 589; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v15, v15 590; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 591; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v14, v14 592; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 593; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v13, v13 594; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 595; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v12, v12 596; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 597; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v11, v11 598; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 599; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v10, v10 600; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 601; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v9, v9 602; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 603; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v8, v8 604; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 605; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v8 606; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v9 607; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v10 608; SI-NNAN-NEXT: v_max_f32_e32 v3, v3, v11 609; SI-NNAN-NEXT: v_max_f32_e32 v4, v4, v12 610; SI-NNAN-NEXT: v_max_f32_e32 v5, v5, v13 611; SI-NNAN-NEXT: v_max_f32_e32 v6, v6, v14 612; SI-NNAN-NEXT: v_max_f32_e32 v7, v7, v15 613; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 614; 615; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v8f16: 616; GFX11-SAFE: ; %bb.0: 617; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 618; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v7 619; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v3 620; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6 621; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2 622; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v5 623; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v1 624; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v11, v10 625; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 626; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0 627; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc_lo 628; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v13, v12 629; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v11, v12, v13, vcc_lo 630; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v15, v14 631; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v12, v14, v15, vcc_lo 632; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v9, v8 633; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc_lo 634; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v2, v6 635; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc_lo 636; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0, v4 637; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2) 638; GFX11-SAFE-NEXT: v_perm_b32 v2, v11, v2, 0x5040100 639; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo 640; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v1, v5 641; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo 642; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v3, v7 643; GFX11-SAFE-NEXT: v_perm_b32 v1, v12, v1, 0x5040100 644; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc_lo 645; GFX11-SAFE-NEXT: v_perm_b32 v0, v8, v0, 0x5040100 646; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) 647; GFX11-SAFE-NEXT: v_perm_b32 v3, v10, v3, 0x5040100 648; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] 649; 650; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v8f16: 651; GFX11-NNAN: ; %bb.0: 652; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 653; GFX11-NNAN-NEXT: v_pk_max_f16 v0, v0, v4 654; GFX11-NNAN-NEXT: v_pk_max_f16 v1, v1, v5 655; GFX11-NNAN-NEXT: v_pk_max_f16 v2, v2, v6 656; GFX11-NNAN-NEXT: v_pk_max_f16 v3, v3, v7 657; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] 658 %cmp = fcmp ugt <8 x half> %a, %b 659 %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b 660 ret <8 x half> %val 661} 662 663attributes #0 = { nounwind } 664