1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SAFE %s 3; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-NNAN %s 4 5; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VI-SAFE %s 6; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NNAN %s 7 8; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI-SAFE %s 9; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NNAN %s 10 11; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SAFE %s 12; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-NNAN %s 13 14 15define half @test_fmin_legacy_ule_f16(half %a, half %b) #0 { 16; GFX9-SAFE-LABEL: test_fmin_legacy_ule_f16: 17; GFX9-SAFE: ; %bb.0: 18; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 20; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 21; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 22; 23; GFX9-NNAN-LABEL: test_fmin_legacy_ule_f16: 24; GFX9-NNAN: ; %bb.0: 25; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GFX9-NNAN-NEXT: v_min_f16_e32 v0, v0, v1 27; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 28; 29; VI-SAFE-LABEL: test_fmin_legacy_ule_f16: 30; VI-SAFE: ; %bb.0: 31; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 32; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 33; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 34; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 35; 36; VI-NNAN-LABEL: test_fmin_legacy_ule_f16: 37; VI-NNAN: ; %bb.0: 38; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v1 40; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 41; 42; SI-SAFE-LABEL: test_fmin_legacy_ule_f16: 43; SI-SAFE: ; %bb.0: 44; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 46; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 47; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 48; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 49; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v1, v0 50; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 51; 52; SI-NNAN-LABEL: test_fmin_legacy_ule_f16: 53; SI-NNAN: ; %bb.0: 54; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 55; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 56; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 57; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 58; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 59; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v1 60; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 61; 62; GFX11-SAFE-LABEL: test_fmin_legacy_ule_f16: 63; GFX11-SAFE: ; %bb.0: 64; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 65; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 66; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 67; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] 68; 69; GFX11-NNAN-LABEL: test_fmin_legacy_ule_f16: 70; GFX11-NNAN: ; %bb.0: 71; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 72; GFX11-NNAN-NEXT: v_min_f16_e32 v0, v0, v1 73; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] 74 %cmp = fcmp ule half %a, %b 75 %val = select i1 %cmp, half %a, half %b 76 ret half %val 77} 78 79define <2 x half> @test_fmin_legacy_ule_v2f16(<2 x half> %a, <2 x half> %b) #0 { 80; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v2f16: 81; GFX9-SAFE: ; %bb.0: 82; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 83; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1 84; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 85; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2 86; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 87; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 88; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 89; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 90; GFX9-SAFE-NEXT: v_perm_b32 v0, v2, v0, s4 91; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 92; 93; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v2f16: 94; GFX9-NNAN: ; %bb.0: 95; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 96; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v1 97; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 98; 99; VI-SAFE-LABEL: test_fmin_legacy_ule_v2f16: 100; VI-SAFE: ; %bb.0: 101; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; VI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1 103; VI-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 104; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2 105; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 106; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 107; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v2 108; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 109; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 110; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 111; 112; VI-NNAN-LABEL: test_fmin_legacy_ule_v2f16: 113; VI-NNAN: ; %bb.0: 114; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 115; VI-NNAN-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 116; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v1 117; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v2 118; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 119; 120; SI-SAFE-LABEL: test_fmin_legacy_ule_v2f16: 121; SI-SAFE: ; %bb.0: 122; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 123; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 124; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 125; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 126; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 127; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 128; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 129; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 130; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 131; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v2, v0 132; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v3, v1 133; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 134; 135; SI-NNAN-LABEL: test_fmin_legacy_ule_v2f16: 136; SI-NNAN: ; %bb.0: 137; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 138; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 139; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 140; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 141; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 142; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 143; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 144; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 145; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 146; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v2 147; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v3 148; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 149; 150; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v2f16: 151; GFX11-SAFE: ; %bb.0: 152; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 153; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1 154; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 155; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) 156; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2 157; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 158; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 159; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 160; GFX11-SAFE-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 161; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] 162; 163; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v2f16: 164; GFX11-NNAN: ; %bb.0: 165; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 166; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v1 167; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] 168 %cmp = fcmp ule <2 x half> %a, %b 169 %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b 170 ret <2 x half> %val 171} 172 173define <3 x half> @test_fmin_legacy_ule_v3f16(<3 x half> %a, <3 x half> %b) #0 { 174; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v3f16: 175; GFX9-SAFE: ; %bb.0: 176; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 177; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 178; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 179; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 180; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 181; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 182; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 183; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 184; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 185; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 186; GFX9-SAFE-NEXT: v_perm_b32 v0, v4, v0, s4 187; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 188; 189; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v3f16: 190; GFX9-NNAN: ; %bb.0: 191; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 192; GFX9-NNAN-NEXT: v_pk_min_f16 v1, v1, v3 193; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v2 194; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 195; 196; VI-SAFE-LABEL: test_fmin_legacy_ule_v3f16: 197; VI-SAFE: ; %bb.0: 198; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 199; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 200; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 201; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 202; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 203; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 204; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 205; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 206; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 207; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4 208; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 209; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 210; 211; VI-NNAN-LABEL: test_fmin_legacy_ule_v3f16: 212; VI-NNAN: ; %bb.0: 213; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 214; VI-NNAN-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 215; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v2 216; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v3 217; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v4 218; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 219; 220; SI-SAFE-LABEL: test_fmin_legacy_ule_v3f16: 221; SI-SAFE: ; %bb.0: 222; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 223; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 224; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 225; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 226; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 227; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 228; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 229; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 230; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 231; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 232; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 233; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 234; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 235; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v3, v0 236; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v4, v1 237; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v5, v2 238; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 239; 240; SI-NNAN-LABEL: test_fmin_legacy_ule_v3f16: 241; SI-NNAN: ; %bb.0: 242; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 243; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 244; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 245; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 246; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 247; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 248; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 249; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 250; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 251; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 252; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 253; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 254; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 255; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v3 256; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v4 257; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v5 258; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 259; 260; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v3f16: 261; GFX11-SAFE: ; %bb.0: 262; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 264; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 265; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 266; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo 267; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3) 268; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4 269; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc_lo 270; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3 271; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo 272; GFX11-SAFE-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 273; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] 274; 275; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v3f16: 276; GFX11-NNAN: ; %bb.0: 277; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 278; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v2 279; GFX11-NNAN-NEXT: v_pk_min_f16 v1, v1, v3 280; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] 281 %cmp = fcmp ule <3 x half> %a, %b 282 %val = select <3 x i1> %cmp, <3 x half> %a, <3 x half> %b 283 ret <3 x half> %val 284} 285 286define <4 x half> @test_fmin_legacy_ule_v4f16(<4 x half> %a, <4 x half> %b) #0 { 287; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v4f16: 288; GFX9-SAFE: ; %bb.0: 289; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 290; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3 291; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1 292; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 293; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 294; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6 295; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 296; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 297; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 298; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 299; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 300; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 301; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 302; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 303; GFX9-SAFE-NEXT: v_perm_b32 v0, v4, v0, s4 304; GFX9-SAFE-NEXT: v_perm_b32 v1, v6, v1, s4 305; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 306; 307; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v4f16: 308; GFX9-NNAN: ; %bb.0: 309; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 310; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v2 311; GFX9-NNAN-NEXT: v_pk_min_f16 v1, v1, v3 312; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 313; 314; VI-SAFE-LABEL: test_fmin_legacy_ule_v4f16: 315; VI-SAFE: ; %bb.0: 316; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 317; VI-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3 318; VI-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1 319; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 320; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 321; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6 322; VI-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 323; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 324; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 325; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 326; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 327; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 328; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 329; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4 330; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 331; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v6 332; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 333; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 334; 335; VI-NNAN-LABEL: test_fmin_legacy_ule_v4f16: 336; VI-NNAN: ; %bb.0: 337; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 338; VI-NNAN-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 339; VI-NNAN-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 340; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v3 341; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v2 342; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v5 343; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v4 344; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 345; 346; SI-SAFE-LABEL: test_fmin_legacy_ule_v4f16: 347; SI-SAFE: ; %bb.0: 348; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 349; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 350; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 351; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 352; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 353; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 354; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 355; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 356; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 357; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 358; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 359; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 360; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 361; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 362; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 363; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 364; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 365; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v4, v0 366; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v5, v1 367; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v6, v2 368; SI-SAFE-NEXT: v_min_legacy_f32_e32 v3, v7, v3 369; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 370; 371; SI-NNAN-LABEL: test_fmin_legacy_ule_v4f16: 372; SI-NNAN: ; %bb.0: 373; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 374; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 375; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 376; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 377; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 378; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 379; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 380; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 381; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 382; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 383; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 384; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 385; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 386; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 387; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 388; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 389; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 390; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v4 391; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v5 392; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v6 393; SI-NNAN-NEXT: v_min_f32_e32 v3, v3, v7 394; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 395; 396; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v4f16: 397; GFX11-SAFE: ; %bb.0: 398; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 399; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v3 400; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v1 401; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v2 402; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v0 403; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 404; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4 405; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo 406; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6 407; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo 408; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 409; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo 410; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3 411; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo 412; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 413; GFX11-SAFE-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 414; GFX11-SAFE-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 415; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] 416; 417; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v4f16: 418; GFX11-NNAN: ; %bb.0: 419; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 420; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v2 421; GFX11-NNAN-NEXT: v_pk_min_f16 v1, v1, v3 422; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] 423 %cmp = fcmp ule <4 x half> %a, %b 424 %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b 425 ret <4 x half> %val 426} 427 428define <8 x half> @test_fmin_legacy_ule_v8f16(<8 x half> %a, <8 x half> %b) #0 { 429; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v8f16: 430; GFX9-SAFE: ; %bb.0: 431; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 432; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7 433; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3 434; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6 435; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2 436; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v15, v14 437; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5 438; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1 439; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc 440; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v13, v12 441; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 442; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0 443; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 444; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v11, v10 445; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 446; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v9, v8 447; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc 448; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v7 449; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc 450; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v2, v6 451; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 452; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v5 453; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 454; GFX9-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v4 455; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 456; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 457; GFX9-SAFE-NEXT: v_perm_b32 v0, v8, v0, s4 458; GFX9-SAFE-NEXT: v_perm_b32 v1, v10, v1, s4 459; GFX9-SAFE-NEXT: v_perm_b32 v2, v12, v2, s4 460; GFX9-SAFE-NEXT: v_perm_b32 v3, v14, v3, s4 461; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 462; 463; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v8f16: 464; GFX9-NNAN: ; %bb.0: 465; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 466; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v4 467; GFX9-NNAN-NEXT: v_pk_min_f16 v1, v1, v5 468; GFX9-NNAN-NEXT: v_pk_min_f16 v2, v2, v6 469; GFX9-NNAN-NEXT: v_pk_min_f16 v3, v3, v7 470; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 471; 472; VI-SAFE-LABEL: test_fmin_legacy_ule_v8f16: 473; VI-SAFE: ; %bb.0: 474; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 475; VI-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7 476; VI-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3 477; VI-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6 478; VI-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2 479; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v15, v14 480; VI-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5 481; VI-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1 482; VI-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc 483; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v13, v12 484; VI-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 485; VI-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0 486; VI-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 487; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v11, v10 488; VI-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 489; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v9, v8 490; VI-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc 491; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v7 492; VI-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc 493; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v2, v6 494; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 495; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v5 496; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 497; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v4 498; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 499; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v8 500; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 501; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v10 502; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 503; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v12 504; VI-SAFE-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 505; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v14 506; VI-SAFE-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 507; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 508; 509; VI-NNAN-LABEL: test_fmin_legacy_ule_v8f16: 510; VI-NNAN: ; %bb.0: 511; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 512; VI-NNAN-NEXT: v_min_f16_sdwa v8, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 513; VI-NNAN-NEXT: v_min_f16_sdwa v9, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 514; VI-NNAN-NEXT: v_min_f16_sdwa v10, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 515; VI-NNAN-NEXT: v_min_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 516; VI-NNAN-NEXT: v_min_f16_e32 v3, v3, v7 517; VI-NNAN-NEXT: v_min_f16_e32 v2, v2, v6 518; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v5 519; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v4 520; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v11 521; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v10 522; VI-NNAN-NEXT: v_or_b32_e32 v2, v2, v9 523; VI-NNAN-NEXT: v_or_b32_e32 v3, v3, v8 524; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 525; 526; SI-SAFE-LABEL: test_fmin_legacy_ule_v8f16: 527; SI-SAFE: ; %bb.0: 528; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 529; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 530; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v15, v15 531; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 532; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v14, v14 533; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 534; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v13, v13 535; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 536; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v12, v12 537; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 538; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v11, v11 539; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 540; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v10, v10 541; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 542; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v9, v9 543; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 544; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v8, v8 545; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 546; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v15, v15 547; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 548; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v14, v14 549; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 550; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v13, v13 551; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 552; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v12, v12 553; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 554; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v11, v11 555; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 556; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v10, v10 557; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 558; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v9, v9 559; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 560; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v8, v8 561; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v8, v0 562; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v9, v1 563; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v10, v2 564; SI-SAFE-NEXT: v_min_legacy_f32_e32 v3, v11, v3 565; SI-SAFE-NEXT: v_min_legacy_f32_e32 v4, v12, v4 566; SI-SAFE-NEXT: v_min_legacy_f32_e32 v5, v13, v5 567; SI-SAFE-NEXT: v_min_legacy_f32_e32 v6, v14, v6 568; SI-SAFE-NEXT: v_min_legacy_f32_e32 v7, v15, v7 569; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 570; 571; SI-NNAN-LABEL: test_fmin_legacy_ule_v8f16: 572; SI-NNAN: ; %bb.0: 573; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 574; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v15, v15 575; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 576; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v14, v14 577; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 578; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v13, v13 579; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 580; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v12, v12 581; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 582; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v11, v11 583; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 584; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v10, v10 585; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 586; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v9, v9 587; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 588; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v8, v8 589; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 590; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v15, v15 591; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 592; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v14, v14 593; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 594; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v13, v13 595; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 596; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v12, v12 597; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 598; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v11, v11 599; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 600; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v10, v10 601; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 602; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v9, v9 603; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 604; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v8, v8 605; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 606; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v8 607; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v9 608; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v10 609; SI-NNAN-NEXT: v_min_f32_e32 v3, v3, v11 610; SI-NNAN-NEXT: v_min_f32_e32 v4, v4, v12 611; SI-NNAN-NEXT: v_min_f32_e32 v5, v5, v13 612; SI-NNAN-NEXT: v_min_f32_e32 v6, v6, v14 613; SI-NNAN-NEXT: v_min_f32_e32 v7, v7, v15 614; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 615; 616; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v8f16: 617; GFX11-SAFE: ; %bb.0: 618; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 619; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v7 620; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v3 621; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6 622; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2 623; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v5 624; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v1 625; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v11, v10 626; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 627; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0 628; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc_lo 629; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v13, v12 630; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v11, v12, v13, vcc_lo 631; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v15, v14 632; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v12, v14, v15, vcc_lo 633; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v9, v8 634; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc_lo 635; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v2, v6 636; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc_lo 637; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v4 638; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2) 639; GFX11-SAFE-NEXT: v_perm_b32 v2, v11, v2, 0x5040100 640; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo 641; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v5 642; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo 643; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v7 644; GFX11-SAFE-NEXT: v_perm_b32 v1, v12, v1, 0x5040100 645; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc_lo 646; GFX11-SAFE-NEXT: v_perm_b32 v0, v8, v0, 0x5040100 647; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) 648; GFX11-SAFE-NEXT: v_perm_b32 v3, v10, v3, 0x5040100 649; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] 650; 651; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v8f16: 652; GFX11-NNAN: ; %bb.0: 653; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 654; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v4 655; GFX11-NNAN-NEXT: v_pk_min_f16 v1, v1, v5 656; GFX11-NNAN-NEXT: v_pk_min_f16 v2, v2, v6 657; GFX11-NNAN-NEXT: v_pk_min_f16 v3, v3, v7 658; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] 659 %cmp = fcmp ule <8 x half> %a, %b 660 %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b 661 ret <8 x half> %val 662} 663 664attributes #0 = { nounwind } 665