1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s 6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s 8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s 9; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s 10 11define half @v_minimum_f16(half %src0, half %src1) { 12; GFX8-LABEL: v_minimum_f16: 13; GFX8: ; %bb.0: 14; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15; GFX8-NEXT: v_min_f16_e32 v2, v0, v1 16; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00 17; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 18; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 19; GFX8-NEXT: s_setpc_b64 s[30:31] 20; 21; GFX900-LABEL: v_minimum_f16: 22; GFX900: ; %bb.0: 23; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24; GFX900-NEXT: v_min_f16_e32 v2, v0, v1 25; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00 26; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 27; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 28; GFX900-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX950-LABEL: v_minimum_f16: 31; GFX950: ; %bb.0: 32; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX950-NEXT: v_min_f16_e32 v2, v0, v1 34; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00 35; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 36; GFX950-NEXT: s_nop 1 37; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 38; GFX950-NEXT: s_setpc_b64 s[30:31] 39; 40; GFX10-LABEL: v_minimum_f16: 41; GFX10: ; %bb.0: 42; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 43; GFX10-NEXT: v_min_f16_e32 v2, v0, v1 44; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 45; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 46; GFX10-NEXT: s_setpc_b64 s[30:31] 47; 48; GFX11-LABEL: v_minimum_f16: 49; GFX11: ; %bb.0: 50; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 51; GFX11-NEXT: v_min_f16_e32 v2, v0, v1 52; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 53; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 54; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 55; GFX11-NEXT: s_setpc_b64 s[30:31] 56; 57; GFX12-LABEL: v_minimum_f16: 58; GFX12: ; %bb.0: 59; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 60; GFX12-NEXT: s_wait_expcnt 0x0 61; GFX12-NEXT: s_wait_samplecnt 0x0 62; GFX12-NEXT: s_wait_bvhcnt 0x0 63; GFX12-NEXT: s_wait_kmcnt 0x0 64; GFX12-NEXT: v_minimum_f16 v0, v0, v1 65; GFX12-NEXT: s_setpc_b64 s[30:31] 66 %op = call half @llvm.minimum.f16(half %src0, half %src1) 67 ret half %op 68} 69 70define half @v_minimum_f16__nnan(half %src0, half %src1) { 71; GFX8-LABEL: v_minimum_f16__nnan: 72; GFX8: ; %bb.0: 73; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 75; GFX8-NEXT: s_setpc_b64 s[30:31] 76; 77; GFX9-LABEL: v_minimum_f16__nnan: 78; GFX9: ; %bb.0: 79; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 81; GFX9-NEXT: s_setpc_b64 s[30:31] 82; 83; GFX10-LABEL: v_minimum_f16__nnan: 84; GFX10: ; %bb.0: 85; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 87; GFX10-NEXT: s_setpc_b64 s[30:31] 88; 89; GFX11-LABEL: v_minimum_f16__nnan: 90; GFX11: ; %bb.0: 91; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 92; GFX11-NEXT: v_min_f16_e32 v0, v0, v1 93; GFX11-NEXT: s_setpc_b64 s[30:31] 94; 95; GFX12-LABEL: v_minimum_f16__nnan: 96; GFX12: ; %bb.0: 97; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 98; GFX12-NEXT: s_wait_expcnt 0x0 99; GFX12-NEXT: s_wait_samplecnt 0x0 100; GFX12-NEXT: s_wait_bvhcnt 0x0 101; GFX12-NEXT: s_wait_kmcnt 0x0 102; GFX12-NEXT: v_minimum_f16 v0, v0, v1 103; GFX12-NEXT: s_setpc_b64 s[30:31] 104 %op = call nnan half @llvm.minimum.f16(half %src0, half %src1) 105 ret half %op 106} 107 108define half @v_minimum_f16__nsz(half %src0, half %src1) { 109; GFX8-LABEL: v_minimum_f16__nsz: 110; GFX8: ; %bb.0: 111; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 112; GFX8-NEXT: v_min_f16_e32 v2, v0, v1 113; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00 114; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 115; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 116; GFX8-NEXT: s_setpc_b64 s[30:31] 117; 118; GFX900-LABEL: v_minimum_f16__nsz: 119; GFX900: ; %bb.0: 120; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GFX900-NEXT: v_min_f16_e32 v2, v0, v1 122; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00 123; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 124; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 125; GFX900-NEXT: s_setpc_b64 s[30:31] 126; 127; GFX950-LABEL: v_minimum_f16__nsz: 128; GFX950: ; %bb.0: 129; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130; GFX950-NEXT: v_min_f16_e32 v2, v0, v1 131; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00 132; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 133; GFX950-NEXT: s_nop 1 134; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 135; GFX950-NEXT: s_setpc_b64 s[30:31] 136; 137; GFX10-LABEL: v_minimum_f16__nsz: 138; GFX10: ; %bb.0: 139; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 140; GFX10-NEXT: v_min_f16_e32 v2, v0, v1 141; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 142; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 143; GFX10-NEXT: s_setpc_b64 s[30:31] 144; 145; GFX11-LABEL: v_minimum_f16__nsz: 146; GFX11: ; %bb.0: 147; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 148; GFX11-NEXT: v_min_f16_e32 v2, v0, v1 149; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 150; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 151; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 152; GFX11-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX12-LABEL: v_minimum_f16__nsz: 155; GFX12: ; %bb.0: 156; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 157; GFX12-NEXT: s_wait_expcnt 0x0 158; GFX12-NEXT: s_wait_samplecnt 0x0 159; GFX12-NEXT: s_wait_bvhcnt 0x0 160; GFX12-NEXT: s_wait_kmcnt 0x0 161; GFX12-NEXT: v_minimum_f16 v0, v0, v1 162; GFX12-NEXT: s_setpc_b64 s[30:31] 163 %op = call nsz half @llvm.minimum.f16(half %src0, half %src1) 164 ret half %op 165} 166 167define half @v_minimum_f16__nnan_nsz(half %src0, half %src1) { 168; GFX8-LABEL: v_minimum_f16__nnan_nsz: 169; GFX8: ; %bb.0: 170; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 171; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 172; GFX8-NEXT: s_setpc_b64 s[30:31] 173; 174; GFX9-LABEL: v_minimum_f16__nnan_nsz: 175; GFX9: ; %bb.0: 176; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 177; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 178; GFX9-NEXT: s_setpc_b64 s[30:31] 179; 180; GFX10-LABEL: v_minimum_f16__nnan_nsz: 181; GFX10: ; %bb.0: 182; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 184; GFX10-NEXT: s_setpc_b64 s[30:31] 185; 186; GFX11-LABEL: v_minimum_f16__nnan_nsz: 187; GFX11: ; %bb.0: 188; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 189; GFX11-NEXT: v_min_f16_e32 v0, v0, v1 190; GFX11-NEXT: s_setpc_b64 s[30:31] 191; 192; GFX12-LABEL: v_minimum_f16__nnan_nsz: 193; GFX12: ; %bb.0: 194; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 195; GFX12-NEXT: s_wait_expcnt 0x0 196; GFX12-NEXT: s_wait_samplecnt 0x0 197; GFX12-NEXT: s_wait_bvhcnt 0x0 198; GFX12-NEXT: s_wait_kmcnt 0x0 199; GFX12-NEXT: v_minimum_f16 v0, v0, v1 200; GFX12-NEXT: s_setpc_b64 s[30:31] 201 %op = call nnan nsz half @llvm.minimum.f16(half %src0, half %src1) 202 ret half %op 203} 204 205define half @v_minimum_f16__nnan_src0(half %arg0, half %src1) { 206; GFX8-LABEL: v_minimum_f16__nnan_src0: 207; GFX8: ; %bb.0: 208; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 209; GFX8-NEXT: v_add_f16_e32 v0, 1.0, v0 210; GFX8-NEXT: v_min_f16_e32 v2, v0, v1 211; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00 212; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 213; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 214; GFX8-NEXT: s_setpc_b64 s[30:31] 215; 216; GFX900-LABEL: v_minimum_f16__nnan_src0: 217; GFX900: ; %bb.0: 218; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 219; GFX900-NEXT: v_add_f16_e32 v0, 1.0, v0 220; GFX900-NEXT: v_min_f16_e32 v2, v0, v1 221; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00 222; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 223; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 224; GFX900-NEXT: s_setpc_b64 s[30:31] 225; 226; GFX950-LABEL: v_minimum_f16__nnan_src0: 227; GFX950: ; %bb.0: 228; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 229; GFX950-NEXT: v_add_f16_e32 v0, 1.0, v0 230; GFX950-NEXT: v_min_f16_e32 v2, v0, v1 231; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00 232; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 233; GFX950-NEXT: s_nop 1 234; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 235; GFX950-NEXT: s_setpc_b64 s[30:31] 236; 237; GFX10-LABEL: v_minimum_f16__nnan_src0: 238; GFX10: ; %bb.0: 239; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 240; GFX10-NEXT: v_add_f16_e32 v0, 1.0, v0 241; GFX10-NEXT: v_min_f16_e32 v2, v0, v1 242; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 243; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 244; GFX10-NEXT: s_setpc_b64 s[30:31] 245; 246; GFX11-LABEL: v_minimum_f16__nnan_src0: 247; GFX11: ; %bb.0: 248; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 249; GFX11-NEXT: v_add_f16_e32 v0, 1.0, v0 250; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 251; GFX11-NEXT: v_min_f16_e32 v2, v0, v1 252; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 253; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 254; GFX11-NEXT: s_setpc_b64 s[30:31] 255; 256; GFX12-LABEL: v_minimum_f16__nnan_src0: 257; GFX12: ; %bb.0: 258; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 259; GFX12-NEXT: s_wait_expcnt 0x0 260; GFX12-NEXT: s_wait_samplecnt 0x0 261; GFX12-NEXT: s_wait_bvhcnt 0x0 262; GFX12-NEXT: s_wait_kmcnt 0x0 263; GFX12-NEXT: v_add_f16_e32 v0, 1.0, v0 264; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 265; GFX12-NEXT: v_minimum_f16 v0, v0, v1 266; GFX12-NEXT: s_setpc_b64 s[30:31] 267 %src0 = fadd nnan half %arg0, 1.0 268 %op = call half @llvm.minimum.f16(half %src0, half %src1) 269 ret half %op 270} 271 272define half @v_minimum_f16__nnan_src1(half %src0, half %arg1) { 273; GFX8-LABEL: v_minimum_f16__nnan_src1: 274; GFX8: ; %bb.0: 275; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 276; GFX8-NEXT: v_add_f16_e32 v1, 1.0, v1 277; GFX8-NEXT: v_min_f16_e32 v2, v0, v1 278; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00 279; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 280; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 281; GFX8-NEXT: s_setpc_b64 s[30:31] 282; 283; GFX900-LABEL: v_minimum_f16__nnan_src1: 284; GFX900: ; %bb.0: 285; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 286; GFX900-NEXT: v_add_f16_e32 v1, 1.0, v1 287; GFX900-NEXT: v_min_f16_e32 v2, v0, v1 288; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00 289; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 290; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 291; GFX900-NEXT: s_setpc_b64 s[30:31] 292; 293; GFX950-LABEL: v_minimum_f16__nnan_src1: 294; GFX950: ; %bb.0: 295; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 296; GFX950-NEXT: v_add_f16_e32 v1, 1.0, v1 297; GFX950-NEXT: v_min_f16_e32 v2, v0, v1 298; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00 299; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 300; GFX950-NEXT: s_nop 1 301; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 302; GFX950-NEXT: s_setpc_b64 s[30:31] 303; 304; GFX10-LABEL: v_minimum_f16__nnan_src1: 305; GFX10: ; %bb.0: 306; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 307; GFX10-NEXT: v_add_f16_e32 v1, 1.0, v1 308; GFX10-NEXT: v_min_f16_e32 v2, v0, v1 309; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 310; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 311; GFX10-NEXT: s_setpc_b64 s[30:31] 312; 313; GFX11-LABEL: v_minimum_f16__nnan_src1: 314; GFX11: ; %bb.0: 315; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 316; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1 317; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 318; GFX11-NEXT: v_min_f16_e32 v2, v0, v1 319; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 320; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 321; GFX11-NEXT: s_setpc_b64 s[30:31] 322; 323; GFX12-LABEL: v_minimum_f16__nnan_src1: 324; GFX12: ; %bb.0: 325; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 326; GFX12-NEXT: s_wait_expcnt 0x0 327; GFX12-NEXT: s_wait_samplecnt 0x0 328; GFX12-NEXT: s_wait_bvhcnt 0x0 329; GFX12-NEXT: s_wait_kmcnt 0x0 330; GFX12-NEXT: v_add_f16_e32 v1, 1.0, v1 331; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 332; GFX12-NEXT: v_minimum_f16 v0, v0, v1 333; GFX12-NEXT: s_setpc_b64 s[30:31] 334 %src1 = fadd nnan half %arg1, 1.0 335 %op = call half @llvm.minimum.f16(half %src0, half %src1) 336 ret half %op 337} 338 339define void @s_minimum_f16(half inreg %src0, half inreg %src1) { 340; GFX8-LABEL: s_minimum_f16: 341; GFX8: ; %bb.0: 342; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 343; GFX8-NEXT: v_mov_b32_e32 v0, s17 344; GFX8-NEXT: v_min_f16_e32 v1, s16, v0 345; GFX8-NEXT: v_mov_b32_e32 v2, 0x7e00 346; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s16, v0 347; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 348; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 349; GFX8-NEXT: ;;#ASMSTART 350; GFX8-NEXT: ; use v0 351; GFX8-NEXT: ;;#ASMEND 352; GFX8-NEXT: s_setpc_b64 s[30:31] 353; 354; GFX900-LABEL: s_minimum_f16: 355; GFX900: ; %bb.0: 356; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 357; GFX900-NEXT: v_mov_b32_e32 v0, s17 358; GFX900-NEXT: v_min_f16_e32 v1, s16, v0 359; GFX900-NEXT: v_mov_b32_e32 v2, 0x7e00 360; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s16, v0 361; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 362; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 363; GFX900-NEXT: ;;#ASMSTART 364; GFX900-NEXT: ; use v0 365; GFX900-NEXT: ;;#ASMEND 366; GFX900-NEXT: s_setpc_b64 s[30:31] 367; 368; GFX950-LABEL: s_minimum_f16: 369; GFX950: ; %bb.0: 370; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 371; GFX950-NEXT: v_mov_b32_e32 v0, s1 372; GFX950-NEXT: v_min_f16_e32 v1, s0, v0 373; GFX950-NEXT: v_mov_b32_e32 v2, 0x7e00 374; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v0 375; GFX950-NEXT: s_nop 1 376; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 377; GFX950-NEXT: v_and_b32_e32 v0, 0xffff, v0 378; GFX950-NEXT: ;;#ASMSTART 379; GFX950-NEXT: ; use v0 380; GFX950-NEXT: ;;#ASMEND 381; GFX950-NEXT: s_setpc_b64 s[30:31] 382; 383; GFX10-LABEL: s_minimum_f16: 384; GFX10: ; %bb.0: 385; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 386; GFX10-NEXT: v_min_f16_e64 v0, s16, s17 387; GFX10-NEXT: v_cmp_o_f16_e64 vcc_lo, s16, s17 388; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo 389; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 390; GFX10-NEXT: ;;#ASMSTART 391; GFX10-NEXT: ; use v0 392; GFX10-NEXT: ;;#ASMEND 393; GFX10-NEXT: s_setpc_b64 s[30:31] 394; 395; GFX11-LABEL: s_minimum_f16: 396; GFX11: ; %bb.0: 397; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 398; GFX11-NEXT: v_min_f16_e64 v0, s0, s1 399; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1 400; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 401; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo 402; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 403; GFX11-NEXT: ;;#ASMSTART 404; GFX11-NEXT: ; use v0 405; GFX11-NEXT: ;;#ASMEND 406; GFX11-NEXT: s_setpc_b64 s[30:31] 407; 408; GFX12-LABEL: s_minimum_f16: 409; GFX12: ; %bb.0: 410; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 411; GFX12-NEXT: s_wait_expcnt 0x0 412; GFX12-NEXT: s_wait_samplecnt 0x0 413; GFX12-NEXT: s_wait_bvhcnt 0x0 414; GFX12-NEXT: s_wait_kmcnt 0x0 415; GFX12-NEXT: s_minimum_f16 s0, s0, s1 416; GFX12-NEXT: s_wait_alu 0xfffe 417; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) 418; GFX12-NEXT: s_and_b32 s0, 0xffff, s0 419; GFX12-NEXT: ;;#ASMSTART 420; GFX12-NEXT: ; use s0 421; GFX12-NEXT: ;;#ASMEND 422; GFX12-NEXT: s_wait_alu 0xfffe 423; GFX12-NEXT: s_setpc_b64 s[30:31] 424 %op = call half @llvm.minimum.f16(half %src0, half %src1) 425 %cast = bitcast half %op to i16 426 %zext = zext i16 %cast to i32 427 call void asm sideeffect "; use $0", "s"(i32 %zext) 428 ret void 429} 430 431define <2 x half> @v_minimum_v2f16(<2 x half> %src0, <2 x half> %src1) { 432; GFX8-LABEL: v_minimum_v2f16: 433; GFX8: ; %bb.0: 434; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 435; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1 436; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 437; GFX8-NEXT: v_min_f16_e32 v4, v3, v2 438; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00 439; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2 440; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc 441; GFX8-NEXT: v_min_f16_e32 v3, v0, v1 442; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 443; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 444; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc 445; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 446; GFX8-NEXT: s_setpc_b64 s[30:31] 447; 448; GFX900-LABEL: v_minimum_v2f16: 449; GFX900: ; %bb.0: 450; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 451; GFX900-NEXT: v_pk_min_f16 v2, v0, v1 452; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00 453; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 454; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc 455; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2 456; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 457; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 458; GFX900-NEXT: s_mov_b32 s4, 0x5040100 459; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4 460; GFX900-NEXT: s_setpc_b64 s[30:31] 461; 462; GFX950-LABEL: v_minimum_v2f16: 463; GFX950: ; %bb.0: 464; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 465; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v1 466; GFX950-NEXT: s_setpc_b64 s[30:31] 467; 468; GFX10-LABEL: v_minimum_v2f16: 469; GFX10: ; %bb.0: 470; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 471; GFX10-NEXT: v_pk_min_f16 v2, v0, v1 472; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 473; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v2 474; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v2, vcc_lo 475; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 476; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo 477; GFX10-NEXT: v_perm_b32 v0, v0, v2, 0x5040100 478; GFX10-NEXT: s_setpc_b64 s[30:31] 479; 480; GFX11-LABEL: v_minimum_v2f16: 481; GFX11: ; %bb.0: 482; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 483; GFX11-NEXT: v_pk_min_f16 v2, v0, v1 484; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 485; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0 486; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 487; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) 488; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2 489; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 490; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v3 491; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 492; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo 493; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 494; GFX11-NEXT: s_setpc_b64 s[30:31] 495; 496; GFX12-LABEL: v_minimum_v2f16: 497; GFX12: ; %bb.0: 498; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 499; GFX12-NEXT: s_wait_expcnt 0x0 500; GFX12-NEXT: s_wait_samplecnt 0x0 501; GFX12-NEXT: s_wait_bvhcnt 0x0 502; GFX12-NEXT: s_wait_kmcnt 0x0 503; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1 504; GFX12-NEXT: s_setpc_b64 s[30:31] 505 %op = call <2 x half> @llvm.minimum.v2f16(<2 x half> %src0, <2 x half> %src1) 506 ret <2 x half> %op 507} 508 509define <2 x half> @v_minimum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) { 510; GFX8-LABEL: v_minimum_v2f16__nnan: 511; GFX8: ; %bb.0: 512; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 513; GFX8-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 514; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 515; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 516; GFX8-NEXT: s_setpc_b64 s[30:31] 517; 518; GFX900-LABEL: v_minimum_v2f16__nnan: 519; GFX900: ; %bb.0: 520; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 521; GFX900-NEXT: v_pk_min_f16 v0, v0, v1 522; GFX900-NEXT: s_setpc_b64 s[30:31] 523; 524; GFX950-LABEL: v_minimum_v2f16__nnan: 525; GFX950: ; %bb.0: 526; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 527; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v1 528; GFX950-NEXT: s_setpc_b64 s[30:31] 529; 530; GFX10-LABEL: v_minimum_v2f16__nnan: 531; GFX10: ; %bb.0: 532; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 533; GFX10-NEXT: v_pk_min_f16 v0, v0, v1 534; GFX10-NEXT: s_setpc_b64 s[30:31] 535; 536; GFX11-LABEL: v_minimum_v2f16__nnan: 537; GFX11: ; %bb.0: 538; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 539; GFX11-NEXT: v_pk_min_f16 v0, v0, v1 540; GFX11-NEXT: s_setpc_b64 s[30:31] 541; 542; GFX12-LABEL: v_minimum_v2f16__nnan: 543; GFX12: ; %bb.0: 544; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 545; GFX12-NEXT: s_wait_expcnt 0x0 546; GFX12-NEXT: s_wait_samplecnt 0x0 547; GFX12-NEXT: s_wait_bvhcnt 0x0 548; GFX12-NEXT: s_wait_kmcnt 0x0 549; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1 550; GFX12-NEXT: s_setpc_b64 s[30:31] 551 %op = call nnan <2 x half> @llvm.minimum.v2f16(<2 x half> %src0, <2 x half> %src1) 552 ret <2 x half> %op 553} 554 555define <2 x half> @v_minimum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) { 556; GFX8-LABEL: v_minimum_v2f16__nsz: 557; GFX8: ; %bb.0: 558; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 559; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1 560; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 561; GFX8-NEXT: v_min_f16_e32 v4, v3, v2 562; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00 563; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2 564; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc 565; GFX8-NEXT: v_min_f16_e32 v3, v0, v1 566; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 567; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 568; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc 569; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 570; GFX8-NEXT: s_setpc_b64 s[30:31] 571; 572; GFX900-LABEL: v_minimum_v2f16__nsz: 573; GFX900: ; %bb.0: 574; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 575; GFX900-NEXT: v_pk_min_f16 v2, v0, v1 576; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00 577; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 578; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc 579; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2 580; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 581; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 582; GFX900-NEXT: s_mov_b32 s4, 0x5040100 583; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4 584; GFX900-NEXT: s_setpc_b64 s[30:31] 585; 586; GFX950-LABEL: v_minimum_v2f16__nsz: 587; GFX950: ; %bb.0: 588; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 589; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v1 590; GFX950-NEXT: s_setpc_b64 s[30:31] 591; 592; GFX10-LABEL: v_minimum_v2f16__nsz: 593; GFX10: ; %bb.0: 594; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 595; GFX10-NEXT: v_pk_min_f16 v2, v0, v1 596; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 597; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v2 598; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v2, vcc_lo 599; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 600; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo 601; GFX10-NEXT: v_perm_b32 v0, v0, v2, 0x5040100 602; GFX10-NEXT: s_setpc_b64 s[30:31] 603; 604; GFX11-LABEL: v_minimum_v2f16__nsz: 605; GFX11: ; %bb.0: 606; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 607; GFX11-NEXT: v_pk_min_f16 v2, v0, v1 608; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 609; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0 610; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 611; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) 612; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2 613; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 614; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v3 615; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 616; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo 617; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 618; GFX11-NEXT: s_setpc_b64 s[30:31] 619; 620; GFX12-LABEL: v_minimum_v2f16__nsz: 621; GFX12: ; %bb.0: 622; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 623; GFX12-NEXT: s_wait_expcnt 0x0 624; GFX12-NEXT: s_wait_samplecnt 0x0 625; GFX12-NEXT: s_wait_bvhcnt 0x0 626; GFX12-NEXT: s_wait_kmcnt 0x0 627; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1 628; GFX12-NEXT: s_setpc_b64 s[30:31] 629 %op = call nsz <2 x half> @llvm.minimum.v2f16(<2 x half> %src0, <2 x half> %src1) 630 ret <2 x half> %op 631} 632 633define <2 x half> @v_minimum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1) { 634; GFX8-LABEL: v_minimum_v2f16__nnan_nsz: 635; GFX8: ; %bb.0: 636; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 637; GFX8-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 638; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 639; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 640; GFX8-NEXT: s_setpc_b64 s[30:31] 641; 642; GFX900-LABEL: v_minimum_v2f16__nnan_nsz: 643; GFX900: ; %bb.0: 644; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 645; GFX900-NEXT: v_pk_min_f16 v0, v0, v1 646; GFX900-NEXT: s_setpc_b64 s[30:31] 647; 648; GFX950-LABEL: v_minimum_v2f16__nnan_nsz: 649; GFX950: ; %bb.0: 650; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 651; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v1, v1 652; GFX950-NEXT: s_setpc_b64 s[30:31] 653; 654; GFX10-LABEL: v_minimum_v2f16__nnan_nsz: 655; GFX10: ; %bb.0: 656; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 657; GFX10-NEXT: v_pk_min_f16 v0, v0, v1 658; GFX10-NEXT: s_setpc_b64 s[30:31] 659; 660; GFX11-LABEL: v_minimum_v2f16__nnan_nsz: 661; GFX11: ; %bb.0: 662; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 663; GFX11-NEXT: v_pk_min_f16 v0, v0, v1 664; GFX11-NEXT: s_setpc_b64 s[30:31] 665; 666; GFX12-LABEL: v_minimum_v2f16__nnan_nsz: 667; GFX12: ; %bb.0: 668; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 669; GFX12-NEXT: s_wait_expcnt 0x0 670; GFX12-NEXT: s_wait_samplecnt 0x0 671; GFX12-NEXT: s_wait_bvhcnt 0x0 672; GFX12-NEXT: s_wait_kmcnt 0x0 673; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1 674; GFX12-NEXT: s_setpc_b64 s[30:31] 675 %op = call nnan nsz <2 x half> @llvm.minimum.v2f16(<2 x half> %src0, <2 x half> %src1) 676 ret <2 x half> %op 677} 678 679define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) { 680; GFX8-LABEL: s_minimum_v2f16: 681; GFX8: ; %bb.0: 682; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 683; GFX8-NEXT: s_lshr_b32 s4, s17, 16 684; GFX8-NEXT: s_lshr_b32 s5, s16, 16 685; GFX8-NEXT: v_mov_b32_e32 v0, s4 686; GFX8-NEXT: v_min_f16_e32 v1, s5, v0 687; GFX8-NEXT: v_mov_b32_e32 v2, 0x7e00 688; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s5, v0 689; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 690; GFX8-NEXT: v_mov_b32_e32 v1, s17 691; GFX8-NEXT: v_min_f16_e32 v3, s16, v1 692; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s16, v1 693; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 694; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 695; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 696; GFX8-NEXT: ;;#ASMSTART 697; GFX8-NEXT: ; use v0 698; GFX8-NEXT: ;;#ASMEND 699; GFX8-NEXT: s_setpc_b64 s[30:31] 700; 701; GFX900-LABEL: s_minimum_v2f16: 702; GFX900: ; %bb.0: 703; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 704; GFX900-NEXT: v_mov_b32_e32 v0, s17 705; GFX900-NEXT: v_mov_b32_e32 v1, s17 706; GFX900-NEXT: s_lshr_b32 s4, s17, 16 707; GFX900-NEXT: v_pk_min_f16 v1, s16, v1 708; GFX900-NEXT: v_mov_b32_e32 v2, 0x7e00 709; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s16, v0 710; GFX900-NEXT: s_lshr_b32 s5, s16, 16 711; GFX900-NEXT: v_mov_b32_e32 v3, s4 712; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 713; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 714; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s5, v3 715; GFX900-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 716; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 717; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 718; GFX900-NEXT: ;;#ASMSTART 719; GFX900-NEXT: ; use v0 720; GFX900-NEXT: ;;#ASMEND 721; GFX900-NEXT: s_setpc_b64 s[30:31] 722; 723; GFX950-LABEL: s_minimum_v2f16: 724; GFX950: ; %bb.0: 725; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 726; GFX950-NEXT: v_mov_b32_e32 v0, s0 727; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, s1, s1 728; GFX950-NEXT: s_nop 0 729; GFX950-NEXT: ;;#ASMSTART 730; GFX950-NEXT: ; use v0 731; GFX950-NEXT: ;;#ASMEND 732; GFX950-NEXT: s_setpc_b64 s[30:31] 733; 734; GFX10-LABEL: s_minimum_v2f16: 735; GFX10: ; %bb.0: 736; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 737; GFX10-NEXT: v_pk_min_f16 v0, s16, s17 738; GFX10-NEXT: v_cmp_o_f16_e64 vcc_lo, s16, s17 739; GFX10-NEXT: s_lshr_b32 s4, s17, 16 740; GFX10-NEXT: s_lshr_b32 s5, s16, 16 741; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v0 742; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo 743; GFX10-NEXT: v_cmp_o_f16_e64 vcc_lo, s5, s4 744; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 745; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo 746; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 747; GFX10-NEXT: ;;#ASMSTART 748; GFX10-NEXT: ; use v0 749; GFX10-NEXT: ;;#ASMEND 750; GFX10-NEXT: s_setpc_b64 s[30:31] 751; 752; GFX11-LABEL: s_minimum_v2f16: 753; GFX11: ; %bb.0: 754; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 755; GFX11-NEXT: v_pk_min_f16 v0, s0, s1 756; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1 757; GFX11-NEXT: s_lshr_b32 s2, s1, 16 758; GFX11-NEXT: s_lshr_b32 s0, s0, 16 759; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2) 760; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 761; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo 762; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s2 763; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 764; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) 765; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo 766; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0 767; GFX11-NEXT: ;;#ASMSTART 768; GFX11-NEXT: ; use v0 769; GFX11-NEXT: ;;#ASMEND 770; GFX11-NEXT: s_setpc_b64 s[30:31] 771; 772; GFX12-LABEL: s_minimum_v2f16: 773; GFX12: ; %bb.0: 774; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 775; GFX12-NEXT: s_wait_expcnt 0x0 776; GFX12-NEXT: s_wait_samplecnt 0x0 777; GFX12-NEXT: s_wait_bvhcnt 0x0 778; GFX12-NEXT: s_wait_kmcnt 0x0 779; GFX12-NEXT: v_pk_minimum_f16 v0, s0, s1 780; GFX12-NEXT: ;;#ASMSTART 781; GFX12-NEXT: ; use v0 782; GFX12-NEXT: ;;#ASMEND 783; GFX12-NEXT: s_setpc_b64 s[30:31] 784 %op = call <2 x half> @llvm.minimum.v2f16(<2 x half> %src0, <2 x half> %src1) 785 %cast = bitcast <2 x half> %op to i32 786 call void asm sideeffect "; use $0", "s"(i32 %cast) 787 ret void 788} 789 790define <3 x half> @v_minimum_v3f16(<3 x half> %src0, <3 x half> %src1) { 791; GFX8-LABEL: v_minimum_v3f16: 792; GFX8: ; %bb.0: 793; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 794; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 795; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0 796; GFX8-NEXT: v_min_f16_e32 v6, v5, v4 797; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00 798; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4 799; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc 800; GFX8-NEXT: v_min_f16_e32 v5, v1, v3 801; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 802; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc 803; GFX8-NEXT: v_min_f16_e32 v3, v0, v2 804; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 805; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc 806; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4 807; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 808; GFX8-NEXT: s_setpc_b64 s[30:31] 809; 810; GFX900-LABEL: v_minimum_v3f16: 811; GFX900: ; %bb.0: 812; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 813; GFX900-NEXT: v_pk_min_f16 v4, v1, v3 814; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00 815; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 816; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc 817; GFX900-NEXT: v_pk_min_f16 v3, v0, v2 818; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 819; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc 820; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3 821; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 822; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc 823; GFX900-NEXT: s_mov_b32 s4, 0x5040100 824; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4 825; GFX900-NEXT: s_setpc_b64 s[30:31] 826; 827; GFX950-LABEL: v_minimum_v3f16: 828; GFX950: ; %bb.0: 829; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 830; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, v3 831; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, v2 832; GFX950-NEXT: s_setpc_b64 s[30:31] 833; 834; GFX10-LABEL: v_minimum_v3f16: 835; GFX10: ; %bb.0: 836; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 837; GFX10-NEXT: v_pk_min_f16 v4, v0, v2 838; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 839; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v4 840; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo 841; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 842; GFX10-NEXT: v_pk_min_f16 v2, v1, v3 843; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo 844; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 845; GFX10-NEXT: v_perm_b32 v0, v0, v4, 0x5040100 846; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo 847; GFX10-NEXT: s_setpc_b64 s[30:31] 848; 849; GFX11-LABEL: v_minimum_v3f16: 850; GFX11: ; %bb.0: 851; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 852; GFX11-NEXT: v_pk_min_f16 v4, v0, v2 853; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2 854; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v0 855; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 856; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) 857; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v4 858; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo 859; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5 860; GFX11-NEXT: v_pk_min_f16 v4, v1, v3 861; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) 862; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo 863; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 864; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 865; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 866; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo 867; GFX11-NEXT: s_setpc_b64 s[30:31] 868; 869; GFX12-LABEL: v_minimum_v3f16: 870; GFX12: ; %bb.0: 871; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 872; GFX12-NEXT: s_wait_expcnt 0x0 873; GFX12-NEXT: s_wait_samplecnt 0x0 874; GFX12-NEXT: s_wait_bvhcnt 0x0 875; GFX12-NEXT: s_wait_kmcnt 0x0 876; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 877; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3 878; GFX12-NEXT: s_setpc_b64 s[30:31] 879 %op = call <3 x half> @llvm.minimum.v3f16(<3 x half> %src0, <3 x half> %src1) 880 ret <3 x half> %op 881} 882 883define <3 x half> @v_minimum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) { 884; GFX8-LABEL: v_minimum_v3f16__nnan: 885; GFX8: ; %bb.0: 886; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 887; GFX8-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 888; GFX8-NEXT: v_min_f16_e32 v0, v0, v2 889; GFX8-NEXT: v_min_f16_e32 v1, v1, v3 890; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 891; GFX8-NEXT: s_setpc_b64 s[30:31] 892; 893; GFX900-LABEL: v_minimum_v3f16__nnan: 894; GFX900: ; %bb.0: 895; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 896; GFX900-NEXT: v_pk_min_f16 v0, v0, v2 897; GFX900-NEXT: v_pk_min_f16 v1, v1, v3 898; GFX900-NEXT: s_setpc_b64 s[30:31] 899; 900; GFX950-LABEL: v_minimum_v3f16__nnan: 901; GFX950: ; %bb.0: 902; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 903; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, v3 904; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, v2 905; GFX950-NEXT: s_setpc_b64 s[30:31] 906; 907; GFX10-LABEL: v_minimum_v3f16__nnan: 908; GFX10: ; %bb.0: 909; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 910; GFX10-NEXT: v_pk_min_f16 v0, v0, v2 911; GFX10-NEXT: v_pk_min_f16 v1, v1, v3 912; GFX10-NEXT: s_setpc_b64 s[30:31] 913; 914; GFX11-LABEL: v_minimum_v3f16__nnan: 915; GFX11: ; %bb.0: 916; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 917; GFX11-NEXT: v_pk_min_f16 v0, v0, v2 918; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 919; GFX11-NEXT: s_setpc_b64 s[30:31] 920; 921; GFX12-LABEL: v_minimum_v3f16__nnan: 922; GFX12: ; %bb.0: 923; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 924; GFX12-NEXT: s_wait_expcnt 0x0 925; GFX12-NEXT: s_wait_samplecnt 0x0 926; GFX12-NEXT: s_wait_bvhcnt 0x0 927; GFX12-NEXT: s_wait_kmcnt 0x0 928; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 929; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3 930; GFX12-NEXT: s_setpc_b64 s[30:31] 931 %op = call nnan <3 x half> @llvm.minimum.v3f16(<3 x half> %src0, <3 x half> %src1) 932 ret <3 x half> %op 933} 934 935define <3 x half> @v_minimum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) { 936; GFX8-LABEL: v_minimum_v3f16__nsz: 937; GFX8: ; %bb.0: 938; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 939; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 940; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0 941; GFX8-NEXT: v_min_f16_e32 v6, v5, v4 942; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00 943; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4 944; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc 945; GFX8-NEXT: v_min_f16_e32 v5, v1, v3 946; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 947; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc 948; GFX8-NEXT: v_min_f16_e32 v3, v0, v2 949; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 950; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc 951; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4 952; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 953; GFX8-NEXT: s_setpc_b64 s[30:31] 954; 955; GFX900-LABEL: v_minimum_v3f16__nsz: 956; GFX900: ; %bb.0: 957; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 958; GFX900-NEXT: v_pk_min_f16 v4, v1, v3 959; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00 960; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 961; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc 962; GFX900-NEXT: v_pk_min_f16 v3, v0, v2 963; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 964; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc 965; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3 966; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 967; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc 968; GFX900-NEXT: s_mov_b32 s4, 0x5040100 969; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4 970; GFX900-NEXT: s_setpc_b64 s[30:31] 971; 972; GFX950-LABEL: v_minimum_v3f16__nsz: 973; GFX950: ; %bb.0: 974; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 975; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, v3 976; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, v2 977; GFX950-NEXT: s_setpc_b64 s[30:31] 978; 979; GFX10-LABEL: v_minimum_v3f16__nsz: 980; GFX10: ; %bb.0: 981; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 982; GFX10-NEXT: v_pk_min_f16 v4, v0, v2 983; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 984; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v4 985; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo 986; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 987; GFX10-NEXT: v_pk_min_f16 v2, v1, v3 988; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo 989; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 990; GFX10-NEXT: v_perm_b32 v0, v0, v4, 0x5040100 991; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo 992; GFX10-NEXT: s_setpc_b64 s[30:31] 993; 994; GFX11-LABEL: v_minimum_v3f16__nsz: 995; GFX11: ; %bb.0: 996; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 997; GFX11-NEXT: v_pk_min_f16 v4, v0, v2 998; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2 999; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v0 1000; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 1001; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) 1002; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v4 1003; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo 1004; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5 1005; GFX11-NEXT: v_pk_min_f16 v4, v1, v3 1006; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1007; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo 1008; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 1009; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 1010; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 1011; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo 1012; GFX11-NEXT: s_setpc_b64 s[30:31] 1013; 1014; GFX12-LABEL: v_minimum_v3f16__nsz: 1015; GFX12: ; %bb.0: 1016; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1017; GFX12-NEXT: s_wait_expcnt 0x0 1018; GFX12-NEXT: s_wait_samplecnt 0x0 1019; GFX12-NEXT: s_wait_bvhcnt 0x0 1020; GFX12-NEXT: s_wait_kmcnt 0x0 1021; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 1022; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3 1023; GFX12-NEXT: s_setpc_b64 s[30:31] 1024 %op = call nsz <3 x half> @llvm.minimum.v3f16(<3 x half> %src0, <3 x half> %src1) 1025 ret <3 x half> %op 1026} 1027 1028define <3 x half> @v_minimum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1) { 1029; GFX8-LABEL: v_minimum_v3f16__nnan_nsz: 1030; GFX8: ; %bb.0: 1031; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1032; GFX8-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1033; GFX8-NEXT: v_min_f16_e32 v0, v0, v2 1034; GFX8-NEXT: v_min_f16_e32 v1, v1, v3 1035; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 1036; GFX8-NEXT: s_setpc_b64 s[30:31] 1037; 1038; GFX900-LABEL: v_minimum_v3f16__nnan_nsz: 1039; GFX900: ; %bb.0: 1040; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1041; GFX900-NEXT: v_pk_min_f16 v0, v0, v2 1042; GFX900-NEXT: v_pk_min_f16 v1, v1, v3 1043; GFX900-NEXT: s_setpc_b64 s[30:31] 1044; 1045; GFX950-LABEL: v_minimum_v3f16__nnan_nsz: 1046; GFX950: ; %bb.0: 1047; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1048; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, v3 1049; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, v2 1050; GFX950-NEXT: s_setpc_b64 s[30:31] 1051; 1052; GFX10-LABEL: v_minimum_v3f16__nnan_nsz: 1053; GFX10: ; %bb.0: 1054; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1055; GFX10-NEXT: v_pk_min_f16 v0, v0, v2 1056; GFX10-NEXT: v_pk_min_f16 v1, v1, v3 1057; GFX10-NEXT: s_setpc_b64 s[30:31] 1058; 1059; GFX11-LABEL: v_minimum_v3f16__nnan_nsz: 1060; GFX11: ; %bb.0: 1061; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1062; GFX11-NEXT: v_pk_min_f16 v0, v0, v2 1063; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 1064; GFX11-NEXT: s_setpc_b64 s[30:31] 1065; 1066; GFX12-LABEL: v_minimum_v3f16__nnan_nsz: 1067; GFX12: ; %bb.0: 1068; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1069; GFX12-NEXT: s_wait_expcnt 0x0 1070; GFX12-NEXT: s_wait_samplecnt 0x0 1071; GFX12-NEXT: s_wait_bvhcnt 0x0 1072; GFX12-NEXT: s_wait_kmcnt 0x0 1073; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 1074; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3 1075; GFX12-NEXT: s_setpc_b64 s[30:31] 1076 %op = call nnan nsz <3 x half> @llvm.minimum.v3f16(<3 x half> %src0, <3 x half> %src1) 1077 ret <3 x half> %op 1078} 1079 1080define <4 x half> @v_minimum_v4f16(<4 x half> %src0, <4 x half> %src1) { 1081; GFX8-LABEL: v_minimum_v4f16: 1082; GFX8: ; %bb.0: 1083; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1084; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v3 1085; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v1 1086; GFX8-NEXT: v_min_f16_e32 v6, v5, v4 1087; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00 1088; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4 1089; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc 1090; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 1091; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 1092; GFX8-NEXT: v_min_f16_e32 v8, v6, v5 1093; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v6, v5 1094; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v8, vcc 1095; GFX8-NEXT: v_min_f16_e32 v6, v1, v3 1096; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 1097; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc 1098; GFX8-NEXT: v_min_f16_e32 v3, v0, v2 1099; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1100; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc 1101; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v5 1102; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1103; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4 1104; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1105; GFX8-NEXT: s_setpc_b64 s[30:31] 1106; 1107; GFX900-LABEL: v_minimum_v4f16: 1108; GFX900: ; %bb.0: 1109; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1110; GFX900-NEXT: v_pk_min_f16 v4, v1, v3 1111; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00 1112; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 1113; GFX900-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc 1114; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v4 1115; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 1116; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc 1117; GFX900-NEXT: v_pk_min_f16 v3, v0, v2 1118; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1119; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc 1120; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3 1121; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 1122; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc 1123; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1124; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4 1125; GFX900-NEXT: v_perm_b32 v1, v1, v6, s4 1126; GFX900-NEXT: s_setpc_b64 s[30:31] 1127; 1128; GFX950-LABEL: v_minimum_v4f16: 1129; GFX950: ; %bb.0: 1130; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1131; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, v2 1132; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, v3 1133; GFX950-NEXT: s_setpc_b64 s[30:31] 1134; 1135; GFX10-LABEL: v_minimum_v4f16: 1136; GFX10: ; %bb.0: 1137; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1138; GFX10-NEXT: v_pk_min_f16 v4, v1, v3 1139; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 1140; GFX10-NEXT: v_pk_min_f16 v5, v0, v2 1141; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v4, vcc_lo 1142; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 1143; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5 1144; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v4 1145; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v5, vcc_lo 1146; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 1147; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo 1148; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 1149; GFX10-NEXT: v_perm_b32 v0, v0, v5, 0x5040100 1150; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo 1151; GFX10-NEXT: v_perm_b32 v1, v1, v6, 0x5040100 1152; GFX10-NEXT: s_setpc_b64 s[30:31] 1153; 1154; GFX11-LABEL: v_minimum_v4f16: 1155; GFX11: ; %bb.0: 1156; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1157; GFX11-NEXT: v_pk_min_f16 v4, v1, v3 1158; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 1159; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3 1160; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1 1161; GFX11-NEXT: v_pk_min_f16 v7, v0, v2 1162; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v2 1163; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo 1164; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0 1165; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 1166; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v7 1167; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4 1168; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo 1169; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v8 1170; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1171; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo 1172; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5 1173; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 1174; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo 1175; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1176; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100 1177; GFX11-NEXT: s_setpc_b64 s[30:31] 1178; 1179; GFX12-LABEL: v_minimum_v4f16: 1180; GFX12: ; %bb.0: 1181; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1182; GFX12-NEXT: s_wait_expcnt 0x0 1183; GFX12-NEXT: s_wait_samplecnt 0x0 1184; GFX12-NEXT: s_wait_bvhcnt 0x0 1185; GFX12-NEXT: s_wait_kmcnt 0x0 1186; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 1187; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3 1188; GFX12-NEXT: s_setpc_b64 s[30:31] 1189 %op = call <4 x half> @llvm.minimum.v4f16(<4 x half> %src0, <4 x half> %src1) 1190 ret <4 x half> %op 1191} 1192 1193define <4 x half> @v_minimum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) { 1194; GFX8-LABEL: v_minimum_v4f16__nnan: 1195; GFX8: ; %bb.0: 1196; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1197; GFX8-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1198; GFX8-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1199; GFX8-NEXT: v_min_f16_e32 v1, v1, v3 1200; GFX8-NEXT: v_min_f16_e32 v0, v0, v2 1201; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 1202; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 1203; GFX8-NEXT: s_setpc_b64 s[30:31] 1204; 1205; GFX900-LABEL: v_minimum_v4f16__nnan: 1206; GFX900: ; %bb.0: 1207; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1208; GFX900-NEXT: v_pk_min_f16 v0, v0, v2 1209; GFX900-NEXT: v_pk_min_f16 v1, v1, v3 1210; GFX900-NEXT: s_setpc_b64 s[30:31] 1211; 1212; GFX950-LABEL: v_minimum_v4f16__nnan: 1213; GFX950: ; %bb.0: 1214; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1215; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, v2 1216; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, v3 1217; GFX950-NEXT: s_setpc_b64 s[30:31] 1218; 1219; GFX10-LABEL: v_minimum_v4f16__nnan: 1220; GFX10: ; %bb.0: 1221; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1222; GFX10-NEXT: v_pk_min_f16 v0, v0, v2 1223; GFX10-NEXT: v_pk_min_f16 v1, v1, v3 1224; GFX10-NEXT: s_setpc_b64 s[30:31] 1225; 1226; GFX11-LABEL: v_minimum_v4f16__nnan: 1227; GFX11: ; %bb.0: 1228; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1229; GFX11-NEXT: v_pk_min_f16 v0, v0, v2 1230; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 1231; GFX11-NEXT: s_setpc_b64 s[30:31] 1232; 1233; GFX12-LABEL: v_minimum_v4f16__nnan: 1234; GFX12: ; %bb.0: 1235; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1236; GFX12-NEXT: s_wait_expcnt 0x0 1237; GFX12-NEXT: s_wait_samplecnt 0x0 1238; GFX12-NEXT: s_wait_bvhcnt 0x0 1239; GFX12-NEXT: s_wait_kmcnt 0x0 1240; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 1241; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3 1242; GFX12-NEXT: s_setpc_b64 s[30:31] 1243 %op = call nnan <4 x half> @llvm.minimum.v4f16(<4 x half> %src0, <4 x half> %src1) 1244 ret <4 x half> %op 1245} 1246 1247define <4 x half> @v_minimum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) { 1248; GFX8-LABEL: v_minimum_v4f16__nsz: 1249; GFX8: ; %bb.0: 1250; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1251; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v3 1252; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v1 1253; GFX8-NEXT: v_min_f16_e32 v6, v5, v4 1254; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00 1255; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4 1256; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc 1257; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 1258; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 1259; GFX8-NEXT: v_min_f16_e32 v8, v6, v5 1260; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v6, v5 1261; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v8, vcc 1262; GFX8-NEXT: v_min_f16_e32 v6, v1, v3 1263; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 1264; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc 1265; GFX8-NEXT: v_min_f16_e32 v3, v0, v2 1266; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1267; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc 1268; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v5 1269; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1270; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4 1271; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1272; GFX8-NEXT: s_setpc_b64 s[30:31] 1273; 1274; GFX900-LABEL: v_minimum_v4f16__nsz: 1275; GFX900: ; %bb.0: 1276; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1277; GFX900-NEXT: v_pk_min_f16 v4, v1, v3 1278; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00 1279; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 1280; GFX900-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc 1281; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v4 1282; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 1283; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc 1284; GFX900-NEXT: v_pk_min_f16 v3, v0, v2 1285; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1286; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc 1287; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3 1288; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 1289; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc 1290; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1291; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4 1292; GFX900-NEXT: v_perm_b32 v1, v1, v6, s4 1293; GFX900-NEXT: s_setpc_b64 s[30:31] 1294; 1295; GFX950-LABEL: v_minimum_v4f16__nsz: 1296; GFX950: ; %bb.0: 1297; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1298; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, v2 1299; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, v3 1300; GFX950-NEXT: s_setpc_b64 s[30:31] 1301; 1302; GFX10-LABEL: v_minimum_v4f16__nsz: 1303; GFX10: ; %bb.0: 1304; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1305; GFX10-NEXT: v_pk_min_f16 v4, v1, v3 1306; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 1307; GFX10-NEXT: v_pk_min_f16 v5, v0, v2 1308; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v4, vcc_lo 1309; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 1310; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5 1311; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v4 1312; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v5, vcc_lo 1313; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 1314; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo 1315; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 1316; GFX10-NEXT: v_perm_b32 v0, v0, v5, 0x5040100 1317; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo 1318; GFX10-NEXT: v_perm_b32 v1, v1, v6, 0x5040100 1319; GFX10-NEXT: s_setpc_b64 s[30:31] 1320; 1321; GFX11-LABEL: v_minimum_v4f16__nsz: 1322; GFX11: ; %bb.0: 1323; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1324; GFX11-NEXT: v_pk_min_f16 v4, v1, v3 1325; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 1326; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3 1327; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1 1328; GFX11-NEXT: v_pk_min_f16 v7, v0, v2 1329; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v2 1330; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo 1331; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0 1332; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 1333; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v7 1334; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4 1335; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo 1336; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v8 1337; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1338; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo 1339; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5 1340; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 1341; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo 1342; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1343; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100 1344; GFX11-NEXT: s_setpc_b64 s[30:31] 1345; 1346; GFX12-LABEL: v_minimum_v4f16__nsz: 1347; GFX12: ; %bb.0: 1348; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1349; GFX12-NEXT: s_wait_expcnt 0x0 1350; GFX12-NEXT: s_wait_samplecnt 0x0 1351; GFX12-NEXT: s_wait_bvhcnt 0x0 1352; GFX12-NEXT: s_wait_kmcnt 0x0 1353; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 1354; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3 1355; GFX12-NEXT: s_setpc_b64 s[30:31] 1356 %op = call nsz <4 x half> @llvm.minimum.v4f16(<4 x half> %src0, <4 x half> %src1) 1357 ret <4 x half> %op 1358} 1359 1360define <4 x half> @v_minimum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1) { 1361; GFX8-LABEL: v_minimum_v4f16__nnan_nsz: 1362; GFX8: ; %bb.0: 1363; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1364; GFX8-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1365; GFX8-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1366; GFX8-NEXT: v_min_f16_e32 v1, v1, v3 1367; GFX8-NEXT: v_min_f16_e32 v0, v0, v2 1368; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 1369; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 1370; GFX8-NEXT: s_setpc_b64 s[30:31] 1371; 1372; GFX900-LABEL: v_minimum_v4f16__nnan_nsz: 1373; GFX900: ; %bb.0: 1374; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1375; GFX900-NEXT: v_pk_min_f16 v0, v0, v2 1376; GFX900-NEXT: v_pk_min_f16 v1, v1, v3 1377; GFX900-NEXT: s_setpc_b64 s[30:31] 1378; 1379; GFX950-LABEL: v_minimum_v4f16__nnan_nsz: 1380; GFX950: ; %bb.0: 1381; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1382; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v2, v2 1383; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v3, v3 1384; GFX950-NEXT: s_setpc_b64 s[30:31] 1385; 1386; GFX10-LABEL: v_minimum_v4f16__nnan_nsz: 1387; GFX10: ; %bb.0: 1388; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1389; GFX10-NEXT: v_pk_min_f16 v0, v0, v2 1390; GFX10-NEXT: v_pk_min_f16 v1, v1, v3 1391; GFX10-NEXT: s_setpc_b64 s[30:31] 1392; 1393; GFX11-LABEL: v_minimum_v4f16__nnan_nsz: 1394; GFX11: ; %bb.0: 1395; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1396; GFX11-NEXT: v_pk_min_f16 v0, v0, v2 1397; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 1398; GFX11-NEXT: s_setpc_b64 s[30:31] 1399; 1400; GFX12-LABEL: v_minimum_v4f16__nnan_nsz: 1401; GFX12: ; %bb.0: 1402; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1403; GFX12-NEXT: s_wait_expcnt 0x0 1404; GFX12-NEXT: s_wait_samplecnt 0x0 1405; GFX12-NEXT: s_wait_bvhcnt 0x0 1406; GFX12-NEXT: s_wait_kmcnt 0x0 1407; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 1408; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3 1409; GFX12-NEXT: s_setpc_b64 s[30:31] 1410 %op = call nnan nsz <4 x half> @llvm.minimum.v4f16(<4 x half> %src0, <4 x half> %src1) 1411 ret <4 x half> %op 1412} 1413 1414define <8 x half> @v_minimum_v8f16(<8 x half> %src0, <8 x half> %src1) { 1415; GFX8-LABEL: v_minimum_v8f16: 1416; GFX8: ; %bb.0: 1417; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1418; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v7 1419; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v3 1420; GFX8-NEXT: v_min_f16_e32 v10, v9, v8 1421; GFX8-NEXT: v_mov_b32_e32 v11, 0x7e00 1422; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v9, v8 1423; GFX8-NEXT: v_cndmask_b32_e32 v8, v11, v10, vcc 1424; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v6 1425; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v2 1426; GFX8-NEXT: v_min_f16_e32 v12, v10, v9 1427; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v10, v9 1428; GFX8-NEXT: v_cndmask_b32_e32 v9, v11, v12, vcc 1429; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v5 1430; GFX8-NEXT: v_lshrrev_b32_e32 v12, 16, v1 1431; GFX8-NEXT: v_min_f16_e32 v13, v12, v10 1432; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v12, v10 1433; GFX8-NEXT: v_cndmask_b32_e32 v10, v11, v13, vcc 1434; GFX8-NEXT: v_lshrrev_b32_e32 v12, 16, v4 1435; GFX8-NEXT: v_lshrrev_b32_e32 v13, 16, v0 1436; GFX8-NEXT: v_min_f16_e32 v14, v13, v12 1437; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v13, v12 1438; GFX8-NEXT: v_cndmask_b32_e32 v12, v11, v14, vcc 1439; GFX8-NEXT: v_min_f16_e32 v13, v3, v7 1440; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v7 1441; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v13, vcc 1442; GFX8-NEXT: v_min_f16_e32 v7, v2, v6 1443; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v2, v6 1444; GFX8-NEXT: v_cndmask_b32_e32 v2, v11, v7, vcc 1445; GFX8-NEXT: v_min_f16_e32 v6, v1, v5 1446; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v5 1447; GFX8-NEXT: v_cndmask_b32_e32 v1, v11, v6, vcc 1448; GFX8-NEXT: v_min_f16_e32 v5, v0, v4 1449; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v4 1450; GFX8-NEXT: v_cndmask_b32_e32 v0, v11, v5, vcc 1451; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v12 1452; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1453; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v10 1454; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1455; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v9 1456; GFX8-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1457; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v8 1458; GFX8-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1459; GFX8-NEXT: s_setpc_b64 s[30:31] 1460; 1461; GFX900-LABEL: v_minimum_v8f16: 1462; GFX900: ; %bb.0: 1463; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1464; GFX900-NEXT: v_pk_min_f16 v8, v3, v7 1465; GFX900-NEXT: v_mov_b32_e32 v9, 0x7e00 1466; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v3, v7 1467; GFX900-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc 1468; GFX900-NEXT: v_lshrrev_b32_e32 v8, 16, v8 1469; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1 1470; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc 1471; GFX900-NEXT: v_pk_min_f16 v7, v2, v6 1472; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v2, v6 1473; GFX900-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc 1474; GFX900-NEXT: v_lshrrev_b32_e32 v7, 16, v7 1475; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1 1476; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc 1477; GFX900-NEXT: v_pk_min_f16 v6, v1, v5 1478; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v5 1479; GFX900-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc 1480; GFX900-NEXT: v_lshrrev_b32_e32 v6, 16, v6 1481; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1 1482; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc 1483; GFX900-NEXT: v_pk_min_f16 v5, v0, v4 1484; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v4 1485; GFX900-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc 1486; GFX900-NEXT: v_lshrrev_b32_e32 v5, 16, v5 1487; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1 1488; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc 1489; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1490; GFX900-NEXT: v_perm_b32 v0, v0, v6, s4 1491; GFX900-NEXT: v_perm_b32 v1, v1, v7, s4 1492; GFX900-NEXT: v_perm_b32 v2, v2, v8, s4 1493; GFX900-NEXT: v_perm_b32 v3, v3, v10, s4 1494; GFX900-NEXT: s_setpc_b64 s[30:31] 1495; 1496; GFX950-LABEL: v_minimum_v8f16: 1497; GFX950: ; %bb.0: 1498; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1499; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v4, v4 1500; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v5, v5 1501; GFX950-NEXT: v_pk_minimum3_f16 v2, v2, v6, v6 1502; GFX950-NEXT: v_pk_minimum3_f16 v3, v3, v7, v7 1503; GFX950-NEXT: s_setpc_b64 s[30:31] 1504; 1505; GFX10-LABEL: v_minimum_v8f16: 1506; GFX10: ; %bb.0: 1507; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1508; GFX10-NEXT: v_pk_min_f16 v8, v3, v7 1509; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v7 1510; GFX10-NEXT: v_pk_min_f16 v9, v2, v6 1511; GFX10-NEXT: v_pk_min_f16 v12, v1, v5 1512; GFX10-NEXT: v_pk_min_f16 v13, v0, v4 1513; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7e00, v8, vcc_lo 1514; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v6 1515; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v9 1516; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v8 1517; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7e00, v9, vcc_lo 1518; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1 1519; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v11, vcc_lo 1520; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v5 1521; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v13 1522; GFX10-NEXT: v_perm_b32 v2, v2, v9, 0x5040100 1523; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v12, vcc_lo 1524; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v4 1525; GFX10-NEXT: v_lshrrev_b32_e32 v12, 16, v12 1526; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7e00, v13, vcc_lo 1527; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1 1528; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo 1529; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1 1530; GFX10-NEXT: v_perm_b32 v0, v0, v13, 0x5040100 1531; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v12, vcc_lo 1532; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1 1533; GFX10-NEXT: v_perm_b32 v1, v1, v6, 0x5040100 1534; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo 1535; GFX10-NEXT: v_perm_b32 v3, v3, v10, 0x5040100 1536; GFX10-NEXT: s_setpc_b64 s[30:31] 1537; 1538; GFX11-LABEL: v_minimum_v8f16: 1539; GFX11: ; %bb.0: 1540; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1541; GFX11-NEXT: v_pk_min_f16 v8, v3, v7 1542; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v7 1543; GFX11-NEXT: v_pk_min_f16 v10, v2, v6 1544; GFX11-NEXT: v_lshrrev_b32_e32 v11, 16, v6 1545; GFX11-NEXT: v_lshrrev_b32_e32 v12, 16, v2 1546; GFX11-NEXT: v_pk_min_f16 v14, v1, v5 1547; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7e00, v8, vcc_lo 1548; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v6 1549; GFX11-NEXT: v_lshrrev_b32_e32 v13, 16, v10 1550; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v7 1551; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v3 1552; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v8 1553; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo 1554; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v12, v11 1555; GFX11-NEXT: v_pk_min_f16 v11, v0, v4 1556; GFX11-NEXT: v_lshrrev_b32_e32 v12, 16, v4 1557; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v13, vcc_lo 1558; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v5 1559; GFX11-NEXT: v_lshrrev_b32_e32 v13, 16, v0 1560; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v5 1561; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1562; GFX11-NEXT: v_lshrrev_b32_e32 v15, 16, v11 1563; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7e00, v14, vcc_lo 1564; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v4 1565; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v14 1566; GFX11-NEXT: v_perm_b32 v2, v6, v2, 0x5040100 1567; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo 1568; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v13, v12 1569; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v15, vcc_lo 1570; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v5 1571; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2) 1572; GFX11-NEXT: v_perm_b32 v0, v4, v0, 0x5040100 1573; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v14, vcc_lo 1574; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v7 1575; GFX11-NEXT: v_perm_b32 v1, v1, v10, 0x5040100 1576; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo 1577; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1578; GFX11-NEXT: v_perm_b32 v3, v3, v9, 0x5040100 1579; GFX11-NEXT: s_setpc_b64 s[30:31] 1580; 1581; GFX12-LABEL: v_minimum_v8f16: 1582; GFX12: ; %bb.0: 1583; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1584; GFX12-NEXT: s_wait_expcnt 0x0 1585; GFX12-NEXT: s_wait_samplecnt 0x0 1586; GFX12-NEXT: s_wait_bvhcnt 0x0 1587; GFX12-NEXT: s_wait_kmcnt 0x0 1588; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v4 1589; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 1590; GFX12-NEXT: v_pk_minimum_f16 v2, v2, v6 1591; GFX12-NEXT: v_pk_minimum_f16 v3, v3, v7 1592; GFX12-NEXT: s_setpc_b64 s[30:31] 1593 %op = call <8 x half> @llvm.minimum.v8f16(<8 x half> %src0, <8 x half> %src1) 1594 ret <8 x half> %op 1595} 1596 1597define <16 x half> @v_minimum_v16f16(<16 x half> %src0, <16 x half> %src1) { 1598; GFX8-LABEL: v_minimum_v16f16: 1599; GFX8: ; %bb.0: 1600; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1601; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v14 1602; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v6 1603; GFX8-NEXT: v_min_f16_e32 v16, v18, v17 1604; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v18, v17 1605; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v13 1606; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v5 1607; GFX8-NEXT: v_min_f16_e32 v20, v18, v17 1608; GFX8-NEXT: v_cmp_o_f16_e64 s[4:5], v18, v17 1609; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v12 1610; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v4 1611; GFX8-NEXT: v_min_f16_e32 v21, v18, v17 1612; GFX8-NEXT: v_cmp_o_f16_e64 s[6:7], v18, v17 1613; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v11 1614; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v3 1615; GFX8-NEXT: v_min_f16_e32 v22, v18, v17 1616; GFX8-NEXT: v_cmp_o_f16_e64 s[8:9], v18, v17 1617; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v10 1618; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v2 1619; GFX8-NEXT: v_min_f16_e32 v23, v18, v17 1620; GFX8-NEXT: v_cmp_o_f16_e64 s[10:11], v18, v17 1621; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v9 1622; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v1 1623; GFX8-NEXT: v_min_f16_e32 v24, v18, v17 1624; GFX8-NEXT: v_cmp_o_f16_e64 s[12:13], v18, v17 1625; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v8 1626; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v0 1627; GFX8-NEXT: v_min_f16_e32 v25, v18, v17 1628; GFX8-NEXT: v_cmp_o_f16_e64 s[14:15], v18, v17 1629; GFX8-NEXT: v_min_f16_e32 v17, v6, v14 1630; GFX8-NEXT: v_cmp_o_f16_e64 s[16:17], v6, v14 1631; GFX8-NEXT: v_min_f16_e32 v6, v5, v13 1632; GFX8-NEXT: v_cmp_o_f16_e64 s[18:19], v5, v13 1633; GFX8-NEXT: v_min_f16_e32 v5, v4, v12 1634; GFX8-NEXT: v_cmp_o_f16_e64 s[20:21], v4, v12 1635; GFX8-NEXT: v_min_f16_e32 v4, v3, v11 1636; GFX8-NEXT: v_cmp_o_f16_e64 s[22:23], v3, v11 1637; GFX8-NEXT: v_min_f16_e32 v11, v7, v15 1638; GFX8-NEXT: v_cmp_o_f16_e64 s[24:25], v7, v15 1639; GFX8-NEXT: v_lshrrev_b32_e32 v12, 16, v15 1640; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v7 1641; GFX8-NEXT: v_mov_b32_e32 v19, 0x7e00 1642; GFX8-NEXT: v_min_f16_e32 v13, v7, v12 1643; GFX8-NEXT: v_cmp_o_f16_e64 s[26:27], v7, v12 1644; GFX8-NEXT: v_min_f16_e32 v3, v2, v10 1645; GFX8-NEXT: v_cndmask_b32_e64 v12, v19, v13, s[26:27] 1646; GFX8-NEXT: v_cndmask_b32_e32 v13, v19, v16, vcc 1647; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v2, v10 1648; GFX8-NEXT: v_min_f16_e32 v14, v1, v9 1649; GFX8-NEXT: v_cndmask_b32_e32 v2, v19, v3, vcc 1650; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v9 1651; GFX8-NEXT: v_min_f16_e32 v7, v0, v8 1652; GFX8-NEXT: v_cndmask_b32_e64 v18, v19, v22, s[8:9] 1653; GFX8-NEXT: v_cndmask_b32_e64 v22, v19, v25, s[14:15] 1654; GFX8-NEXT: v_cndmask_b32_e32 v1, v19, v14, vcc 1655; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v8 1656; GFX8-NEXT: v_cndmask_b32_e64 v16, v19, v21, s[6:7] 1657; GFX8-NEXT: v_cndmask_b32_e64 v21, v19, v24, s[12:13] 1658; GFX8-NEXT: v_cndmask_b32_e32 v0, v19, v7, vcc 1659; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v22 1660; GFX8-NEXT: v_cndmask_b32_e64 v15, v19, v20, s[4:5] 1661; GFX8-NEXT: v_cndmask_b32_e64 v20, v19, v23, s[10:11] 1662; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1663; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v21 1664; GFX8-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1665; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v20 1666; GFX8-NEXT: v_cndmask_b32_e64 v4, v19, v4, s[22:23] 1667; GFX8-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1668; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v18 1669; GFX8-NEXT: v_cndmask_b32_e64 v5, v19, v5, s[20:21] 1670; GFX8-NEXT: v_or_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1671; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v16 1672; GFX8-NEXT: v_cndmask_b32_e64 v6, v19, v6, s[18:19] 1673; GFX8-NEXT: v_or_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1674; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v15 1675; GFX8-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[24:25] 1676; GFX8-NEXT: v_cndmask_b32_e64 v17, v19, v17, s[16:17] 1677; GFX8-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1678; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v13 1679; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v12 1680; GFX8-NEXT: v_or_b32_sdwa v6, v17, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1681; GFX8-NEXT: v_or_b32_sdwa v7, v11, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1682; GFX8-NEXT: s_setpc_b64 s[30:31] 1683; 1684; GFX900-LABEL: v_minimum_v16f16: 1685; GFX900: ; %bb.0: 1686; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1687; GFX900-NEXT: v_pk_min_f16 v16, v7, v15 1688; GFX900-NEXT: v_mov_b32_e32 v17, 0x7e00 1689; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v7, v15 1690; GFX900-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc 1691; GFX900-NEXT: v_lshrrev_b32_e32 v16, 16, v16 1692; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1 1693; GFX900-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc 1694; GFX900-NEXT: v_pk_min_f16 v15, v6, v14 1695; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v6, v14 1696; GFX900-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc 1697; GFX900-NEXT: v_lshrrev_b32_e32 v15, 16, v15 1698; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1 1699; GFX900-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc 1700; GFX900-NEXT: v_pk_min_f16 v14, v5, v13 1701; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v5, v13 1702; GFX900-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc 1703; GFX900-NEXT: v_lshrrev_b32_e32 v14, 16, v14 1704; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1 1705; GFX900-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc 1706; GFX900-NEXT: v_pk_min_f16 v13, v4, v12 1707; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v4, v12 1708; GFX900-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc 1709; GFX900-NEXT: v_lshrrev_b32_e32 v13, 16, v13 1710; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1 1711; GFX900-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc 1712; GFX900-NEXT: v_pk_min_f16 v12, v3, v11 1713; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v3, v11 1714; GFX900-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc 1715; GFX900-NEXT: v_lshrrev_b32_e32 v12, 16, v12 1716; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1 1717; GFX900-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc 1718; GFX900-NEXT: v_pk_min_f16 v11, v2, v10 1719; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v2, v10 1720; GFX900-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc 1721; GFX900-NEXT: v_lshrrev_b32_e32 v11, 16, v11 1722; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1 1723; GFX900-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc 1724; GFX900-NEXT: v_pk_min_f16 v10, v1, v9 1725; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v9 1726; GFX900-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc 1727; GFX900-NEXT: v_lshrrev_b32_e32 v10, 16, v10 1728; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1 1729; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc 1730; GFX900-NEXT: v_pk_min_f16 v9, v0, v8 1731; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v8 1732; GFX900-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc 1733; GFX900-NEXT: v_lshrrev_b32_e32 v9, 16, v9 1734; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1 1735; GFX900-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc 1736; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1737; GFX900-NEXT: v_perm_b32 v0, v0, v10, s4 1738; GFX900-NEXT: v_perm_b32 v1, v1, v11, s4 1739; GFX900-NEXT: v_perm_b32 v2, v2, v12, s4 1740; GFX900-NEXT: v_perm_b32 v3, v3, v13, s4 1741; GFX900-NEXT: v_perm_b32 v4, v4, v14, s4 1742; GFX900-NEXT: v_perm_b32 v5, v5, v15, s4 1743; GFX900-NEXT: v_perm_b32 v6, v6, v16, s4 1744; GFX900-NEXT: v_perm_b32 v7, v7, v18, s4 1745; GFX900-NEXT: s_setpc_b64 s[30:31] 1746; 1747; GFX950-LABEL: v_minimum_v16f16: 1748; GFX950: ; %bb.0: 1749; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1750; GFX950-NEXT: v_pk_minimum3_f16 v0, v0, v8, v8 1751; GFX950-NEXT: v_pk_minimum3_f16 v1, v1, v9, v9 1752; GFX950-NEXT: v_pk_minimum3_f16 v2, v2, v10, v10 1753; GFX950-NEXT: v_pk_minimum3_f16 v3, v3, v11, v11 1754; GFX950-NEXT: v_pk_minimum3_f16 v4, v4, v12, v12 1755; GFX950-NEXT: v_pk_minimum3_f16 v5, v5, v13, v13 1756; GFX950-NEXT: v_pk_minimum3_f16 v6, v6, v14, v14 1757; GFX950-NEXT: v_pk_minimum3_f16 v7, v7, v15, v15 1758; GFX950-NEXT: s_setpc_b64 s[30:31] 1759; 1760; GFX10-LABEL: v_minimum_v16f16: 1761; GFX10: ; %bb.0: 1762; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1763; GFX10-NEXT: v_pk_min_f16 v16, v7, v15 1764; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v7, v15 1765; GFX10-NEXT: v_pk_min_f16 v18, v6, v14 1766; GFX10-NEXT: v_pk_min_f16 v19, v3, v11 1767; GFX10-NEXT: v_pk_min_f16 v20, v2, v10 1768; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v16 1769; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7e00, v16, vcc_lo 1770; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1 1771; GFX10-NEXT: v_lshrrev_b32_e32 v15, 16, v18 1772; GFX10-NEXT: v_pk_min_f16 v21, v0, v8 1773; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7e00, v17, vcc_lo 1774; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v14 1775; GFX10-NEXT: v_pk_min_f16 v17, v5, v13 1776; GFX10-NEXT: v_lshrrev_b32_e32 v23, 16, v21 1777; GFX10-NEXT: v_perm_b32 v7, v7, v16, 0x5040100 1778; GFX10-NEXT: v_cndmask_b32_e32 v18, 0x7e00, v18, vcc_lo 1779; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1 1780; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v17 1781; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo 1782; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v5, v13 1783; GFX10-NEXT: v_perm_b32 v6, v6, v18, 0x5040100 1784; GFX10-NEXT: v_cndmask_b32_e32 v15, 0x7e00, v17, vcc_lo 1785; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1 1786; GFX10-NEXT: v_pk_min_f16 v17, v4, v12 1787; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo 1788; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v12 1789; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v17 1790; GFX10-NEXT: v_perm_b32 v5, v5, v15, 0x5040100 1791; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7e00, v17, vcc_lo 1792; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v11 1793; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v19 1794; GFX10-NEXT: v_cndmask_b32_e32 v19, 0x7e00, v19, vcc_lo 1795; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1 1796; GFX10-NEXT: v_pk_min_f16 v11, v1, v9 1797; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo 1798; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v10 1799; GFX10-NEXT: v_lshrrev_b32_e32 v22, 16, v11 1800; GFX10-NEXT: v_perm_b32 v3, v3, v19, 0x5040100 1801; GFX10-NEXT: v_cndmask_b32_e32 v17, 0x7e00, v20, vcc_lo 1802; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v9 1803; GFX10-NEXT: v_lshrrev_b32_e32 v20, 16, v20 1804; GFX10-NEXT: v_cndmask_b32_e32 v11, 0x7e00, v11, vcc_lo 1805; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1 1806; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v22, vcc_lo 1807; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v8 1808; GFX10-NEXT: v_perm_b32 v1, v1, v11, 0x5040100 1809; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7e00, v21, vcc_lo 1810; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1 1811; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v23, vcc_lo 1812; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1 1813; GFX10-NEXT: v_perm_b32 v0, v0, v9, 0x5040100 1814; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo 1815; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1 1816; GFX10-NEXT: v_perm_b32 v2, v2, v17, 0x5040100 1817; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v14, vcc_lo 1818; GFX10-NEXT: v_perm_b32 v4, v4, v13, 0x5040100 1819; GFX10-NEXT: s_setpc_b64 s[30:31] 1820; 1821; GFX11-LABEL: v_minimum_v16f16: 1822; GFX11: ; %bb.0: 1823; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1824; GFX11-NEXT: v_pk_min_f16 v16, v7, v15 1825; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v15 1826; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v7 1827; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v7, v15 1828; GFX11-NEXT: v_pk_min_f16 v15, v6, v14 1829; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v16 1830; GFX11-NEXT: v_pk_min_f16 v20, v4, v12 1831; GFX11-NEXT: v_pk_min_f16 v22, v2, v10 1832; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7e00, v16, vcc_lo 1833; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v18, v17 1834; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v14 1835; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v6 1836; GFX11-NEXT: v_lshrrev_b32_e32 v23, 16, v8 1837; GFX11-NEXT: v_lshrrev_b32_e32 v24, 16, v0 1838; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7e00, v19, vcc_lo 1839; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v14 1840; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v15 1841; GFX11-NEXT: v_pk_min_f16 v14, v5, v13 1842; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 1843; GFX11-NEXT: v_perm_b32 v7, v16, v7, 0x5040100 1844; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo 1845; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v18, v17 1846; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v13 1847; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v5 1848; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7e00, v19, vcc_lo 1849; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v5, v13 1850; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v14 1851; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) 1852; GFX11-NEXT: v_perm_b32 v6, v15, v6, 0x5040100 1853; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo 1854; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v18, v17 1855; GFX11-NEXT: v_pk_min_f16 v17, v3, v11 1856; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v20 1857; GFX11-NEXT: v_cndmask_b32_e32 v13, 0x7e00, v19, vcc_lo 1858; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v12 1859; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v11 1860; GFX11-NEXT: v_lshrrev_b32_e32 v21, 16, v17 1861; GFX11-NEXT: v_lshrrev_b32_e32 v12, 16, v12 1862; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4 1863; GFX11-NEXT: v_cndmask_b32_e32 v14, 0x7e00, v20, vcc_lo 1864; GFX11-NEXT: v_lshrrev_b32_e32 v20, 16, v3 1865; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v11 1866; GFX11-NEXT: v_perm_b32 v5, v13, v5, 0x5040100 1867; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo 1868; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 1869; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v20, v19 1870; GFX11-NEXT: v_pk_min_f16 v19, v1, v9 1871; GFX11-NEXT: v_lshrrev_b32_e32 v20, 16, v22 1872; GFX11-NEXT: v_cndmask_b32_e32 v11, 0x7e00, v21, vcc_lo 1873; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v10 1874; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v10 1875; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v2 1876; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 1877; GFX11-NEXT: v_perm_b32 v3, v11, v3, 0x5040100 1878; GFX11-NEXT: v_cndmask_b32_e32 v17, 0x7e00, v22, vcc_lo 1879; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v9 1880; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v9 1881; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1882; GFX11-NEXT: v_pk_min_f16 v22, v0, v8 1883; GFX11-NEXT: v_cndmask_b32_e32 v21, 0x7e00, v19, vcc_lo 1884; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v19 1885; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1886; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v9 1887; GFX11-NEXT: v_lshrrev_b32_e32 v25, 16, v22 1888; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1889; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v19, vcc_lo 1890; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v8 1891; GFX11-NEXT: v_perm_b32 v1, v1, v21, 0x5040100 1892; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v22, vcc_lo 1893; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v24, v23 1894; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7e00, v25, vcc_lo 1895; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v10 1896; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2) 1897; GFX11-NEXT: v_perm_b32 v0, v8, v0, 0x5040100 1898; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo 1899; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v12 1900; GFX11-NEXT: v_perm_b32 v2, v2, v17, 0x5040100 1901; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v18, vcc_lo 1902; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1903; GFX11-NEXT: v_perm_b32 v4, v4, v14, 0x5040100 1904; GFX11-NEXT: s_setpc_b64 s[30:31] 1905; 1906; GFX12-LABEL: v_minimum_v16f16: 1907; GFX12: ; %bb.0: 1908; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1909; GFX12-NEXT: s_wait_expcnt 0x0 1910; GFX12-NEXT: s_wait_samplecnt 0x0 1911; GFX12-NEXT: s_wait_bvhcnt 0x0 1912; GFX12-NEXT: s_wait_kmcnt 0x0 1913; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v8 1914; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v9 1915; GFX12-NEXT: v_pk_minimum_f16 v2, v2, v10 1916; GFX12-NEXT: v_pk_minimum_f16 v3, v3, v11 1917; GFX12-NEXT: v_pk_minimum_f16 v4, v4, v12 1918; GFX12-NEXT: v_pk_minimum_f16 v5, v5, v13 1919; GFX12-NEXT: v_pk_minimum_f16 v6, v6, v14 1920; GFX12-NEXT: v_pk_minimum_f16 v7, v7, v15 1921; GFX12-NEXT: s_setpc_b64 s[30:31] 1922 %op = call <16 x half> @llvm.minimum.v16f16(<16 x half> %src0, <16 x half> %src1) 1923 ret <16 x half> %op 1924} 1925;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 1926; GCN: {{.*}} 1927