1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s 6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s 8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s 9 10define half @v_maximum_f16(half %src0, half %src1) { 11; GFX7-LABEL: v_maximum_f16: 12; GFX7: ; %bb.0: 13; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 15; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 16; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000 17; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 18; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 19; GFX7-NEXT: v_max_f32_e32 v3, v0, v1 20; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 21; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 22; GFX7-NEXT: s_setpc_b64 s[30:31] 23; 24; GFX8-LABEL: v_maximum_f16: 25; GFX8: ; %bb.0: 26; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX8-NEXT: v_max_f16_e32 v2, v0, v1 28; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00 29; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 30; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 31; GFX8-NEXT: s_setpc_b64 s[30:31] 32; 33; GFX900-LABEL: v_maximum_f16: 34; GFX900: ; %bb.0: 35; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; GFX900-NEXT: v_max_f16_e32 v2, v0, v1 37; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00 38; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 39; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 40; GFX900-NEXT: s_setpc_b64 s[30:31] 41; 42; GFX950-LABEL: v_maximum_f16: 43; GFX950: ; %bb.0: 44; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GFX950-NEXT: v_max_f16_e32 v2, v0, v1 46; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00 47; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 48; GFX950-NEXT: s_nop 1 49; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 50; GFX950-NEXT: s_setpc_b64 s[30:31] 51; 52; GFX10-LABEL: v_maximum_f16: 53; GFX10: ; %bb.0: 54; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 55; GFX10-NEXT: v_max_f16_e32 v2, v0, v1 56; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 57; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 58; GFX10-NEXT: s_setpc_b64 s[30:31] 59; 60; GFX11-LABEL: v_maximum_f16: 61; GFX11: ; %bb.0: 62; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63; GFX11-NEXT: v_max_f16_e32 v2, v0, v1 64; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 65; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 66; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 67; GFX11-NEXT: s_setpc_b64 s[30:31] 68; 69; GFX12-LABEL: v_maximum_f16: 70; GFX12: ; %bb.0: 71; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 72; GFX12-NEXT: s_wait_expcnt 0x0 73; GFX12-NEXT: s_wait_samplecnt 0x0 74; GFX12-NEXT: s_wait_bvhcnt 0x0 75; GFX12-NEXT: s_wait_kmcnt 0x0 76; GFX12-NEXT: v_maximum_f16 v0, v0, v1 77; GFX12-NEXT: s_setpc_b64 s[30:31] 78 %op = call half @llvm.maximum.f16(half %src0, half %src1) 79 ret half %op 80} 81 82define half @v_maximum_f16__nnan(half %src0, half %src1) { 83; GFX7-LABEL: v_maximum_f16__nnan: 84; GFX7: ; %bb.0: 85; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 87; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 88; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 89; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 90; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 91; GFX7-NEXT: s_setpc_b64 s[30:31] 92; 93; GFX8-LABEL: v_maximum_f16__nnan: 94; GFX8: ; %bb.0: 95; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 96; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 97; GFX8-NEXT: s_setpc_b64 s[30:31] 98; 99; GFX9-LABEL: v_maximum_f16__nnan: 100; GFX9: ; %bb.0: 101; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 103; GFX9-NEXT: s_setpc_b64 s[30:31] 104; 105; GFX10-LABEL: v_maximum_f16__nnan: 106; GFX10: ; %bb.0: 107; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 109; GFX10-NEXT: s_setpc_b64 s[30:31] 110; 111; GFX11-LABEL: v_maximum_f16__nnan: 112; GFX11: ; %bb.0: 113; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 114; GFX11-NEXT: v_max_f16_e32 v0, v0, v1 115; GFX11-NEXT: s_setpc_b64 s[30:31] 116; 117; GFX12-LABEL: v_maximum_f16__nnan: 118; GFX12: ; %bb.0: 119; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 120; GFX12-NEXT: s_wait_expcnt 0x0 121; GFX12-NEXT: s_wait_samplecnt 0x0 122; GFX12-NEXT: s_wait_bvhcnt 0x0 123; GFX12-NEXT: s_wait_kmcnt 0x0 124; GFX12-NEXT: v_maximum_f16 v0, v0, v1 125; GFX12-NEXT: s_setpc_b64 s[30:31] 126 %op = call nnan half @llvm.maximum.f16(half %src0, half %src1) 127 ret half %op 128} 129 130define half @v_maximum_f16__nsz(half %src0, half %src1) { 131; GFX7-LABEL: v_maximum_f16__nsz: 132; GFX7: ; %bb.0: 133; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 134; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 135; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 136; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000 137; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 138; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 139; GFX7-NEXT: v_max_f32_e32 v3, v0, v1 140; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 141; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 142; GFX7-NEXT: s_setpc_b64 s[30:31] 143; 144; GFX8-LABEL: v_maximum_f16__nsz: 145; GFX8: ; %bb.0: 146; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 147; GFX8-NEXT: v_max_f16_e32 v2, v0, v1 148; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00 149; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 150; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 151; GFX8-NEXT: s_setpc_b64 s[30:31] 152; 153; GFX900-LABEL: v_maximum_f16__nsz: 154; GFX900: ; %bb.0: 155; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 156; GFX900-NEXT: v_max_f16_e32 v2, v0, v1 157; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00 158; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 159; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 160; GFX900-NEXT: s_setpc_b64 s[30:31] 161; 162; GFX950-LABEL: v_maximum_f16__nsz: 163; GFX950: ; %bb.0: 164; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165; GFX950-NEXT: v_max_f16_e32 v2, v0, v1 166; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00 167; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 168; GFX950-NEXT: s_nop 1 169; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 170; GFX950-NEXT: s_setpc_b64 s[30:31] 171; 172; GFX10-LABEL: v_maximum_f16__nsz: 173; GFX10: ; %bb.0: 174; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 175; GFX10-NEXT: v_max_f16_e32 v2, v0, v1 176; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 177; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 178; GFX10-NEXT: s_setpc_b64 s[30:31] 179; 180; GFX11-LABEL: v_maximum_f16__nsz: 181; GFX11: ; %bb.0: 182; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183; GFX11-NEXT: v_max_f16_e32 v2, v0, v1 184; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 185; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 186; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 187; GFX11-NEXT: s_setpc_b64 s[30:31] 188; 189; GFX12-LABEL: v_maximum_f16__nsz: 190; GFX12: ; %bb.0: 191; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 192; GFX12-NEXT: s_wait_expcnt 0x0 193; GFX12-NEXT: s_wait_samplecnt 0x0 194; GFX12-NEXT: s_wait_bvhcnt 0x0 195; GFX12-NEXT: s_wait_kmcnt 0x0 196; GFX12-NEXT: v_maximum_f16 v0, v0, v1 197; GFX12-NEXT: s_setpc_b64 s[30:31] 198 %op = call nsz half @llvm.maximum.f16(half %src0, half %src1) 199 ret half %op 200} 201 202define half @v_maximum_f16__nnan_nsz(half %src0, half %src1) { 203; GFX7-LABEL: v_maximum_f16__nnan_nsz: 204; GFX7: ; %bb.0: 205; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 207; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 208; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 209; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 210; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 211; GFX7-NEXT: s_setpc_b64 s[30:31] 212; 213; GFX8-LABEL: v_maximum_f16__nnan_nsz: 214; GFX8: ; %bb.0: 215; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 216; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 217; GFX8-NEXT: s_setpc_b64 s[30:31] 218; 219; GFX9-LABEL: v_maximum_f16__nnan_nsz: 220; GFX9: ; %bb.0: 221; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 222; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 223; GFX9-NEXT: s_setpc_b64 s[30:31] 224; 225; GFX10-LABEL: v_maximum_f16__nnan_nsz: 226; GFX10: ; %bb.0: 227; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 228; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 229; GFX10-NEXT: s_setpc_b64 s[30:31] 230; 231; GFX11-LABEL: v_maximum_f16__nnan_nsz: 232; GFX11: ; %bb.0: 233; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 234; GFX11-NEXT: v_max_f16_e32 v0, v0, v1 235; GFX11-NEXT: s_setpc_b64 s[30:31] 236; 237; GFX12-LABEL: v_maximum_f16__nnan_nsz: 238; GFX12: ; %bb.0: 239; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 240; GFX12-NEXT: s_wait_expcnt 0x0 241; GFX12-NEXT: s_wait_samplecnt 0x0 242; GFX12-NEXT: s_wait_bvhcnt 0x0 243; GFX12-NEXT: s_wait_kmcnt 0x0 244; GFX12-NEXT: v_maximum_f16 v0, v0, v1 245; GFX12-NEXT: s_setpc_b64 s[30:31] 246 %op = call nnan nsz half @llvm.maximum.f16(half %src0, half %src1) 247 ret half %op 248} 249 250define half @v_maximum_f16__nnan_src0(half %arg0, half %src1) { 251; GFX7-LABEL: v_maximum_f16__nnan_src0: 252; GFX7: ; %bb.0: 253; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 254; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 255; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 256; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000 257; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 258; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 259; GFX7-NEXT: v_add_f32_e32 v0, 1.0, v0 260; GFX7-NEXT: v_max_f32_e32 v3, v0, v1 261; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 262; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 263; GFX7-NEXT: s_setpc_b64 s[30:31] 264; 265; GFX8-LABEL: v_maximum_f16__nnan_src0: 266; GFX8: ; %bb.0: 267; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 268; GFX8-NEXT: v_add_f16_e32 v0, 1.0, v0 269; GFX8-NEXT: v_max_f16_e32 v2, v0, v1 270; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00 271; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 272; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 273; GFX8-NEXT: s_setpc_b64 s[30:31] 274; 275; GFX900-LABEL: v_maximum_f16__nnan_src0: 276; GFX900: ; %bb.0: 277; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 278; GFX900-NEXT: v_add_f16_e32 v0, 1.0, v0 279; GFX900-NEXT: v_max_f16_e32 v2, v0, v1 280; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00 281; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 282; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 283; GFX900-NEXT: s_setpc_b64 s[30:31] 284; 285; GFX950-LABEL: v_maximum_f16__nnan_src0: 286; GFX950: ; %bb.0: 287; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 288; GFX950-NEXT: v_add_f16_e32 v0, 1.0, v0 289; GFX950-NEXT: v_max_f16_e32 v2, v0, v1 290; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00 291; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 292; GFX950-NEXT: s_nop 1 293; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 294; GFX950-NEXT: s_setpc_b64 s[30:31] 295; 296; GFX10-LABEL: v_maximum_f16__nnan_src0: 297; GFX10: ; %bb.0: 298; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 299; GFX10-NEXT: v_add_f16_e32 v0, 1.0, v0 300; GFX10-NEXT: v_max_f16_e32 v2, v0, v1 301; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 302; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 303; GFX10-NEXT: s_setpc_b64 s[30:31] 304; 305; GFX11-LABEL: v_maximum_f16__nnan_src0: 306; GFX11: ; %bb.0: 307; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308; GFX11-NEXT: v_add_f16_e32 v0, 1.0, v0 309; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 310; GFX11-NEXT: v_max_f16_e32 v2, v0, v1 311; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 312; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 313; GFX11-NEXT: s_setpc_b64 s[30:31] 314; 315; GFX12-LABEL: v_maximum_f16__nnan_src0: 316; GFX12: ; %bb.0: 317; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 318; GFX12-NEXT: s_wait_expcnt 0x0 319; GFX12-NEXT: s_wait_samplecnt 0x0 320; GFX12-NEXT: s_wait_bvhcnt 0x0 321; GFX12-NEXT: s_wait_kmcnt 0x0 322; GFX12-NEXT: v_add_f16_e32 v0, 1.0, v0 323; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 324; GFX12-NEXT: v_maximum_f16 v0, v0, v1 325; GFX12-NEXT: s_setpc_b64 s[30:31] 326 %src0 = fadd nnan half %arg0, 1.0 327 %op = call half @llvm.maximum.f16(half %src0, half %src1) 328 ret half %op 329} 330 331define half @v_maximum_f16__nnan_src1(half %src0, half %arg1) { 332; GFX7-LABEL: v_maximum_f16__nnan_src1: 333; GFX7: ; %bb.0: 334; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 335; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 336; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 337; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000 338; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 339; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 340; GFX7-NEXT: v_add_f32_e32 v1, 1.0, v1 341; GFX7-NEXT: v_max_f32_e32 v3, v0, v1 342; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 343; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 344; GFX7-NEXT: s_setpc_b64 s[30:31] 345; 346; GFX8-LABEL: v_maximum_f16__nnan_src1: 347; GFX8: ; %bb.0: 348; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 349; GFX8-NEXT: v_add_f16_e32 v1, 1.0, v1 350; GFX8-NEXT: v_max_f16_e32 v2, v0, v1 351; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00 352; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 353; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 354; GFX8-NEXT: s_setpc_b64 s[30:31] 355; 356; GFX900-LABEL: v_maximum_f16__nnan_src1: 357; GFX900: ; %bb.0: 358; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 359; GFX900-NEXT: v_add_f16_e32 v1, 1.0, v1 360; GFX900-NEXT: v_max_f16_e32 v2, v0, v1 361; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00 362; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 363; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 364; GFX900-NEXT: s_setpc_b64 s[30:31] 365; 366; GFX950-LABEL: v_maximum_f16__nnan_src1: 367; GFX950: ; %bb.0: 368; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 369; GFX950-NEXT: v_add_f16_e32 v1, 1.0, v1 370; GFX950-NEXT: v_max_f16_e32 v2, v0, v1 371; GFX950-NEXT: v_mov_b32_e32 v3, 0x7e00 372; GFX950-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 373; GFX950-NEXT: s_nop 1 374; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 375; GFX950-NEXT: s_setpc_b64 s[30:31] 376; 377; GFX10-LABEL: v_maximum_f16__nnan_src1: 378; GFX10: ; %bb.0: 379; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 380; GFX10-NEXT: v_add_f16_e32 v1, 1.0, v1 381; GFX10-NEXT: v_max_f16_e32 v2, v0, v1 382; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 383; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 384; GFX10-NEXT: s_setpc_b64 s[30:31] 385; 386; GFX11-LABEL: v_maximum_f16__nnan_src1: 387; GFX11: ; %bb.0: 388; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 389; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1 390; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 391; GFX11-NEXT: v_max_f16_e32 v2, v0, v1 392; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 393; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 394; GFX11-NEXT: s_setpc_b64 s[30:31] 395; 396; GFX12-LABEL: v_maximum_f16__nnan_src1: 397; GFX12: ; %bb.0: 398; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 399; GFX12-NEXT: s_wait_expcnt 0x0 400; GFX12-NEXT: s_wait_samplecnt 0x0 401; GFX12-NEXT: s_wait_bvhcnt 0x0 402; GFX12-NEXT: s_wait_kmcnt 0x0 403; GFX12-NEXT: v_add_f16_e32 v1, 1.0, v1 404; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 405; GFX12-NEXT: v_maximum_f16 v0, v0, v1 406; GFX12-NEXT: s_setpc_b64 s[30:31] 407 %src1 = fadd nnan half %arg1, 1.0 408 %op = call half @llvm.maximum.f16(half %src0, half %src1) 409 ret half %op 410} 411 412define void @s_maximum_f16(half inreg %src0, half inreg %src1) { 413; GFX7-LABEL: s_maximum_f16: 414; GFX7: ; %bb.0: 415; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 416; GFX7-NEXT: v_cvt_f16_f32_e32 v0, s17 417; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s16 418; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000 419; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 420; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 421; GFX7-NEXT: v_max_f32_e32 v3, v1, v0 422; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v0 423; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 424; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 425; GFX7-NEXT: ;;#ASMSTART 426; GFX7-NEXT: ; use v0 427; GFX7-NEXT: ;;#ASMEND 428; GFX7-NEXT: s_setpc_b64 s[30:31] 429; 430; GFX8-LABEL: s_maximum_f16: 431; GFX8: ; %bb.0: 432; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 433; GFX8-NEXT: v_mov_b32_e32 v0, s17 434; GFX8-NEXT: v_max_f16_e32 v1, s16, v0 435; GFX8-NEXT: v_mov_b32_e32 v2, 0x7e00 436; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s16, v0 437; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 438; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 439; GFX8-NEXT: ;;#ASMSTART 440; GFX8-NEXT: ; use v0 441; GFX8-NEXT: ;;#ASMEND 442; GFX8-NEXT: s_setpc_b64 s[30:31] 443; 444; GFX900-LABEL: s_maximum_f16: 445; GFX900: ; %bb.0: 446; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 447; GFX900-NEXT: v_mov_b32_e32 v0, s17 448; GFX900-NEXT: v_max_f16_e32 v1, s16, v0 449; GFX900-NEXT: v_mov_b32_e32 v2, 0x7e00 450; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s16, v0 451; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 452; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 453; GFX900-NEXT: ;;#ASMSTART 454; GFX900-NEXT: ; use v0 455; GFX900-NEXT: ;;#ASMEND 456; GFX900-NEXT: s_setpc_b64 s[30:31] 457; 458; GFX950-LABEL: s_maximum_f16: 459; GFX950: ; %bb.0: 460; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 461; GFX950-NEXT: v_mov_b32_e32 v0, s1 462; GFX950-NEXT: v_max_f16_e32 v1, s0, v0 463; GFX950-NEXT: v_mov_b32_e32 v2, 0x7e00 464; GFX950-NEXT: v_cmp_o_f16_e32 vcc, s0, v0 465; GFX950-NEXT: s_nop 1 466; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 467; GFX950-NEXT: v_and_b32_e32 v0, 0xffff, v0 468; GFX950-NEXT: ;;#ASMSTART 469; GFX950-NEXT: ; use v0 470; GFX950-NEXT: ;;#ASMEND 471; GFX950-NEXT: s_setpc_b64 s[30:31] 472; 473; GFX10-LABEL: s_maximum_f16: 474; GFX10: ; %bb.0: 475; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 476; GFX10-NEXT: v_max_f16_e64 v0, s16, s17 477; GFX10-NEXT: v_cmp_o_f16_e64 vcc_lo, s16, s17 478; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo 479; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 480; GFX10-NEXT: ;;#ASMSTART 481; GFX10-NEXT: ; use v0 482; GFX10-NEXT: ;;#ASMEND 483; GFX10-NEXT: s_setpc_b64 s[30:31] 484; 485; GFX11-LABEL: s_maximum_f16: 486; GFX11: ; %bb.0: 487; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 488; GFX11-NEXT: v_max_f16_e64 v0, s0, s1 489; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1 490; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 491; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo 492; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 493; GFX11-NEXT: ;;#ASMSTART 494; GFX11-NEXT: ; use v0 495; GFX11-NEXT: ;;#ASMEND 496; GFX11-NEXT: s_setpc_b64 s[30:31] 497; 498; GFX12-LABEL: s_maximum_f16: 499; GFX12: ; %bb.0: 500; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 501; GFX12-NEXT: s_wait_expcnt 0x0 502; GFX12-NEXT: s_wait_samplecnt 0x0 503; GFX12-NEXT: s_wait_bvhcnt 0x0 504; GFX12-NEXT: s_wait_kmcnt 0x0 505; GFX12-NEXT: s_maximum_f16 s0, s0, s1 506; GFX12-NEXT: s_wait_alu 0xfffe 507; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) 508; GFX12-NEXT: s_and_b32 s0, 0xffff, s0 509; GFX12-NEXT: ;;#ASMSTART 510; GFX12-NEXT: ; use s0 511; GFX12-NEXT: ;;#ASMEND 512; GFX12-NEXT: s_wait_alu 0xfffe 513; GFX12-NEXT: s_setpc_b64 s[30:31] 514 %op = call half @llvm.maximum.f16(half %src0, half %src1) 515 %cast = bitcast half %op to i16 516 %zext = zext i16 %cast to i32 517 call void asm sideeffect "; use $0", "s"(i32 %zext) 518 ret void 519} 520 521define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) { 522; GFX7-LABEL: v_maximum_v2f16: 523; GFX7: ; %bb.0: 524; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 525; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 526; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 527; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 528; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 529; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 530; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 531; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 532; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 533; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000 534; GFX7-NEXT: v_max_f32_e32 v4, v0, v2 535; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 536; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc 537; GFX7-NEXT: v_max_f32_e32 v2, v1, v3 538; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 539; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc 540; GFX7-NEXT: s_setpc_b64 s[30:31] 541; 542; GFX8-LABEL: v_maximum_v2f16: 543; GFX8: ; %bb.0: 544; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 545; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1 546; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 547; GFX8-NEXT: v_max_f16_e32 v4, v3, v2 548; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00 549; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2 550; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc 551; GFX8-NEXT: v_max_f16_e32 v3, v0, v1 552; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 553; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 554; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc 555; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 556; GFX8-NEXT: s_setpc_b64 s[30:31] 557; 558; GFX900-LABEL: v_maximum_v2f16: 559; GFX900: ; %bb.0: 560; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 561; GFX900-NEXT: v_pk_max_f16 v2, v0, v1 562; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00 563; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 564; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc 565; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2 566; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 567; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 568; GFX900-NEXT: s_mov_b32 s4, 0x5040100 569; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4 570; GFX900-NEXT: s_setpc_b64 s[30:31] 571; 572; GFX950-LABEL: v_maximum_v2f16: 573; GFX950: ; %bb.0: 574; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 575; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1 576; GFX950-NEXT: s_setpc_b64 s[30:31] 577; 578; GFX10-LABEL: v_maximum_v2f16: 579; GFX10: ; %bb.0: 580; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 581; GFX10-NEXT: v_pk_max_f16 v2, v0, v1 582; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 583; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v2 584; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v2, vcc_lo 585; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 586; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo 587; GFX10-NEXT: v_perm_b32 v0, v0, v2, 0x5040100 588; GFX10-NEXT: s_setpc_b64 s[30:31] 589; 590; GFX11-LABEL: v_maximum_v2f16: 591; GFX11: ; %bb.0: 592; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 593; GFX11-NEXT: v_pk_max_f16 v2, v0, v1 594; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 595; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0 596; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 597; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) 598; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2 599; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 600; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v3 601; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 602; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo 603; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 604; GFX11-NEXT: s_setpc_b64 s[30:31] 605; 606; GFX12-LABEL: v_maximum_v2f16: 607; GFX12: ; %bb.0: 608; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 609; GFX12-NEXT: s_wait_expcnt 0x0 610; GFX12-NEXT: s_wait_samplecnt 0x0 611; GFX12-NEXT: s_wait_bvhcnt 0x0 612; GFX12-NEXT: s_wait_kmcnt 0x0 613; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 614; GFX12-NEXT: s_setpc_b64 s[30:31] 615 %op = call <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1) 616 ret <2 x half> %op 617} 618 619define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) { 620; GFX7-LABEL: v_maximum_v2f16__nnan: 621; GFX7: ; %bb.0: 622; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 623; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 624; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 625; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 626; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 627; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 628; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 629; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 630; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 631; GFX7-NEXT: v_max_f32_e32 v0, v0, v2 632; GFX7-NEXT: v_max_f32_e32 v1, v1, v3 633; GFX7-NEXT: s_setpc_b64 s[30:31] 634; 635; GFX8-LABEL: v_maximum_v2f16__nnan: 636; GFX8: ; %bb.0: 637; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 638; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 639; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 640; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 641; GFX8-NEXT: s_setpc_b64 s[30:31] 642; 643; GFX900-LABEL: v_maximum_v2f16__nnan: 644; GFX900: ; %bb.0: 645; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 646; GFX900-NEXT: v_pk_max_f16 v0, v0, v1 647; GFX900-NEXT: s_setpc_b64 s[30:31] 648; 649; GFX950-LABEL: v_maximum_v2f16__nnan: 650; GFX950: ; %bb.0: 651; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 652; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1 653; GFX950-NEXT: s_setpc_b64 s[30:31] 654; 655; GFX10-LABEL: v_maximum_v2f16__nnan: 656; GFX10: ; %bb.0: 657; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 658; GFX10-NEXT: v_pk_max_f16 v0, v0, v1 659; GFX10-NEXT: s_setpc_b64 s[30:31] 660; 661; GFX11-LABEL: v_maximum_v2f16__nnan: 662; GFX11: ; %bb.0: 663; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 664; GFX11-NEXT: v_pk_max_f16 v0, v0, v1 665; GFX11-NEXT: s_setpc_b64 s[30:31] 666; 667; GFX12-LABEL: v_maximum_v2f16__nnan: 668; GFX12: ; %bb.0: 669; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 670; GFX12-NEXT: s_wait_expcnt 0x0 671; GFX12-NEXT: s_wait_samplecnt 0x0 672; GFX12-NEXT: s_wait_bvhcnt 0x0 673; GFX12-NEXT: s_wait_kmcnt 0x0 674; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 675; GFX12-NEXT: s_setpc_b64 s[30:31] 676 %op = call nnan <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1) 677 ret <2 x half> %op 678} 679 680define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) { 681; GFX7-LABEL: v_maximum_v2f16__nsz: 682; GFX7: ; %bb.0: 683; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 684; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 685; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 686; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 687; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 688; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 689; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 690; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 691; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 692; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000 693; GFX7-NEXT: v_max_f32_e32 v4, v0, v2 694; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 695; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc 696; GFX7-NEXT: v_max_f32_e32 v2, v1, v3 697; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 698; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc 699; GFX7-NEXT: s_setpc_b64 s[30:31] 700; 701; GFX8-LABEL: v_maximum_v2f16__nsz: 702; GFX8: ; %bb.0: 703; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 704; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1 705; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 706; GFX8-NEXT: v_max_f16_e32 v4, v3, v2 707; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00 708; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2 709; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc 710; GFX8-NEXT: v_max_f16_e32 v3, v0, v1 711; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 712; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 713; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc 714; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 715; GFX8-NEXT: s_setpc_b64 s[30:31] 716; 717; GFX900-LABEL: v_maximum_v2f16__nsz: 718; GFX900: ; %bb.0: 719; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 720; GFX900-NEXT: v_pk_max_f16 v2, v0, v1 721; GFX900-NEXT: v_mov_b32_e32 v3, 0x7e00 722; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 723; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc 724; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2 725; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 726; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 727; GFX900-NEXT: s_mov_b32 s4, 0x5040100 728; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4 729; GFX900-NEXT: s_setpc_b64 s[30:31] 730; 731; GFX950-LABEL: v_maximum_v2f16__nsz: 732; GFX950: ; %bb.0: 733; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 734; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1 735; GFX950-NEXT: s_setpc_b64 s[30:31] 736; 737; GFX10-LABEL: v_maximum_v2f16__nsz: 738; GFX10: ; %bb.0: 739; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 740; GFX10-NEXT: v_pk_max_f16 v2, v0, v1 741; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 742; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v2 743; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v2, vcc_lo 744; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 745; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo 746; GFX10-NEXT: v_perm_b32 v0, v0, v2, 0x5040100 747; GFX10-NEXT: s_setpc_b64 s[30:31] 748; 749; GFX11-LABEL: v_maximum_v2f16__nsz: 750; GFX11: ; %bb.0: 751; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 752; GFX11-NEXT: v_pk_max_f16 v2, v0, v1 753; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 754; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0 755; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 756; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) 757; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2 758; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo 759; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v3 760; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 761; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo 762; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 763; GFX11-NEXT: s_setpc_b64 s[30:31] 764; 765; GFX12-LABEL: v_maximum_v2f16__nsz: 766; GFX12: ; %bb.0: 767; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 768; GFX12-NEXT: s_wait_expcnt 0x0 769; GFX12-NEXT: s_wait_samplecnt 0x0 770; GFX12-NEXT: s_wait_bvhcnt 0x0 771; GFX12-NEXT: s_wait_kmcnt 0x0 772; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 773; GFX12-NEXT: s_setpc_b64 s[30:31] 774 %op = call nsz <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1) 775 ret <2 x half> %op 776} 777 778define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1) { 779; GFX7-LABEL: v_maximum_v2f16__nnan_nsz: 780; GFX7: ; %bb.0: 781; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 782; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 783; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 784; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 785; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 786; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 787; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 788; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 789; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 790; GFX7-NEXT: v_max_f32_e32 v0, v0, v2 791; GFX7-NEXT: v_max_f32_e32 v1, v1, v3 792; GFX7-NEXT: s_setpc_b64 s[30:31] 793; 794; GFX8-LABEL: v_maximum_v2f16__nnan_nsz: 795; GFX8: ; %bb.0: 796; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 797; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 798; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 799; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 800; GFX8-NEXT: s_setpc_b64 s[30:31] 801; 802; GFX900-LABEL: v_maximum_v2f16__nnan_nsz: 803; GFX900: ; %bb.0: 804; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 805; GFX900-NEXT: v_pk_max_f16 v0, v0, v1 806; GFX900-NEXT: s_setpc_b64 s[30:31] 807; 808; GFX950-LABEL: v_maximum_v2f16__nnan_nsz: 809; GFX950: ; %bb.0: 810; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 811; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1 812; GFX950-NEXT: s_setpc_b64 s[30:31] 813; 814; GFX10-LABEL: v_maximum_v2f16__nnan_nsz: 815; GFX10: ; %bb.0: 816; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 817; GFX10-NEXT: v_pk_max_f16 v0, v0, v1 818; GFX10-NEXT: s_setpc_b64 s[30:31] 819; 820; GFX11-LABEL: v_maximum_v2f16__nnan_nsz: 821; GFX11: ; %bb.0: 822; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 823; GFX11-NEXT: v_pk_max_f16 v0, v0, v1 824; GFX11-NEXT: s_setpc_b64 s[30:31] 825; 826; GFX12-LABEL: v_maximum_v2f16__nnan_nsz: 827; GFX12: ; %bb.0: 828; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 829; GFX12-NEXT: s_wait_expcnt 0x0 830; GFX12-NEXT: s_wait_samplecnt 0x0 831; GFX12-NEXT: s_wait_bvhcnt 0x0 832; GFX12-NEXT: s_wait_kmcnt 0x0 833; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 834; GFX12-NEXT: s_setpc_b64 s[30:31] 835 %op = call nnan nsz <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1) 836 ret <2 x half> %op 837} 838 839define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) { 840; GFX7-LABEL: s_maximum_v2f16: 841; GFX7: ; %bb.0: 842; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 843; GFX7-NEXT: v_cvt_f16_f32_e32 v0, s19 844; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s17 845; GFX7-NEXT: v_cvt_f16_f32_e32 v2, s18 846; GFX7-NEXT: v_cvt_f16_f32_e32 v3, s16 847; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 848; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 849; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 850; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 851; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000 852; GFX7-NEXT: v_max_f32_e32 v4, v1, v0 853; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v0 854; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc 855; GFX7-NEXT: v_max_f32_e32 v1, v3, v2 856; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v2 857; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 858; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 859; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 860; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 861; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 862; GFX7-NEXT: ;;#ASMSTART 863; GFX7-NEXT: ; use v0 864; GFX7-NEXT: ;;#ASMEND 865; GFX7-NEXT: s_setpc_b64 s[30:31] 866; 867; GFX8-LABEL: s_maximum_v2f16: 868; GFX8: ; %bb.0: 869; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 870; GFX8-NEXT: s_lshr_b32 s4, s17, 16 871; GFX8-NEXT: s_lshr_b32 s5, s16, 16 872; GFX8-NEXT: v_mov_b32_e32 v0, s4 873; GFX8-NEXT: v_max_f16_e32 v1, s5, v0 874; GFX8-NEXT: v_mov_b32_e32 v2, 0x7e00 875; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s5, v0 876; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 877; GFX8-NEXT: v_mov_b32_e32 v1, s17 878; GFX8-NEXT: v_max_f16_e32 v3, s16, v1 879; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s16, v1 880; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 881; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 882; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 883; GFX8-NEXT: ;;#ASMSTART 884; GFX8-NEXT: ; use v0 885; GFX8-NEXT: ;;#ASMEND 886; GFX8-NEXT: s_setpc_b64 s[30:31] 887; 888; GFX900-LABEL: s_maximum_v2f16: 889; GFX900: ; %bb.0: 890; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 891; GFX900-NEXT: v_mov_b32_e32 v0, s17 892; GFX900-NEXT: v_mov_b32_e32 v1, s17 893; GFX900-NEXT: s_lshr_b32 s4, s17, 16 894; GFX900-NEXT: v_pk_max_f16 v1, s16, v1 895; GFX900-NEXT: v_mov_b32_e32 v2, 0x7e00 896; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s16, v0 897; GFX900-NEXT: s_lshr_b32 s5, s16, 16 898; GFX900-NEXT: v_mov_b32_e32 v3, s4 899; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 900; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 901; GFX900-NEXT: v_cmp_o_f16_e32 vcc, s5, v3 902; GFX900-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 903; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 904; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 905; GFX900-NEXT: ;;#ASMSTART 906; GFX900-NEXT: ; use v0 907; GFX900-NEXT: ;;#ASMEND 908; GFX900-NEXT: s_setpc_b64 s[30:31] 909; 910; GFX950-LABEL: s_maximum_v2f16: 911; GFX950: ; %bb.0: 912; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 913; GFX950-NEXT: v_mov_b32_e32 v0, s0 914; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, s1, s1 915; GFX950-NEXT: s_nop 0 916; GFX950-NEXT: ;;#ASMSTART 917; GFX950-NEXT: ; use v0 918; GFX950-NEXT: ;;#ASMEND 919; GFX950-NEXT: s_setpc_b64 s[30:31] 920; 921; GFX10-LABEL: s_maximum_v2f16: 922; GFX10: ; %bb.0: 923; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 924; GFX10-NEXT: v_pk_max_f16 v0, s16, s17 925; GFX10-NEXT: v_cmp_o_f16_e64 vcc_lo, s16, s17 926; GFX10-NEXT: s_lshr_b32 s4, s17, 16 927; GFX10-NEXT: s_lshr_b32 s5, s16, 16 928; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v0 929; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo 930; GFX10-NEXT: v_cmp_o_f16_e64 vcc_lo, s5, s4 931; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 932; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo 933; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 934; GFX10-NEXT: ;;#ASMSTART 935; GFX10-NEXT: ; use v0 936; GFX10-NEXT: ;;#ASMEND 937; GFX10-NEXT: s_setpc_b64 s[30:31] 938; 939; GFX11-LABEL: s_maximum_v2f16: 940; GFX11: ; %bb.0: 941; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 942; GFX11-NEXT: v_pk_max_f16 v0, s0, s1 943; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1 944; GFX11-NEXT: s_lshr_b32 s2, s1, 16 945; GFX11-NEXT: s_lshr_b32 s0, s0, 16 946; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2) 947; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 948; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo 949; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s2 950; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 951; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) 952; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo 953; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0 954; GFX11-NEXT: ;;#ASMSTART 955; GFX11-NEXT: ; use v0 956; GFX11-NEXT: ;;#ASMEND 957; GFX11-NEXT: s_setpc_b64 s[30:31] 958; 959; GFX12-LABEL: s_maximum_v2f16: 960; GFX12: ; %bb.0: 961; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 962; GFX12-NEXT: s_wait_expcnt 0x0 963; GFX12-NEXT: s_wait_samplecnt 0x0 964; GFX12-NEXT: s_wait_bvhcnt 0x0 965; GFX12-NEXT: s_wait_kmcnt 0x0 966; GFX12-NEXT: v_pk_maximum_f16 v0, s0, s1 967; GFX12-NEXT: ;;#ASMSTART 968; GFX12-NEXT: ; use v0 969; GFX12-NEXT: ;;#ASMEND 970; GFX12-NEXT: s_setpc_b64 s[30:31] 971 %op = call <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1) 972 %cast = bitcast <2 x half> %op to i32 973 call void asm sideeffect "; use $0", "s"(i32 %cast) 974 ret void 975} 976 977define <3 x half> @v_maximum_v3f16(<3 x half> %src0, <3 x half> %src1) { 978; GFX7-LABEL: v_maximum_v3f16: 979; GFX7: ; %bb.0: 980; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 981; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 982; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 983; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 984; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 985; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 986; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 987; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 988; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 989; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 990; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 991; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 992; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 993; GFX7-NEXT: v_max_f32_e32 v6, v0, v3 994; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000 995; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 996; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 997; GFX7-NEXT: v_max_f32_e32 v3, v1, v4 998; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 999; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc 1000; GFX7-NEXT: v_max_f32_e32 v3, v2, v5 1001; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 1002; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc 1003; GFX7-NEXT: s_setpc_b64 s[30:31] 1004; 1005; GFX8-LABEL: v_maximum_v3f16: 1006; GFX8: ; %bb.0: 1007; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1008; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 1009; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0 1010; GFX8-NEXT: v_max_f16_e32 v6, v5, v4 1011; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00 1012; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4 1013; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc 1014; GFX8-NEXT: v_max_f16_e32 v5, v1, v3 1015; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 1016; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc 1017; GFX8-NEXT: v_max_f16_e32 v3, v0, v2 1018; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1019; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc 1020; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4 1021; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1022; GFX8-NEXT: s_setpc_b64 s[30:31] 1023; 1024; GFX900-LABEL: v_maximum_v3f16: 1025; GFX900: ; %bb.0: 1026; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1027; GFX900-NEXT: v_pk_max_f16 v4, v1, v3 1028; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00 1029; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 1030; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc 1031; GFX900-NEXT: v_pk_max_f16 v3, v0, v2 1032; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1033; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc 1034; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3 1035; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 1036; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc 1037; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1038; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4 1039; GFX900-NEXT: s_setpc_b64 s[30:31] 1040; 1041; GFX950-LABEL: v_maximum_v3f16: 1042; GFX950: ; %bb.0: 1043; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1044; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 1045; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 1046; GFX950-NEXT: s_setpc_b64 s[30:31] 1047; 1048; GFX10-LABEL: v_maximum_v3f16: 1049; GFX10: ; %bb.0: 1050; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1051; GFX10-NEXT: v_pk_max_f16 v4, v0, v2 1052; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 1053; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v4 1054; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo 1055; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 1056; GFX10-NEXT: v_pk_max_f16 v2, v1, v3 1057; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo 1058; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 1059; GFX10-NEXT: v_perm_b32 v0, v0, v4, 0x5040100 1060; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo 1061; GFX10-NEXT: s_setpc_b64 s[30:31] 1062; 1063; GFX11-LABEL: v_maximum_v3f16: 1064; GFX11: ; %bb.0: 1065; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1066; GFX11-NEXT: v_pk_max_f16 v4, v0, v2 1067; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2 1068; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v0 1069; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 1070; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) 1071; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v4 1072; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo 1073; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5 1074; GFX11-NEXT: v_pk_max_f16 v4, v1, v3 1075; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1076; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo 1077; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 1078; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 1079; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 1080; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo 1081; GFX11-NEXT: s_setpc_b64 s[30:31] 1082; 1083; GFX12-LABEL: v_maximum_v3f16: 1084; GFX12: ; %bb.0: 1085; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1086; GFX12-NEXT: s_wait_expcnt 0x0 1087; GFX12-NEXT: s_wait_samplecnt 0x0 1088; GFX12-NEXT: s_wait_bvhcnt 0x0 1089; GFX12-NEXT: s_wait_kmcnt 0x0 1090; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 1091; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 1092; GFX12-NEXT: s_setpc_b64 s[30:31] 1093 %op = call <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1) 1094 ret <3 x half> %op 1095} 1096 1097define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) { 1098; GFX7-LABEL: v_maximum_v3f16__nnan: 1099; GFX7: ; %bb.0: 1100; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1101; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 1102; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 1103; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 1104; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 1105; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 1106; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 1107; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 1108; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 1109; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 1110; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 1111; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 1112; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 1113; GFX7-NEXT: v_max_f32_e32 v0, v0, v3 1114; GFX7-NEXT: v_max_f32_e32 v1, v1, v4 1115; GFX7-NEXT: v_max_f32_e32 v2, v2, v5 1116; GFX7-NEXT: s_setpc_b64 s[30:31] 1117; 1118; GFX8-LABEL: v_maximum_v3f16__nnan: 1119; GFX8: ; %bb.0: 1120; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1121; GFX8-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1122; GFX8-NEXT: v_max_f16_e32 v0, v0, v2 1123; GFX8-NEXT: v_max_f16_e32 v1, v1, v3 1124; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 1125; GFX8-NEXT: s_setpc_b64 s[30:31] 1126; 1127; GFX900-LABEL: v_maximum_v3f16__nnan: 1128; GFX900: ; %bb.0: 1129; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1130; GFX900-NEXT: v_pk_max_f16 v0, v0, v2 1131; GFX900-NEXT: v_pk_max_f16 v1, v1, v3 1132; GFX900-NEXT: s_setpc_b64 s[30:31] 1133; 1134; GFX950-LABEL: v_maximum_v3f16__nnan: 1135; GFX950: ; %bb.0: 1136; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1137; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 1138; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 1139; GFX950-NEXT: s_setpc_b64 s[30:31] 1140; 1141; GFX10-LABEL: v_maximum_v3f16__nnan: 1142; GFX10: ; %bb.0: 1143; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1144; GFX10-NEXT: v_pk_max_f16 v0, v0, v2 1145; GFX10-NEXT: v_pk_max_f16 v1, v1, v3 1146; GFX10-NEXT: s_setpc_b64 s[30:31] 1147; 1148; GFX11-LABEL: v_maximum_v3f16__nnan: 1149; GFX11: ; %bb.0: 1150; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1151; GFX11-NEXT: v_pk_max_f16 v0, v0, v2 1152; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 1153; GFX11-NEXT: s_setpc_b64 s[30:31] 1154; 1155; GFX12-LABEL: v_maximum_v3f16__nnan: 1156; GFX12: ; %bb.0: 1157; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1158; GFX12-NEXT: s_wait_expcnt 0x0 1159; GFX12-NEXT: s_wait_samplecnt 0x0 1160; GFX12-NEXT: s_wait_bvhcnt 0x0 1161; GFX12-NEXT: s_wait_kmcnt 0x0 1162; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 1163; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 1164; GFX12-NEXT: s_setpc_b64 s[30:31] 1165 %op = call nnan <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1) 1166 ret <3 x half> %op 1167} 1168 1169define <3 x half> @v_maximum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) { 1170; GFX7-LABEL: v_maximum_v3f16__nsz: 1171; GFX7: ; %bb.0: 1172; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1173; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 1174; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 1175; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 1176; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 1177; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 1178; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 1179; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 1180; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 1181; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 1182; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 1183; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 1184; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 1185; GFX7-NEXT: v_max_f32_e32 v6, v0, v3 1186; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000 1187; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 1188; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 1189; GFX7-NEXT: v_max_f32_e32 v3, v1, v4 1190; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 1191; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc 1192; GFX7-NEXT: v_max_f32_e32 v3, v2, v5 1193; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 1194; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc 1195; GFX7-NEXT: s_setpc_b64 s[30:31] 1196; 1197; GFX8-LABEL: v_maximum_v3f16__nsz: 1198; GFX8: ; %bb.0: 1199; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1200; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 1201; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0 1202; GFX8-NEXT: v_max_f16_e32 v6, v5, v4 1203; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00 1204; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4 1205; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc 1206; GFX8-NEXT: v_max_f16_e32 v5, v1, v3 1207; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 1208; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc 1209; GFX8-NEXT: v_max_f16_e32 v3, v0, v2 1210; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1211; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc 1212; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4 1213; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1214; GFX8-NEXT: s_setpc_b64 s[30:31] 1215; 1216; GFX900-LABEL: v_maximum_v3f16__nsz: 1217; GFX900: ; %bb.0: 1218; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1219; GFX900-NEXT: v_pk_max_f16 v4, v1, v3 1220; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00 1221; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 1222; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc 1223; GFX900-NEXT: v_pk_max_f16 v3, v0, v2 1224; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1225; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc 1226; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3 1227; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 1228; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc 1229; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1230; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4 1231; GFX900-NEXT: s_setpc_b64 s[30:31] 1232; 1233; GFX950-LABEL: v_maximum_v3f16__nsz: 1234; GFX950: ; %bb.0: 1235; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1236; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 1237; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 1238; GFX950-NEXT: s_setpc_b64 s[30:31] 1239; 1240; GFX10-LABEL: v_maximum_v3f16__nsz: 1241; GFX10: ; %bb.0: 1242; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1243; GFX10-NEXT: v_pk_max_f16 v4, v0, v2 1244; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 1245; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v4 1246; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo 1247; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 1248; GFX10-NEXT: v_pk_max_f16 v2, v1, v3 1249; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo 1250; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 1251; GFX10-NEXT: v_perm_b32 v0, v0, v4, 0x5040100 1252; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo 1253; GFX10-NEXT: s_setpc_b64 s[30:31] 1254; 1255; GFX11-LABEL: v_maximum_v3f16__nsz: 1256; GFX11: ; %bb.0: 1257; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1258; GFX11-NEXT: v_pk_max_f16 v4, v0, v2 1259; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2 1260; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v0 1261; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 1262; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) 1263; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v4 1264; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo 1265; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5 1266; GFX11-NEXT: v_pk_max_f16 v4, v1, v3 1267; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1268; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo 1269; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 1270; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 1271; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 1272; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo 1273; GFX11-NEXT: s_setpc_b64 s[30:31] 1274; 1275; GFX12-LABEL: v_maximum_v3f16__nsz: 1276; GFX12: ; %bb.0: 1277; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1278; GFX12-NEXT: s_wait_expcnt 0x0 1279; GFX12-NEXT: s_wait_samplecnt 0x0 1280; GFX12-NEXT: s_wait_bvhcnt 0x0 1281; GFX12-NEXT: s_wait_kmcnt 0x0 1282; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 1283; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 1284; GFX12-NEXT: s_setpc_b64 s[30:31] 1285 %op = call nsz <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1) 1286 ret <3 x half> %op 1287} 1288 1289define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1) { 1290; GFX7-LABEL: v_maximum_v3f16__nnan_nsz: 1291; GFX7: ; %bb.0: 1292; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1293; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 1294; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 1295; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 1296; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 1297; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 1298; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 1299; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 1300; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 1301; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 1302; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 1303; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 1304; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 1305; GFX7-NEXT: v_max_f32_e32 v0, v0, v3 1306; GFX7-NEXT: v_max_f32_e32 v1, v1, v4 1307; GFX7-NEXT: v_max_f32_e32 v2, v2, v5 1308; GFX7-NEXT: s_setpc_b64 s[30:31] 1309; 1310; GFX8-LABEL: v_maximum_v3f16__nnan_nsz: 1311; GFX8: ; %bb.0: 1312; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1313; GFX8-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1314; GFX8-NEXT: v_max_f16_e32 v0, v0, v2 1315; GFX8-NEXT: v_max_f16_e32 v1, v1, v3 1316; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 1317; GFX8-NEXT: s_setpc_b64 s[30:31] 1318; 1319; GFX900-LABEL: v_maximum_v3f16__nnan_nsz: 1320; GFX900: ; %bb.0: 1321; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1322; GFX900-NEXT: v_pk_max_f16 v0, v0, v2 1323; GFX900-NEXT: v_pk_max_f16 v1, v1, v3 1324; GFX900-NEXT: s_setpc_b64 s[30:31] 1325; 1326; GFX950-LABEL: v_maximum_v3f16__nnan_nsz: 1327; GFX950: ; %bb.0: 1328; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1329; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 1330; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 1331; GFX950-NEXT: s_setpc_b64 s[30:31] 1332; 1333; GFX10-LABEL: v_maximum_v3f16__nnan_nsz: 1334; GFX10: ; %bb.0: 1335; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1336; GFX10-NEXT: v_pk_max_f16 v0, v0, v2 1337; GFX10-NEXT: v_pk_max_f16 v1, v1, v3 1338; GFX10-NEXT: s_setpc_b64 s[30:31] 1339; 1340; GFX11-LABEL: v_maximum_v3f16__nnan_nsz: 1341; GFX11: ; %bb.0: 1342; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1343; GFX11-NEXT: v_pk_max_f16 v0, v0, v2 1344; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 1345; GFX11-NEXT: s_setpc_b64 s[30:31] 1346; 1347; GFX12-LABEL: v_maximum_v3f16__nnan_nsz: 1348; GFX12: ; %bb.0: 1349; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1350; GFX12-NEXT: s_wait_expcnt 0x0 1351; GFX12-NEXT: s_wait_samplecnt 0x0 1352; GFX12-NEXT: s_wait_bvhcnt 0x0 1353; GFX12-NEXT: s_wait_kmcnt 0x0 1354; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 1355; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 1356; GFX12-NEXT: s_setpc_b64 s[30:31] 1357 %op = call nnan nsz <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1) 1358 ret <3 x half> %op 1359} 1360 1361define <4 x half> @v_maximum_v4f16(<4 x half> %src0, <4 x half> %src1) { 1362; GFX7-LABEL: v_maximum_v4f16: 1363; GFX7: ; %bb.0: 1364; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1365; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 1366; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 1367; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 1368; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 1369; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 1370; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 1371; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 1372; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 1373; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 1374; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 1375; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 1376; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 1377; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 1378; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 1379; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 1380; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 1381; GFX7-NEXT: v_max_f32_e32 v8, v0, v4 1382; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000 1383; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 1384; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc 1385; GFX7-NEXT: v_max_f32_e32 v4, v1, v5 1386; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 1387; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc 1388; GFX7-NEXT: v_max_f32_e32 v4, v2, v6 1389; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 1390; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc 1391; GFX7-NEXT: v_max_f32_e32 v4, v3, v7 1392; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 1393; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc 1394; GFX7-NEXT: s_setpc_b64 s[30:31] 1395; 1396; GFX8-LABEL: v_maximum_v4f16: 1397; GFX8: ; %bb.0: 1398; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1399; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v3 1400; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v1 1401; GFX8-NEXT: v_max_f16_e32 v6, v5, v4 1402; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00 1403; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4 1404; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc 1405; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 1406; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 1407; GFX8-NEXT: v_max_f16_e32 v8, v6, v5 1408; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v6, v5 1409; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v8, vcc 1410; GFX8-NEXT: v_max_f16_e32 v6, v1, v3 1411; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 1412; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc 1413; GFX8-NEXT: v_max_f16_e32 v3, v0, v2 1414; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1415; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc 1416; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v5 1417; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1418; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4 1419; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1420; GFX8-NEXT: s_setpc_b64 s[30:31] 1421; 1422; GFX900-LABEL: v_maximum_v4f16: 1423; GFX900: ; %bb.0: 1424; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1425; GFX900-NEXT: v_pk_max_f16 v4, v1, v3 1426; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00 1427; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 1428; GFX900-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc 1429; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v4 1430; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 1431; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc 1432; GFX900-NEXT: v_pk_max_f16 v3, v0, v2 1433; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1434; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc 1435; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3 1436; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 1437; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc 1438; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1439; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4 1440; GFX900-NEXT: v_perm_b32 v1, v1, v6, s4 1441; GFX900-NEXT: s_setpc_b64 s[30:31] 1442; 1443; GFX950-LABEL: v_maximum_v4f16: 1444; GFX950: ; %bb.0: 1445; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1446; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 1447; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 1448; GFX950-NEXT: s_setpc_b64 s[30:31] 1449; 1450; GFX10-LABEL: v_maximum_v4f16: 1451; GFX10: ; %bb.0: 1452; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1453; GFX10-NEXT: v_pk_max_f16 v4, v1, v3 1454; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 1455; GFX10-NEXT: v_pk_max_f16 v5, v0, v2 1456; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v4, vcc_lo 1457; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 1458; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5 1459; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v4 1460; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v5, vcc_lo 1461; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 1462; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo 1463; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 1464; GFX10-NEXT: v_perm_b32 v0, v0, v5, 0x5040100 1465; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo 1466; GFX10-NEXT: v_perm_b32 v1, v1, v6, 0x5040100 1467; GFX10-NEXT: s_setpc_b64 s[30:31] 1468; 1469; GFX11-LABEL: v_maximum_v4f16: 1470; GFX11: ; %bb.0: 1471; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1472; GFX11-NEXT: v_pk_max_f16 v4, v1, v3 1473; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 1474; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3 1475; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1 1476; GFX11-NEXT: v_pk_max_f16 v7, v0, v2 1477; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v2 1478; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo 1479; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0 1480; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 1481; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v7 1482; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4 1483; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo 1484; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v8 1485; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1486; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo 1487; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5 1488; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 1489; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo 1490; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1491; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100 1492; GFX11-NEXT: s_setpc_b64 s[30:31] 1493; 1494; GFX12-LABEL: v_maximum_v4f16: 1495; GFX12: ; %bb.0: 1496; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1497; GFX12-NEXT: s_wait_expcnt 0x0 1498; GFX12-NEXT: s_wait_samplecnt 0x0 1499; GFX12-NEXT: s_wait_bvhcnt 0x0 1500; GFX12-NEXT: s_wait_kmcnt 0x0 1501; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 1502; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 1503; GFX12-NEXT: s_setpc_b64 s[30:31] 1504 %op = call <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1) 1505 ret <4 x half> %op 1506} 1507 1508define <4 x half> @v_maximum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) { 1509; GFX7-LABEL: v_maximum_v4f16__nnan: 1510; GFX7: ; %bb.0: 1511; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1512; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 1513; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 1514; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 1515; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 1516; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 1517; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 1518; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 1519; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 1520; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 1521; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 1522; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 1523; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 1524; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 1525; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 1526; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 1527; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 1528; GFX7-NEXT: v_max_f32_e32 v0, v0, v4 1529; GFX7-NEXT: v_max_f32_e32 v1, v1, v5 1530; GFX7-NEXT: v_max_f32_e32 v2, v2, v6 1531; GFX7-NEXT: v_max_f32_e32 v3, v3, v7 1532; GFX7-NEXT: s_setpc_b64 s[30:31] 1533; 1534; GFX8-LABEL: v_maximum_v4f16__nnan: 1535; GFX8: ; %bb.0: 1536; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1537; GFX8-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1538; GFX8-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1539; GFX8-NEXT: v_max_f16_e32 v1, v1, v3 1540; GFX8-NEXT: v_max_f16_e32 v0, v0, v2 1541; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 1542; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 1543; GFX8-NEXT: s_setpc_b64 s[30:31] 1544; 1545; GFX900-LABEL: v_maximum_v4f16__nnan: 1546; GFX900: ; %bb.0: 1547; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1548; GFX900-NEXT: v_pk_max_f16 v0, v0, v2 1549; GFX900-NEXT: v_pk_max_f16 v1, v1, v3 1550; GFX900-NEXT: s_setpc_b64 s[30:31] 1551; 1552; GFX950-LABEL: v_maximum_v4f16__nnan: 1553; GFX950: ; %bb.0: 1554; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1555; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 1556; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 1557; GFX950-NEXT: s_setpc_b64 s[30:31] 1558; 1559; GFX10-LABEL: v_maximum_v4f16__nnan: 1560; GFX10: ; %bb.0: 1561; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1562; GFX10-NEXT: v_pk_max_f16 v0, v0, v2 1563; GFX10-NEXT: v_pk_max_f16 v1, v1, v3 1564; GFX10-NEXT: s_setpc_b64 s[30:31] 1565; 1566; GFX11-LABEL: v_maximum_v4f16__nnan: 1567; GFX11: ; %bb.0: 1568; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1569; GFX11-NEXT: v_pk_max_f16 v0, v0, v2 1570; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 1571; GFX11-NEXT: s_setpc_b64 s[30:31] 1572; 1573; GFX12-LABEL: v_maximum_v4f16__nnan: 1574; GFX12: ; %bb.0: 1575; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1576; GFX12-NEXT: s_wait_expcnt 0x0 1577; GFX12-NEXT: s_wait_samplecnt 0x0 1578; GFX12-NEXT: s_wait_bvhcnt 0x0 1579; GFX12-NEXT: s_wait_kmcnt 0x0 1580; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 1581; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 1582; GFX12-NEXT: s_setpc_b64 s[30:31] 1583 %op = call nnan <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1) 1584 ret <4 x half> %op 1585} 1586 1587define <4 x half> @v_maximum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) { 1588; GFX7-LABEL: v_maximum_v4f16__nsz: 1589; GFX7: ; %bb.0: 1590; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1591; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 1592; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 1593; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 1594; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 1595; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 1596; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 1597; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 1598; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 1599; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 1600; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 1601; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 1602; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 1603; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 1604; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 1605; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 1606; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 1607; GFX7-NEXT: v_max_f32_e32 v8, v0, v4 1608; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000 1609; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 1610; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc 1611; GFX7-NEXT: v_max_f32_e32 v4, v1, v5 1612; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 1613; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc 1614; GFX7-NEXT: v_max_f32_e32 v4, v2, v6 1615; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 1616; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc 1617; GFX7-NEXT: v_max_f32_e32 v4, v3, v7 1618; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 1619; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc 1620; GFX7-NEXT: s_setpc_b64 s[30:31] 1621; 1622; GFX8-LABEL: v_maximum_v4f16__nsz: 1623; GFX8: ; %bb.0: 1624; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1625; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v3 1626; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v1 1627; GFX8-NEXT: v_max_f16_e32 v6, v5, v4 1628; GFX8-NEXT: v_mov_b32_e32 v7, 0x7e00 1629; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v5, v4 1630; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc 1631; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 1632; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 1633; GFX8-NEXT: v_max_f16_e32 v8, v6, v5 1634; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v6, v5 1635; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v8, vcc 1636; GFX8-NEXT: v_max_f16_e32 v6, v1, v3 1637; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 1638; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc 1639; GFX8-NEXT: v_max_f16_e32 v3, v0, v2 1640; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1641; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc 1642; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v5 1643; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1644; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4 1645; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1646; GFX8-NEXT: s_setpc_b64 s[30:31] 1647; 1648; GFX900-LABEL: v_maximum_v4f16__nsz: 1649; GFX900: ; %bb.0: 1650; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1651; GFX900-NEXT: v_pk_max_f16 v4, v1, v3 1652; GFX900-NEXT: v_mov_b32_e32 v5, 0x7e00 1653; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 1654; GFX900-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc 1655; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v4 1656; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 1657; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc 1658; GFX900-NEXT: v_pk_max_f16 v3, v0, v2 1659; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1660; GFX900-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc 1661; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3 1662; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 1663; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc 1664; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1665; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4 1666; GFX900-NEXT: v_perm_b32 v1, v1, v6, s4 1667; GFX900-NEXT: s_setpc_b64 s[30:31] 1668; 1669; GFX950-LABEL: v_maximum_v4f16__nsz: 1670; GFX950: ; %bb.0: 1671; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1672; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 1673; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 1674; GFX950-NEXT: s_setpc_b64 s[30:31] 1675; 1676; GFX10-LABEL: v_maximum_v4f16__nsz: 1677; GFX10: ; %bb.0: 1678; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1679; GFX10-NEXT: v_pk_max_f16 v4, v1, v3 1680; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 1681; GFX10-NEXT: v_pk_max_f16 v5, v0, v2 1682; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v4, vcc_lo 1683; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 1684; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5 1685; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v4 1686; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v5, vcc_lo 1687; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 1688; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo 1689; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 1690; GFX10-NEXT: v_perm_b32 v0, v0, v5, 0x5040100 1691; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo 1692; GFX10-NEXT: v_perm_b32 v1, v1, v6, 0x5040100 1693; GFX10-NEXT: s_setpc_b64 s[30:31] 1694; 1695; GFX11-LABEL: v_maximum_v4f16__nsz: 1696; GFX11: ; %bb.0: 1697; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1698; GFX11-NEXT: v_pk_max_f16 v4, v1, v3 1699; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v3 1700; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3 1701; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1 1702; GFX11-NEXT: v_pk_max_f16 v7, v0, v2 1703; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v2 1704; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo 1705; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0 1706; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v2 1707; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v7 1708; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4 1709; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo 1710; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v8 1711; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1712; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo 1713; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v5 1714; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 1715; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo 1716; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1717; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100 1718; GFX11-NEXT: s_setpc_b64 s[30:31] 1719; 1720; GFX12-LABEL: v_maximum_v4f16__nsz: 1721; GFX12: ; %bb.0: 1722; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1723; GFX12-NEXT: s_wait_expcnt 0x0 1724; GFX12-NEXT: s_wait_samplecnt 0x0 1725; GFX12-NEXT: s_wait_bvhcnt 0x0 1726; GFX12-NEXT: s_wait_kmcnt 0x0 1727; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 1728; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 1729; GFX12-NEXT: s_setpc_b64 s[30:31] 1730 %op = call nsz <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1) 1731 ret <4 x half> %op 1732} 1733 1734define <4 x half> @v_maximum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1) { 1735; GFX7-LABEL: v_maximum_v4f16__nnan_nsz: 1736; GFX7: ; %bb.0: 1737; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1738; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 1739; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 1740; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 1741; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 1742; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 1743; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 1744; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 1745; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 1746; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 1747; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 1748; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 1749; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 1750; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 1751; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 1752; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 1753; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 1754; GFX7-NEXT: v_max_f32_e32 v0, v0, v4 1755; GFX7-NEXT: v_max_f32_e32 v1, v1, v5 1756; GFX7-NEXT: v_max_f32_e32 v2, v2, v6 1757; GFX7-NEXT: v_max_f32_e32 v3, v3, v7 1758; GFX7-NEXT: s_setpc_b64 s[30:31] 1759; 1760; GFX8-LABEL: v_maximum_v4f16__nnan_nsz: 1761; GFX8: ; %bb.0: 1762; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1763; GFX8-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1764; GFX8-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1765; GFX8-NEXT: v_max_f16_e32 v1, v1, v3 1766; GFX8-NEXT: v_max_f16_e32 v0, v0, v2 1767; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 1768; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 1769; GFX8-NEXT: s_setpc_b64 s[30:31] 1770; 1771; GFX900-LABEL: v_maximum_v4f16__nnan_nsz: 1772; GFX900: ; %bb.0: 1773; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1774; GFX900-NEXT: v_pk_max_f16 v0, v0, v2 1775; GFX900-NEXT: v_pk_max_f16 v1, v1, v3 1776; GFX900-NEXT: s_setpc_b64 s[30:31] 1777; 1778; GFX950-LABEL: v_maximum_v4f16__nnan_nsz: 1779; GFX950: ; %bb.0: 1780; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1781; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 1782; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 1783; GFX950-NEXT: s_setpc_b64 s[30:31] 1784; 1785; GFX10-LABEL: v_maximum_v4f16__nnan_nsz: 1786; GFX10: ; %bb.0: 1787; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1788; GFX10-NEXT: v_pk_max_f16 v0, v0, v2 1789; GFX10-NEXT: v_pk_max_f16 v1, v1, v3 1790; GFX10-NEXT: s_setpc_b64 s[30:31] 1791; 1792; GFX11-LABEL: v_maximum_v4f16__nnan_nsz: 1793; GFX11: ; %bb.0: 1794; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1795; GFX11-NEXT: v_pk_max_f16 v0, v0, v2 1796; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 1797; GFX11-NEXT: s_setpc_b64 s[30:31] 1798; 1799; GFX12-LABEL: v_maximum_v4f16__nnan_nsz: 1800; GFX12: ; %bb.0: 1801; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1802; GFX12-NEXT: s_wait_expcnt 0x0 1803; GFX12-NEXT: s_wait_samplecnt 0x0 1804; GFX12-NEXT: s_wait_bvhcnt 0x0 1805; GFX12-NEXT: s_wait_kmcnt 0x0 1806; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 1807; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 1808; GFX12-NEXT: s_setpc_b64 s[30:31] 1809 %op = call nnan nsz <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1) 1810 ret <4 x half> %op 1811} 1812 1813define <8 x half> @v_maximum_v8f16(<8 x half> %src0, <8 x half> %src1) { 1814; GFX7-LABEL: v_maximum_v8f16: 1815; GFX7: ; %bb.0: 1816; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1817; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8 1818; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 1819; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9 1820; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 1821; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10 1822; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 1823; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11 1824; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 1825; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12 1826; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 1827; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8 1828; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 1829; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13 1830; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 1831; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9 1832; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 1833; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14 1834; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 1835; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10 1836; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 1837; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15 1838; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 1839; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11 1840; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 1841; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12 1842; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 1843; GFX7-NEXT: v_max_f32_e32 v16, v0, v8 1844; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000 1845; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v8 1846; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13 1847; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 1848; GFX7-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc 1849; GFX7-NEXT: v_max_f32_e32 v8, v1, v9 1850; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v9 1851; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14 1852; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 1853; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc 1854; GFX7-NEXT: v_max_f32_e32 v8, v2, v10 1855; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v10 1856; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15 1857; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 1858; GFX7-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc 1859; GFX7-NEXT: v_max_f32_e32 v8, v3, v11 1860; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v11 1861; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc 1862; GFX7-NEXT: v_max_f32_e32 v8, v4, v12 1863; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v12 1864; GFX7-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc 1865; GFX7-NEXT: v_max_f32_e32 v8, v5, v13 1866; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v5, v13 1867; GFX7-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc 1868; GFX7-NEXT: v_max_f32_e32 v8, v6, v14 1869; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v6, v14 1870; GFX7-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc 1871; GFX7-NEXT: v_max_f32_e32 v8, v7, v15 1872; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v7, v15 1873; GFX7-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc 1874; GFX7-NEXT: s_setpc_b64 s[30:31] 1875; 1876; GFX8-LABEL: v_maximum_v8f16: 1877; GFX8: ; %bb.0: 1878; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1879; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v7 1880; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v3 1881; GFX8-NEXT: v_max_f16_e32 v10, v9, v8 1882; GFX8-NEXT: v_mov_b32_e32 v11, 0x7e00 1883; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v9, v8 1884; GFX8-NEXT: v_cndmask_b32_e32 v8, v11, v10, vcc 1885; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v6 1886; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v2 1887; GFX8-NEXT: v_max_f16_e32 v12, v10, v9 1888; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v10, v9 1889; GFX8-NEXT: v_cndmask_b32_e32 v9, v11, v12, vcc 1890; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v5 1891; GFX8-NEXT: v_lshrrev_b32_e32 v12, 16, v1 1892; GFX8-NEXT: v_max_f16_e32 v13, v12, v10 1893; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v12, v10 1894; GFX8-NEXT: v_cndmask_b32_e32 v10, v11, v13, vcc 1895; GFX8-NEXT: v_lshrrev_b32_e32 v12, 16, v4 1896; GFX8-NEXT: v_lshrrev_b32_e32 v13, 16, v0 1897; GFX8-NEXT: v_max_f16_e32 v14, v13, v12 1898; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v13, v12 1899; GFX8-NEXT: v_cndmask_b32_e32 v12, v11, v14, vcc 1900; GFX8-NEXT: v_max_f16_e32 v13, v3, v7 1901; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v7 1902; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v13, vcc 1903; GFX8-NEXT: v_max_f16_e32 v7, v2, v6 1904; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v2, v6 1905; GFX8-NEXT: v_cndmask_b32_e32 v2, v11, v7, vcc 1906; GFX8-NEXT: v_max_f16_e32 v6, v1, v5 1907; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v5 1908; GFX8-NEXT: v_cndmask_b32_e32 v1, v11, v6, vcc 1909; GFX8-NEXT: v_max_f16_e32 v5, v0, v4 1910; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v4 1911; GFX8-NEXT: v_cndmask_b32_e32 v0, v11, v5, vcc 1912; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v12 1913; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1914; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v10 1915; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1916; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v9 1917; GFX8-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1918; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v8 1919; GFX8-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1920; GFX8-NEXT: s_setpc_b64 s[30:31] 1921; 1922; GFX900-LABEL: v_maximum_v8f16: 1923; GFX900: ; %bb.0: 1924; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1925; GFX900-NEXT: v_pk_max_f16 v8, v3, v7 1926; GFX900-NEXT: v_mov_b32_e32 v9, 0x7e00 1927; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v3, v7 1928; GFX900-NEXT: v_cndmask_b32_e32 v10, v9, v8, vcc 1929; GFX900-NEXT: v_lshrrev_b32_e32 v8, 16, v8 1930; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1 1931; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc 1932; GFX900-NEXT: v_pk_max_f16 v7, v2, v6 1933; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v2, v6 1934; GFX900-NEXT: v_cndmask_b32_e32 v8, v9, v7, vcc 1935; GFX900-NEXT: v_lshrrev_b32_e32 v7, 16, v7 1936; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1 1937; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc 1938; GFX900-NEXT: v_pk_max_f16 v6, v1, v5 1939; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v5 1940; GFX900-NEXT: v_cndmask_b32_e32 v7, v9, v6, vcc 1941; GFX900-NEXT: v_lshrrev_b32_e32 v6, 16, v6 1942; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1 1943; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v6, vcc 1944; GFX900-NEXT: v_pk_max_f16 v5, v0, v4 1945; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v4 1946; GFX900-NEXT: v_cndmask_b32_e32 v6, v9, v5, vcc 1947; GFX900-NEXT: v_lshrrev_b32_e32 v5, 16, v5 1948; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1 1949; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc 1950; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1951; GFX900-NEXT: v_perm_b32 v0, v0, v6, s4 1952; GFX900-NEXT: v_perm_b32 v1, v1, v7, s4 1953; GFX900-NEXT: v_perm_b32 v2, v2, v8, s4 1954; GFX900-NEXT: v_perm_b32 v3, v3, v10, s4 1955; GFX900-NEXT: s_setpc_b64 s[30:31] 1956; 1957; GFX950-LABEL: v_maximum_v8f16: 1958; GFX950: ; %bb.0: 1959; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1960; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v4, v4 1961; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v5, v5 1962; GFX950-NEXT: v_pk_maximum3_f16 v2, v2, v6, v6 1963; GFX950-NEXT: v_pk_maximum3_f16 v3, v3, v7, v7 1964; GFX950-NEXT: s_setpc_b64 s[30:31] 1965; 1966; GFX10-LABEL: v_maximum_v8f16: 1967; GFX10: ; %bb.0: 1968; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1969; GFX10-NEXT: v_pk_max_f16 v8, v3, v7 1970; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v7 1971; GFX10-NEXT: v_pk_max_f16 v9, v2, v6 1972; GFX10-NEXT: v_pk_max_f16 v12, v1, v5 1973; GFX10-NEXT: v_pk_max_f16 v13, v0, v4 1974; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7e00, v8, vcc_lo 1975; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v6 1976; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v9 1977; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v8 1978; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7e00, v9, vcc_lo 1979; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1 1980; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v11, vcc_lo 1981; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v5 1982; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v13 1983; GFX10-NEXT: v_perm_b32 v2, v2, v9, 0x5040100 1984; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v12, vcc_lo 1985; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v4 1986; GFX10-NEXT: v_lshrrev_b32_e32 v12, 16, v12 1987; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7e00, v13, vcc_lo 1988; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1 1989; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo 1990; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1 1991; GFX10-NEXT: v_perm_b32 v0, v0, v13, 0x5040100 1992; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v12, vcc_lo 1993; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1 1994; GFX10-NEXT: v_perm_b32 v1, v1, v6, 0x5040100 1995; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo 1996; GFX10-NEXT: v_perm_b32 v3, v3, v10, 0x5040100 1997; GFX10-NEXT: s_setpc_b64 s[30:31] 1998; 1999; GFX11-LABEL: v_maximum_v8f16: 2000; GFX11: ; %bb.0: 2001; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2002; GFX11-NEXT: v_pk_max_f16 v8, v3, v7 2003; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v7 2004; GFX11-NEXT: v_pk_max_f16 v10, v2, v6 2005; GFX11-NEXT: v_lshrrev_b32_e32 v11, 16, v6 2006; GFX11-NEXT: v_lshrrev_b32_e32 v12, 16, v2 2007; GFX11-NEXT: v_pk_max_f16 v14, v1, v5 2008; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7e00, v8, vcc_lo 2009; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v6 2010; GFX11-NEXT: v_lshrrev_b32_e32 v13, 16, v10 2011; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v7 2012; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v3 2013; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v8 2014; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo 2015; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v12, v11 2016; GFX11-NEXT: v_pk_max_f16 v11, v0, v4 2017; GFX11-NEXT: v_lshrrev_b32_e32 v12, 16, v4 2018; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v13, vcc_lo 2019; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v5 2020; GFX11-NEXT: v_lshrrev_b32_e32 v13, 16, v0 2021; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v5 2022; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1 2023; GFX11-NEXT: v_lshrrev_b32_e32 v15, 16, v11 2024; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7e00, v14, vcc_lo 2025; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v4 2026; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v14 2027; GFX11-NEXT: v_perm_b32 v2, v6, v2, 0x5040100 2028; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo 2029; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v13, v12 2030; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v15, vcc_lo 2031; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v5 2032; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2) 2033; GFX11-NEXT: v_perm_b32 v0, v4, v0, 0x5040100 2034; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v14, vcc_lo 2035; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v7 2036; GFX11-NEXT: v_perm_b32 v1, v1, v10, 0x5040100 2037; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo 2038; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2039; GFX11-NEXT: v_perm_b32 v3, v3, v9, 0x5040100 2040; GFX11-NEXT: s_setpc_b64 s[30:31] 2041; 2042; GFX12-LABEL: v_maximum_v8f16: 2043; GFX12: ; %bb.0: 2044; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2045; GFX12-NEXT: s_wait_expcnt 0x0 2046; GFX12-NEXT: s_wait_samplecnt 0x0 2047; GFX12-NEXT: s_wait_bvhcnt 0x0 2048; GFX12-NEXT: s_wait_kmcnt 0x0 2049; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v4 2050; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 2051; GFX12-NEXT: v_pk_maximum_f16 v2, v2, v6 2052; GFX12-NEXT: v_pk_maximum_f16 v3, v3, v7 2053; GFX12-NEXT: s_setpc_b64 s[30:31] 2054 %op = call <8 x half> @llvm.maximum.v8f16(<8 x half> %src0, <8 x half> %src1) 2055 ret <8 x half> %op 2056} 2057 2058define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) { 2059; GFX7-LABEL: v_maximum_v16f16: 2060; GFX7: ; %bb.0: 2061; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2062; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v17 2063; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 2064; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 2065; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 2066; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 2067; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 2068; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 2069; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 2070; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 2071; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v17 2072; GFX7-NEXT: v_max_f32_e32 v1, v1, v17 2073; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v18 2074; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 2075; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 2076; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 2077; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 2078; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 2079; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 2080; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 2081; GFX7-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v17 2082; GFX7-NEXT: v_max_f32_e32 v2, v2, v17 2083; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v19 2084; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 2085; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8 2086; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9 2087; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 2088; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10 2089; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8 2090; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9 2091; GFX7-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v17 2092; GFX7-NEXT: v_max_f32_e32 v3, v3, v17 2093; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v20 2094; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10 2095; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11 2096; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v28 2097; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 2098; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12 2099; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11 2100; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18 2101; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v17 2102; GFX7-NEXT: v_max_f32_e32 v4, v4, v17 2103; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v21 2104; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12 2105; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13 2106; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v16 2107; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 2108; GFX7-NEXT: v_cmp_o_f32_e64 s[24:25], v12, v18 2109; GFX7-NEXT: v_max_f32_e32 v12, v12, v18 2110; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v29 2111; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v17 2112; GFX7-NEXT: v_max_f32_e32 v5, v5, v17 2113; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v22 2114; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v0 2115; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v18 2116; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v13 2117; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 2118; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v19 2119; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v20 2120; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v18, v16 2121; GFX7-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v17 2122; GFX7-NEXT: v_max_f32_e32 v6, v6, v17 2123; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v23 2124; GFX7-NEXT: v_max_f32_e32 v16, v18, v16 2125; GFX7-NEXT: v_max_f32_e32 v18, v13, v0 2126; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v0 2127; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 2128; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v15 2129; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v30 2130; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14 2131; GFX7-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v17 2132; GFX7-NEXT: v_max_f32_e32 v7, v7, v17 2133; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v24 2134; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15 2135; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14 2136; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v13 2137; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 2138; GFX7-NEXT: v_mov_b32_e32 v19, 0x7fc00000 2139; GFX7-NEXT: v_cndmask_b32_e32 v1, v19, v1, vcc 2140; GFX7-NEXT: v_cndmask_b32_e64 v13, v19, v16, s[26:27] 2141; GFX7-NEXT: v_cmp_o_f32_e64 s[16:17], v8, v17 2142; GFX7-NEXT: v_max_f32_e32 v8, v8, v17 2143; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v25 2144; GFX7-NEXT: v_max_f32_e32 v16, v14, v15 2145; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v14, v15 2146; GFX7-NEXT: v_cndmask_b32_e32 v14, v19, v16, vcc 2147; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 2148; GFX7-NEXT: v_cndmask_b32_e64 v2, v19, v2, s[4:5] 2149; GFX7-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[6:7] 2150; GFX7-NEXT: v_cndmask_b32_e64 v4, v19, v4, s[8:9] 2151; GFX7-NEXT: v_cmp_o_f32_e64 s[18:19], v9, v17 2152; GFX7-NEXT: v_max_f32_e32 v9, v9, v17 2153; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v26 2154; GFX7-NEXT: v_cndmask_b32_e64 v5, v19, v5, s[10:11] 2155; GFX7-NEXT: v_cndmask_b32_e64 v6, v19, v6, s[12:13] 2156; GFX7-NEXT: v_cndmask_b32_e64 v7, v19, v7, s[14:15] 2157; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 2158; GFX7-NEXT: v_cndmask_b32_e64 v8, v19, v8, s[16:17] 2159; GFX7-NEXT: v_cndmask_b32_e64 v9, v19, v9, s[18:19] 2160; GFX7-NEXT: v_cndmask_b32_e64 v12, v19, v12, s[24:25] 2161; GFX7-NEXT: v_cmp_o_f32_e64 s[20:21], v10, v17 2162; GFX7-NEXT: v_max_f32_e32 v10, v10, v17 2163; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v27 2164; GFX7-NEXT: v_cndmask_b32_e64 v10, v19, v10, s[20:21] 2165; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 2166; GFX7-NEXT: v_cmp_o_f32_e64 s[22:23], v11, v17 2167; GFX7-NEXT: v_max_f32_e32 v11, v11, v17 2168; GFX7-NEXT: buffer_load_dword v17, off, s[0:3], s32 2169; GFX7-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[22:23] 2170; GFX7-NEXT: s_waitcnt vmcnt(0) 2171; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v17 2172; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v0 2173; GFX7-NEXT: v_cndmask_b32_e64 v0, v19, v18, s[28:29] 2174; GFX7-NEXT: v_max_f32_e32 v15, v20, v17 2175; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v20, v17 2176; GFX7-NEXT: v_cndmask_b32_e32 v15, v19, v15, vcc 2177; GFX7-NEXT: s_setpc_b64 s[30:31] 2178; 2179; GFX8-LABEL: v_maximum_v16f16: 2180; GFX8: ; %bb.0: 2181; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2182; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v14 2183; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v6 2184; GFX8-NEXT: v_max_f16_e32 v16, v18, v17 2185; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v18, v17 2186; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v13 2187; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v5 2188; GFX8-NEXT: v_max_f16_e32 v20, v18, v17 2189; GFX8-NEXT: v_cmp_o_f16_e64 s[4:5], v18, v17 2190; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v12 2191; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v4 2192; GFX8-NEXT: v_max_f16_e32 v21, v18, v17 2193; GFX8-NEXT: v_cmp_o_f16_e64 s[6:7], v18, v17 2194; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v11 2195; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v3 2196; GFX8-NEXT: v_max_f16_e32 v22, v18, v17 2197; GFX8-NEXT: v_cmp_o_f16_e64 s[8:9], v18, v17 2198; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v10 2199; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v2 2200; GFX8-NEXT: v_max_f16_e32 v23, v18, v17 2201; GFX8-NEXT: v_cmp_o_f16_e64 s[10:11], v18, v17 2202; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v9 2203; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v1 2204; GFX8-NEXT: v_max_f16_e32 v24, v18, v17 2205; GFX8-NEXT: v_cmp_o_f16_e64 s[12:13], v18, v17 2206; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v8 2207; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v0 2208; GFX8-NEXT: v_max_f16_e32 v25, v18, v17 2209; GFX8-NEXT: v_cmp_o_f16_e64 s[14:15], v18, v17 2210; GFX8-NEXT: v_max_f16_e32 v17, v6, v14 2211; GFX8-NEXT: v_cmp_o_f16_e64 s[16:17], v6, v14 2212; GFX8-NEXT: v_max_f16_e32 v6, v5, v13 2213; GFX8-NEXT: v_cmp_o_f16_e64 s[18:19], v5, v13 2214; GFX8-NEXT: v_max_f16_e32 v5, v4, v12 2215; GFX8-NEXT: v_cmp_o_f16_e64 s[20:21], v4, v12 2216; GFX8-NEXT: v_max_f16_e32 v4, v3, v11 2217; GFX8-NEXT: v_cmp_o_f16_e64 s[22:23], v3, v11 2218; GFX8-NEXT: v_max_f16_e32 v11, v7, v15 2219; GFX8-NEXT: v_cmp_o_f16_e64 s[24:25], v7, v15 2220; GFX8-NEXT: v_lshrrev_b32_e32 v12, 16, v15 2221; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v7 2222; GFX8-NEXT: v_mov_b32_e32 v19, 0x7e00 2223; GFX8-NEXT: v_max_f16_e32 v13, v7, v12 2224; GFX8-NEXT: v_cmp_o_f16_e64 s[26:27], v7, v12 2225; GFX8-NEXT: v_max_f16_e32 v3, v2, v10 2226; GFX8-NEXT: v_cndmask_b32_e64 v12, v19, v13, s[26:27] 2227; GFX8-NEXT: v_cndmask_b32_e32 v13, v19, v16, vcc 2228; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v2, v10 2229; GFX8-NEXT: v_max_f16_e32 v14, v1, v9 2230; GFX8-NEXT: v_cndmask_b32_e32 v2, v19, v3, vcc 2231; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v1, v9 2232; GFX8-NEXT: v_max_f16_e32 v7, v0, v8 2233; GFX8-NEXT: v_cndmask_b32_e64 v18, v19, v22, s[8:9] 2234; GFX8-NEXT: v_cndmask_b32_e64 v22, v19, v25, s[14:15] 2235; GFX8-NEXT: v_cndmask_b32_e32 v1, v19, v14, vcc 2236; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v8 2237; GFX8-NEXT: v_cndmask_b32_e64 v16, v19, v21, s[6:7] 2238; GFX8-NEXT: v_cndmask_b32_e64 v21, v19, v24, s[12:13] 2239; GFX8-NEXT: v_cndmask_b32_e32 v0, v19, v7, vcc 2240; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v22 2241; GFX8-NEXT: v_cndmask_b32_e64 v15, v19, v20, s[4:5] 2242; GFX8-NEXT: v_cndmask_b32_e64 v20, v19, v23, s[10:11] 2243; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2244; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v21 2245; GFX8-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2246; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v20 2247; GFX8-NEXT: v_cndmask_b32_e64 v4, v19, v4, s[22:23] 2248; GFX8-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2249; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v18 2250; GFX8-NEXT: v_cndmask_b32_e64 v5, v19, v5, s[20:21] 2251; GFX8-NEXT: v_or_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2252; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v16 2253; GFX8-NEXT: v_cndmask_b32_e64 v6, v19, v6, s[18:19] 2254; GFX8-NEXT: v_or_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2255; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v15 2256; GFX8-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[24:25] 2257; GFX8-NEXT: v_cndmask_b32_e64 v17, v19, v17, s[16:17] 2258; GFX8-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2259; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v13 2260; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v12 2261; GFX8-NEXT: v_or_b32_sdwa v6, v17, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2262; GFX8-NEXT: v_or_b32_sdwa v7, v11, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2263; GFX8-NEXT: s_setpc_b64 s[30:31] 2264; 2265; GFX900-LABEL: v_maximum_v16f16: 2266; GFX900: ; %bb.0: 2267; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2268; GFX900-NEXT: v_pk_max_f16 v16, v7, v15 2269; GFX900-NEXT: v_mov_b32_e32 v17, 0x7e00 2270; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v7, v15 2271; GFX900-NEXT: v_cndmask_b32_e32 v18, v17, v16, vcc 2272; GFX900-NEXT: v_lshrrev_b32_e32 v16, 16, v16 2273; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1 2274; GFX900-NEXT: v_cndmask_b32_e32 v7, v17, v16, vcc 2275; GFX900-NEXT: v_pk_max_f16 v15, v6, v14 2276; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v6, v14 2277; GFX900-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc 2278; GFX900-NEXT: v_lshrrev_b32_e32 v15, 16, v15 2279; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1 2280; GFX900-NEXT: v_cndmask_b32_e32 v6, v17, v15, vcc 2281; GFX900-NEXT: v_pk_max_f16 v14, v5, v13 2282; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v5, v13 2283; GFX900-NEXT: v_cndmask_b32_e32 v15, v17, v14, vcc 2284; GFX900-NEXT: v_lshrrev_b32_e32 v14, 16, v14 2285; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1 2286; GFX900-NEXT: v_cndmask_b32_e32 v5, v17, v14, vcc 2287; GFX900-NEXT: v_pk_max_f16 v13, v4, v12 2288; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v4, v12 2289; GFX900-NEXT: v_cndmask_b32_e32 v14, v17, v13, vcc 2290; GFX900-NEXT: v_lshrrev_b32_e32 v13, 16, v13 2291; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1 2292; GFX900-NEXT: v_cndmask_b32_e32 v4, v17, v13, vcc 2293; GFX900-NEXT: v_pk_max_f16 v12, v3, v11 2294; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v3, v11 2295; GFX900-NEXT: v_cndmask_b32_e32 v13, v17, v12, vcc 2296; GFX900-NEXT: v_lshrrev_b32_e32 v12, 16, v12 2297; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1 2298; GFX900-NEXT: v_cndmask_b32_e32 v3, v17, v12, vcc 2299; GFX900-NEXT: v_pk_max_f16 v11, v2, v10 2300; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v2, v10 2301; GFX900-NEXT: v_cndmask_b32_e32 v12, v17, v11, vcc 2302; GFX900-NEXT: v_lshrrev_b32_e32 v11, 16, v11 2303; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1 2304; GFX900-NEXT: v_cndmask_b32_e32 v2, v17, v11, vcc 2305; GFX900-NEXT: v_pk_max_f16 v10, v1, v9 2306; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v1, v9 2307; GFX900-NEXT: v_cndmask_b32_e32 v11, v17, v10, vcc 2308; GFX900-NEXT: v_lshrrev_b32_e32 v10, 16, v10 2309; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1 2310; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v10, vcc 2311; GFX900-NEXT: v_pk_max_f16 v9, v0, v8 2312; GFX900-NEXT: v_cmp_o_f16_e32 vcc, v0, v8 2313; GFX900-NEXT: v_cndmask_b32_e32 v10, v17, v9, vcc 2314; GFX900-NEXT: v_lshrrev_b32_e32 v9, 16, v9 2315; GFX900-NEXT: v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1 2316; GFX900-NEXT: v_cndmask_b32_e32 v0, v17, v9, vcc 2317; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2318; GFX900-NEXT: v_perm_b32 v0, v0, v10, s4 2319; GFX900-NEXT: v_perm_b32 v1, v1, v11, s4 2320; GFX900-NEXT: v_perm_b32 v2, v2, v12, s4 2321; GFX900-NEXT: v_perm_b32 v3, v3, v13, s4 2322; GFX900-NEXT: v_perm_b32 v4, v4, v14, s4 2323; GFX900-NEXT: v_perm_b32 v5, v5, v15, s4 2324; GFX900-NEXT: v_perm_b32 v6, v6, v16, s4 2325; GFX900-NEXT: v_perm_b32 v7, v7, v18, s4 2326; GFX900-NEXT: s_setpc_b64 s[30:31] 2327; 2328; GFX950-LABEL: v_maximum_v16f16: 2329; GFX950: ; %bb.0: 2330; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2331; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v8, v8 2332; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v9, v9 2333; GFX950-NEXT: v_pk_maximum3_f16 v2, v2, v10, v10 2334; GFX950-NEXT: v_pk_maximum3_f16 v3, v3, v11, v11 2335; GFX950-NEXT: v_pk_maximum3_f16 v4, v4, v12, v12 2336; GFX950-NEXT: v_pk_maximum3_f16 v5, v5, v13, v13 2337; GFX950-NEXT: v_pk_maximum3_f16 v6, v6, v14, v14 2338; GFX950-NEXT: v_pk_maximum3_f16 v7, v7, v15, v15 2339; GFX950-NEXT: s_setpc_b64 s[30:31] 2340; 2341; GFX10-LABEL: v_maximum_v16f16: 2342; GFX10: ; %bb.0: 2343; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2344; GFX10-NEXT: v_pk_max_f16 v16, v7, v15 2345; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v7, v15 2346; GFX10-NEXT: v_pk_max_f16 v18, v6, v14 2347; GFX10-NEXT: v_pk_max_f16 v19, v3, v11 2348; GFX10-NEXT: v_pk_max_f16 v20, v2, v10 2349; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v16 2350; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7e00, v16, vcc_lo 2351; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1 2352; GFX10-NEXT: v_lshrrev_b32_e32 v15, 16, v18 2353; GFX10-NEXT: v_pk_max_f16 v21, v0, v8 2354; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7e00, v17, vcc_lo 2355; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v14 2356; GFX10-NEXT: v_pk_max_f16 v17, v5, v13 2357; GFX10-NEXT: v_lshrrev_b32_e32 v23, 16, v21 2358; GFX10-NEXT: v_perm_b32 v7, v7, v16, 0x5040100 2359; GFX10-NEXT: v_cndmask_b32_e32 v18, 0x7e00, v18, vcc_lo 2360; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1 2361; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v17 2362; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo 2363; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v5, v13 2364; GFX10-NEXT: v_perm_b32 v6, v6, v18, 0x5040100 2365; GFX10-NEXT: v_cndmask_b32_e32 v15, 0x7e00, v17, vcc_lo 2366; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1 2367; GFX10-NEXT: v_pk_max_f16 v17, v4, v12 2368; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo 2369; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v12 2370; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v17 2371; GFX10-NEXT: v_perm_b32 v5, v5, v15, 0x5040100 2372; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7e00, v17, vcc_lo 2373; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v11 2374; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v19 2375; GFX10-NEXT: v_cndmask_b32_e32 v19, 0x7e00, v19, vcc_lo 2376; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1 2377; GFX10-NEXT: v_pk_max_f16 v11, v1, v9 2378; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo 2379; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v10 2380; GFX10-NEXT: v_lshrrev_b32_e32 v22, 16, v11 2381; GFX10-NEXT: v_perm_b32 v3, v3, v19, 0x5040100 2382; GFX10-NEXT: v_cndmask_b32_e32 v17, 0x7e00, v20, vcc_lo 2383; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v9 2384; GFX10-NEXT: v_lshrrev_b32_e32 v20, 16, v20 2385; GFX10-NEXT: v_cndmask_b32_e32 v11, 0x7e00, v11, vcc_lo 2386; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1 2387; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v22, vcc_lo 2388; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v8 2389; GFX10-NEXT: v_perm_b32 v1, v1, v11, 0x5040100 2390; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7e00, v21, vcc_lo 2391; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1 2392; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v23, vcc_lo 2393; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1 2394; GFX10-NEXT: v_perm_b32 v0, v0, v9, 0x5040100 2395; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo 2396; GFX10-NEXT: v_cmp_o_f16_sdwa vcc_lo, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1 2397; GFX10-NEXT: v_perm_b32 v2, v2, v17, 0x5040100 2398; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v14, vcc_lo 2399; GFX10-NEXT: v_perm_b32 v4, v4, v13, 0x5040100 2400; GFX10-NEXT: s_setpc_b64 s[30:31] 2401; 2402; GFX11-LABEL: v_maximum_v16f16: 2403; GFX11: ; %bb.0: 2404; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2405; GFX11-NEXT: v_pk_max_f16 v16, v7, v15 2406; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v15 2407; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v7 2408; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v7, v15 2409; GFX11-NEXT: v_pk_max_f16 v15, v6, v14 2410; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v16 2411; GFX11-NEXT: v_pk_max_f16 v20, v4, v12 2412; GFX11-NEXT: v_pk_max_f16 v22, v2, v10 2413; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7e00, v16, vcc_lo 2414; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v18, v17 2415; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v14 2416; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v6 2417; GFX11-NEXT: v_lshrrev_b32_e32 v23, 16, v8 2418; GFX11-NEXT: v_lshrrev_b32_e32 v24, 16, v0 2419; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7e00, v19, vcc_lo 2420; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v6, v14 2421; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v15 2422; GFX11-NEXT: v_pk_max_f16 v14, v5, v13 2423; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 2424; GFX11-NEXT: v_perm_b32 v7, v16, v7, 0x5040100 2425; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo 2426; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v18, v17 2427; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v13 2428; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v5 2429; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7e00, v19, vcc_lo 2430; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v5, v13 2431; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v14 2432; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) 2433; GFX11-NEXT: v_perm_b32 v6, v15, v6, 0x5040100 2434; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo 2435; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v18, v17 2436; GFX11-NEXT: v_pk_max_f16 v17, v3, v11 2437; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v20 2438; GFX11-NEXT: v_cndmask_b32_e32 v13, 0x7e00, v19, vcc_lo 2439; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v12 2440; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v11 2441; GFX11-NEXT: v_lshrrev_b32_e32 v21, 16, v17 2442; GFX11-NEXT: v_lshrrev_b32_e32 v12, 16, v12 2443; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4 2444; GFX11-NEXT: v_cndmask_b32_e32 v14, 0x7e00, v20, vcc_lo 2445; GFX11-NEXT: v_lshrrev_b32_e32 v20, 16, v3 2446; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v11 2447; GFX11-NEXT: v_perm_b32 v5, v13, v5, 0x5040100 2448; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo 2449; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 2450; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v20, v19 2451; GFX11-NEXT: v_pk_max_f16 v19, v1, v9 2452; GFX11-NEXT: v_lshrrev_b32_e32 v20, 16, v22 2453; GFX11-NEXT: v_cndmask_b32_e32 v11, 0x7e00, v21, vcc_lo 2454; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v10 2455; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v10 2456; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2457; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 2458; GFX11-NEXT: v_perm_b32 v3, v11, v3, 0x5040100 2459; GFX11-NEXT: v_cndmask_b32_e32 v17, 0x7e00, v22, vcc_lo 2460; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v9 2461; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v9 2462; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1 2463; GFX11-NEXT: v_pk_max_f16 v22, v0, v8 2464; GFX11-NEXT: v_cndmask_b32_e32 v21, 0x7e00, v19, vcc_lo 2465; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v19 2466; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2467; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v9 2468; GFX11-NEXT: v_lshrrev_b32_e32 v25, 16, v22 2469; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) 2470; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v19, vcc_lo 2471; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v8 2472; GFX11-NEXT: v_perm_b32 v1, v1, v21, 0x5040100 2473; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v22, vcc_lo 2474; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v24, v23 2475; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7e00, v25, vcc_lo 2476; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v2, v10 2477; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2) 2478; GFX11-NEXT: v_perm_b32 v0, v8, v0, 0x5040100 2479; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo 2480; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v4, v12 2481; GFX11-NEXT: v_perm_b32 v2, v2, v17, 0x5040100 2482; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7e00, v18, vcc_lo 2483; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2484; GFX11-NEXT: v_perm_b32 v4, v4, v14, 0x5040100 2485; GFX11-NEXT: s_setpc_b64 s[30:31] 2486; 2487; GFX12-LABEL: v_maximum_v16f16: 2488; GFX12: ; %bb.0: 2489; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2490; GFX12-NEXT: s_wait_expcnt 0x0 2491; GFX12-NEXT: s_wait_samplecnt 0x0 2492; GFX12-NEXT: s_wait_bvhcnt 0x0 2493; GFX12-NEXT: s_wait_kmcnt 0x0 2494; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v8 2495; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v9 2496; GFX12-NEXT: v_pk_maximum_f16 v2, v2, v10 2497; GFX12-NEXT: v_pk_maximum_f16 v3, v3, v11 2498; GFX12-NEXT: v_pk_maximum_f16 v4, v4, v12 2499; GFX12-NEXT: v_pk_maximum_f16 v5, v5, v13 2500; GFX12-NEXT: v_pk_maximum_f16 v6, v6, v14 2501; GFX12-NEXT: v_pk_maximum_f16 v7, v7, v15 2502; GFX12-NEXT: s_setpc_b64 s[30:31] 2503 %op = call <16 x half> @llvm.maximum.v16f16(<16 x half> %src0, <16 x half> %src1) 2504 ret <16 x half> %op 2505} 2506;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 2507; GCN: {{.*}} 2508