1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s 5 6define float @v_fmaximum3_f32(float %a, float %b, float %c) { 7; GFX12-LABEL: v_fmaximum3_f32: 8; GFX12: ; %bb.0: 9; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 10; GFX12-NEXT: s_wait_expcnt 0x0 11; GFX12-NEXT: s_wait_samplecnt 0x0 12; GFX12-NEXT: s_wait_bvhcnt 0x0 13; GFX12-NEXT: s_wait_kmcnt 0x0 14; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, v2 15; GFX12-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX940-LABEL: v_fmaximum3_f32: 18; GFX940: ; %bb.0: 19; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 21; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 22; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 23; GFX940-NEXT: s_nop 1 24; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 25; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 26; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 27; GFX940-NEXT: s_nop 1 28; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 29; GFX940-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX950-LABEL: v_fmaximum3_f32: 32; GFX950: ; %bb.0: 33; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v2 35; GFX950-NEXT: s_setpc_b64 s[30:31] 36 %max0 = call float @llvm.maximum.f32(float %a, float %b) 37 %max1 = call float @llvm.maximum.f32(float %max0, float %c) 38 ret float %max1 39} 40 41define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) { 42; GFX12-LABEL: v_fmaximum3_f32_commute: 43; GFX12: ; %bb.0: 44; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 45; GFX12-NEXT: s_wait_expcnt 0x0 46; GFX12-NEXT: s_wait_samplecnt 0x0 47; GFX12-NEXT: s_wait_bvhcnt 0x0 48; GFX12-NEXT: s_wait_kmcnt 0x0 49; GFX12-NEXT: v_maximum3_f32 v0, v2, v0, v1 50; GFX12-NEXT: s_setpc_b64 s[30:31] 51; 52; GFX940-LABEL: v_fmaximum3_f32_commute: 53; GFX940: ; %bb.0: 54; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 55; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 56; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 57; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 58; GFX940-NEXT: s_nop 1 59; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 60; GFX940-NEXT: v_max_f32_e32 v1, v2, v0 61; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 62; GFX940-NEXT: s_nop 1 63; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 64; GFX940-NEXT: s_setpc_b64 s[30:31] 65; 66; GFX950-LABEL: v_fmaximum3_f32_commute: 67; GFX950: ; %bb.0: 68; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69; GFX950-NEXT: v_maximum3_f32 v0, v2, v0, v1 70; GFX950-NEXT: s_setpc_b64 s[30:31] 71 %max0 = call float @llvm.maximum.f32(float %a, float %b) 72 %max1 = call float @llvm.maximum.f32(float %c, float %max0) 73 ret float %max1 74} 75 76define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inreg %c) { 77; GFX12-LABEL: s_fmaximum3_f32: 78; GFX12: ; %bb.0: 79; GFX12-NEXT: v_mov_b32_e32 v0, s2 80; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 81; GFX12-NEXT: v_maximum3_f32 v0, s0, s1, v0 82; GFX12-NEXT: v_readfirstlane_b32 s0, v0 83; GFX12-NEXT: ; return to shader part epilog 84; 85; GFX940-LABEL: s_fmaximum3_f32: 86; GFX940: ; %bb.0: 87; GFX940-NEXT: v_mov_b32_e32 v0, s1 88; GFX940-NEXT: v_max_f32_e32 v1, s0, v0 89; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 90; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 91; GFX940-NEXT: s_nop 1 92; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 93; GFX940-NEXT: v_max_f32_e32 v1, s2, v0 94; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 95; GFX940-NEXT: s_nop 1 96; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 97; GFX940-NEXT: s_nop 0 98; GFX940-NEXT: v_readfirstlane_b32 s0, v0 99; GFX940-NEXT: ; return to shader part epilog 100; 101; GFX950-LABEL: s_fmaximum3_f32: 102; GFX950: ; %bb.0: 103; GFX950-NEXT: v_mov_b32_e32 v0, s1 104; GFX950-NEXT: v_mov_b32_e32 v1, s2 105; GFX950-NEXT: v_maximum3_f32 v0, s0, v0, v1 106; GFX950-NEXT: s_nop 0 107; GFX950-NEXT: v_readfirstlane_b32 s0, v0 108; GFX950-NEXT: ; return to shader part epilog 109 %max0 = call float @llvm.maximum.f32(float %a, float %b) 110 %max1 = call float @llvm.maximum.f32(float %max0, float %c) 111 %cast = bitcast float %max1 to i32 112 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast) 113 ret i32 %readfirstlane 114} 115 116define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) { 117; GFX12-LABEL: v_fmaximum3_f32_fabs0: 118; GFX12: ; %bb.0: 119; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 120; GFX12-NEXT: s_wait_expcnt 0x0 121; GFX12-NEXT: s_wait_samplecnt 0x0 122; GFX12-NEXT: s_wait_bvhcnt 0x0 123; GFX12-NEXT: s_wait_kmcnt 0x0 124; GFX12-NEXT: v_maximum3_f32 v0, |v0|, v1, v2 125; GFX12-NEXT: s_setpc_b64 s[30:31] 126; 127; GFX940-LABEL: v_fmaximum3_f32_fabs0: 128; GFX940: ; %bb.0: 129; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130; GFX940-NEXT: v_max_f32_e64 v3, |v0|, v1 131; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 132; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 133; GFX940-NEXT: s_nop 1 134; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 135; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 136; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 137; GFX940-NEXT: s_nop 1 138; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 139; GFX940-NEXT: s_setpc_b64 s[30:31] 140; 141; GFX950-LABEL: v_fmaximum3_f32_fabs0: 142; GFX950: ; %bb.0: 143; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 144; GFX950-NEXT: v_maximum3_f32 v0, |v0|, v1, v2 145; GFX950-NEXT: s_setpc_b64 s[30:31] 146 %a.fabs = call float @llvm.fabs.f32(float %a) 147 %max0 = call float @llvm.maximum.f32(float %a.fabs, float %b) 148 %max1 = call float @llvm.maximum.f32(float %max0, float %c) 149 ret float %max1 150} 151 152define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) { 153; GFX12-LABEL: v_fmaximum3_f32_fabs1: 154; GFX12: ; %bb.0: 155; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 156; GFX12-NEXT: s_wait_expcnt 0x0 157; GFX12-NEXT: s_wait_samplecnt 0x0 158; GFX12-NEXT: s_wait_bvhcnt 0x0 159; GFX12-NEXT: s_wait_kmcnt 0x0 160; GFX12-NEXT: v_maximum3_f32 v0, v0, |v1|, v2 161; GFX12-NEXT: s_setpc_b64 s[30:31] 162; 163; GFX940-LABEL: v_fmaximum3_f32_fabs1: 164; GFX940: ; %bb.0: 165; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 166; GFX940-NEXT: v_max_f32_e64 v3, v0, |v1| 167; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 168; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| 169; GFX940-NEXT: s_nop 1 170; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 171; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 172; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 173; GFX940-NEXT: s_nop 1 174; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 175; GFX940-NEXT: s_setpc_b64 s[30:31] 176; 177; GFX950-LABEL: v_fmaximum3_f32_fabs1: 178; GFX950: ; %bb.0: 179; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 180; GFX950-NEXT: v_maximum3_f32 v0, v0, |v1|, v2 181; GFX950-NEXT: s_setpc_b64 s[30:31] 182 %b.fabs = call float @llvm.fabs.f32(float %b) 183 %max0 = call float @llvm.maximum.f32(float %a, float %b.fabs) 184 %max1 = call float @llvm.maximum.f32(float %max0, float %c) 185 ret float %max1 186} 187 188define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) { 189; GFX12-LABEL: v_fmaximum3_f32_fabs2: 190; GFX12: ; %bb.0: 191; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 192; GFX12-NEXT: s_wait_expcnt 0x0 193; GFX12-NEXT: s_wait_samplecnt 0x0 194; GFX12-NEXT: s_wait_bvhcnt 0x0 195; GFX12-NEXT: s_wait_kmcnt 0x0 196; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, |v2| 197; GFX12-NEXT: s_setpc_b64 s[30:31] 198; 199; GFX940-LABEL: v_fmaximum3_f32_fabs2: 200; GFX940: ; %bb.0: 201; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 202; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 203; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 204; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 205; GFX940-NEXT: s_nop 1 206; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 207; GFX940-NEXT: v_max_f32_e64 v1, v0, |v2| 208; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| 209; GFX940-NEXT: s_nop 1 210; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 211; GFX940-NEXT: s_setpc_b64 s[30:31] 212; 213; GFX950-LABEL: v_fmaximum3_f32_fabs2: 214; GFX950: ; %bb.0: 215; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 216; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, |v2| 217; GFX950-NEXT: s_setpc_b64 s[30:31] 218 %c.fabs = call float @llvm.fabs.f32(float %c) 219 %max0 = call float @llvm.maximum.f32(float %a, float %b) 220 %max1 = call float @llvm.maximum.f32(float %max0, float %c.fabs) 221 ret float %max1 222} 223 224define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) { 225; GFX12-LABEL: v_fmaximum3_f32_fabs_all: 226; GFX12: ; %bb.0: 227; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 228; GFX12-NEXT: s_wait_expcnt 0x0 229; GFX12-NEXT: s_wait_samplecnt 0x0 230; GFX12-NEXT: s_wait_bvhcnt 0x0 231; GFX12-NEXT: s_wait_kmcnt 0x0 232; GFX12-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v2| 233; GFX12-NEXT: s_setpc_b64 s[30:31] 234; 235; GFX940-LABEL: v_fmaximum3_f32_fabs_all: 236; GFX940: ; %bb.0: 237; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 238; GFX940-NEXT: v_max_f32_e64 v3, |v0|, |v1| 239; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 240; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| 241; GFX940-NEXT: s_nop 1 242; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 243; GFX940-NEXT: v_max_f32_e64 v1, v0, |v2| 244; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| 245; GFX940-NEXT: s_nop 1 246; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 247; GFX940-NEXT: s_setpc_b64 s[30:31] 248; 249; GFX950-LABEL: v_fmaximum3_f32_fabs_all: 250; GFX950: ; %bb.0: 251; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 252; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v2| 253; GFX950-NEXT: s_setpc_b64 s[30:31] 254 %a.fabs = call float @llvm.fabs.f32(float %a) 255 %b.fabs = call float @llvm.fabs.f32(float %b) 256 %c.fabs = call float @llvm.fabs.f32(float %c) 257 %max0 = call float @llvm.maximum.f32(float %a.fabs, float %b.fabs) 258 %max1 = call float @llvm.maximum.f32(float %max0, float %c.fabs) 259 ret float %max1 260} 261 262define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) { 263; GFX12-LABEL: v_fmaximum3_f32_fneg_all: 264; GFX12: ; %bb.0: 265; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 266; GFX12-NEXT: s_wait_expcnt 0x0 267; GFX12-NEXT: s_wait_samplecnt 0x0 268; GFX12-NEXT: s_wait_bvhcnt 0x0 269; GFX12-NEXT: s_wait_kmcnt 0x0 270; GFX12-NEXT: v_maximum3_f32 v0, -v0, -v1, -v2 271; GFX12-NEXT: s_setpc_b64 s[30:31] 272; 273; GFX940-LABEL: v_fmaximum3_f32_fneg_all: 274; GFX940: ; %bb.0: 275; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 276; GFX940-NEXT: v_max_f32_e64 v3, -v0, -v1 277; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 278; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 279; GFX940-NEXT: s_nop 1 280; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 281; GFX940-NEXT: v_max_f32_e64 v1, v0, -v2 282; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 283; GFX940-NEXT: s_nop 1 284; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 285; GFX940-NEXT: s_setpc_b64 s[30:31] 286; 287; GFX950-LABEL: v_fmaximum3_f32_fneg_all: 288; GFX950: ; %bb.0: 289; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 290; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v1, -v2 291; GFX950-NEXT: s_setpc_b64 s[30:31] 292 %a.fneg = fneg float %a 293 %b.fneg = fneg float %b 294 %c.fneg = fneg float %c 295 %max0 = call float @llvm.maximum.f32(float %a.fneg, float %b.fneg) 296 %max1 = call float @llvm.maximum.f32(float %max0, float %c.fneg) 297 ret float %max1 298} 299 300define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) { 301; GFX12-LABEL: v_fmaximum3_f32_fneg_fabs_all: 302; GFX12: ; %bb.0: 303; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 304; GFX12-NEXT: s_wait_expcnt 0x0 305; GFX12-NEXT: s_wait_samplecnt 0x0 306; GFX12-NEXT: s_wait_bvhcnt 0x0 307; GFX12-NEXT: s_wait_kmcnt 0x0 308; GFX12-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v2| 309; GFX12-NEXT: s_setpc_b64 s[30:31] 310; 311; GFX940-LABEL: v_fmaximum3_f32_fneg_fabs_all: 312; GFX940: ; %bb.0: 313; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 314; GFX940-NEXT: v_max_f32_e64 v3, -|v0|, -|v1| 315; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 316; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| 317; GFX940-NEXT: s_nop 1 318; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 319; GFX940-NEXT: v_max_f32_e64 v1, v0, -|v2| 320; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| 321; GFX940-NEXT: s_nop 1 322; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 323; GFX940-NEXT: s_setpc_b64 s[30:31] 324; 325; GFX950-LABEL: v_fmaximum3_f32_fneg_fabs_all: 326; GFX950: ; %bb.0: 327; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328; GFX950-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v2| 329; GFX950-NEXT: s_setpc_b64 s[30:31] 330 %a.fabs = call float @llvm.fabs.f32(float %a) 331 %b.fabs = call float @llvm.fabs.f32(float %b) 332 %c.fabs = call float @llvm.fabs.f32(float %c) 333 %a.fneg.fabs = fneg float %a.fabs 334 %b.fneg.fabs = fneg float %b.fabs 335 %c.fneg.fabs = fneg float %c.fabs 336 %max0 = call float @llvm.maximum.f32(float %a.fneg.fabs, float %b.fneg.fabs) 337 %max1 = call float @llvm.maximum.f32(float %max0, float %c.fneg.fabs) 338 ret float %max1 339} 340 341define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) { 342; GFX12-LABEL: v_fmaximum3_f32_fneg0: 343; GFX12: ; %bb.0: 344; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 345; GFX12-NEXT: s_wait_expcnt 0x0 346; GFX12-NEXT: s_wait_samplecnt 0x0 347; GFX12-NEXT: s_wait_bvhcnt 0x0 348; GFX12-NEXT: s_wait_kmcnt 0x0 349; GFX12-NEXT: v_maximum3_f32 v0, -v0, v1, v2 350; GFX12-NEXT: s_setpc_b64 s[30:31] 351; 352; GFX940-LABEL: v_fmaximum3_f32_fneg0: 353; GFX940: ; %bb.0: 354; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 355; GFX940-NEXT: v_max_f32_e64 v3, -v0, v1 356; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 357; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 358; GFX940-NEXT: s_nop 1 359; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 360; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 361; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 362; GFX940-NEXT: s_nop 1 363; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 364; GFX940-NEXT: s_setpc_b64 s[30:31] 365; 366; GFX950-LABEL: v_fmaximum3_f32_fneg0: 367; GFX950: ; %bb.0: 368; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 369; GFX950-NEXT: v_maximum3_f32 v0, -v0, v1, v2 370; GFX950-NEXT: s_setpc_b64 s[30:31] 371 %a.fneg = fneg float %a 372 %max0 = call float @llvm.maximum.f32(float %a.fneg, float %b) 373 %max1 = call float @llvm.maximum.f32(float %max0, float %c) 374 ret float %max1 375} 376 377define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) { 378; GFX12-LABEL: v_fmaximum3_f32_fneg1: 379; GFX12: ; %bb.0: 380; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 381; GFX12-NEXT: s_wait_expcnt 0x0 382; GFX12-NEXT: s_wait_samplecnt 0x0 383; GFX12-NEXT: s_wait_bvhcnt 0x0 384; GFX12-NEXT: s_wait_kmcnt 0x0 385; GFX12-NEXT: v_maximum3_f32 v0, v0, -v1, v2 386; GFX12-NEXT: s_setpc_b64 s[30:31] 387; 388; GFX940-LABEL: v_fmaximum3_f32_fneg1: 389; GFX940: ; %bb.0: 390; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 391; GFX940-NEXT: v_max_f32_e64 v3, v0, -v1 392; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 393; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1 394; GFX940-NEXT: s_nop 1 395; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 396; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 397; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 398; GFX940-NEXT: s_nop 1 399; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 400; GFX940-NEXT: s_setpc_b64 s[30:31] 401; 402; GFX950-LABEL: v_fmaximum3_f32_fneg1: 403; GFX950: ; %bb.0: 404; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 405; GFX950-NEXT: v_maximum3_f32 v0, v0, -v1, v2 406; GFX950-NEXT: s_setpc_b64 s[30:31] 407 %b.fneg = fneg float %b 408 %max0 = call float @llvm.maximum.f32(float %a, float %b.fneg) 409 %max1 = call float @llvm.maximum.f32(float %max0, float %c) 410 ret float %max1 411} 412 413define float @v_fmaximum3_f32_fneg2(float %a, float %b, float %c) { 414; GFX12-LABEL: v_fmaximum3_f32_fneg2: 415; GFX12: ; %bb.0: 416; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 417; GFX12-NEXT: s_wait_expcnt 0x0 418; GFX12-NEXT: s_wait_samplecnt 0x0 419; GFX12-NEXT: s_wait_bvhcnt 0x0 420; GFX12-NEXT: s_wait_kmcnt 0x0 421; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, -v2 422; GFX12-NEXT: s_setpc_b64 s[30:31] 423; 424; GFX940-LABEL: v_fmaximum3_f32_fneg2: 425; GFX940: ; %bb.0: 426; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 427; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 428; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 429; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 430; GFX940-NEXT: s_nop 1 431; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 432; GFX940-NEXT: v_max_f32_e64 v1, v0, -v2 433; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 434; GFX940-NEXT: s_nop 1 435; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 436; GFX940-NEXT: s_setpc_b64 s[30:31] 437; 438; GFX950-LABEL: v_fmaximum3_f32_fneg2: 439; GFX950: ; %bb.0: 440; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 441; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, -v2 442; GFX950-NEXT: s_setpc_b64 s[30:31] 443 %c.fneg = fneg float %c 444 %max0 = call float @llvm.maximum.f32(float %a, float %b) 445 %max1 = call float @llvm.maximum.f32(float %max0, float %c.fneg) 446 ret float %max1 447} 448 449define float @v_fmaximum3_f32_const0(float %b, float %c) { 450; GFX12-LABEL: v_fmaximum3_f32_const0: 451; GFX12: ; %bb.0: 452; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 453; GFX12-NEXT: s_wait_expcnt 0x0 454; GFX12-NEXT: s_wait_samplecnt 0x0 455; GFX12-NEXT: s_wait_bvhcnt 0x0 456; GFX12-NEXT: s_wait_kmcnt 0x0 457; GFX12-NEXT: v_maximum3_f32 v0, v0, 0x41000000, v1 458; GFX12-NEXT: s_setpc_b64 s[30:31] 459; 460; GFX940-LABEL: v_fmaximum3_f32_const0: 461; GFX940: ; %bb.0: 462; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 463; GFX940-NEXT: v_max_f32_e32 v2, 0x41000000, v0 464; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 465; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 466; GFX940-NEXT: s_nop 1 467; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 468; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 469; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 470; GFX940-NEXT: s_nop 1 471; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 472; GFX940-NEXT: s_setpc_b64 s[30:31] 473; 474; GFX950-LABEL: v_fmaximum3_f32_const0: 475; GFX950: ; %bb.0: 476; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 477; GFX950-NEXT: s_mov_b32 s0, 0x41000000 478; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, v1 479; GFX950-NEXT: s_setpc_b64 s[30:31] 480 %max0 = call float @llvm.maximum.f32(float 8.0, float %b) 481 %max1 = call float @llvm.maximum.f32(float %max0, float %c) 482 ret float %max1 483} 484 485define float @v_fmaximum3_f32__const2(float %a, float %b) { 486; GFX12-LABEL: v_fmaximum3_f32__const2: 487; GFX12: ; %bb.0: 488; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 489; GFX12-NEXT: s_wait_expcnt 0x0 490; GFX12-NEXT: s_wait_samplecnt 0x0 491; GFX12-NEXT: s_wait_bvhcnt 0x0 492; GFX12-NEXT: s_wait_kmcnt 0x0 493; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, 0x41000000 494; GFX12-NEXT: s_setpc_b64 s[30:31] 495; 496; GFX940-LABEL: v_fmaximum3_f32__const2: 497; GFX940: ; %bb.0: 498; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 499; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 500; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 501; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 502; GFX940-NEXT: s_nop 1 503; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 504; GFX940-NEXT: v_max_f32_e32 v1, 0x41000000, v0 505; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 506; GFX940-NEXT: s_nop 1 507; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 508; GFX940-NEXT: s_setpc_b64 s[30:31] 509; 510; GFX950-LABEL: v_fmaximum3_f32__const2: 511; GFX950: ; %bb.0: 512; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 513; GFX950-NEXT: s_mov_b32 s0, 0x41000000 514; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, s0 515; GFX950-NEXT: s_setpc_b64 s[30:31] 516 %max0 = call float @llvm.maximum.f32(float %a, float %b) 517 %max1 = call float @llvm.maximum.f32(float %max0, float 8.0) 518 ret float %max1 519} 520 521define float @v_fmaximum3_f32_inlineimm0(float %b, float %c) { 522; GFX12-LABEL: v_fmaximum3_f32_inlineimm0: 523; GFX12: ; %bb.0: 524; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 525; GFX12-NEXT: s_wait_expcnt 0x0 526; GFX12-NEXT: s_wait_samplecnt 0x0 527; GFX12-NEXT: s_wait_bvhcnt 0x0 528; GFX12-NEXT: s_wait_kmcnt 0x0 529; GFX12-NEXT: v_maximum3_f32 v0, v0, 4.0, v1 530; GFX12-NEXT: s_setpc_b64 s[30:31] 531; 532; GFX940-LABEL: v_fmaximum3_f32_inlineimm0: 533; GFX940: ; %bb.0: 534; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 535; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v0 536; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 537; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 538; GFX940-NEXT: s_nop 1 539; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 540; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 541; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 542; GFX940-NEXT: s_nop 1 543; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 544; GFX940-NEXT: s_setpc_b64 s[30:31] 545; 546; GFX950-LABEL: v_fmaximum3_f32_inlineimm0: 547; GFX950: ; %bb.0: 548; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 549; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, v1 550; GFX950-NEXT: s_setpc_b64 s[30:31] 551 %max0 = call float @llvm.maximum.f32(float 4.0, float %b) 552 %max1 = call float @llvm.maximum.f32(float %max0, float %c) 553 ret float %max1 554} 555 556define float @v_fmaximum3_f32__inlineimm(float %a, float %b) { 557; GFX12-LABEL: v_fmaximum3_f32__inlineimm: 558; GFX12: ; %bb.0: 559; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 560; GFX12-NEXT: s_wait_expcnt 0x0 561; GFX12-NEXT: s_wait_samplecnt 0x0 562; GFX12-NEXT: s_wait_bvhcnt 0x0 563; GFX12-NEXT: s_wait_kmcnt 0x0 564; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, 4.0 565; GFX12-NEXT: s_setpc_b64 s[30:31] 566; 567; GFX940-LABEL: v_fmaximum3_f32__inlineimm: 568; GFX940: ; %bb.0: 569; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 570; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 571; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 572; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 573; GFX940-NEXT: s_nop 1 574; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 575; GFX940-NEXT: v_max_f32_e32 v1, 4.0, v0 576; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 577; GFX940-NEXT: s_nop 1 578; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 579; GFX940-NEXT: s_setpc_b64 s[30:31] 580; 581; GFX950-LABEL: v_fmaximum3_f32__inlineimm: 582; GFX950: ; %bb.0: 583; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 584; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, 4.0 585; GFX950-NEXT: s_setpc_b64 s[30:31] 586 %max0 = call float @llvm.maximum.f32(float %a, float %b) 587 %max1 = call float @llvm.maximum.f32(float %max0, float 4.0) 588 ret float %max1 589} 590 591define float @v_fmaximum3_f32_const1_const2(float %a) { 592; GFX12-LABEL: v_fmaximum3_f32_const1_const2: 593; GFX12: ; %bb.0: 594; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 595; GFX12-NEXT: s_wait_expcnt 0x0 596; GFX12-NEXT: s_wait_samplecnt 0x0 597; GFX12-NEXT: s_wait_bvhcnt 0x0 598; GFX12-NEXT: s_wait_kmcnt 0x0 599; GFX12-NEXT: s_mov_b32 s0, 0x41000000 600; GFX12-NEXT: s_wait_alu 0xfffe 601; GFX12-NEXT: v_maximum3_f32 v0, v0, s0, 0x41800000 602; GFX12-NEXT: s_setpc_b64 s[30:31] 603; 604; GFX940-LABEL: v_fmaximum3_f32_const1_const2: 605; GFX940: ; %bb.0: 606; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 607; GFX940-NEXT: v_max_f32_e32 v1, 0x41000000, v0 608; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 609; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 610; GFX940-NEXT: s_nop 1 611; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 612; GFX940-NEXT: v_max_f32_e32 v1, 0x41800000, v0 613; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 614; GFX940-NEXT: s_nop 1 615; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 616; GFX940-NEXT: s_setpc_b64 s[30:31] 617; 618; GFX950-LABEL: v_fmaximum3_f32_const1_const2: 619; GFX950: ; %bb.0: 620; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 621; GFX950-NEXT: s_mov_b32 s0, 0x41000000 622; GFX950-NEXT: v_mov_b32_e32 v1, 0x41800000 623; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, v1 624; GFX950-NEXT: s_setpc_b64 s[30:31] 625 %max0 = call float @llvm.maximum.f32(float %a, float 8.0) 626 %max1 = call float @llvm.maximum.f32(float %max0, float 16.0) 627 ret float %max1 628} 629 630define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) { 631; GFX12-LABEL: v_fmaximum3_v2f32: 632; GFX12: ; %bb.0: 633; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 634; GFX12-NEXT: s_wait_expcnt 0x0 635; GFX12-NEXT: s_wait_samplecnt 0x0 636; GFX12-NEXT: s_wait_bvhcnt 0x0 637; GFX12-NEXT: s_wait_kmcnt 0x0 638; GFX12-NEXT: v_maximum3_f32 v0, v4, v0, v2 639; GFX12-NEXT: v_maximum3_f32 v1, v5, v1, v3 640; GFX12-NEXT: s_setpc_b64 s[30:31] 641; 642; GFX940-LABEL: v_fmaximum3_v2f32: 643; GFX940: ; %bb.0: 644; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 645; GFX940-NEXT: v_max_f32_e32 v6, v1, v3 646; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 647; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 648; GFX940-NEXT: v_max_f32_e32 v3, v0, v2 649; GFX940-NEXT: s_nop 0 650; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc 651; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 652; GFX940-NEXT: s_nop 1 653; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc 654; GFX940-NEXT: v_max_f32_e32 v2, v4, v0 655; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v0 656; GFX940-NEXT: s_nop 1 657; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 658; GFX940-NEXT: v_max_f32_e32 v2, v5, v1 659; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v1 660; GFX940-NEXT: s_nop 1 661; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 662; GFX940-NEXT: s_setpc_b64 s[30:31] 663; 664; GFX950-LABEL: v_fmaximum3_v2f32: 665; GFX950: ; %bb.0: 666; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 667; GFX950-NEXT: v_maximum3_f32 v0, v4, v0, v2 668; GFX950-NEXT: v_maximum3_f32 v1, v5, v1, v3 669; GFX950-NEXT: s_setpc_b64 s[30:31] 670 %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) 671 %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %c, <2 x float> %max0) 672 ret <2 x float> %max1 673} 674 675define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2 x float> %c) { 676; GFX12-LABEL: v_fmaximum3_v2f32_commute: 677; GFX12: ; %bb.0: 678; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 679; GFX12-NEXT: s_wait_expcnt 0x0 680; GFX12-NEXT: s_wait_samplecnt 0x0 681; GFX12-NEXT: s_wait_bvhcnt 0x0 682; GFX12-NEXT: s_wait_kmcnt 0x0 683; GFX12-NEXT: v_maximum3_f32 v0, v0, v2, v4 684; GFX12-NEXT: v_maximum3_f32 v1, v1, v3, v5 685; GFX12-NEXT: s_setpc_b64 s[30:31] 686; 687; GFX940-LABEL: v_fmaximum3_v2f32_commute: 688; GFX940: ; %bb.0: 689; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 690; GFX940-NEXT: v_max_f32_e32 v6, v1, v3 691; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 692; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 693; GFX940-NEXT: v_max_f32_e32 v3, v0, v2 694; GFX940-NEXT: s_nop 0 695; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc 696; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 697; GFX940-NEXT: s_nop 1 698; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc 699; GFX940-NEXT: v_max_f32_e32 v2, v0, v4 700; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 701; GFX940-NEXT: s_nop 1 702; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 703; GFX940-NEXT: v_max_f32_e32 v2, v1, v5 704; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 705; GFX940-NEXT: s_nop 1 706; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 707; GFX940-NEXT: s_setpc_b64 s[30:31] 708; 709; GFX950-LABEL: v_fmaximum3_v2f32_commute: 710; GFX950: ; %bb.0: 711; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 712; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v4 713; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v5 714; GFX950-NEXT: s_setpc_b64 s[30:31] 715 %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) 716 %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c) 717 ret <2 x float> %max1 718} 719 720define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b, <2 x float> %c) { 721; GFX12-LABEL: v_fmaximum3_v2f32__fabs_all: 722; GFX12: ; %bb.0: 723; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 724; GFX12-NEXT: s_wait_expcnt 0x0 725; GFX12-NEXT: s_wait_samplecnt 0x0 726; GFX12-NEXT: s_wait_bvhcnt 0x0 727; GFX12-NEXT: s_wait_kmcnt 0x0 728; GFX12-NEXT: v_maximum3_f32 v0, |v0|, |v2|, |v4| 729; GFX12-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v5| 730; GFX12-NEXT: s_setpc_b64 s[30:31] 731; 732; GFX940-LABEL: v_fmaximum3_v2f32__fabs_all: 733; GFX940: ; %bb.0: 734; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 735; GFX940-NEXT: v_max_f32_e64 v6, |v1|, |v3| 736; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 737; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3| 738; GFX940-NEXT: v_max_f32_e64 v3, |v0|, |v2| 739; GFX940-NEXT: s_nop 0 740; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc 741; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2| 742; GFX940-NEXT: s_nop 1 743; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc 744; GFX940-NEXT: v_max_f32_e64 v2, v0, |v4| 745; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4| 746; GFX940-NEXT: s_nop 1 747; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 748; GFX940-NEXT: v_max_f32_e64 v2, v1, |v5| 749; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5| 750; GFX940-NEXT: s_nop 1 751; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 752; GFX940-NEXT: s_setpc_b64 s[30:31] 753; 754; GFX950-LABEL: v_fmaximum3_v2f32__fabs_all: 755; GFX950: ; %bb.0: 756; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 757; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v2|, |v4| 758; GFX950-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v5| 759; GFX950-NEXT: s_setpc_b64 s[30:31] 760 %a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) 761 %b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b) 762 %c.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %c) 763 %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a.fabs, <2 x float> %b.fabs) 764 %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c.fabs) 765 ret <2 x float> %max1 766} 767 768define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b, <2 x float> %c) { 769; GFX12-LABEL: v_fmaximum3_v2f32__fneg_all: 770; GFX12: ; %bb.0: 771; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 772; GFX12-NEXT: s_wait_expcnt 0x0 773; GFX12-NEXT: s_wait_samplecnt 0x0 774; GFX12-NEXT: s_wait_bvhcnt 0x0 775; GFX12-NEXT: s_wait_kmcnt 0x0 776; GFX12-NEXT: v_maximum3_f32 v0, -v0, -v2, -v4 777; GFX12-NEXT: v_maximum3_f32 v1, -v1, -v3, -v5 778; GFX12-NEXT: s_setpc_b64 s[30:31] 779; 780; GFX940-LABEL: v_fmaximum3_v2f32__fneg_all: 781; GFX940: ; %bb.0: 782; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 783; GFX940-NEXT: v_max_f32_e64 v6, -v1, -v3 784; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 785; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3 786; GFX940-NEXT: v_max_f32_e64 v3, -v0, -v2 787; GFX940-NEXT: s_nop 0 788; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc 789; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2 790; GFX940-NEXT: s_nop 1 791; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc 792; GFX940-NEXT: v_max_f32_e64 v2, v0, -v4 793; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4 794; GFX940-NEXT: s_nop 1 795; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 796; GFX940-NEXT: v_max_f32_e64 v2, v1, -v5 797; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5 798; GFX940-NEXT: s_nop 1 799; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 800; GFX940-NEXT: s_setpc_b64 s[30:31] 801; 802; GFX950-LABEL: v_fmaximum3_v2f32__fneg_all: 803; GFX950: ; %bb.0: 804; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 805; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v2, -v4 806; GFX950-NEXT: v_maximum3_f32 v1, -v1, -v3, -v5 807; GFX950-NEXT: s_setpc_b64 s[30:31] 808 %a.fneg = fneg <2 x float> %a 809 %b.fneg = fneg <2 x float> %b 810 %c.fneg = fneg <2 x float> %c 811 %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a.fneg, <2 x float> %b.fneg) 812 %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c.fneg) 813 ret <2 x float> %max1 814} 815 816define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c) { 817; GFX12-LABEL: v_fmaximum3_v2f32__inlineimm1: 818; GFX12: ; %bb.0: 819; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 820; GFX12-NEXT: s_wait_expcnt 0x0 821; GFX12-NEXT: s_wait_samplecnt 0x0 822; GFX12-NEXT: s_wait_bvhcnt 0x0 823; GFX12-NEXT: s_wait_kmcnt 0x0 824; GFX12-NEXT: v_maximum3_f32 v0, v0, 2.0, v2 825; GFX12-NEXT: v_maximum3_f32 v1, v1, 2.0, v3 826; GFX12-NEXT: s_setpc_b64 s[30:31] 827; 828; GFX940-LABEL: v_fmaximum3_v2f32__inlineimm1: 829; GFX940: ; %bb.0: 830; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 831; GFX940-NEXT: v_max_f32_e32 v4, 2.0, v1 832; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000 833; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 834; GFX940-NEXT: s_nop 1 835; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc 836; GFX940-NEXT: v_max_f32_e32 v4, 2.0, v0 837; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 838; GFX940-NEXT: s_nop 1 839; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc 840; GFX940-NEXT: v_max_f32_e32 v4, v0, v2 841; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 842; GFX940-NEXT: v_max_f32_e32 v2, v1, v3 843; GFX940-NEXT: s_nop 0 844; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc 845; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 846; GFX940-NEXT: s_nop 1 847; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc 848; GFX940-NEXT: s_setpc_b64 s[30:31] 849; 850; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm1: 851; GFX950: ; %bb.0: 852; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 853; GFX950-NEXT: v_maximum3_f32 v0, v0, 2.0, v2 854; GFX950-NEXT: v_maximum3_f32 v1, v1, 2.0, v3 855; GFX950-NEXT: s_setpc_b64 s[30:31] 856 %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>) 857 %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c) 858 ret <2 x float> %max1 859} 860 861define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b) { 862; GFX12-LABEL: v_fmaximum3_v2f32__inlineimm2: 863; GFX12: ; %bb.0: 864; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 865; GFX12-NEXT: s_wait_expcnt 0x0 866; GFX12-NEXT: s_wait_samplecnt 0x0 867; GFX12-NEXT: s_wait_bvhcnt 0x0 868; GFX12-NEXT: s_wait_kmcnt 0x0 869; GFX12-NEXT: v_maximum3_f32 v0, v0, v2, 4.0 870; GFX12-NEXT: v_maximum3_f32 v1, v1, v3, 4.0 871; GFX12-NEXT: s_setpc_b64 s[30:31] 872; 873; GFX940-LABEL: v_fmaximum3_v2f32__inlineimm2: 874; GFX940: ; %bb.0: 875; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 876; GFX940-NEXT: v_max_f32_e32 v4, v1, v3 877; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000 878; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 879; GFX940-NEXT: v_max_f32_e32 v3, v0, v2 880; GFX940-NEXT: s_nop 0 881; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc 882; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 883; GFX940-NEXT: s_nop 1 884; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc 885; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v0 886; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 887; GFX940-NEXT: s_nop 1 888; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc 889; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v1 890; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 891; GFX940-NEXT: s_nop 1 892; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc 893; GFX940-NEXT: s_setpc_b64 s[30:31] 894; 895; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm2: 896; GFX950: ; %bb.0: 897; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 898; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, 4.0 899; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, 4.0 900; GFX950-NEXT: s_setpc_b64 s[30:31] 901 %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) 902 %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> <float 4.0, float 4.0>) 903 ret <2 x float> %max1 904} 905 906define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) { 907; GFX12-LABEL: v_fmaximum3_v3f32: 908; GFX12: ; %bb.0: 909; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 910; GFX12-NEXT: s_wait_expcnt 0x0 911; GFX12-NEXT: s_wait_samplecnt 0x0 912; GFX12-NEXT: s_wait_bvhcnt 0x0 913; GFX12-NEXT: s_wait_kmcnt 0x0 914; GFX12-NEXT: v_maximum3_f32 v0, v6, v0, v3 915; GFX12-NEXT: v_maximum3_f32 v1, v7, v1, v4 916; GFX12-NEXT: v_maximum3_f32 v2, v8, v2, v5 917; GFX12-NEXT: s_setpc_b64 s[30:31] 918; 919; GFX940-LABEL: v_fmaximum3_v3f32: 920; GFX940: ; %bb.0: 921; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 922; GFX940-NEXT: v_max_f32_e32 v9, v2, v5 923; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 924; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 925; GFX940-NEXT: v_max_f32_e32 v5, v1, v4 926; GFX940-NEXT: s_nop 0 927; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc 928; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 929; GFX940-NEXT: v_max_f32_e32 v4, v0, v3 930; GFX940-NEXT: s_nop 0 931; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc 932; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 933; GFX940-NEXT: s_nop 1 934; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc 935; GFX940-NEXT: v_max_f32_e32 v3, v6, v0 936; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v0 937; GFX940-NEXT: s_nop 1 938; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc 939; GFX940-NEXT: v_max_f32_e32 v3, v7, v1 940; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v1 941; GFX940-NEXT: s_nop 1 942; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc 943; GFX940-NEXT: v_max_f32_e32 v3, v8, v2 944; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v2 945; GFX940-NEXT: s_nop 1 946; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc 947; GFX940-NEXT: s_setpc_b64 s[30:31] 948; 949; GFX950-LABEL: v_fmaximum3_v3f32: 950; GFX950: ; %bb.0: 951; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 952; GFX950-NEXT: v_maximum3_f32 v0, v6, v0, v3 953; GFX950-NEXT: v_maximum3_f32 v1, v7, v1, v4 954; GFX950-NEXT: v_maximum3_f32 v2, v8, v2, v5 955; GFX950-NEXT: s_setpc_b64 s[30:31] 956 %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b) 957 %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %c, <3 x float> %max0) 958 ret <3 x float> %max1 959} 960 961define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3 x float> %c) { 962; GFX12-LABEL: v_fmaximum3_v3f32_commute: 963; GFX12: ; %bb.0: 964; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 965; GFX12-NEXT: s_wait_expcnt 0x0 966; GFX12-NEXT: s_wait_samplecnt 0x0 967; GFX12-NEXT: s_wait_bvhcnt 0x0 968; GFX12-NEXT: s_wait_kmcnt 0x0 969; GFX12-NEXT: v_maximum3_f32 v0, v0, v3, v6 970; GFX12-NEXT: v_maximum3_f32 v1, v1, v4, v7 971; GFX12-NEXT: v_maximum3_f32 v2, v2, v5, v8 972; GFX12-NEXT: s_setpc_b64 s[30:31] 973; 974; GFX940-LABEL: v_fmaximum3_v3f32_commute: 975; GFX940: ; %bb.0: 976; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 977; GFX940-NEXT: v_max_f32_e32 v9, v2, v5 978; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 979; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 980; GFX940-NEXT: v_max_f32_e32 v5, v1, v4 981; GFX940-NEXT: s_nop 0 982; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc 983; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 984; GFX940-NEXT: v_max_f32_e32 v4, v0, v3 985; GFX940-NEXT: s_nop 0 986; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc 987; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 988; GFX940-NEXT: s_nop 1 989; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc 990; GFX940-NEXT: v_max_f32_e32 v3, v0, v6 991; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v6 992; GFX940-NEXT: s_nop 1 993; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc 994; GFX940-NEXT: v_max_f32_e32 v3, v1, v7 995; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v7 996; GFX940-NEXT: s_nop 1 997; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc 998; GFX940-NEXT: v_max_f32_e32 v3, v2, v8 999; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v8 1000; GFX940-NEXT: s_nop 1 1001; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc 1002; GFX940-NEXT: s_setpc_b64 s[30:31] 1003; 1004; GFX950-LABEL: v_fmaximum3_v3f32_commute: 1005; GFX950: ; %bb.0: 1006; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1007; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v6 1008; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v7 1009; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v8 1010; GFX950-NEXT: s_setpc_b64 s[30:31] 1011 %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b) 1012 %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c) 1013 ret <3 x float> %max1 1014} 1015 1016define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b, <3 x float> %c) { 1017; GFX12-LABEL: v_fmaximum3_v3f32__fabs_all: 1018; GFX12: ; %bb.0: 1019; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1020; GFX12-NEXT: s_wait_expcnt 0x0 1021; GFX12-NEXT: s_wait_samplecnt 0x0 1022; GFX12-NEXT: s_wait_bvhcnt 0x0 1023; GFX12-NEXT: s_wait_kmcnt 0x0 1024; GFX12-NEXT: v_maximum3_f32 v0, |v0|, |v3|, |v6| 1025; GFX12-NEXT: v_maximum3_f32 v1, |v1|, |v4|, |v7| 1026; GFX12-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v8| 1027; GFX12-NEXT: s_setpc_b64 s[30:31] 1028; 1029; GFX940-LABEL: v_fmaximum3_v3f32__fabs_all: 1030; GFX940: ; %bb.0: 1031; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1032; GFX940-NEXT: v_max_f32_e64 v9, |v2|, |v5| 1033; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 1034; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5| 1035; GFX940-NEXT: v_max_f32_e64 v5, |v1|, |v4| 1036; GFX940-NEXT: s_nop 0 1037; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc 1038; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4| 1039; GFX940-NEXT: v_max_f32_e64 v4, |v0|, |v3| 1040; GFX940-NEXT: s_nop 0 1041; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc 1042; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3| 1043; GFX940-NEXT: s_nop 1 1044; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc 1045; GFX940-NEXT: v_max_f32_e64 v3, v0, |v6| 1046; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6| 1047; GFX940-NEXT: s_nop 1 1048; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc 1049; GFX940-NEXT: v_max_f32_e64 v3, v1, |v7| 1050; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7| 1051; GFX940-NEXT: s_nop 1 1052; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc 1053; GFX940-NEXT: v_max_f32_e64 v3, v2, |v8| 1054; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8| 1055; GFX940-NEXT: s_nop 1 1056; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc 1057; GFX940-NEXT: s_setpc_b64 s[30:31] 1058; 1059; GFX950-LABEL: v_fmaximum3_v3f32__fabs_all: 1060; GFX950: ; %bb.0: 1061; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1062; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v3|, |v6| 1063; GFX950-NEXT: v_maximum3_f32 v1, |v1|, |v4|, |v7| 1064; GFX950-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v8| 1065; GFX950-NEXT: s_setpc_b64 s[30:31] 1066 %a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a) 1067 %b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b) 1068 %c.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %c) 1069 %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a.fabs, <3 x float> %b.fabs) 1070 %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c.fabs) 1071 ret <3 x float> %max1 1072} 1073 1074define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b, <3 x float> %c) { 1075; GFX12-LABEL: v_fmaximum3_v3f32__fneg_all: 1076; GFX12: ; %bb.0: 1077; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1078; GFX12-NEXT: s_wait_expcnt 0x0 1079; GFX12-NEXT: s_wait_samplecnt 0x0 1080; GFX12-NEXT: s_wait_bvhcnt 0x0 1081; GFX12-NEXT: s_wait_kmcnt 0x0 1082; GFX12-NEXT: v_maximum3_f32 v0, -v0, -v3, -v6 1083; GFX12-NEXT: v_maximum3_f32 v1, -v1, -v4, -v7 1084; GFX12-NEXT: v_maximum3_f32 v2, -v2, -v5, -v8 1085; GFX12-NEXT: s_setpc_b64 s[30:31] 1086; 1087; GFX940-LABEL: v_fmaximum3_v3f32__fneg_all: 1088; GFX940: ; %bb.0: 1089; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1090; GFX940-NEXT: v_max_f32_e64 v9, -v2, -v5 1091; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 1092; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5 1093; GFX940-NEXT: v_max_f32_e64 v5, -v1, -v4 1094; GFX940-NEXT: s_nop 0 1095; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc 1096; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4 1097; GFX940-NEXT: v_max_f32_e64 v4, -v0, -v3 1098; GFX940-NEXT: s_nop 0 1099; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc 1100; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3 1101; GFX940-NEXT: s_nop 1 1102; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc 1103; GFX940-NEXT: v_max_f32_e64 v3, v0, -v6 1104; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6 1105; GFX940-NEXT: s_nop 1 1106; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc 1107; GFX940-NEXT: v_max_f32_e64 v3, v1, -v7 1108; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7 1109; GFX940-NEXT: s_nop 1 1110; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc 1111; GFX940-NEXT: v_max_f32_e64 v3, v2, -v8 1112; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8 1113; GFX940-NEXT: s_nop 1 1114; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc 1115; GFX940-NEXT: s_setpc_b64 s[30:31] 1116; 1117; GFX950-LABEL: v_fmaximum3_v3f32__fneg_all: 1118; GFX950: ; %bb.0: 1119; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1120; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v3, -v6 1121; GFX950-NEXT: v_maximum3_f32 v1, -v1, -v4, -v7 1122; GFX950-NEXT: v_maximum3_f32 v2, -v2, -v5, -v8 1123; GFX950-NEXT: s_setpc_b64 s[30:31] 1124 %a.fneg = fneg <3 x float> %a 1125 %b.fneg = fneg <3 x float> %b 1126 %c.fneg = fneg <3 x float> %c 1127 %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a.fneg, <3 x float> %b.fneg) 1128 %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c.fneg) 1129 ret <3 x float> %max1 1130} 1131 1132define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c) { 1133; GFX12-LABEL: v_fmaximum3_v3f32__inlineimm1: 1134; GFX12: ; %bb.0: 1135; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1136; GFX12-NEXT: s_wait_expcnt 0x0 1137; GFX12-NEXT: s_wait_samplecnt 0x0 1138; GFX12-NEXT: s_wait_bvhcnt 0x0 1139; GFX12-NEXT: s_wait_kmcnt 0x0 1140; GFX12-NEXT: v_maximum3_f32 v0, v0, 2.0, v3 1141; GFX12-NEXT: v_maximum3_f32 v1, v1, 2.0, v4 1142; GFX12-NEXT: v_maximum3_f32 v2, v2, 2.0, v5 1143; GFX12-NEXT: s_setpc_b64 s[30:31] 1144; 1145; GFX940-LABEL: v_fmaximum3_v3f32__inlineimm1: 1146; GFX940: ; %bb.0: 1147; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1148; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v2 1149; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 1150; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 1151; GFX940-NEXT: s_nop 1 1152; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc 1153; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v1 1154; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 1155; GFX940-NEXT: s_nop 1 1156; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc 1157; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v0 1158; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 1159; GFX940-NEXT: s_nop 1 1160; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 1161; GFX940-NEXT: v_max_f32_e32 v6, v0, v3 1162; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 1163; GFX940-NEXT: v_max_f32_e32 v3, v1, v4 1164; GFX940-NEXT: s_nop 0 1165; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 1166; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 1167; GFX940-NEXT: s_nop 1 1168; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc 1169; GFX940-NEXT: v_max_f32_e32 v3, v2, v5 1170; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 1171; GFX940-NEXT: s_nop 1 1172; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc 1173; GFX940-NEXT: s_setpc_b64 s[30:31] 1174; 1175; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm1: 1176; GFX950: ; %bb.0: 1177; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1178; GFX950-NEXT: v_maximum3_f32 v0, v0, 2.0, v3 1179; GFX950-NEXT: v_maximum3_f32 v1, v1, 2.0, v4 1180; GFX950-NEXT: v_maximum3_f32 v2, v2, 2.0, v5 1181; GFX950-NEXT: s_setpc_b64 s[30:31] 1182 %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>) 1183 %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c) 1184 ret <3 x float> %max1 1185} 1186 1187define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b) { 1188; GFX12-LABEL: v_fmaximum3_v3f32__inlineimm2: 1189; GFX12: ; %bb.0: 1190; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1191; GFX12-NEXT: s_wait_expcnt 0x0 1192; GFX12-NEXT: s_wait_samplecnt 0x0 1193; GFX12-NEXT: s_wait_bvhcnt 0x0 1194; GFX12-NEXT: s_wait_kmcnt 0x0 1195; GFX12-NEXT: v_maximum3_f32 v0, v0, v3, 4.0 1196; GFX12-NEXT: v_maximum3_f32 v1, v1, v4, 4.0 1197; GFX12-NEXT: v_maximum3_f32 v2, v2, v5, 4.0 1198; GFX12-NEXT: s_setpc_b64 s[30:31] 1199; 1200; GFX940-LABEL: v_fmaximum3_v3f32__inlineimm2: 1201; GFX940: ; %bb.0: 1202; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1203; GFX940-NEXT: v_max_f32_e32 v6, v2, v5 1204; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 1205; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 1206; GFX940-NEXT: v_max_f32_e32 v5, v1, v4 1207; GFX940-NEXT: s_nop 0 1208; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc 1209; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 1210; GFX940-NEXT: v_max_f32_e32 v4, v0, v3 1211; GFX940-NEXT: s_nop 0 1212; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc 1213; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 1214; GFX940-NEXT: s_nop 1 1215; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc 1216; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v0 1217; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 1218; GFX940-NEXT: s_nop 1 1219; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc 1220; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v1 1221; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 1222; GFX940-NEXT: s_nop 1 1223; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc 1224; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v2 1225; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 1226; GFX940-NEXT: s_nop 1 1227; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc 1228; GFX940-NEXT: s_setpc_b64 s[30:31] 1229; 1230; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm2: 1231; GFX950: ; %bb.0: 1232; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1233; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, 4.0 1234; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, 4.0 1235; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, 4.0 1236; GFX950-NEXT: s_setpc_b64 s[30:31] 1237 %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b) 1238 %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> <float 4.0, float 4.0, float 4.0>) 1239 ret <3 x float> %max1 1240} 1241 1242 1243define half @v_fmaximum3_f16(half %a, half %b, half %c) { 1244; GFX12-LABEL: v_fmaximum3_f16: 1245; GFX12: ; %bb.0: 1246; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1247; GFX12-NEXT: s_wait_expcnt 0x0 1248; GFX12-NEXT: s_wait_samplecnt 0x0 1249; GFX12-NEXT: s_wait_bvhcnt 0x0 1250; GFX12-NEXT: s_wait_kmcnt 0x0 1251; GFX12-NEXT: v_maximum3_f16 v0, v0, v1, v2 1252; GFX12-NEXT: s_setpc_b64 s[30:31] 1253; 1254; GFX9-LABEL: v_fmaximum3_f16: 1255; GFX9: ; %bb.0: 1256; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1257; GFX9-NEXT: v_max_f16_e32 v3, v0, v1 1258; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 1259; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 1260; GFX9-NEXT: s_nop 1 1261; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1262; GFX9-NEXT: v_max_f16_e32 v1, v0, v2 1263; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1264; GFX9-NEXT: s_nop 1 1265; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1266; GFX9-NEXT: s_setpc_b64 s[30:31] 1267 %max0 = call half @llvm.maximum.f16(half %a, half %b) 1268 %max1 = call half @llvm.maximum.f16(half %max0, half %c) 1269 ret half %max1 1270} 1271 1272define half @v_fmaximum3_f16_commute(half %a, half %b, half %c) { 1273; GFX12-LABEL: v_fmaximum3_f16_commute: 1274; GFX12: ; %bb.0: 1275; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1276; GFX12-NEXT: s_wait_expcnt 0x0 1277; GFX12-NEXT: s_wait_samplecnt 0x0 1278; GFX12-NEXT: s_wait_bvhcnt 0x0 1279; GFX12-NEXT: s_wait_kmcnt 0x0 1280; GFX12-NEXT: v_maximum3_f16 v0, v2, v0, v1 1281; GFX12-NEXT: s_setpc_b64 s[30:31] 1282; 1283; GFX9-LABEL: v_fmaximum3_f16_commute: 1284; GFX9: ; %bb.0: 1285; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1286; GFX9-NEXT: v_max_f16_e32 v3, v0, v1 1287; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 1288; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 1289; GFX9-NEXT: s_nop 1 1290; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1291; GFX9-NEXT: v_max_f16_e32 v1, v2, v0 1292; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v2, v0 1293; GFX9-NEXT: s_nop 1 1294; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1295; GFX9-NEXT: s_setpc_b64 s[30:31] 1296 %max0 = call half @llvm.maximum.f16(half %a, half %b) 1297 %max1 = call half @llvm.maximum.f16(half %c, half %max0) 1298 ret half %max1 1299} 1300 1301define amdgpu_ps i32 @s_fmaximum3_f16(half inreg %a, half inreg %b, half inreg %c) { 1302; GFX12-LABEL: s_fmaximum3_f16: 1303; GFX12: ; %bb.0: 1304; GFX12-NEXT: v_mov_b32_e32 v0, s2 1305; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1306; GFX12-NEXT: v_maximum3_f16 v0, s0, s1, v0 1307; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 1308; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1309; GFX12-NEXT: v_readfirstlane_b32 s0, v0 1310; GFX12-NEXT: ; return to shader part epilog 1311; 1312; GFX9-LABEL: s_fmaximum3_f16: 1313; GFX9: ; %bb.0: 1314; GFX9-NEXT: v_mov_b32_e32 v0, s1 1315; GFX9-NEXT: v_max_f16_e32 v1, s0, v0 1316; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00 1317; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s0, v0 1318; GFX9-NEXT: s_nop 1 1319; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1320; GFX9-NEXT: v_max_f16_e32 v1, s2, v0 1321; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s2, v0 1322; GFX9-NEXT: s_nop 1 1323; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1324; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 1325; GFX9-NEXT: s_nop 0 1326; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1327; GFX9-NEXT: ; return to shader part epilog 1328 %max0 = call half @llvm.maximum.f16(half %a, half %b) 1329 %max1 = call half @llvm.maximum.f16(half %max0, half %c) 1330 %cast = bitcast half %max1 to i16 1331 %zext = zext i16 %cast to i32 1332 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %zext) 1333 ret i32 %readfirstlane 1334} 1335 1336define half @v_fmaximum3_f16_fabs0(half %a, half %b, half %c) { 1337; GFX12-LABEL: v_fmaximum3_f16_fabs0: 1338; GFX12: ; %bb.0: 1339; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1340; GFX12-NEXT: s_wait_expcnt 0x0 1341; GFX12-NEXT: s_wait_samplecnt 0x0 1342; GFX12-NEXT: s_wait_bvhcnt 0x0 1343; GFX12-NEXT: s_wait_kmcnt 0x0 1344; GFX12-NEXT: v_maximum3_f16 v0, |v0|, v1, v2 1345; GFX12-NEXT: s_setpc_b64 s[30:31] 1346; 1347; GFX9-LABEL: v_fmaximum3_f16_fabs0: 1348; GFX9: ; %bb.0: 1349; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1350; GFX9-NEXT: v_max_f16_e64 v3, |v0|, v1 1351; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 1352; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, v1 1353; GFX9-NEXT: s_nop 1 1354; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1355; GFX9-NEXT: v_max_f16_e32 v1, v0, v2 1356; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1357; GFX9-NEXT: s_nop 1 1358; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1359; GFX9-NEXT: s_setpc_b64 s[30:31] 1360 %a.fabs = call half @llvm.fabs.f16(half %a) 1361 %max0 = call half @llvm.maximum.f16(half %a.fabs, half %b) 1362 %max1 = call half @llvm.maximum.f16(half %max0, half %c) 1363 ret half %max1 1364} 1365 1366define half @v_fmaximum3_f16_fabs1(half %a, half %b, half %c) { 1367; GFX12-LABEL: v_fmaximum3_f16_fabs1: 1368; GFX12: ; %bb.0: 1369; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1370; GFX12-NEXT: s_wait_expcnt 0x0 1371; GFX12-NEXT: s_wait_samplecnt 0x0 1372; GFX12-NEXT: s_wait_bvhcnt 0x0 1373; GFX12-NEXT: s_wait_kmcnt 0x0 1374; GFX12-NEXT: v_maximum3_f16 v0, v0, |v1|, v2 1375; GFX12-NEXT: s_setpc_b64 s[30:31] 1376; 1377; GFX9-LABEL: v_fmaximum3_f16_fabs1: 1378; GFX9: ; %bb.0: 1379; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1380; GFX9-NEXT: v_max_f16_e64 v3, v0, |v1| 1381; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 1382; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v1| 1383; GFX9-NEXT: s_nop 1 1384; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1385; GFX9-NEXT: v_max_f16_e32 v1, v0, v2 1386; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1387; GFX9-NEXT: s_nop 1 1388; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1389; GFX9-NEXT: s_setpc_b64 s[30:31] 1390 %b.fabs = call half @llvm.fabs.f16(half %b) 1391 %max0 = call half @llvm.maximum.f16(half %a, half %b.fabs) 1392 %max1 = call half @llvm.maximum.f16(half %max0, half %c) 1393 ret half %max1 1394} 1395 1396define half @v_fmaximum3_f16_fabs2(half %a, half %b, half %c) { 1397; GFX12-LABEL: v_fmaximum3_f16_fabs2: 1398; GFX12: ; %bb.0: 1399; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1400; GFX12-NEXT: s_wait_expcnt 0x0 1401; GFX12-NEXT: s_wait_samplecnt 0x0 1402; GFX12-NEXT: s_wait_bvhcnt 0x0 1403; GFX12-NEXT: s_wait_kmcnt 0x0 1404; GFX12-NEXT: v_maximum3_f16 v0, v0, v1, |v2| 1405; GFX12-NEXT: s_setpc_b64 s[30:31] 1406; 1407; GFX9-LABEL: v_fmaximum3_f16_fabs2: 1408; GFX9: ; %bb.0: 1409; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1410; GFX9-NEXT: v_max_f16_e32 v3, v0, v1 1411; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 1412; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 1413; GFX9-NEXT: s_nop 1 1414; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1415; GFX9-NEXT: v_max_f16_e64 v1, v0, |v2| 1416; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2| 1417; GFX9-NEXT: s_nop 1 1418; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1419; GFX9-NEXT: s_setpc_b64 s[30:31] 1420 %c.fabs = call half @llvm.fabs.f16(half %c) 1421 %max0 = call half @llvm.maximum.f16(half %a, half %b) 1422 %max1 = call half @llvm.maximum.f16(half %max0, half %c.fabs) 1423 ret half %max1 1424} 1425 1426define half @v_fmaximum3_f16_fabs_all(half %a, half %b, half %c) { 1427; GFX12-LABEL: v_fmaximum3_f16_fabs_all: 1428; GFX12: ; %bb.0: 1429; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1430; GFX12-NEXT: s_wait_expcnt 0x0 1431; GFX12-NEXT: s_wait_samplecnt 0x0 1432; GFX12-NEXT: s_wait_bvhcnt 0x0 1433; GFX12-NEXT: s_wait_kmcnt 0x0 1434; GFX12-NEXT: v_maximum3_f16 v0, |v0|, |v1|, |v2| 1435; GFX12-NEXT: s_setpc_b64 s[30:31] 1436; 1437; GFX9-LABEL: v_fmaximum3_f16_fabs_all: 1438; GFX9: ; %bb.0: 1439; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1440; GFX9-NEXT: v_max_f16_e64 v3, |v0|, |v1| 1441; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 1442; GFX9-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1| 1443; GFX9-NEXT: s_nop 1 1444; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1445; GFX9-NEXT: v_max_f16_e64 v1, v0, |v2| 1446; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2| 1447; GFX9-NEXT: s_nop 1 1448; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1449; GFX9-NEXT: s_setpc_b64 s[30:31] 1450 %a.fabs = call half @llvm.fabs.f16(half %a) 1451 %b.fabs = call half @llvm.fabs.f16(half %b) 1452 %c.fabs = call half @llvm.fabs.f16(half %c) 1453 %max0 = call half @llvm.maximum.f16(half %a.fabs, half %b.fabs) 1454 %max1 = call half @llvm.maximum.f16(half %max0, half %c.fabs) 1455 ret half %max1 1456} 1457 1458define half @v_fmaximum3_f16_fneg_all(half %a, half %b, half %c) { 1459; GFX12-LABEL: v_fmaximum3_f16_fneg_all: 1460; GFX12: ; %bb.0: 1461; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1462; GFX12-NEXT: s_wait_expcnt 0x0 1463; GFX12-NEXT: s_wait_samplecnt 0x0 1464; GFX12-NEXT: s_wait_bvhcnt 0x0 1465; GFX12-NEXT: s_wait_kmcnt 0x0 1466; GFX12-NEXT: v_maximum3_f16 v0, -v0, -v1, -v2 1467; GFX12-NEXT: s_setpc_b64 s[30:31] 1468; 1469; GFX9-LABEL: v_fmaximum3_f16_fneg_all: 1470; GFX9: ; %bb.0: 1471; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1472; GFX9-NEXT: v_max_f16_e64 v3, -v0, -v1 1473; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 1474; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1 1475; GFX9-NEXT: s_nop 1 1476; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1477; GFX9-NEXT: v_max_f16_e64 v1, v0, -v2 1478; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v2 1479; GFX9-NEXT: s_nop 1 1480; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1481; GFX9-NEXT: s_setpc_b64 s[30:31] 1482 %a.fneg = fneg half %a 1483 %b.fneg = fneg half %b 1484 %c.fneg = fneg half %c 1485 %max0 = call half @llvm.maximum.f16(half %a.fneg, half %b.fneg) 1486 %max1 = call half @llvm.maximum.f16(half %max0, half %c.fneg) 1487 ret half %max1 1488} 1489 1490define half @v_fmaximum3_f16_fneg_fabs_all(half %a, half %b, half %c) { 1491; GFX12-LABEL: v_fmaximum3_f16_fneg_fabs_all: 1492; GFX12: ; %bb.0: 1493; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1494; GFX12-NEXT: s_wait_expcnt 0x0 1495; GFX12-NEXT: s_wait_samplecnt 0x0 1496; GFX12-NEXT: s_wait_bvhcnt 0x0 1497; GFX12-NEXT: s_wait_kmcnt 0x0 1498; GFX12-NEXT: v_maximum3_f16 v0, -|v0|, -|v1|, -|v2| 1499; GFX12-NEXT: s_setpc_b64 s[30:31] 1500; 1501; GFX9-LABEL: v_fmaximum3_f16_fneg_fabs_all: 1502; GFX9: ; %bb.0: 1503; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1504; GFX9-NEXT: v_max_f16_e64 v3, -|v0|, -|v1| 1505; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 1506; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -|v0|, -|v1| 1507; GFX9-NEXT: s_nop 1 1508; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1509; GFX9-NEXT: v_max_f16_e64 v1, v0, -|v2| 1510; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -|v2| 1511; GFX9-NEXT: s_nop 1 1512; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1513; GFX9-NEXT: s_setpc_b64 s[30:31] 1514 %a.fabs = call half @llvm.fabs.f16(half %a) 1515 %b.fabs = call half @llvm.fabs.f16(half %b) 1516 %c.fabs = call half @llvm.fabs.f16(half %c) 1517 %a.fneg.fabs = fneg half %a.fabs 1518 %b.fneg.fabs = fneg half %b.fabs 1519 %c.fneg.fabs = fneg half %c.fabs 1520 %max0 = call half @llvm.maximum.f16(half %a.fneg.fabs, half %b.fneg.fabs) 1521 %max1 = call half @llvm.maximum.f16(half %max0, half %c.fneg.fabs) 1522 ret half %max1 1523} 1524 1525define half @v_fmaximum3_f16_fneg0(half %a, half %b, half %c) { 1526; GFX12-LABEL: v_fmaximum3_f16_fneg0: 1527; GFX12: ; %bb.0: 1528; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1529; GFX12-NEXT: s_wait_expcnt 0x0 1530; GFX12-NEXT: s_wait_samplecnt 0x0 1531; GFX12-NEXT: s_wait_bvhcnt 0x0 1532; GFX12-NEXT: s_wait_kmcnt 0x0 1533; GFX12-NEXT: v_maximum3_f16 v0, -v0, v1, v2 1534; GFX12-NEXT: s_setpc_b64 s[30:31] 1535; 1536; GFX9-LABEL: v_fmaximum3_f16_fneg0: 1537; GFX9: ; %bb.0: 1538; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1539; GFX9-NEXT: v_max_f16_e64 v3, -v0, v1 1540; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 1541; GFX9-NEXT: v_cmp_o_f16_e64 vcc, -v0, v1 1542; GFX9-NEXT: s_nop 1 1543; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1544; GFX9-NEXT: v_max_f16_e32 v1, v0, v2 1545; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1546; GFX9-NEXT: s_nop 1 1547; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1548; GFX9-NEXT: s_setpc_b64 s[30:31] 1549 %a.fneg = fneg half %a 1550 %max0 = call half @llvm.maximum.f16(half %a.fneg, half %b) 1551 %max1 = call half @llvm.maximum.f16(half %max0, half %c) 1552 ret half %max1 1553} 1554 1555define half @v_fmaximum3_f16_fneg1(half %a, half %b, half %c) { 1556; GFX12-LABEL: v_fmaximum3_f16_fneg1: 1557; GFX12: ; %bb.0: 1558; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1559; GFX12-NEXT: s_wait_expcnt 0x0 1560; GFX12-NEXT: s_wait_samplecnt 0x0 1561; GFX12-NEXT: s_wait_bvhcnt 0x0 1562; GFX12-NEXT: s_wait_kmcnt 0x0 1563; GFX12-NEXT: v_maximum3_f16 v0, v0, -v1, v2 1564; GFX12-NEXT: s_setpc_b64 s[30:31] 1565; 1566; GFX9-LABEL: v_fmaximum3_f16_fneg1: 1567; GFX9: ; %bb.0: 1568; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1569; GFX9-NEXT: v_max_f16_e64 v3, v0, -v1 1570; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 1571; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v1 1572; GFX9-NEXT: s_nop 1 1573; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1574; GFX9-NEXT: v_max_f16_e32 v1, v0, v2 1575; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 1576; GFX9-NEXT: s_nop 1 1577; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1578; GFX9-NEXT: s_setpc_b64 s[30:31] 1579 %b.fneg = fneg half %b 1580 %max0 = call half @llvm.maximum.f16(half %a, half %b.fneg) 1581 %max1 = call half @llvm.maximum.f16(half %max0, half %c) 1582 ret half %max1 1583} 1584 1585define half @v_fmaximum3_f16_fneg2(half %a, half %b, half %c) { 1586; GFX12-LABEL: v_fmaximum3_f16_fneg2: 1587; GFX12: ; %bb.0: 1588; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1589; GFX12-NEXT: s_wait_expcnt 0x0 1590; GFX12-NEXT: s_wait_samplecnt 0x0 1591; GFX12-NEXT: s_wait_bvhcnt 0x0 1592; GFX12-NEXT: s_wait_kmcnt 0x0 1593; GFX12-NEXT: v_maximum3_f16 v0, v0, v1, -v2 1594; GFX12-NEXT: s_setpc_b64 s[30:31] 1595; 1596; GFX9-LABEL: v_fmaximum3_f16_fneg2: 1597; GFX9: ; %bb.0: 1598; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1599; GFX9-NEXT: v_max_f16_e32 v3, v0, v1 1600; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 1601; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 1602; GFX9-NEXT: s_nop 1 1603; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1604; GFX9-NEXT: v_max_f16_e64 v1, v0, -v2 1605; GFX9-NEXT: v_cmp_o_f16_e64 vcc, v0, -v2 1606; GFX9-NEXT: s_nop 1 1607; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1608; GFX9-NEXT: s_setpc_b64 s[30:31] 1609 %c.fneg = fneg half %c 1610 %max0 = call half @llvm.maximum.f16(half %a, half %b) 1611 %max1 = call half @llvm.maximum.f16(half %max0, half %c.fneg) 1612 ret half %max1 1613} 1614 1615define half @v_fmaximum3_f16_const0(half %b, half %c) { 1616; GFX12-LABEL: v_fmaximum3_f16_const0: 1617; GFX12: ; %bb.0: 1618; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1619; GFX12-NEXT: s_wait_expcnt 0x0 1620; GFX12-NEXT: s_wait_samplecnt 0x0 1621; GFX12-NEXT: s_wait_bvhcnt 0x0 1622; GFX12-NEXT: s_wait_kmcnt 0x0 1623; GFX12-NEXT: v_maximum3_f16 v0, v0, 0x4800, v1 1624; GFX12-NEXT: s_setpc_b64 s[30:31] 1625; 1626; GFX9-LABEL: v_fmaximum3_f16_const0: 1627; GFX9: ; %bb.0: 1628; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1629; GFX9-NEXT: v_max_f16_e32 v2, 0x4800, v0 1630; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00 1631; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 1632; GFX9-NEXT: s_nop 1 1633; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 1634; GFX9-NEXT: v_max_f16_e32 v2, v0, v1 1635; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 1636; GFX9-NEXT: s_nop 1 1637; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 1638; GFX9-NEXT: s_setpc_b64 s[30:31] 1639 %max0 = call half @llvm.maximum.f16(half 8.0, half %b) 1640 %max1 = call half @llvm.maximum.f16(half %max0, half %c) 1641 ret half %max1 1642} 1643 1644define half @v_fmaximum3_f16__const2(half %a, half %b) { 1645; GFX12-LABEL: v_fmaximum3_f16__const2: 1646; GFX12: ; %bb.0: 1647; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1648; GFX12-NEXT: s_wait_expcnt 0x0 1649; GFX12-NEXT: s_wait_samplecnt 0x0 1650; GFX12-NEXT: s_wait_bvhcnt 0x0 1651; GFX12-NEXT: s_wait_kmcnt 0x0 1652; GFX12-NEXT: v_maximum3_f16 v0, v0, v1, 0x4800 1653; GFX12-NEXT: s_setpc_b64 s[30:31] 1654; 1655; GFX9-LABEL: v_fmaximum3_f16__const2: 1656; GFX9: ; %bb.0: 1657; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1658; GFX9-NEXT: v_max_f16_e32 v2, v0, v1 1659; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00 1660; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 1661; GFX9-NEXT: s_nop 1 1662; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 1663; GFX9-NEXT: v_max_f16_e32 v1, 0x4800, v0 1664; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 1665; GFX9-NEXT: s_nop 1 1666; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 1667; GFX9-NEXT: s_setpc_b64 s[30:31] 1668 %max0 = call half @llvm.maximum.f16(half %a, half %b) 1669 %max1 = call half @llvm.maximum.f16(half %max0, half 8.0) 1670 ret half %max1 1671} 1672 1673define half @v_fmaximum3_f16_inlineimm0(half %b, half %c) { 1674; GFX12-LABEL: v_fmaximum3_f16_inlineimm0: 1675; GFX12: ; %bb.0: 1676; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1677; GFX12-NEXT: s_wait_expcnt 0x0 1678; GFX12-NEXT: s_wait_samplecnt 0x0 1679; GFX12-NEXT: s_wait_bvhcnt 0x0 1680; GFX12-NEXT: s_wait_kmcnt 0x0 1681; GFX12-NEXT: v_maximum3_f16 v0, v0, 4.0, v1 1682; GFX12-NEXT: s_setpc_b64 s[30:31] 1683; 1684; GFX9-LABEL: v_fmaximum3_f16_inlineimm0: 1685; GFX9: ; %bb.0: 1686; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1687; GFX9-NEXT: v_max_f16_e32 v2, 4.0, v0 1688; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00 1689; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 1690; GFX9-NEXT: s_nop 1 1691; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 1692; GFX9-NEXT: v_max_f16_e32 v2, v0, v1 1693; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 1694; GFX9-NEXT: s_nop 1 1695; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 1696; GFX9-NEXT: s_setpc_b64 s[30:31] 1697 %max0 = call half @llvm.maximum.f16(half 4.0, half %b) 1698 %max1 = call half @llvm.maximum.f16(half %max0, half %c) 1699 ret half %max1 1700} 1701 1702define half @v_fmaximum3_f16__inlineimm(half %a, half %b) { 1703; GFX12-LABEL: v_fmaximum3_f16__inlineimm: 1704; GFX12: ; %bb.0: 1705; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1706; GFX12-NEXT: s_wait_expcnt 0x0 1707; GFX12-NEXT: s_wait_samplecnt 0x0 1708; GFX12-NEXT: s_wait_bvhcnt 0x0 1709; GFX12-NEXT: s_wait_kmcnt 0x0 1710; GFX12-NEXT: v_maximum3_f16 v0, v0, v1, 4.0 1711; GFX12-NEXT: s_setpc_b64 s[30:31] 1712; 1713; GFX9-LABEL: v_fmaximum3_f16__inlineimm: 1714; GFX9: ; %bb.0: 1715; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1716; GFX9-NEXT: v_max_f16_e32 v2, v0, v1 1717; GFX9-NEXT: v_mov_b32_e32 v3, 0x7e00 1718; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 1719; GFX9-NEXT: s_nop 1 1720; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 1721; GFX9-NEXT: v_max_f16_e32 v1, 4.0, v0 1722; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 1723; GFX9-NEXT: s_nop 1 1724; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 1725; GFX9-NEXT: s_setpc_b64 s[30:31] 1726 %max0 = call half @llvm.maximum.f16(half %a, half %b) 1727 %max1 = call half @llvm.maximum.f16(half %max0, half 4.0) 1728 ret half %max1 1729} 1730 1731define half @v_fmaximum3_f16_const1_const2(half %a) { 1732; GFX12-LABEL: v_fmaximum3_f16_const1_const2: 1733; GFX12: ; %bb.0: 1734; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1735; GFX12-NEXT: s_wait_expcnt 0x0 1736; GFX12-NEXT: s_wait_samplecnt 0x0 1737; GFX12-NEXT: s_wait_bvhcnt 0x0 1738; GFX12-NEXT: s_wait_kmcnt 0x0 1739; GFX12-NEXT: s_movk_i32 s0, 0x4800 1740; GFX12-NEXT: s_wait_alu 0xfffe 1741; GFX12-NEXT: v_maximum3_f16 v0, v0, s0, 0x4c00 1742; GFX12-NEXT: s_setpc_b64 s[30:31] 1743; 1744; GFX9-LABEL: v_fmaximum3_f16_const1_const2: 1745; GFX9: ; %bb.0: 1746; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1747; GFX9-NEXT: v_max_f16_e32 v1, 0x4800, v0 1748; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00 1749; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 1750; GFX9-NEXT: s_nop 1 1751; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1752; GFX9-NEXT: v_max_f16_e32 v1, 0x4c00, v0 1753; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 1754; GFX9-NEXT: s_nop 1 1755; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1756; GFX9-NEXT: s_setpc_b64 s[30:31] 1757 %max0 = call half @llvm.maximum.f16(half %a, half 8.0) 1758 %max1 = call half @llvm.maximum.f16(half %max0, half 16.0) 1759 ret half %max1 1760} 1761 1762define <2 x half> @v_fmaximum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) { 1763; GFX12-LABEL: v_fmaximum3_v2f16: 1764; GFX12: ; %bb.0: 1765; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1766; GFX12-NEXT: s_wait_expcnt 0x0 1767; GFX12-NEXT: s_wait_samplecnt 0x0 1768; GFX12-NEXT: s_wait_bvhcnt 0x0 1769; GFX12-NEXT: s_wait_kmcnt 0x0 1770; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 1771; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1772; GFX12-NEXT: v_pk_maximum_f16 v0, v2, v0 1773; GFX12-NEXT: s_setpc_b64 s[30:31] 1774; 1775; GFX940-LABEL: v_fmaximum3_v2f16: 1776; GFX940: ; %bb.0: 1777; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1778; GFX940-NEXT: v_pk_max_f16 v3, v0, v1 1779; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 1780; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 1781; GFX940-NEXT: s_mov_b32 s0, 0x5040100 1782; GFX940-NEXT: s_nop 0 1783; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc 1784; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 1785; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 1786; GFX940-NEXT: s_nop 1 1787; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1788; GFX940-NEXT: v_perm_b32 v1, v0, v5, s0 1789; GFX940-NEXT: v_pk_max_f16 v1, v2, v1 1790; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v2, v5 1791; GFX940-NEXT: s_nop 1 1792; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc 1793; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1794; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD 1795; GFX940-NEXT: s_nop 1 1796; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1797; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 1798; GFX940-NEXT: s_setpc_b64 s[30:31] 1799; 1800; GFX950-LABEL: v_fmaximum3_v2f16: 1801; GFX950: ; %bb.0: 1802; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1803; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1 1804; GFX950-NEXT: s_nop 0 1805; GFX950-NEXT: v_pk_maximum3_f16 v0, v2, v0, v0 1806; GFX950-NEXT: s_setpc_b64 s[30:31] 1807 %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) 1808 %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %max0) 1809 ret <2 x half> %max1 1810} 1811 1812define <2 x half> @v_fmaximum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x half> %c) { 1813; GFX12-LABEL: v_fmaximum3_v2f16_commute: 1814; GFX12: ; %bb.0: 1815; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1816; GFX12-NEXT: s_wait_expcnt 0x0 1817; GFX12-NEXT: s_wait_samplecnt 0x0 1818; GFX12-NEXT: s_wait_bvhcnt 0x0 1819; GFX12-NEXT: s_wait_kmcnt 0x0 1820; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 1821; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1822; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 1823; GFX12-NEXT: s_setpc_b64 s[30:31] 1824; 1825; GFX940-LABEL: v_fmaximum3_v2f16_commute: 1826; GFX940: ; %bb.0: 1827; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1828; GFX940-NEXT: v_pk_max_f16 v3, v0, v1 1829; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 1830; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 1831; GFX940-NEXT: s_mov_b32 s0, 0x5040100 1832; GFX940-NEXT: s_nop 0 1833; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc 1834; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 1835; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 1836; GFX940-NEXT: s_nop 1 1837; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1838; GFX940-NEXT: v_perm_b32 v1, v0, v5, s0 1839; GFX940-NEXT: v_pk_max_f16 v1, v1, v2 1840; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v2 1841; GFX940-NEXT: s_nop 1 1842; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc 1843; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1844; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1 1845; GFX940-NEXT: s_nop 1 1846; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1847; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 1848; GFX940-NEXT: s_setpc_b64 s[30:31] 1849; 1850; GFX950-LABEL: v_fmaximum3_v2f16_commute: 1851; GFX950: ; %bb.0: 1852; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1853; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1 1854; GFX950-NEXT: s_nop 0 1855; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 1856; GFX950-NEXT: s_setpc_b64 s[30:31] 1857 %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) 1858 %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> %c) 1859 ret <2 x half> %max1 1860} 1861 1862define <2 x half> @v_fmaximum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2 x half> %c) { 1863; GFX12-LABEL: v_fmaximum3_v2f16__fabs_all: 1864; GFX12: ; %bb.0: 1865; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1866; GFX12-NEXT: s_wait_expcnt 0x0 1867; GFX12-NEXT: s_wait_samplecnt 0x0 1868; GFX12-NEXT: s_wait_bvhcnt 0x0 1869; GFX12-NEXT: s_wait_kmcnt 0x0 1870; GFX12-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 1871; GFX12-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 1872; GFX12-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 1873; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1874; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 1875; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 1876; GFX12-NEXT: s_setpc_b64 s[30:31] 1877; 1878; GFX940-LABEL: v_fmaximum3_v2f16__fabs_all: 1879; GFX940: ; %bb.0: 1880; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1881; GFX940-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0 1882; GFX940-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v1 1883; GFX940-NEXT: v_pk_max_f16 v3, v3, v4 1884; GFX940-NEXT: v_mov_b32_e32 v6, 0x7e00 1885; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v3 1886; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1 1887; GFX940-NEXT: s_mov_b32 s0, 0x5040100 1888; GFX940-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v2 1889; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc 1890; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1| 1891; GFX940-NEXT: s_nop 1 1892; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v3, vcc 1893; GFX940-NEXT: v_perm_b32 v1, v4, v0, s0 1894; GFX940-NEXT: v_pk_max_f16 v1, v1, v5 1895; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1 1896; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v1 1897; GFX940-NEXT: s_nop 0 1898; GFX940-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc 1899; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2| 1900; GFX940-NEXT: s_nop 1 1901; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc 1902; GFX940-NEXT: v_perm_b32 v0, v3, v0, s0 1903; GFX940-NEXT: s_setpc_b64 s[30:31] 1904; 1905; GFX950-LABEL: v_fmaximum3_v2f16__fabs_all: 1906; GFX950: ; %bb.0: 1907; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1908; GFX950-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 1909; GFX950-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 1910; GFX950-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 1911; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1 1912; GFX950-NEXT: s_nop 0 1913; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 1914; GFX950-NEXT: s_setpc_b64 s[30:31] 1915 %a.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a) 1916 %b.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %b) 1917 %c.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %c) 1918 %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a.fabs, <2 x half> %b.fabs) 1919 %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> %c.fabs) 1920 ret <2 x half> %max1 1921} 1922 1923define <2 x half> @v_fmaximum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2 x half> %c) { 1924; GFX12-LABEL: v_fmaximum3_v2f16__fneg_all: 1925; GFX12: ; %bb.0: 1926; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1927; GFX12-NEXT: s_wait_expcnt 0x0 1928; GFX12-NEXT: s_wait_samplecnt 0x0 1929; GFX12-NEXT: s_wait_bvhcnt 0x0 1930; GFX12-NEXT: s_wait_kmcnt 0x0 1931; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1] 1932; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1933; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] 1934; GFX12-NEXT: s_setpc_b64 s[30:31] 1935; 1936; GFX940-LABEL: v_fmaximum3_v2f16__fneg_all: 1937; GFX940: ; %bb.0: 1938; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1939; GFX940-NEXT: v_pk_max_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1] 1940; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 1941; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1 1942; GFX940-NEXT: s_mov_b32 s0, 0x5040100 1943; GFX940-NEXT: s_nop 0 1944; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc 1945; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 1946; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1 1947; GFX940-NEXT: s_nop 1 1948; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1949; GFX940-NEXT: v_perm_b32 v1, v0, v5, s0 1950; GFX940-NEXT: v_pk_max_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1] 1951; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v5, -v2 1952; GFX940-NEXT: s_nop 1 1953; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc 1954; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1955; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1 1956; GFX940-NEXT: s_nop 1 1957; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1958; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 1959; GFX940-NEXT: s_setpc_b64 s[30:31] 1960; 1961; GFX950-LABEL: v_fmaximum3_v2f16__fneg_all: 1962; GFX950: ; %bb.0: 1963; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1964; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1 neg_lo:[1,1,1] neg_hi:[1,1,1] 1965; GFX950-NEXT: s_nop 0 1966; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 neg_lo:[0,1,1] neg_hi:[0,1,1] 1967; GFX950-NEXT: s_setpc_b64 s[30:31] 1968 %a.fneg = fneg <2 x half> %a 1969 %b.fneg = fneg <2 x half> %b 1970 %c.fneg = fneg <2 x half> %c 1971 %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a.fneg, <2 x half> %b.fneg) 1972 %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> %c.fneg) 1973 ret <2 x half> %max1 1974} 1975 1976define <2 x half> @v_fmaximum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) { 1977; GFX12-LABEL: v_fmaximum3_v2f16__inlineimm1: 1978; GFX12: ; %bb.0: 1979; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1980; GFX12-NEXT: s_wait_expcnt 0x0 1981; GFX12-NEXT: s_wait_samplecnt 0x0 1982; GFX12-NEXT: s_wait_bvhcnt 0x0 1983; GFX12-NEXT: s_wait_kmcnt 0x0 1984; GFX12-NEXT: v_pk_maximum_f16 v0, v0, 2.0 op_sel_hi:[1,0] 1985; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1986; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 1987; GFX12-NEXT: s_setpc_b64 s[30:31] 1988; 1989; GFX940-LABEL: v_fmaximum3_v2f16__inlineimm1: 1990; GFX940: ; %bb.0: 1991; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1992; GFX940-NEXT: v_pk_max_f16 v2, v0, 2.0 op_sel_hi:[1,0] 1993; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 1994; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v2 1995; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 1996; GFX940-NEXT: s_mov_b32 s0, 0x5040100 1997; GFX940-NEXT: s_nop 0 1998; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc 1999; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 2000; GFX940-NEXT: s_nop 1 2001; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc 2002; GFX940-NEXT: v_perm_b32 v2, v3, v0, s0 2003; GFX940-NEXT: v_pk_max_f16 v2, v2, v1 2004; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1 2005; GFX940-NEXT: v_lshrrev_b32_e32 v5, 16, v2 2006; GFX940-NEXT: s_nop 0 2007; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc 2008; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 2009; GFX940-NEXT: s_nop 1 2010; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc 2011; GFX940-NEXT: v_perm_b32 v0, v3, v0, s0 2012; GFX940-NEXT: s_setpc_b64 s[30:31] 2013; 2014; GFX950-LABEL: v_fmaximum3_v2f16__inlineimm1: 2015; GFX950: ; %bb.0: 2016; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2017; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, 2.0, 2.0 op_sel_hi:[1,0,0] 2018; GFX950-NEXT: s_nop 0 2019; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1 2020; GFX950-NEXT: s_setpc_b64 s[30:31] 2021 %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>) 2022 %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> %c) 2023 ret <2 x half> %max1 2024} 2025 2026define <2 x half> @v_fmaximum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) { 2027; GFX12-LABEL: v_fmaximum3_v2f16__inlineimm2: 2028; GFX12: ; %bb.0: 2029; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2030; GFX12-NEXT: s_wait_expcnt 0x0 2031; GFX12-NEXT: s_wait_samplecnt 0x0 2032; GFX12-NEXT: s_wait_bvhcnt 0x0 2033; GFX12-NEXT: s_wait_kmcnt 0x0 2034; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 2035; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 2036; GFX12-NEXT: v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0] 2037; GFX12-NEXT: s_setpc_b64 s[30:31] 2038; 2039; GFX940-LABEL: v_fmaximum3_v2f16__inlineimm2: 2040; GFX940: ; %bb.0: 2041; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2042; GFX940-NEXT: v_pk_max_f16 v2, v0, v1 2043; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00 2044; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 2045; GFX940-NEXT: s_mov_b32 s0, 0x5040100 2046; GFX940-NEXT: s_nop 0 2047; GFX940-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc 2048; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2049; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 2050; GFX940-NEXT: s_nop 1 2051; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 2052; GFX940-NEXT: v_perm_b32 v1, v0, v4, s0 2053; GFX940-NEXT: v_pk_max_f16 v1, v1, 4.0 op_sel_hi:[1,0] 2054; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 2055; GFX940-NEXT: s_nop 1 2056; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc 2057; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 2058; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 2059; GFX940-NEXT: s_nop 1 2060; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 2061; GFX940-NEXT: v_perm_b32 v0, v0, v2, s0 2062; GFX940-NEXT: s_setpc_b64 s[30:31] 2063; 2064; GFX950-LABEL: v_fmaximum3_v2f16__inlineimm2: 2065; GFX950: ; %bb.0: 2066; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2067; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1 2068; GFX950-NEXT: s_nop 0 2069; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, 4.0, 4.0 op_sel_hi:[1,0,0] 2070; GFX950-NEXT: s_setpc_b64 s[30:31] 2071 %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) 2072 %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> <half 4.0, half 4.0>) 2073 ret <2 x half> %max1 2074} 2075 2076define <3 x half> @v_fmaximum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c) { 2077; GFX12-LABEL: v_fmaximum3_v3f16: 2078; GFX12: ; %bb.0: 2079; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2080; GFX12-NEXT: s_wait_expcnt 0x0 2081; GFX12-NEXT: s_wait_samplecnt 0x0 2082; GFX12-NEXT: s_wait_bvhcnt 0x0 2083; GFX12-NEXT: s_wait_kmcnt 0x0 2084; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 2085; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 2086; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2087; GFX12-NEXT: v_pk_maximum_f16 v0, v4, v0 2088; GFX12-NEXT: v_pk_maximum_f16 v1, v5, v1 2089; GFX12-NEXT: s_setpc_b64 s[30:31] 2090; 2091; GFX940-LABEL: v_fmaximum3_v3f16: 2092; GFX940: ; %bb.0: 2093; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2094; GFX940-NEXT: v_pk_max_f16 v6, v0, v2 2095; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 2096; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 2097; GFX940-NEXT: s_mov_b32 s0, 0x5040100 2098; GFX940-NEXT: s_nop 0 2099; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc 2100; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 2101; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 2102; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 2103; GFX940-NEXT: s_nop 0 2104; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 2105; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 2106; GFX940-NEXT: s_nop 1 2107; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc 2108; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2109; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 2110; GFX940-NEXT: s_nop 1 2111; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 2112; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0 2113; GFX940-NEXT: v_pk_max_f16 v1, v5, v1 2114; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v6 2115; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 2116; GFX940-NEXT: v_pk_max_f16 v2, v4, v2 2117; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc 2118; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v8 2119; GFX940-NEXT: s_nop 1 2120; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc 2121; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2122; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD 2123; GFX940-NEXT: s_nop 1 2124; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 2125; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 2126; GFX940-NEXT: s_setpc_b64 s[30:31] 2127; 2128; GFX950-LABEL: v_fmaximum3_v3f16: 2129; GFX950: ; %bb.0: 2130; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2131; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 2132; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 2133; GFX950-NEXT: v_pk_maximum3_f16 v1, v5, v1, v1 2134; GFX950-NEXT: v_pk_maximum3_f16 v0, v4, v0, v0 2135; GFX950-NEXT: s_setpc_b64 s[30:31] 2136 %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b) 2137 %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %c, <3 x half> %max0) 2138 ret <3 x half> %max1 2139} 2140 2141define <3 x half> @v_fmaximum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x half> %c) { 2142; GFX12-LABEL: v_fmaximum3_v3f16_commute: 2143; GFX12: ; %bb.0: 2144; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2145; GFX12-NEXT: s_wait_expcnt 0x0 2146; GFX12-NEXT: s_wait_samplecnt 0x0 2147; GFX12-NEXT: s_wait_bvhcnt 0x0 2148; GFX12-NEXT: s_wait_kmcnt 0x0 2149; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 2150; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 2151; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2152; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v4 2153; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 2154; GFX12-NEXT: s_setpc_b64 s[30:31] 2155; 2156; GFX940-LABEL: v_fmaximum3_v3f16_commute: 2157; GFX940: ; %bb.0: 2158; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2159; GFX940-NEXT: v_pk_max_f16 v6, v0, v2 2160; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 2161; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 2162; GFX940-NEXT: s_mov_b32 s0, 0x5040100 2163; GFX940-NEXT: s_nop 0 2164; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc 2165; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 2166; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 2167; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 2168; GFX940-NEXT: s_nop 0 2169; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 2170; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 2171; GFX940-NEXT: s_nop 1 2172; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc 2173; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2174; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 2175; GFX940-NEXT: s_nop 1 2176; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 2177; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0 2178; GFX940-NEXT: v_pk_max_f16 v1, v1, v5 2179; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v5 2180; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 2181; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 2182; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc 2183; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v8, v4 2184; GFX940-NEXT: s_nop 1 2185; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc 2186; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2187; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1 2188; GFX940-NEXT: s_nop 1 2189; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 2190; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 2191; GFX940-NEXT: s_setpc_b64 s[30:31] 2192; 2193; GFX950-LABEL: v_fmaximum3_v3f16_commute: 2194; GFX950: ; %bb.0: 2195; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2196; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 2197; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 2198; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v5, v5 2199; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v4, v4 2200; GFX950-NEXT: s_setpc_b64 s[30:31] 2201 %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b) 2202 %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> %c) 2203 ret <3 x half> %max1 2204} 2205 2206define <3 x half> @v_fmaximum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3 x half> %c) { 2207; GFX12-LABEL: v_fmaximum3_v3f16__fabs_all: 2208; GFX12: ; %bb.0: 2209; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2210; GFX12-NEXT: s_wait_expcnt 0x0 2211; GFX12-NEXT: s_wait_samplecnt 0x0 2212; GFX12-NEXT: s_wait_bvhcnt 0x0 2213; GFX12-NEXT: s_wait_kmcnt 0x0 2214; GFX12-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 2215; GFX12-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 2216; GFX12-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 2217; GFX12-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 2218; GFX12-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5 2219; GFX12-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4 2220; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2221; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 2222; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 2223; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2224; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v4 2225; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 2226; GFX12-NEXT: s_setpc_b64 s[30:31] 2227; 2228; GFX940-LABEL: v_fmaximum3_v3f16__fabs_all: 2229; GFX940: ; %bb.0: 2230; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2231; GFX940-NEXT: v_and_b32_e32 v7, 0x7fff7fff, v1 2232; GFX940-NEXT: v_and_b32_e32 v9, 0x7fff7fff, v3 2233; GFX940-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v0 2234; GFX940-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v2 2235; GFX940-NEXT: v_pk_max_f16 v7, v7, v9 2236; GFX940-NEXT: v_mov_b32_e32 v12, 0x7e00 2237; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v7 2238; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1 2239; GFX940-NEXT: v_pk_max_f16 v6, v6, v8 2240; GFX940-NEXT: s_mov_b32 s0, 0x5040100 2241; GFX940-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc 2242; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v6 2243; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1 2244; GFX940-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v4 2245; GFX940-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v5 2246; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc 2247; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3| 2248; GFX940-NEXT: s_nop 1 2249; GFX940-NEXT: v_cndmask_b32_e32 v1, v12, v7, vcc 2250; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2| 2251; GFX940-NEXT: s_nop 1 2252; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc 2253; GFX940-NEXT: v_perm_b32 v2, v8, v0, s0 2254; GFX940-NEXT: v_pk_max_f16 v2, v2, v11 2255; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1 2256; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v2 2257; GFX940-NEXT: v_perm_b32 v6, v9, v1, s0 2258; GFX940-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc 2259; GFX940-NEXT: v_pk_max_f16 v6, v6, v10 2260; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5| 2261; GFX940-NEXT: s_nop 1 2262; GFX940-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc 2263; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4| 2264; GFX940-NEXT: s_nop 1 2265; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v2, vcc 2266; GFX940-NEXT: v_perm_b32 v0, v3, v0, s0 2267; GFX940-NEXT: s_setpc_b64 s[30:31] 2268; 2269; GFX950-LABEL: v_fmaximum3_v3f16__fabs_all: 2270; GFX950: ; %bb.0: 2271; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2272; GFX950-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 2273; GFX950-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 2274; GFX950-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 2275; GFX950-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 2276; GFX950-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5 2277; GFX950-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4 2278; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 2279; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 2280; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v5, v5 2281; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v4, v4 2282; GFX950-NEXT: s_setpc_b64 s[30:31] 2283 %a.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %a) 2284 %b.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %b) 2285 %c.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %c) 2286 %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a.fabs, <3 x half> %b.fabs) 2287 %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> %c.fabs) 2288 ret <3 x half> %max1 2289} 2290 2291define <3 x half> @v_fmaximum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3 x half> %c) { 2292; GFX12-LABEL: v_fmaximum3_v3f16__fneg_all: 2293; GFX12: ; %bb.0: 2294; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2295; GFX12-NEXT: s_wait_expcnt 0x0 2296; GFX12-NEXT: s_wait_samplecnt 0x0 2297; GFX12-NEXT: s_wait_bvhcnt 0x0 2298; GFX12-NEXT: s_wait_kmcnt 0x0 2299; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1] 2300; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1] 2301; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2302; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] 2303; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] 2304; GFX12-NEXT: s_setpc_b64 s[30:31] 2305; 2306; GFX940-LABEL: v_fmaximum3_v3f16__fneg_all: 2307; GFX940: ; %bb.0: 2308; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2309; GFX940-NEXT: v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1] 2310; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 2311; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2 2312; GFX940-NEXT: s_mov_b32 s0, 0x5040100 2313; GFX940-NEXT: s_nop 0 2314; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc 2315; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 2316; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1 2317; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1] 2318; GFX940-NEXT: s_nop 0 2319; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 2320; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3 2321; GFX940-NEXT: s_nop 1 2322; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc 2323; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2324; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1 2325; GFX940-NEXT: s_nop 1 2326; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 2327; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0 2328; GFX940-NEXT: v_pk_max_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] 2329; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5 2330; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 2331; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1] 2332; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc 2333; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4 2334; GFX940-NEXT: s_nop 1 2335; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc 2336; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2337; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1 2338; GFX940-NEXT: s_nop 1 2339; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 2340; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 2341; GFX940-NEXT: s_setpc_b64 s[30:31] 2342; 2343; GFX950-LABEL: v_fmaximum3_v3f16__fneg_all: 2344; GFX950: ; %bb.0: 2345; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2346; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 neg_lo:[1,1,1] neg_hi:[1,1,1] 2347; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 neg_lo:[1,1,1] neg_hi:[1,1,1] 2348; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v5, v5 neg_lo:[0,1,1] neg_hi:[0,1,1] 2349; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v4, v4 neg_lo:[0,1,1] neg_hi:[0,1,1] 2350; GFX950-NEXT: s_setpc_b64 s[30:31] 2351 %a.fneg = fneg <3 x half> %a 2352 %b.fneg = fneg <3 x half> %b 2353 %c.fneg = fneg <3 x half> %c 2354 %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a.fneg, <3 x half> %b.fneg) 2355 %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> %c.fneg) 2356 ret <3 x half> %max1 2357} 2358 2359define <3 x half> @v_fmaximum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) { 2360; GFX12-LABEL: v_fmaximum3_v3f16__inlineimm1: 2361; GFX12: ; %bb.0: 2362; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2363; GFX12-NEXT: s_wait_expcnt 0x0 2364; GFX12-NEXT: s_wait_samplecnt 0x0 2365; GFX12-NEXT: s_wait_bvhcnt 0x0 2366; GFX12-NEXT: s_wait_kmcnt 0x0 2367; GFX12-NEXT: v_pk_maximum_f16 v0, v0, 2.0 op_sel_hi:[1,0] 2368; GFX12-NEXT: v_pk_maximum_f16 v1, v1, 2.0 2369; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2370; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 2371; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 2372; GFX12-NEXT: s_setpc_b64 s[30:31] 2373; 2374; GFX940-LABEL: v_fmaximum3_v3f16__inlineimm1: 2375; GFX940: ; %bb.0: 2376; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2377; GFX940-NEXT: v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0] 2378; GFX940-NEXT: v_mov_b32_e32 v6, 0x7e00 2379; GFX940-NEXT: v_lshrrev_b32_e32 v5, 16, v4 2380; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 2381; GFX940-NEXT: v_pk_max_f16 v7, v1, 2.0 2382; GFX940-NEXT: s_mov_b32 s1, 0x5040100 2383; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc 2384; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 2385; GFX940-NEXT: s_movk_i32 s0, 0x7e00 2386; GFX940-NEXT: s_nop 0 2387; GFX940-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc 2388; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 2389; GFX940-NEXT: s_nop 1 2390; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc 2391; GFX940-NEXT: v_perm_b32 v4, v5, v0, s1 2392; GFX940-NEXT: v_pk_max_f16 v4, v4, v2 2393; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1 2394; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v4 2395; GFX940-NEXT: s_nop 0 2396; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc 2397; GFX940-NEXT: v_pack_b32_f16 v7, v1, s0 2398; GFX940-NEXT: v_pk_max_f16 v7, v7, v3 2399; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 2400; GFX940-NEXT: s_nop 1 2401; GFX940-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc 2402; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 2403; GFX940-NEXT: s_nop 1 2404; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc 2405; GFX940-NEXT: v_perm_b32 v0, v5, v0, s1 2406; GFX940-NEXT: s_setpc_b64 s[30:31] 2407; 2408; GFX950-LABEL: v_fmaximum3_v3f16__inlineimm1: 2409; GFX950: ; %bb.0: 2410; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2411; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, 2.0, 2.0 2412; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, 2.0, 2.0 op_sel_hi:[1,0,0] 2413; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 2414; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 2415; GFX950-NEXT: s_setpc_b64 s[30:31] 2416 %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> <half 2.0, half 2.0, half 2.0>) 2417 %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> %c) 2418 ret <3 x half> %max1 2419} 2420 2421define <3 x half> @v_fmaximum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) { 2422; GFX12-LABEL: v_fmaximum3_v3f16__inlineimm2: 2423; GFX12: ; %bb.0: 2424; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2425; GFX12-NEXT: s_wait_expcnt 0x0 2426; GFX12-NEXT: s_wait_samplecnt 0x0 2427; GFX12-NEXT: s_wait_bvhcnt 0x0 2428; GFX12-NEXT: s_wait_kmcnt 0x0 2429; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 2430; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 2431; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2432; GFX12-NEXT: v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0] 2433; GFX12-NEXT: v_pk_maximum_f16 v1, v1, 4.0 2434; GFX12-NEXT: s_setpc_b64 s[30:31] 2435; 2436; GFX940-LABEL: v_fmaximum3_v3f16__inlineimm2: 2437; GFX940: ; %bb.0: 2438; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2439; GFX940-NEXT: v_pk_max_f16 v4, v0, v2 2440; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00 2441; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 2442; GFX940-NEXT: s_mov_b32 s0, 0x5040100 2443; GFX940-NEXT: s_nop 0 2444; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc 2445; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4 2446; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 2447; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 2448; GFX940-NEXT: s_nop 0 2449; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc 2450; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 2451; GFX940-NEXT: s_nop 1 2452; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc 2453; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2454; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 2455; GFX940-NEXT: s_nop 1 2456; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc 2457; GFX940-NEXT: v_perm_b32 v1, v1, v4, s0 2458; GFX940-NEXT: v_pk_max_f16 v1, v1, 4.0 2459; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 2460; GFX940-NEXT: v_perm_b32 v2, v0, v6, s0 2461; GFX940-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0] 2462; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 2463; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v6 2464; GFX940-NEXT: s_nop 1 2465; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc 2466; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2467; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 2468; GFX940-NEXT: s_nop 1 2469; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc 2470; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 2471; GFX940-NEXT: s_setpc_b64 s[30:31] 2472; 2473; GFX950-LABEL: v_fmaximum3_v3f16__inlineimm2: 2474; GFX950: ; %bb.0: 2475; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2476; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 2477; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 2478; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, 4.0, 4.0 2479; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, 4.0, 4.0 op_sel_hi:[1,0,0] 2480; GFX950-NEXT: s_setpc_b64 s[30:31] 2481 %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b) 2482 %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> <half 4.0, half 4.0, half 4.0>) 2483 ret <3 x half> %max1 2484} 2485 2486define <4 x half> @v_fmaximum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 2487; GFX12-LABEL: v_fmaximum3_v4f16: 2488; GFX12: ; %bb.0: 2489; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2490; GFX12-NEXT: s_wait_expcnt 0x0 2491; GFX12-NEXT: s_wait_samplecnt 0x0 2492; GFX12-NEXT: s_wait_bvhcnt 0x0 2493; GFX12-NEXT: s_wait_kmcnt 0x0 2494; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 2495; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 2496; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2497; GFX12-NEXT: v_pk_maximum_f16 v0, v4, v0 2498; GFX12-NEXT: v_pk_maximum_f16 v1, v5, v1 2499; GFX12-NEXT: s_setpc_b64 s[30:31] 2500; 2501; GFX940-LABEL: v_fmaximum3_v4f16: 2502; GFX940: ; %bb.0: 2503; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2504; GFX940-NEXT: v_pk_max_f16 v6, v0, v2 2505; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 2506; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 2507; GFX940-NEXT: s_mov_b32 s0, 0x5040100 2508; GFX940-NEXT: s_nop 0 2509; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc 2510; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 2511; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 2512; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 2513; GFX940-NEXT: s_nop 0 2514; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 2515; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 2516; GFX940-NEXT: s_nop 1 2517; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc 2518; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2519; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 2520; GFX940-NEXT: s_nop 1 2521; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 2522; GFX940-NEXT: v_perm_b32 v2, v1, v6, s0 2523; GFX940-NEXT: v_pk_max_f16 v2, v5, v2 2524; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v6 2525; GFX940-NEXT: s_nop 1 2526; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc 2527; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2528; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD 2529; GFX940-NEXT: s_nop 1 2530; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 2531; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 2532; GFX940-NEXT: v_pk_max_f16 v2, v4, v2 2533; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v8 2534; GFX940-NEXT: v_perm_b32 v1, v1, v3, s0 2535; GFX940-NEXT: s_nop 0 2536; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc 2537; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2538; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD 2539; GFX940-NEXT: s_nop 1 2540; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 2541; GFX940-NEXT: v_perm_b32 v0, v0, v5, s0 2542; GFX940-NEXT: s_setpc_b64 s[30:31] 2543; 2544; GFX950-LABEL: v_fmaximum3_v4f16: 2545; GFX950: ; %bb.0: 2546; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2547; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 2548; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 2549; GFX950-NEXT: v_pk_maximum3_f16 v1, v5, v1, v1 2550; GFX950-NEXT: v_pk_maximum3_f16 v0, v4, v0, v0 2551; GFX950-NEXT: s_setpc_b64 s[30:31] 2552 %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b) 2553 %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %c, <4 x half> %max0) 2554 ret <4 x half> %max1 2555} 2556 2557define <4 x half> @v_fmaximum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 2558; GFX12-LABEL: v_fmaximum3_v4f16_commute: 2559; GFX12: ; %bb.0: 2560; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2561; GFX12-NEXT: s_wait_expcnt 0x0 2562; GFX12-NEXT: s_wait_samplecnt 0x0 2563; GFX12-NEXT: s_wait_bvhcnt 0x0 2564; GFX12-NEXT: s_wait_kmcnt 0x0 2565; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 2566; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 2567; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2568; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v4 2569; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 2570; GFX12-NEXT: s_setpc_b64 s[30:31] 2571; 2572; GFX940-LABEL: v_fmaximum3_v4f16_commute: 2573; GFX940: ; %bb.0: 2574; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2575; GFX940-NEXT: v_pk_max_f16 v6, v0, v2 2576; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 2577; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 2578; GFX940-NEXT: s_mov_b32 s0, 0x5040100 2579; GFX940-NEXT: s_nop 0 2580; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc 2581; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 2582; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 2583; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 2584; GFX940-NEXT: s_nop 0 2585; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 2586; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 2587; GFX940-NEXT: s_nop 1 2588; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc 2589; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2590; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 2591; GFX940-NEXT: s_nop 1 2592; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 2593; GFX940-NEXT: v_perm_b32 v2, v1, v6, s0 2594; GFX940-NEXT: v_pk_max_f16 v2, v2, v5 2595; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v5 2596; GFX940-NEXT: s_nop 1 2597; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc 2598; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2599; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1 2600; GFX940-NEXT: s_nop 1 2601; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 2602; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 2603; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 2604; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v8, v4 2605; GFX940-NEXT: v_perm_b32 v1, v1, v3, s0 2606; GFX940-NEXT: s_nop 0 2607; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc 2608; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2609; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1 2610; GFX940-NEXT: s_nop 1 2611; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 2612; GFX940-NEXT: v_perm_b32 v0, v0, v5, s0 2613; GFX940-NEXT: s_setpc_b64 s[30:31] 2614; 2615; GFX950-LABEL: v_fmaximum3_v4f16_commute: 2616; GFX950: ; %bb.0: 2617; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2618; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 2619; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 2620; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v5, v5 2621; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v4, v4 2622; GFX950-NEXT: s_setpc_b64 s[30:31] 2623 %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b) 2624 %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> %c) 2625 ret <4 x half> %max1 2626} 2627 2628define <4 x half> @v_fmaximum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 2629; GFX12-LABEL: v_fmaximum3_v4f16__fabs_all: 2630; GFX12: ; %bb.0: 2631; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2632; GFX12-NEXT: s_wait_expcnt 0x0 2633; GFX12-NEXT: s_wait_samplecnt 0x0 2634; GFX12-NEXT: s_wait_bvhcnt 0x0 2635; GFX12-NEXT: s_wait_kmcnt 0x0 2636; GFX12-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 2637; GFX12-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 2638; GFX12-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 2639; GFX12-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 2640; GFX12-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5 2641; GFX12-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4 2642; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2643; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 2644; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 2645; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2646; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v4 2647; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 2648; GFX12-NEXT: s_setpc_b64 s[30:31] 2649; 2650; GFX940-LABEL: v_fmaximum3_v4f16__fabs_all: 2651; GFX940: ; %bb.0: 2652; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2653; GFX940-NEXT: v_and_b32_e32 v7, 0x7fff7fff, v0 2654; GFX940-NEXT: v_and_b32_e32 v9, 0x7fff7fff, v2 2655; GFX940-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v1 2656; GFX940-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v3 2657; GFX940-NEXT: v_pk_max_f16 v7, v7, v9 2658; GFX940-NEXT: v_mov_b32_e32 v12, 0x7e00 2659; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v7 2660; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1 2661; GFX940-NEXT: v_pk_max_f16 v6, v6, v8 2662; GFX940-NEXT: s_mov_b32 s0, 0x5040100 2663; GFX940-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc 2664; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v6 2665; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1 2666; GFX940-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v5 2667; GFX940-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v4 2668; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc 2669; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2| 2670; GFX940-NEXT: s_nop 1 2671; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v7, vcc 2672; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3| 2673; GFX940-NEXT: s_nop 1 2674; GFX940-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc 2675; GFX940-NEXT: v_perm_b32 v2, v8, v1, s0 2676; GFX940-NEXT: v_pk_max_f16 v2, v2, v11 2677; GFX940-NEXT: v_perm_b32 v6, v9, v0, s0 2678; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v2 2679; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1 2680; GFX940-NEXT: v_pk_max_f16 v6, v6, v10 2681; GFX940-NEXT: s_nop 0 2682; GFX940-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc 2683; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v6 2684; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1 2685; GFX940-NEXT: s_nop 1 2686; GFX940-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc 2687; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5| 2688; GFX940-NEXT: s_nop 1 2689; GFX940-NEXT: v_cndmask_b32_e32 v1, v12, v2, vcc 2690; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4| 2691; GFX940-NEXT: v_perm_b32 v1, v3, v1, s0 2692; GFX940-NEXT: s_nop 0 2693; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc 2694; GFX940-NEXT: v_perm_b32 v0, v7, v0, s0 2695; GFX940-NEXT: s_setpc_b64 s[30:31] 2696; 2697; GFX950-LABEL: v_fmaximum3_v4f16__fabs_all: 2698; GFX950: ; %bb.0: 2699; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2700; GFX950-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 2701; GFX950-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 2702; GFX950-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 2703; GFX950-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 2704; GFX950-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5 2705; GFX950-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4 2706; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 2707; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 2708; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v5, v5 2709; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v4, v4 2710; GFX950-NEXT: s_setpc_b64 s[30:31] 2711 %a.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a) 2712 %b.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b) 2713 %c.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %c) 2714 %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a.fabs, <4 x half> %b.fabs) 2715 %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> %c.fabs) 2716 ret <4 x half> %max1 2717} 2718 2719define <4 x half> @v_fmaximum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 2720; GFX12-LABEL: v_fmaximum3_v4f16__fneg_all: 2721; GFX12: ; %bb.0: 2722; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2723; GFX12-NEXT: s_wait_expcnt 0x0 2724; GFX12-NEXT: s_wait_samplecnt 0x0 2725; GFX12-NEXT: s_wait_bvhcnt 0x0 2726; GFX12-NEXT: s_wait_kmcnt 0x0 2727; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1] 2728; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1] 2729; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2730; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] 2731; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] 2732; GFX12-NEXT: s_setpc_b64 s[30:31] 2733; 2734; GFX940-LABEL: v_fmaximum3_v4f16__fneg_all: 2735; GFX940: ; %bb.0: 2736; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2737; GFX940-NEXT: v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1] 2738; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 2739; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2 2740; GFX940-NEXT: s_mov_b32 s0, 0x5040100 2741; GFX940-NEXT: s_nop 0 2742; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc 2743; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 2744; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1 2745; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1] 2746; GFX940-NEXT: s_nop 0 2747; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 2748; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3 2749; GFX940-NEXT: s_nop 1 2750; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc 2751; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2752; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1 2753; GFX940-NEXT: s_nop 1 2754; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 2755; GFX940-NEXT: v_perm_b32 v2, v1, v6, s0 2756; GFX940-NEXT: v_pk_max_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1] 2757; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5 2758; GFX940-NEXT: s_nop 1 2759; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc 2760; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2761; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1 2762; GFX940-NEXT: s_nop 1 2763; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 2764; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 2765; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1] 2766; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4 2767; GFX940-NEXT: v_perm_b32 v1, v1, v3, s0 2768; GFX940-NEXT: s_nop 0 2769; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc 2770; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2771; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1 2772; GFX940-NEXT: s_nop 1 2773; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc 2774; GFX940-NEXT: v_perm_b32 v0, v0, v5, s0 2775; GFX940-NEXT: s_setpc_b64 s[30:31] 2776; 2777; GFX950-LABEL: v_fmaximum3_v4f16__fneg_all: 2778; GFX950: ; %bb.0: 2779; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2780; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 neg_lo:[1,1,1] neg_hi:[1,1,1] 2781; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 neg_lo:[1,1,1] neg_hi:[1,1,1] 2782; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v5, v5 neg_lo:[0,1,1] neg_hi:[0,1,1] 2783; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v4, v4 neg_lo:[0,1,1] neg_hi:[0,1,1] 2784; GFX950-NEXT: s_setpc_b64 s[30:31] 2785 %a.fneg = fneg <4 x half> %a 2786 %b.fneg = fneg <4 x half> %b 2787 %c.fneg = fneg <4 x half> %c 2788 %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a.fneg, <4 x half> %b.fneg) 2789 %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> %c.fneg) 2790 ret <4 x half> %max1 2791} 2792 2793define <4 x half> @v_fmaximum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) { 2794; GFX12-LABEL: v_fmaximum3_v4f16__inlineimm1: 2795; GFX12: ; %bb.0: 2796; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2797; GFX12-NEXT: s_wait_expcnt 0x0 2798; GFX12-NEXT: s_wait_samplecnt 0x0 2799; GFX12-NEXT: s_wait_bvhcnt 0x0 2800; GFX12-NEXT: s_wait_kmcnt 0x0 2801; GFX12-NEXT: v_pk_maximum_f16 v0, v0, 2.0 op_sel_hi:[1,0] 2802; GFX12-NEXT: v_pk_maximum_f16 v1, v1, 2.0 op_sel_hi:[1,0] 2803; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2804; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 2805; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 2806; GFX12-NEXT: s_setpc_b64 s[30:31] 2807; 2808; GFX940-LABEL: v_fmaximum3_v4f16__inlineimm1: 2809; GFX940: ; %bb.0: 2810; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2811; GFX940-NEXT: v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0] 2812; GFX940-NEXT: v_mov_b32_e32 v6, 0x7e00 2813; GFX940-NEXT: v_lshrrev_b32_e32 v5, 16, v4 2814; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 2815; GFX940-NEXT: v_pk_max_f16 v7, v1, 2.0 op_sel_hi:[1,0] 2816; GFX940-NEXT: s_mov_b32 s0, 0x5040100 2817; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc 2818; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v7 2819; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1 2820; GFX940-NEXT: s_nop 1 2821; GFX940-NEXT: v_cndmask_b32_e32 v8, v6, v8, vcc 2822; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 2823; GFX940-NEXT: s_nop 1 2824; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc 2825; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 2826; GFX940-NEXT: s_nop 1 2827; GFX940-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc 2828; GFX940-NEXT: v_perm_b32 v4, v8, v1, s0 2829; GFX940-NEXT: v_pk_max_f16 v4, v4, v3 2830; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1 2831; GFX940-NEXT: v_perm_b32 v8, v5, v0, s0 2832; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v4 2833; GFX940-NEXT: v_pk_max_f16 v8, v8, v2 2834; GFX940-NEXT: v_cndmask_b32_e32 v7, v6, v7, vcc 2835; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v8 2836; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1 2837; GFX940-NEXT: s_nop 1 2838; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v9, vcc 2839; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 2840; GFX940-NEXT: s_nop 1 2841; GFX940-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc 2842; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 2843; GFX940-NEXT: v_perm_b32 v1, v7, v1, s0 2844; GFX940-NEXT: s_nop 0 2845; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc 2846; GFX940-NEXT: v_perm_b32 v0, v5, v0, s0 2847; GFX940-NEXT: s_setpc_b64 s[30:31] 2848; 2849; GFX950-LABEL: v_fmaximum3_v4f16__inlineimm1: 2850; GFX950: ; %bb.0: 2851; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2852; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, 2.0, 2.0 op_sel_hi:[1,0,0] 2853; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, 2.0, 2.0 op_sel_hi:[1,0,0] 2854; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 2855; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 2856; GFX950-NEXT: s_setpc_b64 s[30:31] 2857 %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> <half 2.0, half 2.0, half 2.0, half 2.0>) 2858 %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> %c) 2859 ret <4 x half> %max1 2860} 2861 2862define <4 x half> @v_fmaximum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) { 2863; GFX12-LABEL: v_fmaximum3_v4f16__inlineimm2: 2864; GFX12: ; %bb.0: 2865; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2866; GFX12-NEXT: s_wait_expcnt 0x0 2867; GFX12-NEXT: s_wait_samplecnt 0x0 2868; GFX12-NEXT: s_wait_bvhcnt 0x0 2869; GFX12-NEXT: s_wait_kmcnt 0x0 2870; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 2871; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 2872; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2873; GFX12-NEXT: v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0] 2874; GFX12-NEXT: v_pk_maximum_f16 v1, v1, 4.0 op_sel_hi:[1,0] 2875; GFX12-NEXT: s_setpc_b64 s[30:31] 2876; 2877; GFX940-LABEL: v_fmaximum3_v4f16__inlineimm2: 2878; GFX940: ; %bb.0: 2879; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2880; GFX940-NEXT: v_pk_max_f16 v4, v0, v2 2881; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00 2882; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 2883; GFX940-NEXT: s_mov_b32 s0, 0x5040100 2884; GFX940-NEXT: s_nop 0 2885; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc 2886; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4 2887; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 2888; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 2889; GFX940-NEXT: s_nop 0 2890; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc 2891; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 2892; GFX940-NEXT: s_nop 1 2893; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc 2894; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2895; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 2896; GFX940-NEXT: s_nop 1 2897; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc 2898; GFX940-NEXT: v_perm_b32 v2, v1, v4, s0 2899; GFX940-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0] 2900; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 2901; GFX940-NEXT: s_nop 1 2902; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc 2903; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2904; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 2905; GFX940-NEXT: s_nop 1 2906; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc 2907; GFX940-NEXT: v_perm_b32 v2, v0, v6, s0 2908; GFX940-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0] 2909; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v6 2910; GFX940-NEXT: v_perm_b32 v1, v1, v3, s0 2911; GFX940-NEXT: s_nop 0 2912; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc 2913; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 2914; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 2915; GFX940-NEXT: s_nop 1 2916; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc 2917; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0 2918; GFX940-NEXT: s_setpc_b64 s[30:31] 2919; 2920; GFX950-LABEL: v_fmaximum3_v4f16__inlineimm2: 2921; GFX950: ; %bb.0: 2922; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2923; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, v3, v3 2924; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v2, v2 2925; GFX950-NEXT: v_pk_maximum3_f16 v1, v1, 4.0, 4.0 op_sel_hi:[1,0,0] 2926; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, 4.0, 4.0 op_sel_hi:[1,0,0] 2927; GFX950-NEXT: s_setpc_b64 s[30:31] 2928 %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b) 2929 %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> <half 4.0, half 4.0, half 4.0, half 4.0>) 2930 ret <4 x half> %max1 2931} 2932 2933define double @v_fmaximum3_f64(double %a, double %b, double %c) { 2934; GFX12-LABEL: v_fmaximum3_f64: 2935; GFX12: ; %bb.0: 2936; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2937; GFX12-NEXT: s_wait_expcnt 0x0 2938; GFX12-NEXT: s_wait_samplecnt 0x0 2939; GFX12-NEXT: s_wait_bvhcnt 0x0 2940; GFX12-NEXT: s_wait_kmcnt 0x0 2941; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] 2942; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 2943; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] 2944; GFX12-NEXT: s_setpc_b64 s[30:31] 2945; 2946; GFX9-LABEL: v_fmaximum3_f64: 2947; GFX9: ; %bb.0: 2948; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2949; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] 2950; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 2951; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] 2952; GFX9-NEXT: s_nop 1 2953; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc 2954; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc 2955; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] 2956; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] 2957; GFX9-NEXT: s_nop 1 2958; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 2959; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc 2960; GFX9-NEXT: s_setpc_b64 s[30:31] 2961 %max0 = call double @llvm.maximum.f64(double %a, double %b) 2962 %max1 = call double @llvm.maximum.f64(double %max0, double %c) 2963 ret double %max1 2964} 2965 2966define double @v_fmaximum3_f64_commute(double %a, double %b, double %c) { 2967; GFX12-LABEL: v_fmaximum3_f64_commute: 2968; GFX12: ; %bb.0: 2969; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2970; GFX12-NEXT: s_wait_expcnt 0x0 2971; GFX12-NEXT: s_wait_samplecnt 0x0 2972; GFX12-NEXT: s_wait_bvhcnt 0x0 2973; GFX12-NEXT: s_wait_kmcnt 0x0 2974; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] 2975; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 2976; GFX12-NEXT: v_maximum_f64 v[0:1], v[4:5], v[0:1] 2977; GFX12-NEXT: s_setpc_b64 s[30:31] 2978; 2979; GFX9-LABEL: v_fmaximum3_f64_commute: 2980; GFX9: ; %bb.0: 2981; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2982; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] 2983; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 2984; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] 2985; GFX9-NEXT: s_nop 1 2986; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc 2987; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc 2988; GFX9-NEXT: v_max_f64 v[2:3], v[4:5], v[0:1] 2989; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[0:1] 2990; GFX9-NEXT: s_nop 1 2991; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 2992; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc 2993; GFX9-NEXT: s_setpc_b64 s[30:31] 2994 %max0 = call double @llvm.maximum.f64(double %a, double %b) 2995 %max1 = call double @llvm.maximum.f64(double %c, double %max0) 2996 ret double %max1 2997} 2998 2999define amdgpu_ps <2 x i32> @s_fmaximum3_f64(double inreg %a, double inreg %b, double inreg %c) { 3000; GFX12-LABEL: s_fmaximum3_f64: 3001; GFX12: ; %bb.0: 3002; GFX12-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3] 3003; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3004; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], s[4:5] 3005; GFX12-NEXT: v_readfirstlane_b32 s0, v0 3006; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 3007; GFX12-NEXT: v_readfirstlane_b32 s1, v1 3008; GFX12-NEXT: ; return to shader part epilog 3009; 3010; GFX9-LABEL: s_fmaximum3_f64: 3011; GFX9: ; %bb.0: 3012; GFX9-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 3013; GFX9-NEXT: v_max_f64 v[2:3], s[0:1], v[0:1] 3014; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000 3015; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1] 3016; GFX9-NEXT: s_nop 1 3017; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc 3018; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 3019; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[4:5] 3020; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[4:5], v[0:1] 3021; GFX9-NEXT: s_nop 1 3022; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc 3023; GFX9-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc 3024; GFX9-NEXT: v_readfirstlane_b32 s1, v0 3025; GFX9-NEXT: v_readfirstlane_b32 s0, v1 3026; GFX9-NEXT: ; return to shader part epilog 3027 %max0 = call double @llvm.maximum.f64(double %a, double %b) 3028 %max1 = call double @llvm.maximum.f64(double %max0, double %c) 3029 %cast = bitcast double %max1 to <2 x i32> 3030 %elt0 = extractelement <2 x i32> %cast, i32 0 3031 %elt1 = extractelement <2 x i32> %cast, i32 1 3032 %readlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt0) 3033 %readlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt1) 3034 %insert.0 = insertelement <2 x i32> poison, i32 %readlane0, i32 0 3035 %insert.1 = insertelement <2 x i32> %insert.0, i32 %readlane1, i32 1 3036 ret <2 x i32> %insert.1 3037} 3038 3039define double @v_fmaximum3_f64_fabs0(double %a, double %b, double %c) { 3040; GFX12-LABEL: v_fmaximum3_f64_fabs0: 3041; GFX12: ; %bb.0: 3042; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3043; GFX12-NEXT: s_wait_expcnt 0x0 3044; GFX12-NEXT: s_wait_samplecnt 0x0 3045; GFX12-NEXT: s_wait_bvhcnt 0x0 3046; GFX12-NEXT: s_wait_kmcnt 0x0 3047; GFX12-NEXT: v_maximum_f64 v[0:1], |v[0:1]|, v[2:3] 3048; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3049; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] 3050; GFX12-NEXT: s_setpc_b64 s[30:31] 3051; 3052; GFX9-LABEL: v_fmaximum3_f64_fabs0: 3053; GFX9: ; %bb.0: 3054; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3055; GFX9-NEXT: v_max_f64 v[6:7], |v[0:1]|, v[2:3] 3056; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 3057; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3] 3058; GFX9-NEXT: s_nop 1 3059; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc 3060; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc 3061; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] 3062; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] 3063; GFX9-NEXT: s_nop 1 3064; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 3065; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc 3066; GFX9-NEXT: s_setpc_b64 s[30:31] 3067 %a.fabs = call double @llvm.fabs.f64(double %a) 3068 %max0 = call double @llvm.maximum.f64(double %a.fabs, double %b) 3069 %max1 = call double @llvm.maximum.f64(double %max0, double %c) 3070 ret double %max1 3071} 3072 3073define double @v_fmaximum3_f64_fabs1(double %a, double %b, double %c) { 3074; GFX12-LABEL: v_fmaximum3_f64_fabs1: 3075; GFX12: ; %bb.0: 3076; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3077; GFX12-NEXT: s_wait_expcnt 0x0 3078; GFX12-NEXT: s_wait_samplecnt 0x0 3079; GFX12-NEXT: s_wait_bvhcnt 0x0 3080; GFX12-NEXT: s_wait_kmcnt 0x0 3081; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], |v[2:3]| 3082; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3083; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] 3084; GFX12-NEXT: s_setpc_b64 s[30:31] 3085; 3086; GFX9-LABEL: v_fmaximum3_f64_fabs1: 3087; GFX9: ; %bb.0: 3088; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3089; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], |v[2:3]| 3090; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 3091; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]| 3092; GFX9-NEXT: s_nop 1 3093; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc 3094; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc 3095; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] 3096; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] 3097; GFX9-NEXT: s_nop 1 3098; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 3099; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc 3100; GFX9-NEXT: s_setpc_b64 s[30:31] 3101 %b.fabs = call double @llvm.fabs.f64(double %b) 3102 %max0 = call double @llvm.maximum.f64(double %a, double %b.fabs) 3103 %max1 = call double @llvm.maximum.f64(double %max0, double %c) 3104 ret double %max1 3105} 3106 3107define double @v_fmaximum3_f64_fabs2(double %a, double %b, double %c) { 3108; GFX12-LABEL: v_fmaximum3_f64_fabs2: 3109; GFX12: ; %bb.0: 3110; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3111; GFX12-NEXT: s_wait_expcnt 0x0 3112; GFX12-NEXT: s_wait_samplecnt 0x0 3113; GFX12-NEXT: s_wait_bvhcnt 0x0 3114; GFX12-NEXT: s_wait_kmcnt 0x0 3115; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] 3116; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3117; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], |v[4:5]| 3118; GFX12-NEXT: s_setpc_b64 s[30:31] 3119; 3120; GFX9-LABEL: v_fmaximum3_f64_fabs2: 3121; GFX9: ; %bb.0: 3122; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3123; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] 3124; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 3125; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] 3126; GFX9-NEXT: s_nop 1 3127; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc 3128; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc 3129; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], |v[4:5]| 3130; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]| 3131; GFX9-NEXT: s_nop 1 3132; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 3133; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc 3134; GFX9-NEXT: s_setpc_b64 s[30:31] 3135 %c.fabs = call double @llvm.fabs.f64(double %c) 3136 %max0 = call double @llvm.maximum.f64(double %a, double %b) 3137 %max1 = call double @llvm.maximum.f64(double %max0, double %c.fabs) 3138 ret double %max1 3139} 3140 3141define double @v_fmaximum3_f64_fabs_all(double %a, double %b, double %c) { 3142; GFX12-LABEL: v_fmaximum3_f64_fabs_all: 3143; GFX12: ; %bb.0: 3144; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3145; GFX12-NEXT: s_wait_expcnt 0x0 3146; GFX12-NEXT: s_wait_samplecnt 0x0 3147; GFX12-NEXT: s_wait_bvhcnt 0x0 3148; GFX12-NEXT: s_wait_kmcnt 0x0 3149; GFX12-NEXT: v_maximum_f64 v[0:1], |v[0:1]|, |v[2:3]| 3150; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3151; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], |v[4:5]| 3152; GFX12-NEXT: s_setpc_b64 s[30:31] 3153; 3154; GFX9-LABEL: v_fmaximum3_f64_fabs_all: 3155; GFX9: ; %bb.0: 3156; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3157; GFX9-NEXT: v_max_f64 v[6:7], |v[0:1]|, |v[2:3]| 3158; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 3159; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]| 3160; GFX9-NEXT: s_nop 1 3161; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc 3162; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc 3163; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], |v[4:5]| 3164; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]| 3165; GFX9-NEXT: s_nop 1 3166; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 3167; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc 3168; GFX9-NEXT: s_setpc_b64 s[30:31] 3169 %a.fabs = call double @llvm.fabs.f64(double %a) 3170 %b.fabs = call double @llvm.fabs.f64(double %b) 3171 %c.fabs = call double @llvm.fabs.f64(double %c) 3172 %max0 = call double @llvm.maximum.f64(double %a.fabs, double %b.fabs) 3173 %max1 = call double @llvm.maximum.f64(double %max0, double %c.fabs) 3174 ret double %max1 3175} 3176 3177define double @v_fmaximum3_f64_fneg_all(double %a, double %b, double %c) { 3178; GFX12-LABEL: v_fmaximum3_f64_fneg_all: 3179; GFX12: ; %bb.0: 3180; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3181; GFX12-NEXT: s_wait_expcnt 0x0 3182; GFX12-NEXT: s_wait_samplecnt 0x0 3183; GFX12-NEXT: s_wait_bvhcnt 0x0 3184; GFX12-NEXT: s_wait_kmcnt 0x0 3185; GFX12-NEXT: v_maximum_f64 v[0:1], -v[0:1], -v[2:3] 3186; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3187; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], -v[4:5] 3188; GFX12-NEXT: s_setpc_b64 s[30:31] 3189; 3190; GFX9-LABEL: v_fmaximum3_f64_fneg_all: 3191; GFX9: ; %bb.0: 3192; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3193; GFX9-NEXT: v_max_f64 v[6:7], -v[0:1], -v[2:3] 3194; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 3195; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3] 3196; GFX9-NEXT: s_nop 1 3197; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc 3198; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc 3199; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -v[4:5] 3200; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5] 3201; GFX9-NEXT: s_nop 1 3202; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 3203; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc 3204; GFX9-NEXT: s_setpc_b64 s[30:31] 3205 %a.fneg = fneg double %a 3206 %b.fneg = fneg double %b 3207 %c.fneg = fneg double %c 3208 %max0 = call double @llvm.maximum.f64(double %a.fneg, double %b.fneg) 3209 %max1 = call double @llvm.maximum.f64(double %max0, double %c.fneg) 3210 ret double %max1 3211} 3212 3213define double @v_fmaximum3_f64_fneg_fabs_all(double %a, double %b, double %c) { 3214; GFX12-LABEL: v_fmaximum3_f64_fneg_fabs_all: 3215; GFX12: ; %bb.0: 3216; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3217; GFX12-NEXT: s_wait_expcnt 0x0 3218; GFX12-NEXT: s_wait_samplecnt 0x0 3219; GFX12-NEXT: s_wait_bvhcnt 0x0 3220; GFX12-NEXT: s_wait_kmcnt 0x0 3221; GFX12-NEXT: v_maximum_f64 v[0:1], -|v[0:1]|, -|v[2:3]| 3222; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3223; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], -|v[4:5]| 3224; GFX12-NEXT: s_setpc_b64 s[30:31] 3225; 3226; GFX9-LABEL: v_fmaximum3_f64_fneg_fabs_all: 3227; GFX9: ; %bb.0: 3228; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3229; GFX9-NEXT: v_max_f64 v[6:7], -|v[0:1]|, -|v[2:3]| 3230; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 3231; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]| 3232; GFX9-NEXT: s_nop 1 3233; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc 3234; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc 3235; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -|v[4:5]| 3236; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]| 3237; GFX9-NEXT: s_nop 1 3238; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 3239; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc 3240; GFX9-NEXT: s_setpc_b64 s[30:31] 3241 %a.fabs = call double @llvm.fabs.f64(double %a) 3242 %b.fabs = call double @llvm.fabs.f64(double %b) 3243 %c.fabs = call double @llvm.fabs.f64(double %c) 3244 %a.fneg.fabs = fneg double %a.fabs 3245 %b.fneg.fabs = fneg double %b.fabs 3246 %c.fneg.fabs = fneg double %c.fabs 3247 %max0 = call double @llvm.maximum.f64(double %a.fneg.fabs, double %b.fneg.fabs) 3248 %max1 = call double @llvm.maximum.f64(double %max0, double %c.fneg.fabs) 3249 ret double %max1 3250} 3251 3252define double @v_fmaximum3_f64_fneg0(double %a, double %b, double %c) { 3253; GFX12-LABEL: v_fmaximum3_f64_fneg0: 3254; GFX12: ; %bb.0: 3255; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3256; GFX12-NEXT: s_wait_expcnt 0x0 3257; GFX12-NEXT: s_wait_samplecnt 0x0 3258; GFX12-NEXT: s_wait_bvhcnt 0x0 3259; GFX12-NEXT: s_wait_kmcnt 0x0 3260; GFX12-NEXT: v_maximum_f64 v[0:1], -v[0:1], v[2:3] 3261; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3262; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] 3263; GFX12-NEXT: s_setpc_b64 s[30:31] 3264; 3265; GFX9-LABEL: v_fmaximum3_f64_fneg0: 3266; GFX9: ; %bb.0: 3267; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3268; GFX9-NEXT: v_max_f64 v[6:7], -v[0:1], v[2:3] 3269; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 3270; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3] 3271; GFX9-NEXT: s_nop 1 3272; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc 3273; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc 3274; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] 3275; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] 3276; GFX9-NEXT: s_nop 1 3277; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 3278; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc 3279; GFX9-NEXT: s_setpc_b64 s[30:31] 3280 %a.fneg = fneg double %a 3281 %max0 = call double @llvm.maximum.f64(double %a.fneg, double %b) 3282 %max1 = call double @llvm.maximum.f64(double %max0, double %c) 3283 ret double %max1 3284} 3285 3286define double @v_fmaximum3_f64_fneg1(double %a, double %b, double %c) { 3287; GFX12-LABEL: v_fmaximum3_f64_fneg1: 3288; GFX12: ; %bb.0: 3289; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3290; GFX12-NEXT: s_wait_expcnt 0x0 3291; GFX12-NEXT: s_wait_samplecnt 0x0 3292; GFX12-NEXT: s_wait_bvhcnt 0x0 3293; GFX12-NEXT: s_wait_kmcnt 0x0 3294; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], -v[2:3] 3295; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3296; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5] 3297; GFX12-NEXT: s_setpc_b64 s[30:31] 3298; 3299; GFX9-LABEL: v_fmaximum3_f64_fneg1: 3300; GFX9: ; %bb.0: 3301; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3302; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], -v[2:3] 3303; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 3304; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3] 3305; GFX9-NEXT: s_nop 1 3306; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc 3307; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc 3308; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] 3309; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] 3310; GFX9-NEXT: s_nop 1 3311; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 3312; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc 3313; GFX9-NEXT: s_setpc_b64 s[30:31] 3314 %b.fneg = fneg double %b 3315 %max0 = call double @llvm.maximum.f64(double %a, double %b.fneg) 3316 %max1 = call double @llvm.maximum.f64(double %max0, double %c) 3317 ret double %max1 3318} 3319 3320define double @v_fmaximum3_f64_fneg2(double %a, double %b, double %c) { 3321; GFX12-LABEL: v_fmaximum3_f64_fneg2: 3322; GFX12: ; %bb.0: 3323; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3324; GFX12-NEXT: s_wait_expcnt 0x0 3325; GFX12-NEXT: s_wait_samplecnt 0x0 3326; GFX12-NEXT: s_wait_bvhcnt 0x0 3327; GFX12-NEXT: s_wait_kmcnt 0x0 3328; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] 3329; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3330; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], -v[4:5] 3331; GFX12-NEXT: s_setpc_b64 s[30:31] 3332; 3333; GFX9-LABEL: v_fmaximum3_f64_fneg2: 3334; GFX9: ; %bb.0: 3335; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3336; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] 3337; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 3338; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] 3339; GFX9-NEXT: s_nop 1 3340; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc 3341; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc 3342; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -v[4:5] 3343; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5] 3344; GFX9-NEXT: s_nop 1 3345; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 3346; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc 3347; GFX9-NEXT: s_setpc_b64 s[30:31] 3348 %c.fneg = fneg double %c 3349 %max0 = call double @llvm.maximum.f64(double %a, double %b) 3350 %max1 = call double @llvm.maximum.f64(double %max0, double %c.fneg) 3351 ret double %max1 3352} 3353 3354define double @v_fmaximum3_f64_const0(double %b, double %c) { 3355; GFX12-LABEL: v_fmaximum3_f64_const0: 3356; GFX12: ; %bb.0: 3357; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3358; GFX12-NEXT: s_wait_expcnt 0x0 3359; GFX12-NEXT: s_wait_samplecnt 0x0 3360; GFX12-NEXT: s_wait_bvhcnt 0x0 3361; GFX12-NEXT: s_wait_kmcnt 0x0 3362; GFX12-NEXT: v_maximum_f64 v[0:1], 0x40200000, v[0:1] 3363; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3364; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] 3365; GFX12-NEXT: s_setpc_b64 s[30:31] 3366; 3367; GFX9-LABEL: v_fmaximum3_f64_const0: 3368; GFX9: ; %bb.0: 3369; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3370; GFX9-NEXT: s_mov_b32 s0, 0 3371; GFX9-NEXT: s_mov_b32 s1, 0x40200000 3372; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], s[0:1] 3373; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000 3374; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] 3375; GFX9-NEXT: s_nop 1 3376; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc 3377; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc 3378; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] 3379; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] 3380; GFX9-NEXT: s_nop 1 3381; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc 3382; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc 3383; GFX9-NEXT: s_setpc_b64 s[30:31] 3384 %max0 = call double @llvm.maximum.f64(double 8.0, double %b) 3385 %max1 = call double @llvm.maximum.f64(double %max0, double %c) 3386 ret double %max1 3387} 3388 3389define double @v_fmaximum3_f64__const2(double %a, double %b) { 3390; GFX12-LABEL: v_fmaximum3_f64__const2: 3391; GFX12: ; %bb.0: 3392; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3393; GFX12-NEXT: s_wait_expcnt 0x0 3394; GFX12-NEXT: s_wait_samplecnt 0x0 3395; GFX12-NEXT: s_wait_bvhcnt 0x0 3396; GFX12-NEXT: s_wait_kmcnt 0x0 3397; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] 3398; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3399; GFX12-NEXT: v_maximum_f64 v[0:1], 0x40200000, v[0:1] 3400; GFX12-NEXT: s_setpc_b64 s[30:31] 3401; 3402; GFX9-LABEL: v_fmaximum3_f64__const2: 3403; GFX9: ; %bb.0: 3404; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3405; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] 3406; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000 3407; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] 3408; GFX9-NEXT: s_mov_b32 s0, 0 3409; GFX9-NEXT: s_mov_b32 s1, 0x40200000 3410; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc 3411; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc 3412; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[0:1] 3413; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] 3414; GFX9-NEXT: s_nop 1 3415; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 3416; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc 3417; GFX9-NEXT: s_setpc_b64 s[30:31] 3418 %max0 = call double @llvm.maximum.f64(double %a, double %b) 3419 %max1 = call double @llvm.maximum.f64(double %max0, double 8.0) 3420 ret double %max1 3421} 3422 3423define double @v_fmaximum3_f64_inlineimm0(double %b, double %c) { 3424; GFX12-LABEL: v_fmaximum3_f64_inlineimm0: 3425; GFX12: ; %bb.0: 3426; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3427; GFX12-NEXT: s_wait_expcnt 0x0 3428; GFX12-NEXT: s_wait_samplecnt 0x0 3429; GFX12-NEXT: s_wait_bvhcnt 0x0 3430; GFX12-NEXT: s_wait_kmcnt 0x0 3431; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], 4.0 3432; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3433; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] 3434; GFX12-NEXT: s_setpc_b64 s[30:31] 3435; 3436; GFX9-LABEL: v_fmaximum3_f64_inlineimm0: 3437; GFX9: ; %bb.0: 3438; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3439; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], 4.0 3440; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000 3441; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] 3442; GFX9-NEXT: s_nop 1 3443; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc 3444; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc 3445; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] 3446; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] 3447; GFX9-NEXT: s_nop 1 3448; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc 3449; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc 3450; GFX9-NEXT: s_setpc_b64 s[30:31] 3451 %max0 = call double @llvm.maximum.f64(double 4.0, double %b) 3452 %max1 = call double @llvm.maximum.f64(double %max0, double %c) 3453 ret double %max1 3454} 3455 3456define double @v_fmaximum3_f64__inlineimm(double %a, double %b) { 3457; GFX12-LABEL: v_fmaximum3_f64__inlineimm: 3458; GFX12: ; %bb.0: 3459; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3460; GFX12-NEXT: s_wait_expcnt 0x0 3461; GFX12-NEXT: s_wait_samplecnt 0x0 3462; GFX12-NEXT: s_wait_bvhcnt 0x0 3463; GFX12-NEXT: s_wait_kmcnt 0x0 3464; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] 3465; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3466; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], 4.0 3467; GFX12-NEXT: s_setpc_b64 s[30:31] 3468; 3469; GFX9-LABEL: v_fmaximum3_f64__inlineimm: 3470; GFX9: ; %bb.0: 3471; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3472; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] 3473; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000 3474; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] 3475; GFX9-NEXT: s_nop 1 3476; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc 3477; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc 3478; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], 4.0 3479; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] 3480; GFX9-NEXT: s_nop 1 3481; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 3482; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc 3483; GFX9-NEXT: s_setpc_b64 s[30:31] 3484 %max0 = call double @llvm.maximum.f64(double %a, double %b) 3485 %max1 = call double @llvm.maximum.f64(double %max0, double 4.0) 3486 ret double %max1 3487} 3488 3489define double @v_fmaximum3_f64_const1_const2(double %a) { 3490; GFX12-LABEL: v_fmaximum3_f64_const1_const2: 3491; GFX12: ; %bb.0: 3492; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3493; GFX12-NEXT: s_wait_expcnt 0x0 3494; GFX12-NEXT: s_wait_samplecnt 0x0 3495; GFX12-NEXT: s_wait_bvhcnt 0x0 3496; GFX12-NEXT: s_wait_kmcnt 0x0 3497; GFX12-NEXT: v_maximum_f64 v[0:1], 0x40200000, v[0:1] 3498; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3499; GFX12-NEXT: v_maximum_f64 v[0:1], 0x40300000, v[0:1] 3500; GFX12-NEXT: s_setpc_b64 s[30:31] 3501; 3502; GFX9-LABEL: v_fmaximum3_f64_const1_const2: 3503; GFX9: ; %bb.0: 3504; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3505; GFX9-NEXT: s_mov_b32 s0, 0 3506; GFX9-NEXT: s_mov_b32 s1, 0x40200000 3507; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[0:1] 3508; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000 3509; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] 3510; GFX9-NEXT: s_mov_b32 s0, 0 3511; GFX9-NEXT: s_mov_b32 s1, 0x40300000 3512; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc 3513; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 3514; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[0:1] 3515; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] 3516; GFX9-NEXT: s_nop 1 3517; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 3518; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc 3519; GFX9-NEXT: s_setpc_b64 s[30:31] 3520 %max0 = call double @llvm.maximum.f64(double %a, double 8.0) 3521 %max1 = call double @llvm.maximum.f64(double %max0, double 16.0) 3522 ret double %max1 3523} 3524 3525define <2 x float> @v_no_fmaximum3_f32__multi_use(float %a, float %b, float %c) { 3526; GFX12-LABEL: v_no_fmaximum3_f32__multi_use: 3527; GFX12: ; %bb.0: 3528; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3529; GFX12-NEXT: s_wait_expcnt 0x0 3530; GFX12-NEXT: s_wait_samplecnt 0x0 3531; GFX12-NEXT: s_wait_bvhcnt 0x0 3532; GFX12-NEXT: s_wait_kmcnt 0x0 3533; GFX12-NEXT: v_maximum_f32 v0, v0, v1 3534; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3535; GFX12-NEXT: v_maximum_f32 v1, v0, v2 3536; GFX12-NEXT: s_setpc_b64 s[30:31] 3537; 3538; GFX940-LABEL: v_no_fmaximum3_f32__multi_use: 3539; GFX940: ; %bb.0: 3540; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3541; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 3542; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 3543; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 3544; GFX940-NEXT: s_nop 1 3545; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 3546; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 3547; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 3548; GFX940-NEXT: s_nop 1 3549; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc 3550; GFX940-NEXT: s_setpc_b64 s[30:31] 3551; 3552; GFX950-LABEL: v_no_fmaximum3_f32__multi_use: 3553; GFX950: ; %bb.0: 3554; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3555; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 3556; GFX950-NEXT: v_maximum3_f32 v1, v0, v2, v2 3557; GFX950-NEXT: s_setpc_b64 s[30:31] 3558 %max0 = call float @llvm.maximum.f32(float %a, float %b) 3559 %max1 = call float @llvm.maximum.f32(float %max0, float %c) 3560 %insert.0 = insertelement <2 x float> poison, float %max0, i32 0 3561 %insert.1 = insertelement <2 x float> %insert.0, float %max1, i32 1 3562 ret <2 x float> %insert.1 3563} 3564 3565define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float inreg %b, float inreg %c) { 3566; GFX12-LABEL: s_no_fmaximum3_f32__multi_use: 3567; GFX12: ; %bb.0: 3568; GFX12-NEXT: s_maximum_f32 s0, s0, s1 3569; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 3570; GFX12-NEXT: s_maximum_f32 s1, s0, s2 3571; GFX12-NEXT: ; return to shader part epilog 3572; 3573; GFX940-LABEL: s_no_fmaximum3_f32__multi_use: 3574; GFX940: ; %bb.0: 3575; GFX940-NEXT: v_mov_b32_e32 v0, s1 3576; GFX940-NEXT: v_max_f32_e32 v1, s0, v0 3577; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 3578; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 3579; GFX940-NEXT: s_nop 1 3580; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 3581; GFX940-NEXT: v_max_f32_e32 v1, s2, v0 3582; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 3583; GFX940-NEXT: v_readfirstlane_b32 s0, v0 3584; GFX940-NEXT: s_nop 0 3585; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 3586; GFX940-NEXT: s_nop 0 3587; GFX940-NEXT: v_readfirstlane_b32 s1, v1 3588; GFX940-NEXT: ; return to shader part epilog 3589; 3590; GFX950-LABEL: s_no_fmaximum3_f32__multi_use: 3591; GFX950: ; %bb.0: 3592; GFX950-NEXT: v_mov_b32_e32 v0, s0 3593; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1 3594; GFX950-NEXT: v_maximum3_f32 v1, v0, s2, s2 3595; GFX950-NEXT: v_readfirstlane_b32 s0, v0 3596; GFX950-NEXT: v_readfirstlane_b32 s1, v1 3597; GFX950-NEXT: ; return to shader part epilog 3598 %max0 = call float @llvm.maximum.f32(float %a, float %b) 3599 %max1 = call float @llvm.maximum.f32(float %max0, float %c) 3600 %cast0 = bitcast float %max0 to i32 3601 %cast1 = bitcast float %max1 to i32 3602 %readfirstlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast0) 3603 %readfirstlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast1) 3604 %insert.0 = insertelement <2 x i32> poison, i32 %readfirstlane0, i32 0 3605 %insert.1 = insertelement <2 x i32> %insert.0, i32 %readfirstlane1, i32 1 3606 ret <2 x i32> %insert.1 3607} 3608 3609define <2 x half> @v_no_fmaximum3_f16__multi_use(half %a, half %b, half %c) { 3610; GFX12-LABEL: v_no_fmaximum3_f16__multi_use: 3611; GFX12: ; %bb.0: 3612; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3613; GFX12-NEXT: s_wait_expcnt 0x0 3614; GFX12-NEXT: s_wait_samplecnt 0x0 3615; GFX12-NEXT: s_wait_bvhcnt 0x0 3616; GFX12-NEXT: s_wait_kmcnt 0x0 3617; GFX12-NEXT: v_maximum_f16 v0, v0, v1 3618; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3619; GFX12-NEXT: v_maximum_f16 v1, v0, v2 3620; GFX12-NEXT: v_pack_b32_f16 v0, v0, v1 3621; GFX12-NEXT: s_setpc_b64 s[30:31] 3622; 3623; GFX9-LABEL: v_no_fmaximum3_f16__multi_use: 3624; GFX9: ; %bb.0: 3625; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3626; GFX9-NEXT: v_max_f16_e32 v3, v0, v1 3627; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 3628; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 3629; GFX9-NEXT: s_nop 1 3630; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 3631; GFX9-NEXT: v_max_f16_e32 v1, v0, v2 3632; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 3633; GFX9-NEXT: s_nop 1 3634; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc 3635; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 3636; GFX9-NEXT: s_setpc_b64 s[30:31] 3637 %max0 = call half @llvm.maximum.f16(half %a, half %b) 3638 %max1 = call half @llvm.maximum.f16(half %max0, half %c) 3639 %insert.0 = insertelement <2 x half> poison, half %max0, i32 0 3640 %insert.1 = insertelement <2 x half> %insert.0, half %max1, i32 1 3641 ret <2 x half> %insert.1 3642} 3643 3644define amdgpu_ps <2 x i32> @s_no_fmaximum3_f16__multi_use(half inreg %a, half inreg %b, half inreg %c) { 3645; GFX12-LABEL: s_no_fmaximum3_f16__multi_use: 3646; GFX12: ; %bb.0: 3647; GFX12-NEXT: s_maximum_f16 s0, s0, s1 3648; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) 3649; GFX12-NEXT: s_maximum_f16 s1, s0, s2 3650; GFX12-NEXT: s_and_b32 s0, 0xffff, s0 3651; GFX12-NEXT: s_and_b32 s1, 0xffff, s1 3652; GFX12-NEXT: ; return to shader part epilog 3653; 3654; GFX9-LABEL: s_no_fmaximum3_f16__multi_use: 3655; GFX9: ; %bb.0: 3656; GFX9-NEXT: v_mov_b32_e32 v0, s1 3657; GFX9-NEXT: v_max_f16_e32 v1, s0, v0 3658; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00 3659; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s0, v0 3660; GFX9-NEXT: s_nop 1 3661; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 3662; GFX9-NEXT: v_max_f16_e32 v1, s2, v0 3663; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s2, v0 3664; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 3665; GFX9-NEXT: s_nop 0 3666; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 3667; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 3668; GFX9-NEXT: v_readfirstlane_b32 s0, v0 3669; GFX9-NEXT: v_readfirstlane_b32 s1, v1 3670; GFX9-NEXT: ; return to shader part epilog 3671 %max0 = call half @llvm.maximum.f16(half %a, half %b) 3672 %max1 = call half @llvm.maximum.f16(half %max0, half %c) 3673 %cast0 = bitcast half %max0 to i16 3674 %cast1 = bitcast half %max1 to i16 3675 %ext0 = zext i16 %cast0 to i32 3676 %ext1 = zext i16 %cast1 to i32 3677 %readfirstlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %ext0) 3678 %readfirstlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %ext1) 3679 %insert.0 = insertelement <2 x i32> poison, i32 %readfirstlane0, i32 0 3680 %insert.1 = insertelement <2 x i32> %insert.0, i32 %readfirstlane1, i32 1 3681 ret <2 x i32> %insert.1 3682} 3683 3684define <4 x half> @v_no_fmaximum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b, <2 x half> %c) { 3685; GFX12-LABEL: v_no_fmaximum3_v2f16__multi_use: 3686; GFX12: ; %bb.0: 3687; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3688; GFX12-NEXT: s_wait_expcnt 0x0 3689; GFX12-NEXT: s_wait_samplecnt 0x0 3690; GFX12-NEXT: s_wait_bvhcnt 0x0 3691; GFX12-NEXT: s_wait_kmcnt 0x0 3692; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 3693; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3694; GFX12-NEXT: v_pk_maximum_f16 v1, v0, v2 3695; GFX12-NEXT: s_setpc_b64 s[30:31] 3696; 3697; GFX940-LABEL: v_no_fmaximum3_v2f16__multi_use: 3698; GFX940: ; %bb.0: 3699; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3700; GFX940-NEXT: v_pk_max_f16 v3, v0, v1 3701; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 3702; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 3703; GFX940-NEXT: s_mov_b32 s0, 0x5040100 3704; GFX940-NEXT: s_nop 0 3705; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc 3706; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 3707; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 3708; GFX940-NEXT: s_nop 1 3709; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc 3710; GFX940-NEXT: v_perm_b32 v0, v1, v5, s0 3711; GFX940-NEXT: v_pk_max_f16 v3, v0, v2 3712; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v2 3713; GFX940-NEXT: s_nop 1 3714; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc 3715; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 3716; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1 3717; GFX940-NEXT: s_nop 1 3718; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc 3719; GFX940-NEXT: v_perm_b32 v1, v1, v5, s0 3720; GFX940-NEXT: s_setpc_b64 s[30:31] 3721; 3722; GFX950-LABEL: v_no_fmaximum3_v2f16__multi_use: 3723; GFX950: ; %bb.0: 3724; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3725; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1 3726; GFX950-NEXT: s_nop 0 3727; GFX950-NEXT: v_pk_maximum3_f16 v1, v0, v2, v2 3728; GFX950-NEXT: s_setpc_b64 s[30:31] 3729 %max0 = call <2 x half> @llvm.maximum.f16(<2 x half> %a, <2 x half> %b) 3730 %max1 = call <2 x half> @llvm.maximum.f16(<2 x half> %max0, <2 x half> %c) 3731 %concat = shufflevector <2 x half> %max0, <2 x half> %max1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3732 ret <4 x half> %concat 3733} 3734 3735define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double %c) { 3736; GFX12-LABEL: v_no_fmaximum3_f64__multi_use: 3737; GFX12: ; %bb.0: 3738; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 3739; GFX12-NEXT: s_wait_expcnt 0x0 3740; GFX12-NEXT: s_wait_samplecnt 0x0 3741; GFX12-NEXT: s_wait_bvhcnt 0x0 3742; GFX12-NEXT: s_wait_kmcnt 0x0 3743; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] 3744; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 3745; GFX12-NEXT: v_maximum_f64 v[2:3], v[0:1], v[4:5] 3746; GFX12-NEXT: s_setpc_b64 s[30:31] 3747; 3748; GFX9-LABEL: v_no_fmaximum3_f64__multi_use: 3749; GFX9: ; %bb.0: 3750; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3751; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] 3752; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 3753; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] 3754; GFX9-NEXT: s_nop 1 3755; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc 3756; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc 3757; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] 3758; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] 3759; GFX9-NEXT: s_nop 1 3760; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc 3761; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 3762; GFX9-NEXT: s_setpc_b64 s[30:31] 3763 %max0 = call double @llvm.maximum.f64(double %a, double %b) 3764 %max1 = call double @llvm.maximum.f64(double %max0, double %c) 3765 %insert.0 = insertelement <2 x double> poison, double %max0, i32 0 3766 %insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1 3767 ret <2 x double> %insert.1 3768} 3769