1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,SDAG,SDAG-GFX11 %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL,GISEL-GFX11 %s 4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-GFX12 %s 5; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-GFX12 %s 6 7define i32 @test_minmax_i32(i32 %a, i32 %b, i32 %c) { 8; GFX11-LABEL: test_minmax_i32: 9; GFX11: ; %bb.0: 10; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2 12; GFX11-NEXT: s_setpc_b64 s[30:31] 13; 14; GFX12-LABEL: test_minmax_i32: 15; GFX12: ; %bb.0: 16; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 17; GFX12-NEXT: s_wait_expcnt 0x0 18; GFX12-NEXT: s_wait_samplecnt 0x0 19; GFX12-NEXT: s_wait_bvhcnt 0x0 20; GFX12-NEXT: s_wait_kmcnt 0x0 21; GFX12-NEXT: v_maxmin_i32 v0, v0, v1, v2 22; GFX12-NEXT: s_setpc_b64 s[30:31] 23 %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b) 24 %sminmax = call i32 @llvm.smin.i32(i32 %smax, i32 %c) 25 ret i32 %sminmax 26} 27 28define amdgpu_ps void @s_test_minmax_i32(i32 inreg %a, i32 inreg %b, i32 inreg %c, ptr addrspace(1) inreg %out) { 29; SDAG-LABEL: s_test_minmax_i32: 30; SDAG: ; %bb.0: 31; SDAG-NEXT: s_max_i32 s0, s0, s1 32; SDAG-NEXT: s_mov_b32 s5, s4 33; SDAG-NEXT: s_min_i32 s0, s0, s2 34; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 35; SDAG-NEXT: s_mov_b32 s4, s3 36; SDAG-NEXT: global_store_b32 v0, v1, s[4:5] 37; SDAG-NEXT: s_endpgm 38; 39; GISEL-LABEL: s_test_minmax_i32: 40; GISEL: ; %bb.0: 41; GISEL-NEXT: s_max_i32 s0, s0, s1 42; GISEL-NEXT: s_mov_b32 s6, s3 43; GISEL-NEXT: s_min_i32 s0, s0, s2 44; GISEL-NEXT: s_mov_b32 s7, s4 45; GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 46; GISEL-NEXT: global_store_b32 v1, v0, s[6:7] 47; GISEL-NEXT: s_endpgm 48 %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b) 49 %sminmax = call i32 @llvm.smin.i32(i32 %smax, i32 %c) 50 store i32 %sminmax, ptr addrspace(1) %out 51 ret void 52} 53 54define i32 @test_minmax_commuted_i32(i32 %a, i32 %b, i32 %c) { 55; GFX11-LABEL: test_minmax_commuted_i32: 56; GFX11: ; %bb.0: 57; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2 59; GFX11-NEXT: s_setpc_b64 s[30:31] 60; 61; GFX12-LABEL: test_minmax_commuted_i32: 62; GFX12: ; %bb.0: 63; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 64; GFX12-NEXT: s_wait_expcnt 0x0 65; GFX12-NEXT: s_wait_samplecnt 0x0 66; GFX12-NEXT: s_wait_bvhcnt 0x0 67; GFX12-NEXT: s_wait_kmcnt 0x0 68; GFX12-NEXT: v_maxmin_i32 v0, v0, v1, v2 69; GFX12-NEXT: s_setpc_b64 s[30:31] 70 %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b) 71 %sminmax = call i32 @llvm.smin.i32(i32 %c, i32 %smax) 72 ret i32 %sminmax 73} 74 75define i32 @test_maxmin_i32(i32 %a, i32 %b, i32 %c) { 76; GFX11-LABEL: test_maxmin_i32: 77; GFX11: ; %bb.0: 78; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2 80; GFX11-NEXT: s_setpc_b64 s[30:31] 81; 82; GFX12-LABEL: test_maxmin_i32: 83; GFX12: ; %bb.0: 84; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 85; GFX12-NEXT: s_wait_expcnt 0x0 86; GFX12-NEXT: s_wait_samplecnt 0x0 87; GFX12-NEXT: s_wait_bvhcnt 0x0 88; GFX12-NEXT: s_wait_kmcnt 0x0 89; GFX12-NEXT: v_minmax_i32 v0, v0, v1, v2 90; GFX12-NEXT: s_setpc_b64 s[30:31] 91 %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b) 92 %smaxmin = call i32 @llvm.smax.i32(i32 %smin, i32 %c) 93 ret i32 %smaxmin 94} 95 96define i32 @test_maxmin_commuted_i32(i32 %a, i32 %b, i32 %c) { 97; GFX11-LABEL: test_maxmin_commuted_i32: 98; GFX11: ; %bb.0: 99; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2 101; GFX11-NEXT: s_setpc_b64 s[30:31] 102; 103; GFX12-LABEL: test_maxmin_commuted_i32: 104; GFX12: ; %bb.0: 105; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 106; GFX12-NEXT: s_wait_expcnt 0x0 107; GFX12-NEXT: s_wait_samplecnt 0x0 108; GFX12-NEXT: s_wait_bvhcnt 0x0 109; GFX12-NEXT: s_wait_kmcnt 0x0 110; GFX12-NEXT: v_minmax_i32 v0, v0, v1, v2 111; GFX12-NEXT: s_setpc_b64 s[30:31] 112 %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b) 113 %smaxmin = call i32 @llvm.smax.i32(i32 %c, i32 %smin) 114 ret i32 %smaxmin 115} 116 117define void @test_smed3_i32(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) { 118; GFX11-LABEL: test_smed3_i32: 119; GFX11: ; %bb.0: 120; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GFX11-NEXT: v_med3_i32 v2, v2, v3, v4 122; GFX11-NEXT: global_store_b32 v[0:1], v2, off 123; GFX11-NEXT: s_setpc_b64 s[30:31] 124; 125; GFX12-LABEL: test_smed3_i32: 126; GFX12: ; %bb.0: 127; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 128; GFX12-NEXT: s_wait_expcnt 0x0 129; GFX12-NEXT: s_wait_samplecnt 0x0 130; GFX12-NEXT: s_wait_bvhcnt 0x0 131; GFX12-NEXT: s_wait_kmcnt 0x0 132; GFX12-NEXT: v_med3_i32 v2, v2, v3, v4 133; GFX12-NEXT: global_store_b32 v[0:1], v2, off 134; GFX12-NEXT: s_setpc_b64 s[30:31] 135 %tmp0 = call i32 @llvm.smin.i32(i32 %x, i32 %y) 136 %tmp1 = call i32 @llvm.smax.i32(i32 %x, i32 %y) 137 %tmp2 = call i32 @llvm.smin.i32(i32 %tmp1, i32 %z) 138 %tmp3 = call i32 @llvm.smax.i32(i32 %tmp0, i32 %tmp2) 139 store i32 %tmp3, ptr addrspace(1) %arg 140 ret void 141} 142 143define i32 @test_minmax_u32(i32 %a, i32 %b, i32 %c) { 144; GFX11-LABEL: test_minmax_u32: 145; GFX11: ; %bb.0: 146; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 147; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2 148; GFX11-NEXT: s_setpc_b64 s[30:31] 149; 150; GFX12-LABEL: test_minmax_u32: 151; GFX12: ; %bb.0: 152; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 153; GFX12-NEXT: s_wait_expcnt 0x0 154; GFX12-NEXT: s_wait_samplecnt 0x0 155; GFX12-NEXT: s_wait_bvhcnt 0x0 156; GFX12-NEXT: s_wait_kmcnt 0x0 157; GFX12-NEXT: v_maxmin_u32 v0, v0, v1, v2 158; GFX12-NEXT: s_setpc_b64 s[30:31] 159 %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b) 160 %uminmax = call i32 @llvm.umin.i32(i32 %umax, i32 %c) 161 ret i32 %uminmax 162} 163 164define amdgpu_ps void @s_test_minmax_u32(i32 inreg %a, i32 inreg %b, i32 inreg %c, ptr addrspace(1) inreg %out) { 165; SDAG-LABEL: s_test_minmax_u32: 166; SDAG: ; %bb.0: 167; SDAG-NEXT: s_max_u32 s0, s0, s1 168; SDAG-NEXT: s_mov_b32 s5, s4 169; SDAG-NEXT: s_min_u32 s0, s0, s2 170; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 171; SDAG-NEXT: s_mov_b32 s4, s3 172; SDAG-NEXT: global_store_b32 v0, v1, s[4:5] 173; SDAG-NEXT: s_endpgm 174; 175; GISEL-LABEL: s_test_minmax_u32: 176; GISEL: ; %bb.0: 177; GISEL-NEXT: s_max_u32 s0, s0, s1 178; GISEL-NEXT: s_mov_b32 s6, s3 179; GISEL-NEXT: s_min_u32 s0, s0, s2 180; GISEL-NEXT: s_mov_b32 s7, s4 181; GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 182; GISEL-NEXT: global_store_b32 v1, v0, s[6:7] 183; GISEL-NEXT: s_endpgm 184 %smax = call i32 @llvm.umax.i32(i32 %a, i32 %b) 185 %sminmax = call i32 @llvm.umin.i32(i32 %smax, i32 %c) 186 store i32 %sminmax, ptr addrspace(1) %out 187 ret void 188} 189 190define i32 @test_minmax_commuted_u32(i32 %a, i32 %b, i32 %c) { 191; GFX11-LABEL: test_minmax_commuted_u32: 192; GFX11: ; %bb.0: 193; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 194; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2 195; GFX11-NEXT: s_setpc_b64 s[30:31] 196; 197; GFX12-LABEL: test_minmax_commuted_u32: 198; GFX12: ; %bb.0: 199; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 200; GFX12-NEXT: s_wait_expcnt 0x0 201; GFX12-NEXT: s_wait_samplecnt 0x0 202; GFX12-NEXT: s_wait_bvhcnt 0x0 203; GFX12-NEXT: s_wait_kmcnt 0x0 204; GFX12-NEXT: v_maxmin_u32 v0, v0, v1, v2 205; GFX12-NEXT: s_setpc_b64 s[30:31] 206 %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b) 207 %uminmax = call i32 @llvm.umin.i32(i32 %c, i32 %umax) 208 ret i32 %uminmax 209} 210 211define i32 @test_maxmin_u32(i32 %a, i32 %b, i32 %c) { 212; GFX11-LABEL: test_maxmin_u32: 213; GFX11: ; %bb.0: 214; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 215; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2 216; GFX11-NEXT: s_setpc_b64 s[30:31] 217; 218; GFX12-LABEL: test_maxmin_u32: 219; GFX12: ; %bb.0: 220; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 221; GFX12-NEXT: s_wait_expcnt 0x0 222; GFX12-NEXT: s_wait_samplecnt 0x0 223; GFX12-NEXT: s_wait_bvhcnt 0x0 224; GFX12-NEXT: s_wait_kmcnt 0x0 225; GFX12-NEXT: v_minmax_u32 v0, v0, v1, v2 226; GFX12-NEXT: s_setpc_b64 s[30:31] 227 %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b) 228 %umaxmin = call i32 @llvm.umax.i32(i32 %umin, i32 %c) 229 ret i32 %umaxmin 230} 231 232define i32 @test_maxmin_commuted_u32(i32 %a, i32 %b, i32 %c) { 233; GFX11-LABEL: test_maxmin_commuted_u32: 234; GFX11: ; %bb.0: 235; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 236; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2 237; GFX11-NEXT: s_setpc_b64 s[30:31] 238; 239; GFX12-LABEL: test_maxmin_commuted_u32: 240; GFX12: ; %bb.0: 241; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 242; GFX12-NEXT: s_wait_expcnt 0x0 243; GFX12-NEXT: s_wait_samplecnt 0x0 244; GFX12-NEXT: s_wait_bvhcnt 0x0 245; GFX12-NEXT: s_wait_kmcnt 0x0 246; GFX12-NEXT: v_minmax_u32 v0, v0, v1, v2 247; GFX12-NEXT: s_setpc_b64 s[30:31] 248 %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b) 249 %umaxmin = call i32 @llvm.umax.i32(i32 %c, i32 %umin) 250 ret i32 %umaxmin 251} 252 253define void @test_umed3_i32(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) { 254; GFX11-LABEL: test_umed3_i32: 255; GFX11: ; %bb.0: 256; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 257; GFX11-NEXT: v_med3_u32 v2, v2, v3, v4 258; GFX11-NEXT: global_store_b32 v[0:1], v2, off 259; GFX11-NEXT: s_setpc_b64 s[30:31] 260; 261; GFX12-LABEL: test_umed3_i32: 262; GFX12: ; %bb.0: 263; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 264; GFX12-NEXT: s_wait_expcnt 0x0 265; GFX12-NEXT: s_wait_samplecnt 0x0 266; GFX12-NEXT: s_wait_bvhcnt 0x0 267; GFX12-NEXT: s_wait_kmcnt 0x0 268; GFX12-NEXT: v_med3_u32 v2, v2, v3, v4 269; GFX12-NEXT: global_store_b32 v[0:1], v2, off 270; GFX12-NEXT: s_setpc_b64 s[30:31] 271 %tmp0 = call i32 @llvm.umin.i32(i32 %x, i32 %y) 272 %tmp1 = call i32 @llvm.umax.i32(i32 %x, i32 %y) 273 %tmp2 = call i32 @llvm.umin.i32(i32 %tmp1, i32 %z) 274 %tmp3 = call i32 @llvm.umax.i32(i32 %tmp0, i32 %tmp2) 275 store i32 %tmp3, ptr addrspace(1) %arg 276 ret void 277} 278 279define float @test_minmax_f32_ieee_true(float %a, float %b, float %c) { 280; SDAG-GFX11-LABEL: test_minmax_f32_ieee_true: 281; SDAG-GFX11: ; %bb.0: 282; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 283; SDAG-GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 284; SDAG-GFX11-NEXT: v_max_f32_e32 v2, v2, v2 285; SDAG-GFX11-NEXT: v_maxmin_f32 v0, v0, v1, v2 286; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31] 287; 288; GISEL-GFX11-LABEL: test_minmax_f32_ieee_true: 289; GISEL-GFX11: ; %bb.0: 290; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 291; GISEL-GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 292; GISEL-GFX11-NEXT: v_max_f32_e32 v2, v2, v2 293; GISEL-GFX11-NEXT: v_maxmin_f32 v0, v0, v1, v2 294; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31] 295; 296; SDAG-GFX12-LABEL: test_minmax_f32_ieee_true: 297; SDAG-GFX12: ; %bb.0: 298; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 299; SDAG-GFX12-NEXT: s_wait_expcnt 0x0 300; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0 301; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0 302; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0 303; SDAG-GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 304; SDAG-GFX12-NEXT: v_max_num_f32_e32 v2, v2, v2 305; SDAG-GFX12-NEXT: v_maxmin_num_f32 v0, v0, v1, v2 306; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31] 307; 308; GISEL-GFX12-LABEL: test_minmax_f32_ieee_true: 309; GISEL-GFX12: ; %bb.0: 310; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 311; GISEL-GFX12-NEXT: s_wait_expcnt 0x0 312; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0 313; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0 314; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0 315; GISEL-GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 316; GISEL-GFX12-NEXT: v_max_num_f32_e32 v2, v2, v2 317; GISEL-GFX12-NEXT: v_maxmin_num_f32 v0, v0, v1, v2 318; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31] 319 %max = call float @llvm.maxnum.f32(float %a, float %b) 320 %minmax = call float @llvm.minnum.f32(float %max, float %c) 321 ret float %minmax 322} 323 324define amdgpu_ps void @s_test_minmax_f32_ieee_false(float inreg %a, float inreg %b, float inreg %c, ptr addrspace(1) inreg %out) { 325; SDAG-GFX11-LABEL: s_test_minmax_f32_ieee_false: 326; SDAG-GFX11: ; %bb.0: 327; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 328; SDAG-GFX11-NEXT: s_mov_b32 s5, s4 329; SDAG-GFX11-NEXT: s_mov_b32 s4, s3 330; SDAG-GFX11-NEXT: v_maxmin_f32 v0, s0, s1, v0 331; SDAG-GFX11-NEXT: global_store_b32 v1, v0, s[4:5] 332; SDAG-GFX11-NEXT: s_endpgm 333; 334; GISEL-GFX11-LABEL: s_test_minmax_f32_ieee_false: 335; GISEL-GFX11: ; %bb.0: 336; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 337; GISEL-GFX11-NEXT: s_mov_b32 s6, s3 338; GISEL-GFX11-NEXT: s_mov_b32 s7, s4 339; GISEL-GFX11-NEXT: v_maxmin_f32 v0, s0, s1, v0 340; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[6:7] 341; GISEL-GFX11-NEXT: s_endpgm 342; 343; SDAG-GFX12-LABEL: s_test_minmax_f32_ieee_false: 344; SDAG-GFX12: ; %bb.0: 345; SDAG-GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 346; SDAG-GFX12-NEXT: s_mov_b32 s5, s4 347; SDAG-GFX12-NEXT: s_mov_b32 s4, s3 348; SDAG-GFX12-NEXT: v_maxmin_num_f32 v0, s0, s1, v0 349; SDAG-GFX12-NEXT: global_store_b32 v1, v0, s[4:5] 350; SDAG-GFX12-NEXT: s_endpgm 351; 352; GISEL-GFX12-LABEL: s_test_minmax_f32_ieee_false: 353; GISEL-GFX12: ; %bb.0: 354; GISEL-GFX12-NEXT: s_max_num_f32 s0, s0, s1 355; GISEL-GFX12-NEXT: s_mov_b32 s6, s3 356; GISEL-GFX12-NEXT: s_mov_b32 s7, s4 357; GISEL-GFX12-NEXT: v_mov_b32_e32 v1, 0 358; GISEL-GFX12-NEXT: s_min_num_f32 s0, s0, s2 359; GISEL-GFX12-NEXT: v_mov_b32_e32 v0, s0 360; GISEL-GFX12-NEXT: global_store_b32 v1, v0, s[6:7] 361; GISEL-GFX12-NEXT: s_endpgm 362 %smax = call float @llvm.maxnum.f32(float %a, float %b) 363 %sminmax = call float @llvm.minnum.f32(float %smax, float %c) 364 store float %sminmax, ptr addrspace(1) %out 365 ret void 366} 367 368define amdgpu_ps float @test_minmax_commuted_f32_ieee_false(float %a, float %b, float %c) { 369; GFX11-LABEL: test_minmax_commuted_f32_ieee_false: 370; GFX11: ; %bb.0: 371; GFX11-NEXT: v_maxmin_f32 v0, v0, v1, v2 372; GFX11-NEXT: ; return to shader part epilog 373; 374; GFX12-LABEL: test_minmax_commuted_f32_ieee_false: 375; GFX12: ; %bb.0: 376; GFX12-NEXT: v_maxmin_num_f32 v0, v0, v1, v2 377; GFX12-NEXT: ; return to shader part epilog 378 %max = call float @llvm.maxnum.f32(float %a, float %b) 379 %minmax = call float @llvm.minnum.f32(float %c, float %max) 380 ret float %minmax 381} 382 383define float @test_maxmin_f32_ieee_true(float %a, float %b, float %c) { 384; SDAG-GFX11-LABEL: test_maxmin_f32_ieee_true: 385; SDAG-GFX11: ; %bb.0: 386; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 387; SDAG-GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 388; SDAG-GFX11-NEXT: v_max_f32_e32 v2, v2, v2 389; SDAG-GFX11-NEXT: v_minmax_f32 v0, v0, v1, v2 390; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31] 391; 392; GISEL-GFX11-LABEL: test_maxmin_f32_ieee_true: 393; GISEL-GFX11: ; %bb.0: 394; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395; GISEL-GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 396; GISEL-GFX11-NEXT: v_max_f32_e32 v2, v2, v2 397; GISEL-GFX11-NEXT: v_minmax_f32 v0, v0, v1, v2 398; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31] 399; 400; SDAG-GFX12-LABEL: test_maxmin_f32_ieee_true: 401; SDAG-GFX12: ; %bb.0: 402; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 403; SDAG-GFX12-NEXT: s_wait_expcnt 0x0 404; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0 405; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0 406; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0 407; SDAG-GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 408; SDAG-GFX12-NEXT: v_max_num_f32_e32 v2, v2, v2 409; SDAG-GFX12-NEXT: v_minmax_num_f32 v0, v0, v1, v2 410; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31] 411; 412; GISEL-GFX12-LABEL: test_maxmin_f32_ieee_true: 413; GISEL-GFX12: ; %bb.0: 414; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 415; GISEL-GFX12-NEXT: s_wait_expcnt 0x0 416; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0 417; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0 418; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0 419; GISEL-GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 420; GISEL-GFX12-NEXT: v_max_num_f32_e32 v2, v2, v2 421; GISEL-GFX12-NEXT: v_minmax_num_f32 v0, v0, v1, v2 422; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31] 423 %min = call float @llvm.minnum.f32(float %a, float %b) 424 %maxmin = call float @llvm.maxnum.f32(float %min, float %c) 425 ret float %maxmin 426} 427 428define amdgpu_ps float @test_maxmin_commuted_f32_ieee_false(float %a, float %b, float %c) { 429; GFX11-LABEL: test_maxmin_commuted_f32_ieee_false: 430; GFX11: ; %bb.0: 431; GFX11-NEXT: v_minmax_f32 v0, v0, v1, v2 432; GFX11-NEXT: ; return to shader part epilog 433; 434; GFX12-LABEL: test_maxmin_commuted_f32_ieee_false: 435; GFX12: ; %bb.0: 436; GFX12-NEXT: v_minmax_num_f32 v0, v0, v1, v2 437; GFX12-NEXT: ; return to shader part epilog 438 %min = call float @llvm.minnum.f32(float %a, float %b) 439 %maxmin = call float @llvm.maxnum.f32(float %c, float %min) 440 ret float %maxmin 441} 442 443define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z) #0 { 444; GFX11-LABEL: test_med3_f32: 445; GFX11: ; %bb.0: 446; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 447; GFX11-NEXT: v_med3_f32 v2, v2, v3, v4 448; GFX11-NEXT: global_store_b32 v[0:1], v2, off 449; GFX11-NEXT: s_setpc_b64 s[30:31] 450; 451; GFX12-LABEL: test_med3_f32: 452; GFX12: ; %bb.0: 453; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 454; GFX12-NEXT: s_wait_expcnt 0x0 455; GFX12-NEXT: s_wait_samplecnt 0x0 456; GFX12-NEXT: s_wait_bvhcnt 0x0 457; GFX12-NEXT: s_wait_kmcnt 0x0 458; GFX12-NEXT: v_med3_num_f32 v2, v2, v3, v4 459; GFX12-NEXT: global_store_b32 v[0:1], v2, off 460; GFX12-NEXT: s_setpc_b64 s[30:31] 461 %tmp0 = call float @llvm.minnum.f32(float %x, float %y) 462 %tmp1 = call float @llvm.maxnum.f32(float %x, float %y) 463 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %z) 464 %tmp3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 465 store float %tmp3, ptr addrspace(1) %arg 466 ret void 467} 468 469define amdgpu_ps half @test_minmax_f16_ieee_false(half %a, half %b, half %c) { 470; GFX11-LABEL: test_minmax_f16_ieee_false: 471; GFX11: ; %bb.0: 472; GFX11-NEXT: v_maxmin_f16 v0, v0, v1, v2 473; GFX11-NEXT: ; return to shader part epilog 474; 475; GFX12-LABEL: test_minmax_f16_ieee_false: 476; GFX12: ; %bb.0: 477; GFX12-NEXT: v_maxmin_num_f16 v0, v0, v1, v2 478; GFX12-NEXT: ; return to shader part epilog 479 %max = call half @llvm.maxnum.f16(half %a, half %b) 480 %minmax = call half @llvm.minnum.f16(half %max, half %c) 481 ret half %minmax 482} 483 484define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b, half inreg %c, ptr addrspace(1) inreg %out) { 485; SDAG-GFX11-LABEL: s_test_minmax_f16_ieee_false: 486; SDAG-GFX11: ; %bb.0: 487; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 488; SDAG-GFX11-NEXT: s_mov_b32 s5, s4 489; SDAG-GFX11-NEXT: s_mov_b32 s4, s3 490; SDAG-GFX11-NEXT: v_maxmin_f16 v0, s0, s1, v0 491; SDAG-GFX11-NEXT: global_store_b16 v1, v0, s[4:5] 492; SDAG-GFX11-NEXT: s_endpgm 493; 494; GISEL-GFX11-LABEL: s_test_minmax_f16_ieee_false: 495; GISEL-GFX11: ; %bb.0: 496; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 497; GISEL-GFX11-NEXT: s_mov_b32 s6, s3 498; GISEL-GFX11-NEXT: s_mov_b32 s7, s4 499; GISEL-GFX11-NEXT: v_maxmin_f16 v0, s0, s1, v0 500; GISEL-GFX11-NEXT: global_store_b16 v1, v0, s[6:7] 501; GISEL-GFX11-NEXT: s_endpgm 502; 503; SDAG-GFX12-LABEL: s_test_minmax_f16_ieee_false: 504; SDAG-GFX12: ; %bb.0: 505; SDAG-GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 506; SDAG-GFX12-NEXT: s_mov_b32 s5, s4 507; SDAG-GFX12-NEXT: s_mov_b32 s4, s3 508; SDAG-GFX12-NEXT: v_maxmin_num_f16 v0, s0, s1, v0 509; SDAG-GFX12-NEXT: global_store_b16 v1, v0, s[4:5] 510; SDAG-GFX12-NEXT: s_endpgm 511; 512; GISEL-GFX12-LABEL: s_test_minmax_f16_ieee_false: 513; GISEL-GFX12: ; %bb.0: 514; GISEL-GFX12-NEXT: s_max_num_f16 s0, s0, s1 515; GISEL-GFX12-NEXT: s_mov_b32 s6, s3 516; GISEL-GFX12-NEXT: s_mov_b32 s7, s4 517; GISEL-GFX12-NEXT: v_mov_b32_e32 v1, 0 518; GISEL-GFX12-NEXT: s_min_num_f16 s0, s0, s2 519; GISEL-GFX12-NEXT: v_mov_b32_e32 v0, s0 520; GISEL-GFX12-NEXT: global_store_b16 v1, v0, s[6:7] 521; GISEL-GFX12-NEXT: s_endpgm 522 %smax = call half @llvm.maxnum.f16(half %a, half %b) 523 %sminmax = call half @llvm.minnum.f16(half %smax, half %c) 524 store half %sminmax, ptr addrspace(1) %out 525 ret void 526} 527 528define half @test_minmax_commuted_f16_ieee_true(half %a, half %b, half %c) { 529; SDAG-GFX11-LABEL: test_minmax_commuted_f16_ieee_true: 530; SDAG-GFX11: ; %bb.0: 531; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 532; SDAG-GFX11-NEXT: v_max_f16_e32 v1, v1, v1 533; SDAG-GFX11-NEXT: v_max_f16_e32 v0, v0, v0 534; SDAG-GFX11-NEXT: v_max_f16_e32 v2, v2, v2 535; SDAG-GFX11-NEXT: v_maxmin_f16 v0, v0, v1, v2 536; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31] 537; 538; GISEL-GFX11-LABEL: test_minmax_commuted_f16_ieee_true: 539; GISEL-GFX11: ; %bb.0: 540; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 541; GISEL-GFX11-NEXT: v_max_f16_e32 v0, v0, v0 542; GISEL-GFX11-NEXT: v_max_f16_e32 v1, v1, v1 543; GISEL-GFX11-NEXT: v_max_f16_e32 v2, v2, v2 544; GISEL-GFX11-NEXT: v_maxmin_f16 v0, v0, v1, v2 545; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31] 546; 547; SDAG-GFX12-LABEL: test_minmax_commuted_f16_ieee_true: 548; SDAG-GFX12: ; %bb.0: 549; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 550; SDAG-GFX12-NEXT: s_wait_expcnt 0x0 551; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0 552; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0 553; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0 554; SDAG-GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1 555; SDAG-GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 556; SDAG-GFX12-NEXT: v_max_num_f16_e32 v2, v2, v2 557; SDAG-GFX12-NEXT: v_maxmin_num_f16 v0, v0, v1, v2 558; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31] 559; 560; GISEL-GFX12-LABEL: test_minmax_commuted_f16_ieee_true: 561; GISEL-GFX12: ; %bb.0: 562; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 563; GISEL-GFX12-NEXT: s_wait_expcnt 0x0 564; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0 565; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0 566; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0 567; GISEL-GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 568; GISEL-GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1 569; GISEL-GFX12-NEXT: v_max_num_f16_e32 v2, v2, v2 570; GISEL-GFX12-NEXT: v_maxmin_num_f16 v0, v0, v1, v2 571; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31] 572 %max = call half @llvm.maxnum.f16(half %a, half %b) 573 %minmax = call half @llvm.minnum.f16(half %c, half %max) 574 ret half %minmax 575} 576 577define amdgpu_ps half @test_maxmin_f16_ieee_false(half %a, half %b, half %c) { 578; GFX11-LABEL: test_maxmin_f16_ieee_false: 579; GFX11: ; %bb.0: 580; GFX11-NEXT: v_minmax_f16 v0, v0, v1, v2 581; GFX11-NEXT: ; return to shader part epilog 582; 583; GFX12-LABEL: test_maxmin_f16_ieee_false: 584; GFX12: ; %bb.0: 585; GFX12-NEXT: v_minmax_num_f16 v0, v0, v1, v2 586; GFX12-NEXT: ; return to shader part epilog 587 %min = call half @llvm.minnum.f16(half %a, half %b) 588 %maxmin = call half @llvm.maxnum.f16(half %min, half %c) 589 ret half %maxmin 590} 591 592define half @test_maxmin_commuted_f16_ieee_true(half %a, half %b, half %c) { 593; SDAG-GFX11-LABEL: test_maxmin_commuted_f16_ieee_true: 594; SDAG-GFX11: ; %bb.0: 595; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 596; SDAG-GFX11-NEXT: v_max_f16_e32 v1, v1, v1 597; SDAG-GFX11-NEXT: v_max_f16_e32 v0, v0, v0 598; SDAG-GFX11-NEXT: v_max_f16_e32 v2, v2, v2 599; SDAG-GFX11-NEXT: v_minmax_f16 v0, v0, v1, v2 600; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31] 601; 602; GISEL-GFX11-LABEL: test_maxmin_commuted_f16_ieee_true: 603; GISEL-GFX11: ; %bb.0: 604; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 605; GISEL-GFX11-NEXT: v_max_f16_e32 v0, v0, v0 606; GISEL-GFX11-NEXT: v_max_f16_e32 v1, v1, v1 607; GISEL-GFX11-NEXT: v_max_f16_e32 v2, v2, v2 608; GISEL-GFX11-NEXT: v_minmax_f16 v0, v0, v1, v2 609; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31] 610; 611; SDAG-GFX12-LABEL: test_maxmin_commuted_f16_ieee_true: 612; SDAG-GFX12: ; %bb.0: 613; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 614; SDAG-GFX12-NEXT: s_wait_expcnt 0x0 615; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0 616; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0 617; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0 618; SDAG-GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1 619; SDAG-GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 620; SDAG-GFX12-NEXT: v_max_num_f16_e32 v2, v2, v2 621; SDAG-GFX12-NEXT: v_minmax_num_f16 v0, v0, v1, v2 622; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31] 623; 624; GISEL-GFX12-LABEL: test_maxmin_commuted_f16_ieee_true: 625; GISEL-GFX12: ; %bb.0: 626; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 627; GISEL-GFX12-NEXT: s_wait_expcnt 0x0 628; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0 629; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0 630; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0 631; GISEL-GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 632; GISEL-GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1 633; GISEL-GFX12-NEXT: v_max_num_f16_e32 v2, v2, v2 634; GISEL-GFX12-NEXT: v_minmax_num_f16 v0, v0, v1, v2 635; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31] 636 %min = call half @llvm.minnum.f16(half %a, half %b) 637 %maxmin = call half @llvm.maxnum.f16(half %c, half %min) 638 ret half %maxmin 639} 640 641define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0 { 642; GFX11-LABEL: test_med3_f16: 643; GFX11: ; %bb.0: 644; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 645; GFX11-NEXT: v_med3_f16 v2, v2, v3, v4 646; GFX11-NEXT: global_store_b16 v[0:1], v2, off 647; GFX11-NEXT: s_setpc_b64 s[30:31] 648; 649; GFX12-LABEL: test_med3_f16: 650; GFX12: ; %bb.0: 651; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 652; GFX12-NEXT: s_wait_expcnt 0x0 653; GFX12-NEXT: s_wait_samplecnt 0x0 654; GFX12-NEXT: s_wait_bvhcnt 0x0 655; GFX12-NEXT: s_wait_kmcnt 0x0 656; GFX12-NEXT: v_med3_num_f16 v2, v2, v3, v4 657; GFX12-NEXT: global_store_b16 v[0:1], v2, off 658; GFX12-NEXT: s_setpc_b64 s[30:31] 659 %tmp0 = call half @llvm.minnum.f16(half %x, half %y) 660 %tmp1 = call half @llvm.maxnum.f16(half %x, half %y) 661 %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %z) 662 %tmp3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2) 663 store half %tmp3, ptr addrspace(1) %arg 664 ret void 665} 666 667declare i32 @llvm.smin.i32(i32, i32) 668declare i32 @llvm.smax.i32(i32, i32) 669declare i32 @llvm.umin.i32(i32, i32) 670declare i32 @llvm.umax.i32(i32, i32) 671declare half @llvm.minnum.f16(half, half) 672declare half @llvm.maxnum.f16(half, half) 673declare float @llvm.minnum.f32(float, float) 674declare float @llvm.maxnum.f32(float, float) 675attributes #0 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" } 676 677