1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s 5 6define float @test_min_max_ValK0_K1_f32(float %a) #0 { 7; GFX10-LABEL: test_min_max_ValK0_K1_f32: 8; GFX10: ; %bb.0: 9; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 11; GFX10-NEXT: s_setpc_b64 s[30:31] 12; 13; GFX8-LABEL: test_min_max_ValK0_K1_f32: 14; GFX8: ; %bb.0: 15; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 17; GFX8-NEXT: s_setpc_b64 s[30:31] 18; 19; GFX12-LABEL: test_min_max_ValK0_K1_f32: 20; GFX12: ; %bb.0: 21; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 22; GFX12-NEXT: s_wait_expcnt 0x0 23; GFX12-NEXT: s_wait_samplecnt 0x0 24; GFX12-NEXT: s_wait_bvhcnt 0x0 25; GFX12-NEXT: s_wait_kmcnt 0x0 26; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 27; GFX12-NEXT: s_setpc_b64 s[30:31] 28 %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0) 29 %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 4.0) 30 ret float %fmed 31} 32 33define float @test_min_max_K0Val_K1_f32(float %a) #1 { 34; GFX10-LABEL: test_min_max_K0Val_K1_f32: 35; GFX10: ; %bb.0: 36; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 38; GFX10-NEXT: s_setpc_b64 s[30:31] 39; 40; GFX8-LABEL: test_min_max_K0Val_K1_f32: 41; GFX8: ; %bb.0: 42; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 43; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 44; GFX8-NEXT: s_setpc_b64 s[30:31] 45; 46; GFX12-LABEL: test_min_max_K0Val_K1_f32: 47; GFX12: ; %bb.0: 48; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 49; GFX12-NEXT: s_wait_expcnt 0x0 50; GFX12-NEXT: s_wait_samplecnt 0x0 51; GFX12-NEXT: s_wait_bvhcnt 0x0 52; GFX12-NEXT: s_wait_kmcnt 0x0 53; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 54; GFX12-NEXT: s_setpc_b64 s[30:31] 55 %maxnum = call nnan float @llvm.maxnum.f32(float 2.0, float %a) 56 %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 4.0) 57 ret float %fmed 58} 59 60; min-max patterns for ieee=true do not have to check for NaNs 61; 'v_max_f16_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true 62define half @test_min_K1max_ValK0_f16(half %a) #0 { 63; GFX10-LABEL: test_min_K1max_ValK0_f16: 64; GFX10: ; %bb.0: 65; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 66; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 67; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 68; GFX10-NEXT: s_setpc_b64 s[30:31] 69; 70; GFX8-LABEL: test_min_K1max_ValK0_f16: 71; GFX8: ; %bb.0: 72; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 74; GFX8-NEXT: v_max_f16_e32 v0, 2.0, v0 75; GFX8-NEXT: v_min_f16_e32 v0, 4.0, v0 76; GFX8-NEXT: s_setpc_b64 s[30:31] 77; 78; GFX12-LABEL: test_min_K1max_ValK0_f16: 79; GFX12: ; %bb.0: 80; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 81; GFX12-NEXT: s_wait_expcnt 0x0 82; GFX12-NEXT: s_wait_samplecnt 0x0 83; GFX12-NEXT: s_wait_bvhcnt 0x0 84; GFX12-NEXT: s_wait_kmcnt 0x0 85; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 86; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 87; GFX12-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0 88; GFX12-NEXT: s_setpc_b64 s[30:31] 89 %maxnum = call half @llvm.maxnum.f16(half %a, half 2.0) 90 %fmed = call half @llvm.minnum.f16(half 4.0, half %maxnum) 91 ret half %fmed 92} 93 94define half @test_min_K1max_K0Val_f16(half %a) #1 { 95; GFX10-LABEL: test_min_K1max_K0Val_f16: 96; GFX10: ; %bb.0: 97; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 98; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 99; GFX10-NEXT: s_setpc_b64 s[30:31] 100; 101; GFX8-LABEL: test_min_K1max_K0Val_f16: 102; GFX8: ; %bb.0: 103; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 104; GFX8-NEXT: v_max_f16_e32 v0, 2.0, v0 105; GFX8-NEXT: v_min_f16_e32 v0, 4.0, v0 106; GFX8-NEXT: s_setpc_b64 s[30:31] 107; 108; GFX12-LABEL: test_min_K1max_K0Val_f16: 109; GFX12: ; %bb.0: 110; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 111; GFX12-NEXT: s_wait_expcnt 0x0 112; GFX12-NEXT: s_wait_samplecnt 0x0 113; GFX12-NEXT: s_wait_bvhcnt 0x0 114; GFX12-NEXT: s_wait_kmcnt 0x0 115; GFX12-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0 116; GFX12-NEXT: s_setpc_b64 s[30:31] 117 %maxnum = call nnan half @llvm.maxnum.f16(half 2.0, half %a) 118 %fmed = call nnan half @llvm.minnum.f16(half 4.0, half %maxnum) 119 ret half %fmed 120} 121 122; max-mix patterns work only for non-NaN inputs 123define float @test_max_min_ValK1_K0_f32(float %a) #0 { 124; GFX10-LABEL: test_max_min_ValK1_K0_f32: 125; GFX10: ; %bb.0: 126; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 128; GFX10-NEXT: s_setpc_b64 s[30:31] 129; 130; GFX8-LABEL: test_max_min_ValK1_K0_f32: 131; GFX8: ; %bb.0: 132; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 134; GFX8-NEXT: s_setpc_b64 s[30:31] 135; 136; GFX12-LABEL: test_max_min_ValK1_K0_f32: 137; GFX12: ; %bb.0: 138; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 139; GFX12-NEXT: s_wait_expcnt 0x0 140; GFX12-NEXT: s_wait_samplecnt 0x0 141; GFX12-NEXT: s_wait_bvhcnt 0x0 142; GFX12-NEXT: s_wait_kmcnt 0x0 143; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 144; GFX12-NEXT: s_setpc_b64 s[30:31] 145 %minnum = call nnan float @llvm.minnum.f32(float %a, float 4.0) 146 %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0) 147 ret float %fmed 148} 149 150define float @test_max_min_K1Val_K0_f32(float %a) #1 { 151; GFX10-LABEL: test_max_min_K1Val_K0_f32: 152; GFX10: ; %bb.0: 153; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 154; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 155; GFX10-NEXT: s_setpc_b64 s[30:31] 156; 157; GFX8-LABEL: test_max_min_K1Val_K0_f32: 158; GFX8: ; %bb.0: 159; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 160; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 161; GFX8-NEXT: s_setpc_b64 s[30:31] 162; 163; GFX12-LABEL: test_max_min_K1Val_K0_f32: 164; GFX12: ; %bb.0: 165; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 166; GFX12-NEXT: s_wait_expcnt 0x0 167; GFX12-NEXT: s_wait_samplecnt 0x0 168; GFX12-NEXT: s_wait_bvhcnt 0x0 169; GFX12-NEXT: s_wait_kmcnt 0x0 170; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 171; GFX12-NEXT: s_setpc_b64 s[30:31] 172 %minnum = call nnan float @llvm.minnum.f32(float 4.0, float %a) 173 %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0) 174 ret float %fmed 175} 176 177define half @test_max_K0min_ValK1_f16(half %a) #0 { 178; GFX10-LABEL: test_max_K0min_ValK1_f16: 179; GFX10: ; %bb.0: 180; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 181; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 182; GFX10-NEXT: s_setpc_b64 s[30:31] 183; 184; GFX8-LABEL: test_max_K0min_ValK1_f16: 185; GFX8: ; %bb.0: 186; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 187; GFX8-NEXT: v_min_f16_e32 v0, 4.0, v0 188; GFX8-NEXT: v_max_f16_e32 v0, 2.0, v0 189; GFX8-NEXT: s_setpc_b64 s[30:31] 190; 191; GFX12-LABEL: test_max_K0min_ValK1_f16: 192; GFX12: ; %bb.0: 193; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 194; GFX12-NEXT: s_wait_expcnt 0x0 195; GFX12-NEXT: s_wait_samplecnt 0x0 196; GFX12-NEXT: s_wait_bvhcnt 0x0 197; GFX12-NEXT: s_wait_kmcnt 0x0 198; GFX12-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0 199; GFX12-NEXT: s_setpc_b64 s[30:31] 200 %minnum = call nnan half @llvm.minnum.f16(half %a, half 4.0) 201 %fmed = call nnan half @llvm.maxnum.f16(half 2.0, half %minnum) 202 ret half %fmed 203} 204 205define half @test_max_K0min_K1Val_f16(half %a) #1 { 206; GFX10-LABEL: test_max_K0min_K1Val_f16: 207; GFX10: ; %bb.0: 208; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 209; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 210; GFX10-NEXT: s_setpc_b64 s[30:31] 211; 212; GFX8-LABEL: test_max_K0min_K1Val_f16: 213; GFX8: ; %bb.0: 214; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 215; GFX8-NEXT: v_min_f16_e32 v0, 4.0, v0 216; GFX8-NEXT: v_max_f16_e32 v0, 2.0, v0 217; GFX8-NEXT: s_setpc_b64 s[30:31] 218; 219; GFX12-LABEL: test_max_K0min_K1Val_f16: 220; GFX12: ; %bb.0: 221; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 222; GFX12-NEXT: s_wait_expcnt 0x0 223; GFX12-NEXT: s_wait_samplecnt 0x0 224; GFX12-NEXT: s_wait_bvhcnt 0x0 225; GFX12-NEXT: s_wait_kmcnt 0x0 226; GFX12-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0 227; GFX12-NEXT: s_setpc_b64 s[30:31] 228 %minnum = call nnan half @llvm.minnum.f16(half 4.0, half %a) 229 %fmed = call nnan half @llvm.maxnum.f16(half 2.0, half %minnum) 230 ret half %fmed 231} 232 233; global nnan function attribute always forces fmed3 combine 234 235define float @test_min_max_global_nnan(float %a) #2 { 236; GFX10-LABEL: test_min_max_global_nnan: 237; GFX10: ; %bb.0: 238; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 239; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 240; GFX10-NEXT: s_setpc_b64 s[30:31] 241; 242; GFX8-LABEL: test_min_max_global_nnan: 243; GFX8: ; %bb.0: 244; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 246; GFX8-NEXT: s_setpc_b64 s[30:31] 247; 248; GFX12-LABEL: test_min_max_global_nnan: 249; GFX12: ; %bb.0: 250; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 251; GFX12-NEXT: s_wait_expcnt 0x0 252; GFX12-NEXT: s_wait_samplecnt 0x0 253; GFX12-NEXT: s_wait_bvhcnt 0x0 254; GFX12-NEXT: s_wait_kmcnt 0x0 255; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 256; GFX12-NEXT: s_setpc_b64 s[30:31] 257 %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0) 258 %fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0) 259 ret float %fmed 260} 261 262define float @test_max_min_global_nnan(float %a) #2 { 263; GFX10-LABEL: test_max_min_global_nnan: 264; GFX10: ; %bb.0: 265; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 266; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 267; GFX10-NEXT: s_setpc_b64 s[30:31] 268; 269; GFX8-LABEL: test_max_min_global_nnan: 270; GFX8: ; %bb.0: 271; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 272; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 273; GFX8-NEXT: s_setpc_b64 s[30:31] 274; 275; GFX12-LABEL: test_max_min_global_nnan: 276; GFX12: ; %bb.0: 277; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 278; GFX12-NEXT: s_wait_expcnt 0x0 279; GFX12-NEXT: s_wait_samplecnt 0x0 280; GFX12-NEXT: s_wait_bvhcnt 0x0 281; GFX12-NEXT: s_wait_kmcnt 0x0 282; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 283; GFX12-NEXT: s_setpc_b64 s[30:31] 284 %minnum = call float @llvm.minnum.f32(float %a, float 4.0) 285 %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0) 286 ret float %fmed 287} 288 289; ------------------------------------------------------------------------------ 290; Negative patterns 291; ------------------------------------------------------------------------------ 292 293; min(max(Val, K0), K1) K0 > K1, should be K0<=K1 294define float @test_min_max_K0_gt_K1(float %a) #0 { 295; GFX10-LABEL: test_min_max_K0_gt_K1: 296; GFX10: ; %bb.0: 297; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 298; GFX10-NEXT: v_max_f32_e32 v0, 4.0, v0 299; GFX10-NEXT: v_min_f32_e32 v0, 2.0, v0 300; GFX10-NEXT: s_setpc_b64 s[30:31] 301; 302; GFX8-LABEL: test_min_max_K0_gt_K1: 303; GFX8: ; %bb.0: 304; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 305; GFX8-NEXT: v_max_f32_e32 v0, 4.0, v0 306; GFX8-NEXT: v_min_f32_e32 v0, 2.0, v0 307; GFX8-NEXT: s_setpc_b64 s[30:31] 308; 309; GFX12-LABEL: test_min_max_K0_gt_K1: 310; GFX12: ; %bb.0: 311; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 312; GFX12-NEXT: s_wait_expcnt 0x0 313; GFX12-NEXT: s_wait_samplecnt 0x0 314; GFX12-NEXT: s_wait_bvhcnt 0x0 315; GFX12-NEXT: s_wait_kmcnt 0x0 316; GFX12-NEXT: v_maxmin_num_f32 v0, v0, 4.0, 2.0 317; GFX12-NEXT: s_setpc_b64 s[30:31] 318 %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 4.0) 319 %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 2.0) 320 ret float %fmed 321} 322 323; max(min(Val, K1), K0) K0 > K1, should be K0<=K1 324define float @test_max_min_K0_gt_K1(float %a) #0 { 325; GFX10-LABEL: test_max_min_K0_gt_K1: 326; GFX10: ; %bb.0: 327; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328; GFX10-NEXT: v_min_f32_e32 v0, 2.0, v0 329; GFX10-NEXT: v_max_f32_e32 v0, 4.0, v0 330; GFX10-NEXT: s_setpc_b64 s[30:31] 331; 332; GFX8-LABEL: test_max_min_K0_gt_K1: 333; GFX8: ; %bb.0: 334; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 335; GFX8-NEXT: v_min_f32_e32 v0, 2.0, v0 336; GFX8-NEXT: v_max_f32_e32 v0, 4.0, v0 337; GFX8-NEXT: s_setpc_b64 s[30:31] 338; 339; GFX12-LABEL: test_max_min_K0_gt_K1: 340; GFX12: ; %bb.0: 341; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 342; GFX12-NEXT: s_wait_expcnt 0x0 343; GFX12-NEXT: s_wait_samplecnt 0x0 344; GFX12-NEXT: s_wait_bvhcnt 0x0 345; GFX12-NEXT: s_wait_kmcnt 0x0 346; GFX12-NEXT: v_minmax_num_f32 v0, v0, 2.0, 4.0 347; GFX12-NEXT: s_setpc_b64 s[30:31] 348 %minnum = call nnan float @llvm.minnum.f32(float %a, float 2.0) 349 %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 4.0) 350 ret float %fmed 351} 352 353; non-inline constant 354define float @test_min_max_non_inline_const(float %a) #0 { 355; GFX10-LABEL: test_min_max_non_inline_const: 356; GFX10: ; %bb.0: 357; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 358; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 359; GFX10-NEXT: v_min_f32_e32 v0, 0x41000000, v0 360; GFX10-NEXT: s_setpc_b64 s[30:31] 361; 362; GFX8-LABEL: test_min_max_non_inline_const: 363; GFX8: ; %bb.0: 364; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 365; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 366; GFX8-NEXT: v_min_f32_e32 v0, 0x41000000, v0 367; GFX8-NEXT: s_setpc_b64 s[30:31] 368; 369; GFX12-LABEL: test_min_max_non_inline_const: 370; GFX12: ; %bb.0: 371; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 372; GFX12-NEXT: s_wait_expcnt 0x0 373; GFX12-NEXT: s_wait_samplecnt 0x0 374; GFX12-NEXT: s_wait_bvhcnt 0x0 375; GFX12-NEXT: s_wait_kmcnt 0x0 376; GFX12-NEXT: v_maxmin_num_f32 v0, v0, 2.0, 0x41000000 377; GFX12-NEXT: s_setpc_b64 s[30:31] 378 %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0) 379 %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 8.0) 380 ret float %fmed 381} 382 383; there is no fmed3 for f64 or v2f16 types 384 385define double @test_min_max_f64(double %a) #0 { 386; GFX10-LABEL: test_min_max_f64: 387; GFX10: ; %bb.0: 388; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 389; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], 2.0 390; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 391; GFX10-NEXT: s_setpc_b64 s[30:31] 392; 393; GFX8-LABEL: test_min_max_f64: 394; GFX8: ; %bb.0: 395; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 396; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], 2.0 397; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 398; GFX8-NEXT: s_setpc_b64 s[30:31] 399; 400; GFX12-LABEL: test_min_max_f64: 401; GFX12: ; %bb.0: 402; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 403; GFX12-NEXT: s_wait_expcnt 0x0 404; GFX12-NEXT: s_wait_samplecnt 0x0 405; GFX12-NEXT: s_wait_bvhcnt 0x0 406; GFX12-NEXT: s_wait_kmcnt 0x0 407; GFX12-NEXT: v_max_num_f64_e32 v[0:1], 2.0, v[0:1] 408; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 409; GFX12-NEXT: v_min_num_f64_e32 v[0:1], 4.0, v[0:1] 410; GFX12-NEXT: s_setpc_b64 s[30:31] 411 %maxnum = call nnan double @llvm.maxnum.f64(double %a, double 2.0) 412 %fmed = call nnan double @llvm.minnum.f64(double %maxnum, double 4.0) 413 ret double %fmed 414} 415 416define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 { 417; GFX10-LABEL: test_min_max_v2f16: 418; GFX10: ; %bb.0: 419; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 420; GFX10-NEXT: v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0] 421; GFX10-NEXT: v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0] 422; GFX10-NEXT: s_setpc_b64 s[30:31] 423; 424; GFX8-LABEL: test_min_max_v2f16: 425; GFX8: ; %bb.0: 426; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 427; GFX8-NEXT: v_mov_b32_e32 v2, 0x4000 428; GFX8-NEXT: v_max_f16_e32 v1, 2.0, v0 429; GFX8-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 430; GFX8-NEXT: v_mov_b32_e32 v2, 0x4400 431; GFX8-NEXT: v_min_f16_e32 v1, 4.0, v1 432; GFX8-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 433; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 434; GFX8-NEXT: s_setpc_b64 s[30:31] 435; 436; GFX12-LABEL: test_min_max_v2f16: 437; GFX12: ; %bb.0: 438; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 439; GFX12-NEXT: s_wait_expcnt 0x0 440; GFX12-NEXT: s_wait_samplecnt 0x0 441; GFX12-NEXT: s_wait_bvhcnt 0x0 442; GFX12-NEXT: s_wait_kmcnt 0x0 443; GFX12-NEXT: v_pk_max_num_f16 v0, v0, 2.0 op_sel_hi:[1,0] 444; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 445; GFX12-NEXT: v_pk_min_num_f16 v0, v0, 4.0 op_sel_hi:[1,0] 446; GFX12-NEXT: s_setpc_b64 s[30:31] 447 %maxnum = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>) 448 %fmed = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> %maxnum, <2 x half> <half 4.0, half 4.0>) 449 ret <2 x half> %fmed 450} 451 452; input that can be NaN 453 454; min-max patterns for ieee=false require known non-NaN input 455define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 { 456; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false: 457; GFX10: ; %bb.0: 458; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 459; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 460; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0 461; GFX10-NEXT: s_setpc_b64 s[30:31] 462; 463; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false: 464; GFX8: ; %bb.0: 465; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 466; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 467; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0 468; GFX8-NEXT: s_setpc_b64 s[30:31] 469; 470; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false: 471; GFX12: ; %bb.0: 472; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 473; GFX12-NEXT: s_wait_expcnt 0x0 474; GFX12-NEXT: s_wait_samplecnt 0x0 475; GFX12-NEXT: s_wait_bvhcnt 0x0 476; GFX12-NEXT: s_wait_kmcnt 0x0 477; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 478; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 479; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 480; GFX12-NEXT: s_setpc_b64 s[30:31] 481 %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0) 482 %fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0) 483 ret float %fmed 484} 485 486; max-min patterns always require known non-NaN input 487 488define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 { 489; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false: 490; GFX10: ; %bb.0: 491; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 492; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0 493; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 494; GFX10-NEXT: s_setpc_b64 s[30:31] 495; 496; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false: 497; GFX8: ; %bb.0: 498; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 499; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0 500; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 501; GFX8-NEXT: s_setpc_b64 s[30:31] 502; 503; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false: 504; GFX12: ; %bb.0: 505; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 506; GFX12-NEXT: s_wait_expcnt 0x0 507; GFX12-NEXT: s_wait_samplecnt 0x0 508; GFX12-NEXT: s_wait_bvhcnt 0x0 509; GFX12-NEXT: s_wait_kmcnt 0x0 510; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 511; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 512; GFX12-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0 513; GFX12-NEXT: s_setpc_b64 s[30:31] 514 %minnum = call float @llvm.minnum.f32(float %a, float 4.0) 515 %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0) 516 ret float %fmed 517} 518 519; 'v_max_f32_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true 520define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 { 521; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true: 522; GFX10: ; %bb.0: 523; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 524; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 525; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0 526; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 527; GFX10-NEXT: s_setpc_b64 s[30:31] 528; 529; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true: 530; GFX8: ; %bb.0: 531; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 532; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 533; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0 534; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 535; GFX8-NEXT: s_setpc_b64 s[30:31] 536; 537; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true: 538; GFX12: ; %bb.0: 539; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 540; GFX12-NEXT: s_wait_expcnt 0x0 541; GFX12-NEXT: s_wait_samplecnt 0x0 542; GFX12-NEXT: s_wait_bvhcnt 0x0 543; GFX12-NEXT: s_wait_kmcnt 0x0 544; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 545; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 546; GFX12-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0 547; GFX12-NEXT: s_setpc_b64 s[30:31] 548 %minnum = call float @llvm.minnum.f32(float %a, float 4.0) 549 %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0) 550 ret float %fmed 551} 552 553declare half @llvm.minnum.f16(half, half) 554declare half @llvm.maxnum.f16(half, half) 555declare float @llvm.minnum.f32(float, float) 556declare float @llvm.maxnum.f32(float, float) 557declare double @llvm.minnum.f64(double, double) 558declare double @llvm.maxnum.f64(double, double) 559declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) 560declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) 561attributes #0 = {"amdgpu-ieee"="true"} 562attributes #1 = {"amdgpu-ieee"="false"} 563attributes #2 = {"no-nans-fp-math"="true"} 564