1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s 4 5define float @test_min_max_ValK0_K1_f32(float %a) #0 { 6; GFX10-LABEL: test_min_max_ValK0_K1_f32: 7; GFX10: ; %bb.0: 8; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp 10; GFX10-NEXT: s_setpc_b64 s[30:31] 11; 12; GFX12-LABEL: test_min_max_ValK0_K1_f32: 13; GFX12: ; %bb.0: 14; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 15; GFX12-NEXT: s_wait_expcnt 0x0 16; GFX12-NEXT: s_wait_samplecnt 0x0 17; GFX12-NEXT: s_wait_bvhcnt 0x0 18; GFX12-NEXT: s_wait_kmcnt 0x0 19; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp 20; GFX12-NEXT: s_setpc_b64 s[30:31] 21 %fmul = fmul float %a, 2.0 22 %maxnum = call nnan float @llvm.maxnum.f32(float %fmul, float 0.0) 23 %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 1.0) 24 ret float %fmed 25} 26 27define double @test_min_max_K0Val_K1_f64(double %a) #1 { 28; GFX10-LABEL: test_min_max_K0Val_K1_f64: 29; GFX10: ; %bb.0: 30; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp 32; GFX10-NEXT: s_setpc_b64 s[30:31] 33; 34; GFX12-LABEL: test_min_max_K0Val_K1_f64: 35; GFX12: ; %bb.0: 36; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 37; GFX12-NEXT: s_wait_expcnt 0x0 38; GFX12-NEXT: s_wait_samplecnt 0x0 39; GFX12-NEXT: s_wait_bvhcnt 0x0 40; GFX12-NEXT: s_wait_kmcnt 0x0 41; GFX12-NEXT: v_mul_f64_e64 v[0:1], v[0:1], 2.0 clamp 42; GFX12-NEXT: s_setpc_b64 s[30:31] 43 %fmul = fmul double %a, 2.0 44 %maxnum = call nnan double @llvm.maxnum.f64(double 0.0, double %fmul) 45 %fmed = call nnan double @llvm.minnum.f64(double %maxnum, double 1.0) 46 ret double %fmed 47} 48 49; min-max patterns for ieee=true, dx10_clamp=true don't have to check for NaNs 50define half @test_min_K1max_ValK0_f16(half %a) #2 { 51; GFX10-LABEL: test_min_K1max_ValK0_f16: 52; GFX10: ; %bb.0: 53; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 54; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp 55; GFX10-NEXT: s_setpc_b64 s[30:31] 56; 57; GFX12-LABEL: test_min_K1max_ValK0_f16: 58; GFX12: ; %bb.0: 59; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 60; GFX12-NEXT: s_wait_expcnt 0x0 61; GFX12-NEXT: s_wait_samplecnt 0x0 62; GFX12-NEXT: s_wait_bvhcnt 0x0 63; GFX12-NEXT: s_wait_kmcnt 0x0 64; GFX12-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp 65; GFX12-NEXT: s_setpc_b64 s[30:31] 66 %fmul = fmul half %a, 2.0 67 %maxnum = call half @llvm.maxnum.f16(half %fmul, half 0.0) 68 %fmed = call half @llvm.minnum.f16(half 1.0, half %maxnum) 69 ret half %fmed 70} 71 72define <2 x half> @test_min_K1max_K0Val_f16(<2 x half> %a) #1 { 73; GFX10-LABEL: test_min_K1max_K0Val_f16: 74; GFX10: ; %bb.0: 75; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 76; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp 77; GFX10-NEXT: s_setpc_b64 s[30:31] 78; 79; GFX12-LABEL: test_min_K1max_K0Val_f16: 80; GFX12: ; %bb.0: 81; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 82; GFX12-NEXT: s_wait_expcnt 0x0 83; GFX12-NEXT: s_wait_samplecnt 0x0 84; GFX12-NEXT: s_wait_bvhcnt 0x0 85; GFX12-NEXT: s_wait_kmcnt 0x0 86; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp 87; GFX12-NEXT: s_setpc_b64 s[30:31] 88 %fmul = fmul <2 x half> %a, <half 2.0, half 2.0> 89 %maxnum = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 0.0, half 0.0>, <2 x half> %fmul) 90 %fmed = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> <half 1.0, half 1.0>, <2 x half> %maxnum) 91 ret <2 x half> %fmed 92} 93 94define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 { 95; GFX10-LABEL: test_min_max_splat_padded_with_undef: 96; GFX10: ; %bb.0: 97; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 98; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp 99; GFX10-NEXT: s_setpc_b64 s[30:31] 100; 101; GFX12-LABEL: test_min_max_splat_padded_with_undef: 102; GFX12: ; %bb.0: 103; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 104; GFX12-NEXT: s_wait_expcnt 0x0 105; GFX12-NEXT: s_wait_samplecnt 0x0 106; GFX12-NEXT: s_wait_bvhcnt 0x0 107; GFX12-NEXT: s_wait_kmcnt 0x0 108; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp 109; GFX12-NEXT: s_setpc_b64 s[30:31] 110 %fmul = fmul <2 x half> %a, <half 2.0, half 2.0> 111 %maxnum = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 0.0, half undef>, <2 x half> %fmul) 112 %fmed = call <2 x half> @llvm.minnum.v2f16(<2 x half> <half 1.0, half undef>, <2 x half> %maxnum) 113 ret <2 x half> %fmed 114} 115 116; max-mix patterns work only for known non-NaN inputs 117 118define float @test_max_min_ValK1_K0_f32(float %a) #0 { 119; GFX10-LABEL: test_max_min_ValK1_K0_f32: 120; GFX10: ; %bb.0: 121; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 122; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp 123; GFX10-NEXT: s_setpc_b64 s[30:31] 124; 125; GFX12-LABEL: test_max_min_ValK1_K0_f32: 126; GFX12: ; %bb.0: 127; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 128; GFX12-NEXT: s_wait_expcnt 0x0 129; GFX12-NEXT: s_wait_samplecnt 0x0 130; GFX12-NEXT: s_wait_bvhcnt 0x0 131; GFX12-NEXT: s_wait_kmcnt 0x0 132; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp 133; GFX12-NEXT: s_setpc_b64 s[30:31] 134 %fmul = fmul float %a, 2.0 135 %minnum = call nnan float @llvm.minnum.f32(float %fmul, float 1.0) 136 %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 0.0) 137 ret float %fmed 138} 139 140define double @test_max_min_K1Val_K0_f64(double %a) #1 { 141; GFX10-LABEL: test_max_min_K1Val_K0_f64: 142; GFX10: ; %bb.0: 143; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 144; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp 145; GFX10-NEXT: s_setpc_b64 s[30:31] 146; 147; GFX12-LABEL: test_max_min_K1Val_K0_f64: 148; GFX12: ; %bb.0: 149; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 150; GFX12-NEXT: s_wait_expcnt 0x0 151; GFX12-NEXT: s_wait_samplecnt 0x0 152; GFX12-NEXT: s_wait_bvhcnt 0x0 153; GFX12-NEXT: s_wait_kmcnt 0x0 154; GFX12-NEXT: v_mul_f64_e64 v[0:1], v[0:1], 2.0 clamp 155; GFX12-NEXT: s_setpc_b64 s[30:31] 156 %fmul = fmul double %a, 2.0 157 %minnum = call nnan double @llvm.minnum.f64(double 1.0, double %fmul) 158 %fmed = call nnan double @llvm.maxnum.f64(double %minnum, double 0.0) 159 ret double %fmed 160} 161 162define half @test_max_K0min_ValK1_f16(half %a) #0 { 163; GFX10-LABEL: test_max_K0min_ValK1_f16: 164; GFX10: ; %bb.0: 165; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 166; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp 167; GFX10-NEXT: s_setpc_b64 s[30:31] 168; 169; GFX12-LABEL: test_max_K0min_ValK1_f16: 170; GFX12: ; %bb.0: 171; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 172; GFX12-NEXT: s_wait_expcnt 0x0 173; GFX12-NEXT: s_wait_samplecnt 0x0 174; GFX12-NEXT: s_wait_bvhcnt 0x0 175; GFX12-NEXT: s_wait_kmcnt 0x0 176; GFX12-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp 177; GFX12-NEXT: s_setpc_b64 s[30:31] 178 %fmul = fmul half %a, 2.0 179 %minnum = call nnan half @llvm.minnum.f16(half %fmul, half 1.0) 180 %fmed = call nnan half @llvm.maxnum.f16(half 0.0, half %minnum) 181 ret half %fmed 182} 183 184; treat undef as value that will result in a constant splat 185define <2 x half> @test_max_K0min_K1Val_v2f16(<2 x half> %a) #1 { 186; GFX10-LABEL: test_max_K0min_K1Val_v2f16: 187; GFX10: ; %bb.0: 188; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 189; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp 190; GFX10-NEXT: s_setpc_b64 s[30:31] 191; 192; GFX12-LABEL: test_max_K0min_K1Val_v2f16: 193; GFX12: ; %bb.0: 194; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 195; GFX12-NEXT: s_wait_expcnt 0x0 196; GFX12-NEXT: s_wait_samplecnt 0x0 197; GFX12-NEXT: s_wait_bvhcnt 0x0 198; GFX12-NEXT: s_wait_kmcnt 0x0 199; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp 200; GFX12-NEXT: s_setpc_b64 s[30:31] 201 %fmul = fmul <2 x half> %a, <half 2.0, half 2.0> 202 %minnum = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> <half 1.0, half undef>, <2 x half> %fmul) 203 %fmed = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> <half undef, half 0.0>, <2 x half> %minnum) 204 ret <2 x half> %fmed 205} 206 207; global nnan function attribute always forces clamp combine 208 209define float @test_min_max_global_nnan(float %a) #3 { 210; GFX10-LABEL: test_min_max_global_nnan: 211; GFX10: ; %bb.0: 212; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 213; GFX10-NEXT: v_max_f32_e64 v0, v0, v0 clamp 214; GFX10-NEXT: s_setpc_b64 s[30:31] 215; 216; GFX12-LABEL: test_min_max_global_nnan: 217; GFX12: ; %bb.0: 218; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 219; GFX12-NEXT: s_wait_expcnt 0x0 220; GFX12-NEXT: s_wait_samplecnt 0x0 221; GFX12-NEXT: s_wait_bvhcnt 0x0 222; GFX12-NEXT: s_wait_kmcnt 0x0 223; GFX12-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp 224; GFX12-NEXT: s_setpc_b64 s[30:31] 225 %maxnum = call float @llvm.maxnum.f32(float %a, float 0.0) 226 %fmed = call float @llvm.minnum.f32(float %maxnum, float 1.0) 227 ret float %fmed 228} 229 230define float @test_max_min_global_nnan(float %a) #3 { 231; GFX10-LABEL: test_max_min_global_nnan: 232; GFX10: ; %bb.0: 233; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 234; GFX10-NEXT: v_max_f32_e64 v0, v0, v0 clamp 235; GFX10-NEXT: s_setpc_b64 s[30:31] 236; 237; GFX12-LABEL: test_max_min_global_nnan: 238; GFX12: ; %bb.0: 239; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 240; GFX12-NEXT: s_wait_expcnt 0x0 241; GFX12-NEXT: s_wait_samplecnt 0x0 242; GFX12-NEXT: s_wait_bvhcnt 0x0 243; GFX12-NEXT: s_wait_kmcnt 0x0 244; GFX12-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp 245; GFX12-NEXT: s_setpc_b64 s[30:31] 246 %minnum = call float @llvm.minnum.f32(float %a, float 1.0) 247 %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0) 248 ret float %fmed 249} 250 251; ------------------------------------------------------------------------------ 252; Negative patterns 253; ------------------------------------------------------------------------------ 254 255; min(max(Val, 1.0), 0.0), should be min(max(Val, 0.0), 1.0) 256define float @test_min_max_K0_gt_K1(float %a) #0 { 257; GFX10-LABEL: test_min_max_K0_gt_K1: 258; GFX10: ; %bb.0: 259; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 260; GFX10-NEXT: v_max_f32_e32 v0, 1.0, v0 261; GFX10-NEXT: v_min_f32_e32 v0, 0, v0 262; GFX10-NEXT: s_setpc_b64 s[30:31] 263; 264; GFX12-LABEL: test_min_max_K0_gt_K1: 265; GFX12: ; %bb.0: 266; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 267; GFX12-NEXT: s_wait_expcnt 0x0 268; GFX12-NEXT: s_wait_samplecnt 0x0 269; GFX12-NEXT: s_wait_bvhcnt 0x0 270; GFX12-NEXT: s_wait_kmcnt 0x0 271; GFX12-NEXT: v_maxmin_num_f32 v0, v0, 1.0, 0 272; GFX12-NEXT: s_setpc_b64 s[30:31] 273 %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 1.0) 274 %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 0.0) 275 ret float %fmed 276} 277 278; max(min(Val, 0.0), 1.0), should be max(min(Val, 1.0), 0.0) 279define float @test_max_min_K0_gt_K1(float %a) #0 { 280; GFX10-LABEL: test_max_min_K0_gt_K1: 281; GFX10: ; %bb.0: 282; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 283; GFX10-NEXT: v_min_f32_e32 v0, 0, v0 284; GFX10-NEXT: v_max_f32_e32 v0, 1.0, v0 285; GFX10-NEXT: s_setpc_b64 s[30:31] 286; 287; GFX12-LABEL: test_max_min_K0_gt_K1: 288; GFX12: ; %bb.0: 289; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 290; GFX12-NEXT: s_wait_expcnt 0x0 291; GFX12-NEXT: s_wait_samplecnt 0x0 292; GFX12-NEXT: s_wait_bvhcnt 0x0 293; GFX12-NEXT: s_wait_kmcnt 0x0 294; GFX12-NEXT: v_minmax_num_f32 v0, v0, 0, 1.0 295; GFX12-NEXT: s_setpc_b64 s[30:31] 296 %minnum = call nnan float @llvm.minnum.f32(float %a, float 0.0) 297 %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 1.0) 298 ret float %fmed 299} 300 301; Input that can be NaN 302 303; min-max patterns for ieee=false require known non-NaN input 304define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 { 305; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false: 306; GFX10: ; %bb.0: 307; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 309; GFX10-NEXT: v_max_f32_e32 v0, 0, v0 310; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0 311; GFX10-NEXT: s_setpc_b64 s[30:31] 312; 313; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false: 314; GFX12: ; %bb.0: 315; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 316; GFX12-NEXT: s_wait_expcnt 0x0 317; GFX12-NEXT: s_wait_samplecnt 0x0 318; GFX12-NEXT: s_wait_bvhcnt 0x0 319; GFX12-NEXT: s_wait_kmcnt 0x0 320; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp 321; GFX12-NEXT: s_setpc_b64 s[30:31] 322 %fmul = fmul float %a, 2.0 323 %maxnum = call float @llvm.maxnum.f32(float %fmul, float 0.0) 324 %fmed = call float @llvm.minnum.f32(float %maxnum, float 1.0) 325 ret float %fmed 326} 327 328; clamp fails here since input can be NaN and dx10_clamp=false; fmed3 succeds 329define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #4 { 330; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false: 331; GFX10: ; %bb.0: 332; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 333; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 334; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0 335; GFX10-NEXT: s_setpc_b64 s[30:31] 336; 337; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false: 338; GFX12: ; %bb.0: 339; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 340; GFX12-NEXT: s_wait_expcnt 0x0 341; GFX12-NEXT: s_wait_samplecnt 0x0 342; GFX12-NEXT: s_wait_bvhcnt 0x0 343; GFX12-NEXT: s_wait_kmcnt 0x0 344; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp 345; GFX12-NEXT: s_setpc_b64 s[30:31] 346 %fmul = fmul float %a, 2.0 347 %maxnum = call float @llvm.maxnum.f32(float %fmul, float 0.0) 348 %fmed = call float @llvm.minnum.f32(float %maxnum, float 1.0) 349 ret float %fmed 350} 351 352; max-min patterns always require known non-NaN input 353 354define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 { 355; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true: 356; GFX10: ; %bb.0: 357; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 358; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 359; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0 360; GFX10-NEXT: v_max_f32_e32 v0, 0, v0 361; GFX10-NEXT: s_setpc_b64 s[30:31] 362; 363; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true: 364; GFX12: ; %bb.0: 365; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 366; GFX12-NEXT: s_wait_expcnt 0x0 367; GFX12-NEXT: s_wait_samplecnt 0x0 368; GFX12-NEXT: s_wait_bvhcnt 0x0 369; GFX12-NEXT: s_wait_kmcnt 0x0 370; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0 371; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 372; GFX12-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0 373; GFX12-NEXT: s_setpc_b64 s[30:31] 374 %fmul = fmul float %a, 2.0 375 %minnum = call float @llvm.minnum.f32(float %fmul, float 1.0) 376 %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0) 377 ret float %fmed 378} 379 380define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 { 381; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false: 382; GFX10: ; %bb.0: 383; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 384; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 385; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0 386; GFX10-NEXT: v_max_f32_e32 v0, 0, v0 387; GFX10-NEXT: s_setpc_b64 s[30:31] 388; 389; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false: 390; GFX12: ; %bb.0: 391; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 392; GFX12-NEXT: s_wait_expcnt 0x0 393; GFX12-NEXT: s_wait_samplecnt 0x0 394; GFX12-NEXT: s_wait_bvhcnt 0x0 395; GFX12-NEXT: s_wait_kmcnt 0x0 396; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0 397; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 398; GFX12-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0 399; GFX12-NEXT: s_setpc_b64 s[30:31] 400 %fmul = fmul float %a, 2.0 401 %minnum = call float @llvm.minnum.f32(float %fmul, float 1.0) 402 %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0) 403 ret float %fmed 404} 405 406declare half @llvm.minnum.f16(half, half) 407declare half @llvm.maxnum.f16(half, half) 408declare float @llvm.minnum.f32(float, float) 409declare float @llvm.maxnum.f32(float, float) 410declare double @llvm.minnum.f64(double, double) 411declare double @llvm.maxnum.f64(double, double) 412declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) 413declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) 414attributes #0 = {"amdgpu-ieee"="true"} 415attributes #1 = {"amdgpu-ieee"="false"} 416attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"} 417attributes #3 = {"no-nans-fp-math"="true"} 418attributes #4 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false"} 419