1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX8 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX9 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 5 6define i32 @test_min_max_ValK0_K1_i32(i32 %a) { 7; GFX89-LABEL: test_min_max_ValK0_K1_i32: 8; GFX89: ; %bb.0: 9; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX89-NEXT: v_med3_i32 v0, v0, -12, 17 11; GFX89-NEXT: s_setpc_b64 s[30:31] 12; 13; GFX10-LABEL: test_min_max_ValK0_K1_i32: 14; GFX10: ; %bb.0: 15; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 17; GFX10-NEXT: s_setpc_b64 s[30:31] 18 %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12) 19 %smed = call i32 @llvm.smin.i32(i32 %smax, i32 17) 20 ret i32 %smed 21} 22 23define i32 @min_max_ValK0_K1_i32(i32 %a) { 24; GFX89-LABEL: min_max_ValK0_K1_i32: 25; GFX89: ; %bb.0: 26; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX89-NEXT: v_med3_i32 v0, v0, -12, 17 28; GFX89-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX10-LABEL: min_max_ValK0_K1_i32: 31; GFX10: ; %bb.0: 32; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 34; GFX10-NEXT: s_setpc_b64 s[30:31] 35 %smax = call i32 @llvm.smax.i32(i32 -12, i32 %a) 36 %smed = call i32 @llvm.smin.i32(i32 %smax, i32 17) 37 ret i32 %smed 38} 39 40define i32 @test_min_K1max_ValK0__i32(i32 %a) { 41; GFX89-LABEL: test_min_K1max_ValK0__i32: 42; GFX89: ; %bb.0: 43; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GFX89-NEXT: v_med3_i32 v0, v0, -12, 17 45; GFX89-NEXT: s_setpc_b64 s[30:31] 46; 47; GFX10-LABEL: test_min_K1max_ValK0__i32: 48; GFX10: ; %bb.0: 49; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 50; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 51; GFX10-NEXT: s_setpc_b64 s[30:31] 52 %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12) 53 %smed = call i32 @llvm.smin.i32(i32 17, i32 %smax) 54 ret i32 %smed 55} 56 57define i32 @test_min_K1max_K0Val__i32(i32 %a) { 58; GFX89-LABEL: test_min_K1max_K0Val__i32: 59; GFX89: ; %bb.0: 60; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; GFX89-NEXT: v_med3_i32 v0, v0, -12, 17 62; GFX89-NEXT: s_setpc_b64 s[30:31] 63; 64; GFX10-LABEL: test_min_K1max_K0Val__i32: 65; GFX10: ; %bb.0: 66; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 67; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 68; GFX10-NEXT: s_setpc_b64 s[30:31] 69 %smax = call i32 @llvm.smax.i32(i32 -12, i32 %a) 70 %smed = call i32 @llvm.smin.i32(i32 17, i32 %smax) 71 ret i32 %smed 72} 73 74define i32 @test_max_min_ValK1_K0_i32(i32 %a) { 75; GFX89-LABEL: test_max_min_ValK1_K0_i32: 76; GFX89: ; %bb.0: 77; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 78; GFX89-NEXT: v_med3_i32 v0, v0, -12, 17 79; GFX89-NEXT: s_setpc_b64 s[30:31] 80; 81; GFX10-LABEL: test_max_min_ValK1_K0_i32: 82; GFX10: ; %bb.0: 83; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 84; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 85; GFX10-NEXT: s_setpc_b64 s[30:31] 86 %smin = call i32 @llvm.smin.i32(i32 %a, i32 17) 87 %smed = call i32 @llvm.smax.i32(i32 %smin, i32 -12) 88 ret i32 %smed 89} 90 91define i32 @test_max_min_K1Val_K0_i32(i32 %a) { 92; GFX89-LABEL: test_max_min_K1Val_K0_i32: 93; GFX89: ; %bb.0: 94; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; GFX89-NEXT: v_med3_i32 v0, v0, -12, 17 96; GFX89-NEXT: s_setpc_b64 s[30:31] 97; 98; GFX10-LABEL: test_max_min_K1Val_K0_i32: 99; GFX10: ; %bb.0: 100; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 101; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 102; GFX10-NEXT: s_setpc_b64 s[30:31] 103 %smin = call i32 @llvm.smin.i32(i32 17, i32 %a) 104 %smed = call i32 @llvm.smax.i32(i32 %smin, i32 -12) 105 ret i32 %smed 106} 107 108define i32 @test_max_K0min_ValK1__i32(i32 %a) { 109; GFX89-LABEL: test_max_K0min_ValK1__i32: 110; GFX89: ; %bb.0: 111; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 112; GFX89-NEXT: v_med3_i32 v0, v0, -12, 17 113; GFX89-NEXT: s_setpc_b64 s[30:31] 114; 115; GFX10-LABEL: test_max_K0min_ValK1__i32: 116; GFX10: ; %bb.0: 117; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 119; GFX10-NEXT: s_setpc_b64 s[30:31] 120 %smin = call i32 @llvm.smin.i32(i32 %a, i32 17) 121 %smed = call i32 @llvm.smax.i32(i32 -12, i32 %smin) 122 ret i32 %smed 123} 124 125define i32 @test_max_K0min_K1Val__i32(i32 %a) { 126; GFX89-LABEL: test_max_K0min_K1Val__i32: 127; GFX89: ; %bb.0: 128; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; GFX89-NEXT: v_med3_i32 v0, v0, -12, 17 130; GFX89-NEXT: s_setpc_b64 s[30:31] 131; 132; GFX10-LABEL: test_max_K0min_K1Val__i32: 133; GFX10: ; %bb.0: 134; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 135; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 136; GFX10-NEXT: s_setpc_b64 s[30:31] 137 %smin = call i32 @llvm.smin.i32(i32 17, i32 %a) 138 %smed = call i32 @llvm.smax.i32(i32 -12, i32 %smin) 139 ret i32 %smed 140} 141 142define <2 x i16> @test_max_K0min_K1Val__v2i16(<2 x i16> %a) { 143; GFX8-LABEL: test_max_K0min_K1Val__v2i16: 144; GFX8: ; %bb.0: 145; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 146; GFX8-NEXT: v_mov_b32_e32 v2, 17 147; GFX8-NEXT: v_min_i16_e32 v1, 17, v0 148; GFX8-NEXT: v_min_i16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 149; GFX8-NEXT: v_mov_b32_e32 v2, -12 150; GFX8-NEXT: v_max_i16_e32 v1, -12, v1 151; GFX8-NEXT: v_max_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 152; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 153; GFX8-NEXT: s_setpc_b64 s[30:31] 154; 155; GFX9-LABEL: test_max_K0min_K1Val__v2i16: 156; GFX9: ; %bb.0: 157; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 158; GFX9-NEXT: v_pk_min_i16 v0, 17, v0 op_sel_hi:[0,1] 159; GFX9-NEXT: v_pk_max_i16 v0, -12, v0 op_sel_hi:[0,1] 160; GFX9-NEXT: s_setpc_b64 s[30:31] 161; 162; GFX10-LABEL: test_max_K0min_K1Val__v2i16: 163; GFX10: ; %bb.0: 164; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165; GFX10-NEXT: v_pk_min_i16 v0, 17, v0 op_sel_hi:[0,1] 166; GFX10-NEXT: v_pk_max_i16 v0, -12, v0 op_sel_hi:[0,1] 167; GFX10-NEXT: s_setpc_b64 s[30:31] 168 %smin = call <2 x i16> @llvm.smin.v2i16(<2 x i16> <i16 17, i16 17>, <2 x i16> %a) 169 %smed = call <2 x i16> @llvm.smax.v2i16(<2 x i16> <i16 -12, i16 -12>, <2 x i16> %smin) 170 ret <2 x i16> %smed 171} 172 173define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) { 174; GFX89-LABEL: test_uniform_min_max: 175; GFX89: ; %bb.0: 176; GFX89-NEXT: s_max_i32 s0, s2, -12 177; GFX89-NEXT: s_min_i32 s0, s0, 17 178; GFX89-NEXT: ; return to shader part epilog 179; 180; GFX10-LABEL: test_uniform_min_max: 181; GFX10: ; %bb.0: 182; GFX10-NEXT: s_max_i32 s0, s2, -12 183; GFX10-NEXT: s_min_i32 s0, s0, 17 184; GFX10-NEXT: ; return to shader part epilog 185 %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12) 186 %smed = call i32 @llvm.smin.i32(i32 %smax, i32 17) 187 ret i32 %smed 188} 189 190define i32 @test_non_inline_constant_i32(i32 %a) { 191; GFX89-LABEL: test_non_inline_constant_i32: 192; GFX89: ; %bb.0: 193; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 194; GFX89-NEXT: v_mov_b32_e32 v1, 0x41 195; GFX89-NEXT: v_med3_i32 v0, v0, -12, v1 196; GFX89-NEXT: s_setpc_b64 s[30:31] 197; 198; GFX10-LABEL: test_non_inline_constant_i32: 199; GFX10: ; %bb.0: 200; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201; GFX10-NEXT: v_med3_i32 v0, v0, -12, 0x41 202; GFX10-NEXT: s_setpc_b64 s[30:31] 203 %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12) 204 %smed = call i32 @llvm.smin.i32(i32 %smax, i32 65) 205 ret i32 %smed 206} 207 208declare i32 @llvm.smin.i32(i32, i32) 209declare i32 @llvm.smax.i32(i32, i32) 210declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>) 211declare <2 x i16> @llvm.smax.v2i16(<2 x i16>, <2 x i16>) 212