1; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s 6 7declare i32 @llvm.amdgcn.workitem.id.x() #0 8 9; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i32: 10; GCN: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17 11define amdgpu_kernel void @v_test_smed3_r_i_i_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 12 %tid = call i32 @llvm.amdgcn.workitem.id.x() 13 %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid 14 %outgep = getelementptr i32, ptr addrspace(1) %out, i32 %tid 15 %a = load i32, ptr addrspace(1) %gep0 16 17 %icmp0 = icmp sgt i32 %a, 12 18 %i0 = select i1 %icmp0, i32 %a, i32 12 19 20 %icmp1 = icmp slt i32 %i0, 17 21 %i1 = select i1 %icmp1, i32 %i0, i32 17 22 23 store i32 %i1, ptr addrspace(1) %outgep 24 ret void 25} 26 27; GCN-LABEL: {{^}}v_test_smed3_multi_use_r_i_i_i32: 28; GCN: v_max_i32 29; GCN: v_min_i32 30define amdgpu_kernel void @v_test_smed3_multi_use_r_i_i_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 31 %tid = call i32 @llvm.amdgcn.workitem.id.x() 32 %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid 33 %outgep = getelementptr i32, ptr addrspace(1) %out, i32 %tid 34 %a = load i32, ptr addrspace(1) %gep0 35 36 %icmp0 = icmp sgt i32 %a, 12 37 %i0 = select i1 %icmp0, i32 %a, i32 12 38 39 %icmp1 = icmp slt i32 %i0, 17 40 %i1 = select i1 %icmp1, i32 %i0, i32 17 41 42 store volatile i32 %i0, ptr addrspace(1) %outgep 43 store volatile i32 %i1, ptr addrspace(1) %outgep 44 ret void 45} 46 47; GCN-LABEL: {{^}}v_test_smed3_r_i_i_sign_mismatch_i32: 48; GCN: v_max_u32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}} 49; GCN: v_min_i32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}} 50define amdgpu_kernel void @v_test_smed3_r_i_i_sign_mismatch_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 51 %tid = call i32 @llvm.amdgcn.workitem.id.x() 52 %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid 53 %outgep = getelementptr i32, ptr addrspace(1) %out, i32 %tid 54 %a = load i32, ptr addrspace(1) %gep0 55 56 %icmp0 = icmp ugt i32 %a, 12 57 %i0 = select i1 %icmp0, i32 %a, i32 12 58 59 %icmp1 = icmp slt i32 %i0, 17 60 %i1 = select i1 %icmp1, i32 %i0, i32 17 61 62 store i32 %i1, ptr addrspace(1) %outgep 63 ret void 64} 65 66; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i64: 67; GCN: v_cmp_lt_i64 68; GCN: v_cmp_gt_i64 69define amdgpu_kernel void @v_test_smed3_r_i_i_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 70 %tid = call i32 @llvm.amdgcn.workitem.id.x() 71 %gep0 = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid 72 %outgep = getelementptr i64, ptr addrspace(1) %out, i32 %tid 73 %a = load i64, ptr addrspace(1) %gep0 74 75 %icmp0 = icmp sgt i64 %a, 12 76 %i0 = select i1 %icmp0, i64 %a, i64 12 77 78 %icmp1 = icmp slt i64 %i0, 17 79 %i1 = select i1 %icmp1, i64 %i0, i64 17 80 81 store i64 %i1, ptr addrspace(1) %outgep 82 ret void 83} 84 85; Regression test for performIntMed3ImmCombine extending arguments to 32 bit 86; which failed for 64 bit arguments. Previously asserted / crashed. 87; GCN-LABEL: {{^}}test_intMed3ImmCombine_no_32bit_extend: 88; GCN: v_cmp_lt_i64 89; GCN: v_cmp_gt_i64 90define i64 @test_intMed3ImmCombine_no_32bit_extend(i64 %x) { 91 %smax = call i64 @llvm.smax.i64(i64 %x, i64 -2) 92 %smin = call i64 @llvm.smin.i64(i64 %smax, i64 2) 93 ret i64 %smin 94} 95declare i64 @llvm.smax.i64(i64, i64) 96declare i64 @llvm.smin.i64(i64, i64) 97 98; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i16: 99; SI: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17 100; VI: v_max_i16_e32 [[MAX:v[0-9]]], 12, {{v[0-9]}} 101; VI: v_min_i16_e32 {{v[0-9]}}, 17, [[MAX]] 102; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17 103; GFX11-TRUE16: v_med3_i16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, 12, 17 104; GFX11-FAKE16: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17 105define amdgpu_kernel void @v_test_smed3_r_i_i_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 106 %tid = call i32 @llvm.amdgcn.workitem.id.x() 107 %gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid 108 %outgep = getelementptr i16, ptr addrspace(1) %out, i32 %tid 109 %a = load i16, ptr addrspace(1) %gep0 110 111 %icmp0 = icmp sgt i16 %a, 12 112 %i0 = select i1 %icmp0, i16 %a, i16 12 113 114 %icmp1 = icmp slt i16 %i0, 17 115 %i1 = select i1 %icmp1, i16 %i0, i16 17 116 117 store i16 %i1, ptr addrspace(1) %outgep 118 ret void 119} 120 121 122define internal i32 @smin(i32 %x, i32 %y) #2 { 123 %cmp = icmp slt i32 %x, %y 124 %sel = select i1 %cmp, i32 %x, i32 %y 125 ret i32 %sel 126} 127 128define internal i32 @smax(i32 %x, i32 %y) #2 { 129 %cmp = icmp sgt i32 %x, %y 130 %sel = select i1 %cmp, i32 %x, i32 %y 131 ret i32 %sel 132} 133 134define internal i16 @smin16(i16 %x, i16 %y) #2 { 135 %cmp = icmp slt i16 %x, %y 136 %sel = select i1 %cmp, i16 %x, i16 %y 137 ret i16 %sel 138} 139 140define internal i16 @smax16(i16 %x, i16 %y) #2 { 141 %cmp = icmp sgt i16 %x, %y 142 %sel = select i1 %cmp, i16 %x, i16 %y 143 ret i16 %sel 144} 145 146define internal i8 @smin8(i8 %x, i8 %y) #2 { 147 %cmp = icmp slt i8 %x, %y 148 %sel = select i1 %cmp, i8 %x, i8 %y 149 ret i8 %sel 150} 151 152define internal i8 @smax8(i8 %x, i8 %y) #2 { 153 %cmp = icmp sgt i8 %x, %y 154 %sel = select i1 %cmp, i8 %x, i8 %y 155 ret i8 %sel 156} 157 158; 16 combinations 159 160; 0: max(min(x, y), min(max(x, y), z)) 161; 1: max(min(x, y), min(max(y, x), z)) 162; 2: max(min(x, y), min(z, max(x, y))) 163; 3: max(min(x, y), min(z, max(y, x))) 164; 4: max(min(y, x), min(max(x, y), z)) 165; 5: max(min(y, x), min(max(y, x), z)) 166; 6: max(min(y, x), min(z, max(x, y))) 167; 7: max(min(y, x), min(z, max(y, x))) 168; 169; + commute outermost max 170 171 172; FIXME: In these cases we probably should have used scalar operations 173; instead. 174 175; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0: 176; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 177define amdgpu_kernel void @s_test_smed3_i32_pat_0(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 178bb: 179 %tmp0 = call i32 @smin(i32 %x, i32 %y) 180 %tmp1 = call i32 @smax(i32 %x, i32 %y) 181 %tmp2 = call i32 @smin(i32 %tmp1, i32 %z) 182 %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2) 183 store i32 %tmp3, ptr addrspace(1) %arg 184 ret void 185} 186 187; GCN-LABEL: {{^}}s_test_smed3_i32_pat_1: 188; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 189define amdgpu_kernel void @s_test_smed3_i32_pat_1(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 190bb: 191 %tmp0 = call i32 @smin(i32 %x, i32 %y) 192 %tmp1 = call i32 @smax(i32 %y, i32 %x) 193 %tmp2 = call i32 @smin(i32 %tmp1, i32 %z) 194 %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2) 195 store i32 %tmp3, ptr addrspace(1) %arg 196 ret void 197} 198 199; GCN-LABEL: {{^}}s_test_smed3_i32_pat_2: 200; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 201define amdgpu_kernel void @s_test_smed3_i32_pat_2(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 202bb: 203 %tmp0 = call i32 @smin(i32 %x, i32 %y) 204 %tmp1 = call i32 @smax(i32 %x, i32 %y) 205 %tmp2 = call i32 @smin(i32 %z, i32 %tmp1) 206 %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2) 207 store i32 %tmp3, ptr addrspace(1) %arg 208 ret void 209} 210 211; GCN-LABEL: {{^}}s_test_smed3_i32_pat_3: 212; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 213define amdgpu_kernel void @s_test_smed3_i32_pat_3(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 214bb: 215 %tmp0 = call i32 @smin(i32 %x, i32 %y) 216 %tmp1 = call i32 @smax(i32 %y, i32 %x) 217 %tmp2 = call i32 @smin(i32 %z, i32 %tmp1) 218 %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2) 219 store i32 %tmp3, ptr addrspace(1) %arg 220 ret void 221} 222 223; GCN-LABEL: {{^}}s_test_smed3_i32_pat_4: 224; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 225define amdgpu_kernel void @s_test_smed3_i32_pat_4(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 226bb: 227 %tmp0 = call i32 @smin(i32 %y, i32 %x) 228 %tmp1 = call i32 @smax(i32 %x, i32 %y) 229 %tmp2 = call i32 @smin(i32 %tmp1, i32 %z) 230 %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2) 231 store i32 %tmp3, ptr addrspace(1) %arg 232 ret void 233} 234 235; GCN-LABEL: {{^}}s_test_smed3_i32_pat_5: 236; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 237define amdgpu_kernel void @s_test_smed3_i32_pat_5(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 238bb: 239 %tmp0 = call i32 @smin(i32 %y, i32 %x) 240 %tmp1 = call i32 @smax(i32 %y, i32 %x) 241 %tmp2 = call i32 @smin(i32 %tmp1, i32 %z) 242 %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2) 243 store i32 %tmp3, ptr addrspace(1) %arg 244 ret void 245} 246 247; GCN-LABEL: {{^}}s_test_smed3_i32_pat_6: 248; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 249define amdgpu_kernel void @s_test_smed3_i32_pat_6(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 250bb: 251 %tmp0 = call i32 @smin(i32 %y, i32 %x) 252 %tmp1 = call i32 @smax(i32 %x, i32 %y) 253 %tmp2 = call i32 @smin(i32 %z, i32 %tmp1) 254 %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2) 255 store i32 %tmp3, ptr addrspace(1) %arg 256 ret void 257} 258 259; GCN-LABEL: {{^}}s_test_smed3_i32_pat_7: 260; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 261define amdgpu_kernel void @s_test_smed3_i32_pat_7(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 262bb: 263 %tmp0 = call i32 @smin(i32 %y, i32 %x) 264 %tmp1 = call i32 @smax(i32 %y, i32 %x) 265 %tmp2 = call i32 @smin(i32 %z, i32 %tmp1) 266 %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2) 267 store i32 %tmp3, ptr addrspace(1) %arg 268 ret void 269} 270 271; GCN-LABEL: {{^}}s_test_smed3_i32_pat_8: 272; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 273define amdgpu_kernel void @s_test_smed3_i32_pat_8(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 274bb: 275 %tmp0 = call i32 @smin(i32 %x, i32 %y) 276 %tmp1 = call i32 @smax(i32 %x, i32 %y) 277 %tmp2 = call i32 @smin(i32 %tmp1, i32 %z) 278 %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0) 279 store i32 %tmp3, ptr addrspace(1) %arg 280 ret void 281} 282 283; GCN-LABEL: {{^}}s_test_smed3_i32_pat_9: 284; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 285define amdgpu_kernel void @s_test_smed3_i32_pat_9(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 286bb: 287 %tmp0 = call i32 @smin(i32 %x, i32 %y) 288 %tmp1 = call i32 @smax(i32 %y, i32 %x) 289 %tmp2 = call i32 @smin(i32 %tmp1, i32 %z) 290 %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0) 291 store i32 %tmp3, ptr addrspace(1) %arg 292 ret void 293} 294 295; GCN-LABEL: {{^}}s_test_smed3_i32_pat_10: 296; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 297define amdgpu_kernel void @s_test_smed3_i32_pat_10(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 298bb: 299 %tmp0 = call i32 @smin(i32 %x, i32 %y) 300 %tmp1 = call i32 @smax(i32 %x, i32 %y) 301 %tmp2 = call i32 @smin(i32 %z, i32 %tmp1) 302 %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0) 303 store i32 %tmp3, ptr addrspace(1) %arg 304 ret void 305} 306 307; GCN-LABEL: {{^}}s_test_smed3_i32_pat_11: 308; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 309define amdgpu_kernel void @s_test_smed3_i32_pat_11(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 310bb: 311 %tmp0 = call i32 @smin(i32 %x, i32 %y) 312 %tmp1 = call i32 @smax(i32 %y, i32 %x) 313 %tmp2 = call i32 @smin(i32 %z, i32 %tmp1) 314 %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0) 315 store i32 %tmp3, ptr addrspace(1) %arg 316 ret void 317} 318 319; GCN-LABEL: {{^}}s_test_smed3_i32_pat_12: 320; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 321define amdgpu_kernel void @s_test_smed3_i32_pat_12(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 322bb: 323 %tmp0 = call i32 @smin(i32 %y, i32 %x) 324 %tmp1 = call i32 @smax(i32 %x, i32 %y) 325 %tmp2 = call i32 @smin(i32 %tmp1, i32 %z) 326 %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0) 327 store i32 %tmp3, ptr addrspace(1) %arg 328 ret void 329} 330 331; GCN-LABEL: {{^}}s_test_smed3_i32_pat_13: 332; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 333define amdgpu_kernel void @s_test_smed3_i32_pat_13(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 334bb: 335 %tmp0 = call i32 @smin(i32 %y, i32 %x) 336 %tmp1 = call i32 @smax(i32 %y, i32 %x) 337 %tmp2 = call i32 @smin(i32 %tmp1, i32 %z) 338 %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0) 339 store i32 %tmp3, ptr addrspace(1) %arg 340 ret void 341} 342 343; GCN-LABEL: {{^}}s_test_smed3_i32_pat_14: 344; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 345define amdgpu_kernel void @s_test_smed3_i32_pat_14(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 346bb: 347 %tmp0 = call i32 @smin(i32 %y, i32 %x) 348 %tmp1 = call i32 @smax(i32 %x, i32 %y) 349 %tmp2 = call i32 @smin(i32 %z, i32 %tmp1) 350 %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0) 351 store i32 %tmp3, ptr addrspace(1) %arg 352 ret void 353} 354 355; GCN-LABEL: {{^}}s_test_smed3_i32_pat_15: 356; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 357define amdgpu_kernel void @s_test_smed3_i32_pat_15(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 358bb: 359 %tmp0 = call i32 @smin(i32 %y, i32 %x) 360 %tmp1 = call i32 @smax(i32 %y, i32 %x) 361 %tmp2 = call i32 @smin(i32 %z, i32 %tmp1) 362 %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0) 363 store i32 %tmp3, ptr addrspace(1) %arg 364 ret void 365} 366 367; 16 combinations 368 369; 16: min(max(x, y), max(min(x, y), z)) 370; 17: min(max(x, y), max(min(y, x), z)) 371; 18: min(max(x, y), max(z, min(x, y))) 372; 19: min(max(x, y), max(z, min(y, x))) 373; 20: min(max(y, x), max(min(x, y), z)) 374; 21: min(max(y, x), max(min(y, x), z)) 375; 22: min(max(y, x), max(z, min(x, y))) 376; 23: min(max(y, x), max(z, min(y, x))) 377; 378; + commute outermost min 379 380; GCN-LABEL: {{^}}s_test_smed3_i32_pat_16: 381; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 382define amdgpu_kernel void @s_test_smed3_i32_pat_16(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 383bb: 384 %tmp0 = call i32 @smin(i32 %x, i32 %y) 385 %tmp1 = call i32 @smax(i32 %x, i32 %y) 386 %tmp2 = call i32 @smax(i32 %tmp0, i32 %z) 387 %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2) 388 store i32 %tmp3, ptr addrspace(1) %arg 389 ret void 390} 391 392; GCN-LABEL: {{^}}s_test_smed3_i32_pat_17: 393; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 394define amdgpu_kernel void @s_test_smed3_i32_pat_17(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 395bb: 396 %tmp0 = call i32 @smin(i32 %y, i32 %x) 397 %tmp1 = call i32 @smax(i32 %x, i32 %y) 398 %tmp2 = call i32 @smax(i32 %tmp0, i32 %z) 399 %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2) 400 store i32 %tmp3, ptr addrspace(1) %arg 401 ret void 402} 403 404; GCN-LABEL: {{^}}s_test_smed3_i32_pat_18: 405; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 406define amdgpu_kernel void @s_test_smed3_i32_pat_18(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 407bb: 408 %tmp0 = call i32 @smin(i32 %x, i32 %y) 409 %tmp1 = call i32 @smax(i32 %x, i32 %y) 410 %tmp2 = call i32 @smax(i32 %z, i32 %tmp0) 411 %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2) 412 store i32 %tmp3, ptr addrspace(1) %arg 413 ret void 414} 415 416; GCN-LABEL: {{^}}s_test_smed3_i32_pat_19: 417; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 418define amdgpu_kernel void @s_test_smed3_i32_pat_19(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 419bb: 420 %tmp0 = call i32 @smin(i32 %y, i32 %x) 421 %tmp1 = call i32 @smax(i32 %x, i32 %y) 422 %tmp2 = call i32 @smax(i32 %z, i32 %tmp0) 423 %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2) 424 store i32 %tmp3, ptr addrspace(1) %arg 425 ret void 426} 427 428; GCN-LABEL: {{^}}s_test_smed3_i32_pat_20: 429; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 430define amdgpu_kernel void @s_test_smed3_i32_pat_20(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 431bb: 432 %tmp0 = call i32 @smin(i32 %x, i32 %y) 433 %tmp1 = call i32 @smax(i32 %y, i32 %x) 434 %tmp2 = call i32 @smax(i32 %tmp0, i32 %z) 435 %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2) 436 store i32 %tmp3, ptr addrspace(1) %arg 437 ret void 438} 439 440; GCN-LABEL: {{^}}s_test_smed3_i32_pat_21: 441; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 442define amdgpu_kernel void @s_test_smed3_i32_pat_21(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 443bb: 444 %tmp0 = call i32 @smin(i32 %y, i32 %x) 445 %tmp1 = call i32 @smax(i32 %y, i32 %x) 446 %tmp2 = call i32 @smax(i32 %tmp0, i32 %z) 447 %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2) 448 store i32 %tmp3, ptr addrspace(1) %arg 449 ret void 450} 451 452; GCN-LABEL: {{^}}s_test_smed3_i32_pat_22: 453; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 454define amdgpu_kernel void @s_test_smed3_i32_pat_22(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 455bb: 456 %tmp0 = call i32 @smin(i32 %x, i32 %y) 457 %tmp1 = call i32 @smax(i32 %y, i32 %x) 458 %tmp2 = call i32 @smax(i32 %z, i32 %tmp0) 459 %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2) 460 store i32 %tmp3, ptr addrspace(1) %arg 461 ret void 462} 463 464; GCN-LABEL: {{^}}s_test_smed3_i32_pat_23: 465; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 466define amdgpu_kernel void @s_test_smed3_i32_pat_23(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 467bb: 468 %tmp0 = call i32 @smin(i32 %y, i32 %x) 469 %tmp1 = call i32 @smax(i32 %y, i32 %x) 470 %tmp2 = call i32 @smax(i32 %z, i32 %tmp0) 471 %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2) 472 store i32 %tmp3, ptr addrspace(1) %arg 473 ret void 474} 475 476; GCN-LABEL: {{^}}s_test_smed3_i32_pat_24: 477; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 478define amdgpu_kernel void @s_test_smed3_i32_pat_24(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 479bb: 480 %tmp0 = call i32 @smin(i32 %x, i32 %y) 481 %tmp1 = call i32 @smax(i32 %x, i32 %y) 482 %tmp2 = call i32 @smax(i32 %tmp0, i32 %z) 483 %tmp3 = call i32 @smin(i32 %tmp2, i32 %tmp1) 484 store i32 %tmp3, ptr addrspace(1) %arg 485 ret void 486} 487 488; GCN-LABEL: {{^}}s_test_smed3_i32_pat_25: 489; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 490define amdgpu_kernel void @s_test_smed3_i32_pat_25(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 491bb: 492 %tmp0 = call i32 @smin(i32 %y, i32 %x) 493 %tmp1 = call i32 @smax(i32 %x, i32 %y) 494 %tmp2 = call i32 @smax(i32 %tmp0, i32 %z) 495 %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2) 496 store i32 %tmp3, ptr addrspace(1) %arg 497 ret void 498} 499 500; GCN-LABEL: {{^}}s_test_smed3_i32_pat_26: 501; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 502define amdgpu_kernel void @s_test_smed3_i32_pat_26(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 503bb: 504 %tmp0 = call i32 @smin(i32 %x, i32 %y) 505 %tmp1 = call i32 @smax(i32 %x, i32 %y) 506 %tmp2 = call i32 @smax(i32 %z, i32 %tmp0) 507 %tmp3 = call i32 @smin(i32 %tmp2, i32 %tmp1) 508 store i32 %tmp3, ptr addrspace(1) %arg 509 ret void 510} 511 512; GCN-LABEL: {{^}}s_test_smed3_i32_pat_27: 513; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 514define amdgpu_kernel void @s_test_smed3_i32_pat_27(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 515bb: 516 %tmp0 = call i32 @smin(i32 %y, i32 %x) 517 %tmp1 = call i32 @smax(i32 %x, i32 %y) 518 %tmp2 = call i32 @smax(i32 %z, i32 %tmp0) 519 %tmp3 = call i32 @smin(i32 %tmp2, i32 %tmp1) 520 store i32 %tmp3, ptr addrspace(1) %arg 521 ret void 522} 523 524; GCN-LABEL: {{^}}s_test_smed3_i32_pat_28: 525; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 526define amdgpu_kernel void @s_test_smed3_i32_pat_28(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 527bb: 528 %tmp0 = call i32 @smin(i32 %x, i32 %y) 529 %tmp1 = call i32 @smax(i32 %y, i32 %x) 530 %tmp2 = call i32 @smax(i32 %tmp0, i32 %z) 531 %tmp3 = call i32 @smin(i32 %tmp2, i32 %tmp1) 532 store i32 %tmp3, ptr addrspace(1) %arg 533 ret void 534} 535 536; GCN-LABEL: {{^}}s_test_smed3_i32_pat_29: 537; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 538define amdgpu_kernel void @s_test_smed3_i32_pat_29(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 539bb: 540 %tmp0 = call i32 @smin(i32 %y, i32 %x) 541 %tmp1 = call i32 @smax(i32 %y, i32 %x) 542 %tmp2 = call i32 @smax(i32 %tmp0, i32 %z) 543 %tmp3 = call i32 @smin(i32 %tmp2, i32 %tmp1) 544 store i32 %tmp3, ptr addrspace(1) %arg 545 ret void 546} 547 548; GCN-LABEL: {{^}}s_test_smed3_i32_pat_30: 549; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 550define amdgpu_kernel void @s_test_smed3_i32_pat_30(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 551bb: 552 %tmp0 = call i32 @smin(i32 %x, i32 %y) 553 %tmp1 = call i32 @smax(i32 %y, i32 %x) 554 %tmp2 = call i32 @smax(i32 %z, i32 %tmp0) 555 %tmp3 = call i32 @smin(i32 %tmp2, i32 %tmp1) 556 store i32 %tmp3, ptr addrspace(1) %arg 557 ret void 558} 559 560; GCN-LABEL: {{^}}s_test_smed3_i32_pat_31: 561; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 562define amdgpu_kernel void @s_test_smed3_i32_pat_31(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 563bb: 564 %tmp0 = call i32 @smin(i32 %y, i32 %x) 565 %tmp1 = call i32 @smax(i32 %y, i32 %x) 566 %tmp2 = call i32 @smax(i32 %z, i32 %tmp0) 567 %tmp3 = call i32 @smin(i32 %tmp2, i32 %tmp1) 568 store i32 %tmp3, ptr addrspace(1) %arg 569 ret void 570} 571 572; FIXME: Should keep scalar or not promote 573; GCN-LABEL: {{^}}s_test_smed3_i16_pat_0: 574; GCN: s_sext_i32_i16 575; GCN: s_sext_i32_i16 576; GCN: s_sext_i32_i16 577; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 578define amdgpu_kernel void @s_test_smed3_i16_pat_0(ptr addrspace(1) %arg, [8 x i32], i16 %x, [8 x i32], i16 %y, [8 x i32], i16 %z) #1 { 579bb: 580 %tmp0 = call i16 @smin16(i16 %x, i16 %y) 581 %tmp1 = call i16 @smax16(i16 %x, i16 %y) 582 %tmp2 = call i16 @smin16(i16 %tmp1, i16 %z) 583 %tmp3 = call i16 @smax16(i16 %tmp0, i16 %tmp2) 584 store i16 %tmp3, ptr addrspace(1) %arg 585 ret void 586} 587 588; GCN-LABEL: {{^}}s_test_smed3_i8_pat_0: 589; GCN: s_sext_i32_i8 590; GCN: s_sext_i32_i8 591; GCN: s_sext_i32_i8 592; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 593define amdgpu_kernel void @s_test_smed3_i8_pat_0(ptr addrspace(1) %arg, [8 x i32], i8 %x, [8 x i32], i8 %y, [8 x i32], i8 %z) #1 { 594bb: 595 %tmp0 = call i8 @smin8(i8 %x, i8 %y) 596 %tmp1 = call i8 @smax8(i8 %x, i8 %y) 597 %tmp2 = call i8 @smin8(i8 %tmp1, i8 %z) 598 %tmp3 = call i8 @smax8(i8 %tmp0, i8 %tmp2) 599 store i8 %tmp3, ptr addrspace(1) %arg 600 ret void 601} 602 603; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_0: 604; GCN: s_min_i32 605; GCN-NOT: {{s_min_i32|s_max_i32}} 606; GCN: v_med3_i32 607define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_0(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 608bb: 609 %tmp0 = call i32 @smin(i32 %x, i32 %y) 610 %tmp1 = call i32 @smax(i32 %x, i32 %y) 611 %tmp2 = call i32 @smin(i32 %tmp1, i32 %z) 612 %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2) 613 store volatile i32 %tmp0, ptr addrspace(1) %arg 614 store volatile i32 %tmp3, ptr addrspace(1) %arg 615 ret void 616} 617 618; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_1: 619; GCN: s_max_i32 620; GCN-NOT: {{s_min_i32|s_max_i32}} 621; GCN: v_med3_i32 622define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_1(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 623bb: 624 %tmp0 = call i32 @smin(i32 %x, i32 %y) 625 %tmp1 = call i32 @smax(i32 %x, i32 %y) 626 %tmp2 = call i32 @smin(i32 %tmp1, i32 %z) 627 %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2) 628 store volatile i32 %tmp1, ptr addrspace(1) %arg 629 store volatile i32 %tmp3, ptr addrspace(1) %arg 630 ret void 631} 632 633; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_2: 634; GCN: s_max_i32 635; GCN: s_min_i32 636; GCN-NOT: {{s_min_i32|s_max_i32}} 637; GCN: v_med3_i32 638define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_2(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 639bb: 640 %tmp0 = call i32 @smin(i32 %x, i32 %y) 641 %tmp1 = call i32 @smax(i32 %x, i32 %y) 642 %tmp2 = call i32 @smin(i32 %tmp1, i32 %z) 643 %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2) 644 store volatile i32 %tmp2, ptr addrspace(1) %arg 645 store volatile i32 %tmp3, ptr addrspace(1) %arg 646 ret void 647} 648 649; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_result: 650; GCN-NOT: {{s_min_i32|s_max_i32}} 651; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 652define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_result(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 { 653bb: 654 %tmp0 = call i32 @smin(i32 %x, i32 %y) 655 %tmp1 = call i32 @smax(i32 %x, i32 %y) 656 %tmp2 = call i32 @smin(i32 %tmp1, i32 %z) 657 %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2) 658 store volatile i32 %tmp3, ptr addrspace(1) %arg 659 store volatile i32 %tmp3, ptr addrspace(1) %arg 660 ret void 661} 662 663; GCN-LABEL: {{^}}s_test_smed3_reuse_bounds 664; GCN-NOT: {{s_min_i32|s_max_i32}} 665; GCN: v_med3_i32 v{{[0-9]+}}, [[B0:s[0-9]+]], [[B1:v[0-9]+]], v{{[0-9]+}} 666; GCN: v_med3_i32 v{{[0-9]+}}, [[B0]], [[B1]], v{{[0-9]+}} 667define amdgpu_kernel void @s_test_smed3_reuse_bounds(ptr addrspace(1) %arg, i32 %b0, i32 %b1, i32 %x, i32 %y) #1 { 668bb: 669 %lo = call i32 @smin(i32 %b0, i32 %b1) 670 %hi = call i32 @smax(i32 %b0, i32 %b1) 671 672 %tmp0 = call i32 @smin(i32 %x, i32 %hi) 673 %z0 = call i32 @smax(i32 %tmp0, i32 %lo) 674 675 %tmp1 = call i32 @smin(i32 %y, i32 %hi) 676 %z1 = call i32 @smax(i32 %tmp1, i32 %lo) 677 678 store volatile i32 %z0, ptr addrspace(1) %arg 679 store volatile i32 %z1, ptr addrspace(1) %arg 680 ret void 681} 682 683; GCN-LABEL: {{^}}v_test_smed3_i16_pat_0: 684; SI: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 685 686; FIXME: VI not matching med3 687; VI: v_min_i16 688; VI: v_max_i16 689; VI: v_min_i16 690; VI: v_max_i16 691 692; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 693; GFX11-TRUE16: v_med3_i16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, v{{[0-9]+}}.h, v{{[0-9]+}}.l 694; GFX11-FAKE16: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 695define amdgpu_kernel void @v_test_smed3_i16_pat_0(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 { 696bb: 697 %tid = call i32 @llvm.amdgcn.workitem.id.x() 698 %gep0 = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i32 %tid 699 %gep1 = getelementptr inbounds i16, ptr addrspace(1) %gep0, i32 3 700 %gep2 = getelementptr inbounds i16, ptr addrspace(1) %gep0, i32 8 701 %out.gep = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid 702 %x = load i16, ptr addrspace(1) %gep0 703 %y = load i16, ptr addrspace(1) %gep1 704 %z = load i16, ptr addrspace(1) %gep2 705 706 %tmp0 = call i16 @smin16(i16 %x, i16 %y) 707 %tmp1 = call i16 @smax16(i16 %x, i16 %y) 708 %tmp2 = call i16 @smin16(i16 %tmp1, i16 %z) 709 %tmp3 = call i16 @smax16(i16 %tmp0, i16 %tmp2) 710 store i16 %tmp3, ptr addrspace(1) %out.gep 711 ret void 712} 713 714; GCN-LABEL: {{^}}v_test_smed3_i16_pat_1: 715; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 716; GFX11-TRUE16: v_med3_i16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, v{{[0-9]+}}.h, v{{[0-9]+}}.l 717; GFX11-FAKE16: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 718 719define amdgpu_kernel void @v_test_smed3_i16_pat_1(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 { 720bb: 721 %tid = call i32 @llvm.amdgcn.workitem.id.x() 722 %gep0 = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i32 %tid 723 %gep1 = getelementptr inbounds i16, ptr addrspace(1) %gep0, i32 3 724 %gep2 = getelementptr inbounds i16, ptr addrspace(1) %gep0, i32 8 725 %out.gep = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid 726 %x = load i16, ptr addrspace(1) %gep0 727 %y = load i16, ptr addrspace(1) %gep1 728 %z = load i16, ptr addrspace(1) %gep2 729 730 %tmp0 = call i16 @smin16(i16 %x, i16 %y) 731 %tmp1 = call i16 @smax16(i16 %x, i16 %y) 732 %tmp2 = call i16 @smax16(i16 %tmp0, i16 %z) 733 %tmp3 = call i16 @smin16(i16 %tmp1, i16 %tmp2) 734 store i16 %tmp3, ptr addrspace(1) %out.gep 735 ret void 736} 737 738attributes #0 = { nounwind readnone } 739attributes #1 = { nounwind } 740attributes #2 = { nounwind readnone alwaysinline } 741