1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn < %s | FileCheck -enable-var-scope --check-prefixes=GCN %s 3 4define amdgpu_kernel void @uniform_sext_in_reg_i8_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { 5; GCN-LABEL: uniform_sext_in_reg_i8_to_i32: 6; GCN: ; %bb.0: 7; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 8; GCN-NEXT: s_mov_b32 s7, 0xf000 9; GCN-NEXT: s_waitcnt lgkmcnt(0) 10; GCN-NEXT: s_add_i32 s2, s2, s3 11; GCN-NEXT: s_sext_i32_i8 s2, s2 12; GCN-NEXT: s_mov_b32 s6, -1 13; GCN-NEXT: s_mov_b32 s4, s0 14; GCN-NEXT: s_mov_b32 s5, s1 15; GCN-NEXT: v_mov_b32_e32 v0, s2 16; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 17; GCN-NEXT: s_endpgm 18 %c = add i32 %a, %b ; add to prevent folding into extload 19 %shl = shl i32 %c, 24 20 %ashr = ashr i32 %shl, 24 21 store i32 %ashr, ptr addrspace(1) %out, align 4 22 ret void 23} 24 25define amdgpu_kernel void @divergent_sext_in_reg_i8_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { 26; GCN-LABEL: divergent_sext_in_reg_i8_to_i32: 27; GCN: ; %bb.0: 28; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 29; GCN-NEXT: s_mov_b32 s7, 0xf000 30; GCN-NEXT: s_mov_b32 s6, -1 31; GCN-NEXT: s_waitcnt lgkmcnt(0) 32; GCN-NEXT: s_mov_b32 s4, s0 33; GCN-NEXT: s_mov_b32 s5, s1 34; GCN-NEXT: s_add_i32 s0, s2, s3 35; GCN-NEXT: v_add_i32_e32 v0, vcc, s0, v0 36; GCN-NEXT: v_bfe_i32 v0, v0, 0, 8 37; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 38; GCN-NEXT: s_endpgm 39 %tid = call i32 @llvm.amdgcn.workitem.id.x() 40 %c = add i32 %a, %b ; add to prevent folding into extload 41 %c.divergent = add i32 %c, %tid 42 %shl = shl i32 %c.divergent, 24 43 %ashr = ashr i32 %shl, 24 44 store i32 %ashr, ptr addrspace(1) %out, align 4 45 ret void 46} 47 48define amdgpu_kernel void @uniform_sext_in_reg_i16_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { 49; GCN-LABEL: uniform_sext_in_reg_i16_to_i32: 50; GCN: ; %bb.0: 51; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 52; GCN-NEXT: s_mov_b32 s7, 0xf000 53; GCN-NEXT: s_waitcnt lgkmcnt(0) 54; GCN-NEXT: s_add_i32 s2, s2, s3 55; GCN-NEXT: s_sext_i32_i16 s2, s2 56; GCN-NEXT: s_mov_b32 s6, -1 57; GCN-NEXT: s_mov_b32 s4, s0 58; GCN-NEXT: s_mov_b32 s5, s1 59; GCN-NEXT: v_mov_b32_e32 v0, s2 60; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 61; GCN-NEXT: s_endpgm 62 %c = add i32 %a, %b ; add to prevent folding into extload 63 %shl = shl i32 %c, 16 64 %ashr = ashr i32 %shl, 16 65 store i32 %ashr, ptr addrspace(1) %out, align 4 66 ret void 67} 68 69define amdgpu_kernel void @divergent_sext_in_reg_i16_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { 70; GCN-LABEL: divergent_sext_in_reg_i16_to_i32: 71; GCN: ; %bb.0: 72; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 73; GCN-NEXT: s_mov_b32 s7, 0xf000 74; GCN-NEXT: s_mov_b32 s6, -1 75; GCN-NEXT: s_waitcnt lgkmcnt(0) 76; GCN-NEXT: s_mov_b32 s4, s0 77; GCN-NEXT: s_mov_b32 s5, s1 78; GCN-NEXT: s_add_i32 s0, s2, s3 79; GCN-NEXT: v_add_i32_e32 v0, vcc, s0, v0 80; GCN-NEXT: v_bfe_i32 v0, v0, 0, 16 81; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 82; GCN-NEXT: s_endpgm 83 %tid = call i32 @llvm.amdgcn.workitem.id.x() 84 %c = add i32 %a, %b ; add to prevent folding into extload 85 %c.divergent = add i32 %c, %tid 86 %shl = shl i32 %c.divergent, 16 87 %ashr = ashr i32 %shl, 16 88 store i32 %ashr, ptr addrspace(1) %out, align 4 89 ret void 90} 91 92declare i32 @llvm.amdgcn.workitem.id.x() #1 93 94attributes #0 = { nounwind } 95attributes #1 = { nounwind readnone speculatable } 96