1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s 3 4define amdgpu_kernel void @zext_i16_to_i32_uniform(ptr addrspace(1) %out, i16 %a, i32 %b) { 5; GCN-LABEL: zext_i16_to_i32_uniform: 6; GCN: ; %bb.0: 7; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 8; GCN-NEXT: s_mov_b32 s7, 0xf000 9; GCN-NEXT: s_mov_b32 s6, -1 10; GCN-NEXT: s_waitcnt lgkmcnt(0) 11; GCN-NEXT: s_mov_b32 s4, s0 12; GCN-NEXT: s_and_b32 s0, s2, 0xffff 13; GCN-NEXT: s_add_i32 s0, s3, s0 14; GCN-NEXT: s_mov_b32 s5, s1 15; GCN-NEXT: v_mov_b32_e32 v0, s0 16; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 17; GCN-NEXT: s_endpgm 18 %zext = zext i16 %a to i32 19 %res = add i32 %b, %zext 20 store i32 %res, ptr addrspace(1) %out 21 ret void 22} 23 24 25define amdgpu_kernel void @zext_i16_to_i64_uniform(ptr addrspace(1) %out, i16 %a, i64 %b) { 26; GCN-LABEL: zext_i16_to_i64_uniform: 27; GCN: ; %bb.0: 28; GCN-NEXT: s_load_dword s8, s[4:5], 0xb 29; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 30; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 31; GCN-NEXT: s_mov_b32 s3, 0xf000 32; GCN-NEXT: s_mov_b32 s2, -1 33; GCN-NEXT: s_waitcnt lgkmcnt(0) 34; GCN-NEXT: s_and_b32 s4, s8, 0xffff 35; GCN-NEXT: s_add_u32 s4, s6, s4 36; GCN-NEXT: s_addc_u32 s5, s7, 0 37; GCN-NEXT: v_mov_b32_e32 v0, s4 38; GCN-NEXT: v_mov_b32_e32 v1, s5 39; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 40; GCN-NEXT: s_endpgm 41 %zext = zext i16 %a to i64 42 %res = add i64 %b, %zext 43 store i64 %res, ptr addrspace(1) %out 44 ret void 45} 46 47define amdgpu_kernel void @zext_i16_to_i32_divergent(ptr addrspace(1) %out, i16 %a, i32 %b) { 48; GCN-LABEL: zext_i16_to_i32_divergent: 49; GCN: ; %bb.0: 50; GCN-NEXT: s_load_dword s6, s[4:5], 0xb 51; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 52; GCN-NEXT: s_mov_b32 s3, 0xf000 53; GCN-NEXT: s_mov_b32 s2, -1 54; GCN-NEXT: s_waitcnt lgkmcnt(0) 55; GCN-NEXT: v_add_i32_e32 v0, vcc, s6, v0 56; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0 57; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 58; GCN-NEXT: s_endpgm 59 %tid = call i32 @llvm.amdgcn.workitem.id.x() 60 %tid.truncated = trunc i32 %tid to i16 61 %divergent.a = add i16 %a, %tid.truncated 62 %zext = zext i16 %divergent.a to i32 63 store i32 %zext, ptr addrspace(1) %out 64 ret void 65} 66 67 68define amdgpu_kernel void @zext_i16_to_i64_divergent(ptr addrspace(1) %out, i16 %a, i64 %b) { 69; GCN-LABEL: zext_i16_to_i64_divergent: 70; GCN: ; %bb.0: 71; GCN-NEXT: s_load_dword s6, s[4:5], 0xb 72; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 73; GCN-NEXT: s_mov_b32 s3, 0xf000 74; GCN-NEXT: s_mov_b32 s2, -1 75; GCN-NEXT: v_mov_b32_e32 v1, 0 76; GCN-NEXT: s_waitcnt lgkmcnt(0) 77; GCN-NEXT: v_add_i32_e32 v0, vcc, s6, v0 78; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0 79; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 80; GCN-NEXT: s_endpgm 81 %tid = call i32 @llvm.amdgcn.workitem.id.x() 82 %tid.truncated = trunc i32 %tid to i16 83 %divergent.a = add i16 %a, %tid.truncated 84 %zext = zext i16 %divergent.a to i64 85 store i64 %zext, ptr addrspace(1) %out 86 ret void 87} 88 89declare i32 @llvm.amdgcn.workitem.id.x() #1 90 91attributes #0 = { nounwind } 92attributes #1 = { nounwind readnone speculatable } 93