1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s 3 4define amdgpu_kernel void @sext_i16_to_i32_uniform(ptr addrspace(1) %out, i16 %a, i32 %b) { 5; GCN-LABEL: sext_i16_to_i32_uniform: 6; GCN: ; %bb.0: 7; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 8; GCN-NEXT: s_mov_b32 s7, 0xf000 9; GCN-NEXT: s_mov_b32 s6, -1 10; GCN-NEXT: s_waitcnt lgkmcnt(0) 11; GCN-NEXT: s_mov_b32 s4, s0 12; GCN-NEXT: s_sext_i32_i16 s0, s2 13; GCN-NEXT: s_add_i32 s0, s3, s0 14; GCN-NEXT: s_mov_b32 s5, s1 15; GCN-NEXT: v_mov_b32_e32 v0, s0 16; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 17; GCN-NEXT: s_endpgm 18 %sext = sext i16 %a to i32 19 %res = add i32 %b, %sext 20 store i32 %res, ptr addrspace(1) %out 21 ret void 22} 23 24 25define amdgpu_kernel void @sext_i16_to_i64_uniform(ptr addrspace(1) %out, i16 %a, i64 %b) { 26; GCN-LABEL: sext_i16_to_i64_uniform: 27; GCN: ; %bb.0: 28; GCN-NEXT: s_load_dword s6, s[4:5], 0xb 29; GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd 30; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 31; GCN-NEXT: s_mov_b32 s3, 0xf000 32; GCN-NEXT: s_mov_b32 s2, -1 33; GCN-NEXT: s_waitcnt lgkmcnt(0) 34; GCN-NEXT: s_bfe_i64 s[4:5], s[6:7], 0x100000 35; GCN-NEXT: s_add_u32 s4, s8, s4 36; GCN-NEXT: s_addc_u32 s5, s9, s5 37; GCN-NEXT: v_mov_b32_e32 v0, s4 38; GCN-NEXT: v_mov_b32_e32 v1, s5 39; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 40; GCN-NEXT: s_endpgm 41 %sext = sext i16 %a to i64 42 %res = add i64 %b, %sext 43 store i64 %res, ptr addrspace(1) %out 44 ret void 45} 46 47define amdgpu_kernel void @sext_i16_to_i32_divergent(ptr addrspace(1) %out, i16 %a, i32 %b) { 48; GCN-LABEL: sext_i16_to_i32_divergent: 49; GCN: ; %bb.0: 50; GCN-NEXT: s_load_dword s6, s[4:5], 0xb 51; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 52; GCN-NEXT: s_mov_b32 s3, 0xf000 53; GCN-NEXT: s_mov_b32 s2, -1 54; GCN-NEXT: s_waitcnt lgkmcnt(0) 55; GCN-NEXT: v_add_i32_e32 v0, vcc, s6, v0 56; GCN-NEXT: v_bfe_i32 v0, v0, 0, 16 57; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 58; GCN-NEXT: s_endpgm 59 %tid = call i32 @llvm.amdgcn.workitem.id.x() 60 %tid.truncated = trunc i32 %tid to i16 61 %divergent.a = add i16 %a, %tid.truncated 62 %sext = sext i16 %divergent.a to i32 63 store i32 %sext, ptr addrspace(1) %out 64 ret void 65} 66 67 68define amdgpu_kernel void @sext_i16_to_i64_divergent(ptr addrspace(1) %out, i16 %a, i64 %b) { 69; GCN-LABEL: sext_i16_to_i64_divergent: 70; GCN: ; %bb.0: 71; GCN-NEXT: s_load_dword s6, s[4:5], 0xb 72; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 73; GCN-NEXT: s_mov_b32 s3, 0xf000 74; GCN-NEXT: s_mov_b32 s2, -1 75; GCN-NEXT: s_waitcnt lgkmcnt(0) 76; GCN-NEXT: v_add_i32_e32 v0, vcc, s6, v0 77; GCN-NEXT: v_bfe_i32 v0, v0, 0, 16 78; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 79; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 80; GCN-NEXT: s_endpgm 81 %tid = call i32 @llvm.amdgcn.workitem.id.x() 82 %tid.truncated = trunc i32 %tid to i16 83 %divergent.a = add i16 %a, %tid.truncated 84 %sext = sext i16 %divergent.a to i64 85 store i64 %sext, ptr addrspace(1) %out 86 ret void 87} 88 89define amdgpu_kernel void @sext_i32_to_i64_uniform(ptr addrspace(1) %out, i32 %a, i64 %b) { 90; GCN-LABEL: sext_i32_to_i64_uniform: 91; GCN: ; %bb.0: 92; GCN-NEXT: s_load_dword s8, s[4:5], 0xb 93; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 94; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 95; GCN-NEXT: s_mov_b32 s3, 0xf000 96; GCN-NEXT: s_mov_b32 s2, -1 97; GCN-NEXT: s_waitcnt lgkmcnt(0) 98; GCN-NEXT: s_ashr_i32 s5, s8, 31 99; GCN-NEXT: s_add_u32 s4, s6, s8 100; GCN-NEXT: s_addc_u32 s5, s7, s5 101; GCN-NEXT: v_mov_b32_e32 v0, s4 102; GCN-NEXT: v_mov_b32_e32 v1, s5 103; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 104; GCN-NEXT: s_endpgm 105 %sext = sext i32 %a to i64 106 %res = add i64 %b, %sext 107 store i64 %res, ptr addrspace(1) %out 108 ret void 109} 110 111define amdgpu_kernel void @sext_i32_to_i64_divergent(ptr addrspace(1) %out, i32 %a, i64 %b) { 112; GCN-LABEL: sext_i32_to_i64_divergent: 113; GCN: ; %bb.0: 114; GCN-NEXT: s_load_dword s6, s[4:5], 0xb 115; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 116; GCN-NEXT: s_mov_b32 s3, 0xf000 117; GCN-NEXT: s_mov_b32 s2, -1 118; GCN-NEXT: s_waitcnt lgkmcnt(0) 119; GCN-NEXT: v_add_i32_e32 v0, vcc, s6, v0 120; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 121; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 122; GCN-NEXT: s_endpgm 123 %tid = call i32 @llvm.amdgcn.workitem.id.x() 124 %divergent.a = add i32 %a, %tid 125 %sext = sext i32 %divergent.a to i64 126 store i64 %sext, ptr addrspace(1) %out 127 ret void 128} 129 130declare i32 @llvm.amdgcn.workitem.id.x() #1 131 132attributes #0 = { nounwind } 133attributes #1 = { nounwind readnone speculatable } 134