1; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_11 %s 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_11 %s 4 5; GCN-LABEL: flat_inst_offset: 6; GFX9_11: flat_load_{{dword|b32}} v{{[0-9]+}}, v[{{[0-9:]+}}] offset:4 7; GFX9_11: flat_store_{{dword|b32}} v[{{[0-9:]+}}], v{{[0-9]+}} offset:4 8; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} 9; GFX10: flat_store_dword v[{{[0-9:]+}}], v{{[0-9]+}}{{$}} 10define void @flat_inst_offset(ptr nocapture %p) { 11 %gep = getelementptr inbounds i32, ptr %p, i64 1 12 %load = load i32, ptr %gep, align 4 13 %inc = add nsw i32 %load, 1 14 store i32 %inc, ptr %gep, align 4 15 ret void 16} 17 18; GCN-LABEL: global_inst_offset: 19; GCN: global_load_{{dword|b32}} v{{[0-9]+}}, v[{{[0-9:]+}}], off offset:4 20; GCN: global_store_{{dword|b32}} v[{{[0-9:]+}}], v{{[0-9]+}}, off offset:4 21define void @global_inst_offset(ptr addrspace(1) nocapture %p) { 22 %gep = getelementptr inbounds i32, ptr addrspace(1) %p, i64 1 23 %load = load i32, ptr addrspace(1) %gep, align 4 24 %inc = add nsw i32 %load, 1 25 store i32 %inc, ptr addrspace(1) %gep, align 4 26 ret void 27} 28 29; GCN-LABEL: load_i16_lo: 30; GFX9_11: flat_load_{{short_d16|d16_b16}} v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}} 31; GFX10: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} 32define amdgpu_kernel void @load_i16_lo(ptr %arg, ptr %out) { 33 %gep = getelementptr inbounds i16, ptr %arg, i32 4 34 %ld = load i16, ptr %gep, align 2 35 %vec = insertelement <2 x i16> <i16 undef, i16 0>, i16 %ld, i32 0 36 %v = add <2 x i16> %vec, %vec 37 store <2 x i16> %v, ptr %out, align 4 38 ret void 39} 40 41; GCN-LABEL: load_i16_hi: 42; GFX9_11: flat_load_{{short_d16_hi|d16_hi_b16}} v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}} 43; GFX10: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} 44define amdgpu_kernel void @load_i16_hi(ptr %arg, ptr %out) { 45 %gep = getelementptr inbounds i16, ptr %arg, i32 4 46 %ld = load i16, ptr %gep, align 2 47 %vec = insertelement <2 x i16> <i16 0, i16 undef>, i16 %ld, i32 1 48 %v = add <2 x i16> %vec, %vec 49 store <2 x i16> %v, ptr %out, align 4 50 ret void 51} 52 53; GCN-LABEL: load_half_lo: 54; GFX9_11: flat_load_{{short_d16|d16_b16}} v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}} 55; GFX10: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} 56define amdgpu_kernel void @load_half_lo(ptr %arg, ptr %out) { 57 %gep = getelementptr inbounds half, ptr %arg, i32 4 58 %ld = load half, ptr %gep, align 2 59 %vec = insertelement <2 x half> <half undef, half 0xH0000>, half %ld, i32 0 60 %v = fadd <2 x half> %vec, %vec 61 store <2 x half> %v, ptr %out, align 4 62 ret void 63} 64 65; GCN-LABEL: load_half_hi: 66; GFX9_11: flat_load_{{short_d16_hi|d16_hi_b16}} v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}} 67; GFX10: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} 68define amdgpu_kernel void @load_half_hi(ptr %arg, ptr %out) { 69 %gep = getelementptr inbounds half, ptr %arg, i32 4 70 %ld = load half, ptr %gep, align 2 71 %vec = insertelement <2 x half> <half 0xH0000, half undef>, half %ld, i32 1 72 %v = fadd <2 x half> %vec, %vec 73 store <2 x half> %v, ptr %out, align 4 74 ret void 75} 76 77; GCN-LABEL: load_float_lo: 78; GFX9_11: flat_load_{{dword|b32}} v{{[0-9]+}}, v[{{[0-9:]+}}] offset:16{{$}} 79; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} 80define amdgpu_kernel void @load_float_lo(ptr %arg, ptr %out) { 81 %gep = getelementptr inbounds float, ptr %arg, i32 4 82 %ld = load float, ptr %gep, align 4 83 %v = fadd float %ld, %ld 84 store float %v, ptr %out, align 4 85 ret void 86} 87