1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900 3; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX90A 4; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX940 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX10 6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900-GISEL 7 8declare void @llvm.amdgcn.global.load.lds(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux) 9 10define amdgpu_ps void @global_load_lds_dword_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) { 11; GFX900-LABEL: global_load_lds_dword_vaddr: 12; GFX900: ; %bb.0: ; %main_body 13; GFX900-NEXT: v_readfirstlane_b32 s0, v2 14; GFX900-NEXT: s_mov_b32 m0, s0 15; GFX900-NEXT: s_nop 0 16; GFX900-NEXT: global_load_dword v[0:1], off offset:16 glc lds 17; GFX900-NEXT: s_endpgm 18; 19; GFX90A-LABEL: global_load_lds_dword_vaddr: 20; GFX90A: ; %bb.0: ; %main_body 21; GFX90A-NEXT: v_readfirstlane_b32 s0, v2 22; GFX90A-NEXT: s_mov_b32 m0, s0 23; GFX90A-NEXT: s_nop 0 24; GFX90A-NEXT: global_load_dword v[0:1], off offset:16 glc lds 25; GFX90A-NEXT: s_endpgm 26; 27; GFX940-LABEL: global_load_lds_dword_vaddr: 28; GFX940: ; %bb.0: ; %main_body 29; GFX940-NEXT: v_readfirstlane_b32 s0, v2 30; GFX940-NEXT: s_mov_b32 m0, s0 31; GFX940-NEXT: s_nop 0 32; GFX940-NEXT: global_load_lds_dword v[0:1], off offset:16 sc0 33; GFX940-NEXT: s_endpgm 34; 35; GFX10-LABEL: global_load_lds_dword_vaddr: 36; GFX10: ; %bb.0: ; %main_body 37; GFX10-NEXT: v_readfirstlane_b32 s0, v2 38; GFX10-NEXT: s_mov_b32 m0, s0 39; GFX10-NEXT: global_load_dword v[0:1], off offset:16 glc lds 40; GFX10-NEXT: s_endpgm 41; 42; GFX900-GISEL-LABEL: global_load_lds_dword_vaddr: 43; GFX900-GISEL: ; %bb.0: ; %main_body 44; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v2 45; GFX900-GISEL-NEXT: s_nop 4 46; GFX900-GISEL-NEXT: global_load_dword v[0:1], off offset:16 glc lds 47; GFX900-GISEL-NEXT: s_endpgm 48main_body: 49 call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 16, i32 1) 50 ret void 51} 52 53define amdgpu_ps void @global_load_lds_dword_saddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr) { 54; GFX900-LABEL: global_load_lds_dword_saddr: 55; GFX900: ; %bb.0: ; %main_body 56; GFX900-NEXT: v_readfirstlane_b32 s2, v0 57; GFX900-NEXT: v_mov_b32_e32 v1, 0 58; GFX900-NEXT: s_mov_b32 m0, s2 59; GFX900-NEXT: s_nop 0 60; GFX900-NEXT: global_load_dword v1, s[0:1] offset:32 slc lds 61; GFX900-NEXT: s_endpgm 62; 63; GFX90A-LABEL: global_load_lds_dword_saddr: 64; GFX90A: ; %bb.0: ; %main_body 65; GFX90A-NEXT: v_readfirstlane_b32 s2, v0 66; GFX90A-NEXT: v_mov_b32_e32 v1, 0 67; GFX90A-NEXT: s_mov_b32 m0, s2 68; GFX90A-NEXT: s_nop 0 69; GFX90A-NEXT: global_load_dword v1, s[0:1] offset:32 slc lds 70; GFX90A-NEXT: s_endpgm 71; 72; GFX940-LABEL: global_load_lds_dword_saddr: 73; GFX940: ; %bb.0: ; %main_body 74; GFX940-NEXT: v_readfirstlane_b32 s2, v0 75; GFX940-NEXT: v_mov_b32_e32 v1, 0 76; GFX940-NEXT: s_mov_b32 m0, s2 77; GFX940-NEXT: s_nop 0 78; GFX940-NEXT: global_load_lds_dword v1, s[0:1] offset:32 nt 79; GFX940-NEXT: s_endpgm 80; 81; GFX10-LABEL: global_load_lds_dword_saddr: 82; GFX10: ; %bb.0: ; %main_body 83; GFX10-NEXT: v_readfirstlane_b32 s2, v0 84; GFX10-NEXT: v_mov_b32_e32 v0, 0 85; GFX10-NEXT: s_mov_b32 m0, s2 86; GFX10-NEXT: global_load_dword v0, s[0:1] offset:32 slc lds 87; GFX10-NEXT: s_endpgm 88; 89; GFX900-GISEL-LABEL: global_load_lds_dword_saddr: 90; GFX900-GISEL: ; %bb.0: ; %main_body 91; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v0 92; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0 93; GFX900-GISEL-NEXT: s_nop 3 94; GFX900-GISEL-NEXT: global_load_dword v0, s[0:1] offset:32 slc lds 95; GFX900-GISEL-NEXT: s_endpgm 96main_body: 97 call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 32, i32 2) 98 ret void 99} 100 101define amdgpu_ps void @global_load_lds_dword_saddr_and_vaddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr, i32 %voffset) { 102; GFX900-LABEL: global_load_lds_dword_saddr_and_vaddr: 103; GFX900: ; %bb.0: ; %main_body 104; GFX900-NEXT: v_readfirstlane_b32 s2, v0 105; GFX900-NEXT: s_mov_b32 m0, s2 106; GFX900-NEXT: s_nop 0 107; GFX900-NEXT: global_load_dword v1, s[0:1] offset:48 lds 108; GFX900-NEXT: s_endpgm 109; 110; GFX90A-LABEL: global_load_lds_dword_saddr_and_vaddr: 111; GFX90A: ; %bb.0: ; %main_body 112; GFX90A-NEXT: v_readfirstlane_b32 s2, v0 113; GFX90A-NEXT: s_mov_b32 m0, s2 114; GFX90A-NEXT: s_nop 0 115; GFX90A-NEXT: global_load_dword v1, s[0:1] offset:48 scc lds 116; GFX90A-NEXT: s_endpgm 117; 118; GFX940-LABEL: global_load_lds_dword_saddr_and_vaddr: 119; GFX940: ; %bb.0: ; %main_body 120; GFX940-NEXT: v_readfirstlane_b32 s2, v0 121; GFX940-NEXT: s_mov_b32 m0, s2 122; GFX940-NEXT: s_nop 0 123; GFX940-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1 124; GFX940-NEXT: s_endpgm 125; 126; GFX10-LABEL: global_load_lds_dword_saddr_and_vaddr: 127; GFX10: ; %bb.0: ; %main_body 128; GFX10-NEXT: v_readfirstlane_b32 s2, v0 129; GFX10-NEXT: s_mov_b32 m0, s2 130; GFX10-NEXT: global_load_dword v1, s[0:1] offset:48 lds 131; GFX10-NEXT: s_endpgm 132; 133; GFX900-GISEL-LABEL: global_load_lds_dword_saddr_and_vaddr: 134; GFX900-GISEL: ; %bb.0: ; %main_body 135; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v0 136; GFX900-GISEL-NEXT: s_nop 4 137; GFX900-GISEL-NEXT: global_load_dword v1, s[0:1] offset:48 lds 138; GFX900-GISEL-NEXT: s_endpgm 139main_body: 140 %voffset.64 = zext i32 %voffset to i64 141 %gep = getelementptr i8, ptr addrspace(1) %gptr, i64 %voffset.64 142 call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gep, ptr addrspace(3) %lptr, i32 4, i32 48, i32 16) 143 ret void 144} 145 146define amdgpu_ps void @global_load_lds_ushort_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) { 147; GFX900-LABEL: global_load_lds_ushort_vaddr: 148; GFX900: ; %bb.0: ; %main_body 149; GFX900-NEXT: v_readfirstlane_b32 s0, v2 150; GFX900-NEXT: s_mov_b32 m0, s0 151; GFX900-NEXT: s_nop 0 152; GFX900-NEXT: global_load_ushort v[0:1], off lds 153; GFX900-NEXT: s_endpgm 154; 155; GFX90A-LABEL: global_load_lds_ushort_vaddr: 156; GFX90A: ; %bb.0: ; %main_body 157; GFX90A-NEXT: v_readfirstlane_b32 s0, v2 158; GFX90A-NEXT: s_mov_b32 m0, s0 159; GFX90A-NEXT: s_nop 0 160; GFX90A-NEXT: global_load_ushort v[0:1], off lds 161; GFX90A-NEXT: s_endpgm 162; 163; GFX940-LABEL: global_load_lds_ushort_vaddr: 164; GFX940: ; %bb.0: ; %main_body 165; GFX940-NEXT: v_readfirstlane_b32 s0, v2 166; GFX940-NEXT: s_mov_b32 m0, s0 167; GFX940-NEXT: s_nop 0 168; GFX940-NEXT: global_load_lds_ushort v[0:1], off 169; GFX940-NEXT: s_endpgm 170; 171; GFX10-LABEL: global_load_lds_ushort_vaddr: 172; GFX10: ; %bb.0: ; %main_body 173; GFX10-NEXT: v_readfirstlane_b32 s0, v2 174; GFX10-NEXT: s_mov_b32 m0, s0 175; GFX10-NEXT: global_load_ushort v[0:1], off dlc lds 176; GFX10-NEXT: s_endpgm 177; 178; GFX900-GISEL-LABEL: global_load_lds_ushort_vaddr: 179; GFX900-GISEL: ; %bb.0: ; %main_body 180; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v2 181; GFX900-GISEL-NEXT: s_nop 4 182; GFX900-GISEL-NEXT: global_load_ushort v[0:1], off lds 183; GFX900-GISEL-NEXT: s_endpgm 184main_body: 185 call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 2, i32 0, i32 4) 186 ret void 187} 188 189define amdgpu_ps void @global_load_lds_ubyte_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) { 190; GFX900-LABEL: global_load_lds_ubyte_vaddr: 191; GFX900: ; %bb.0: ; %main_body 192; GFX900-NEXT: v_readfirstlane_b32 s0, v2 193; GFX900-NEXT: s_mov_b32 m0, s0 194; GFX900-NEXT: s_nop 0 195; GFX900-NEXT: global_load_ubyte v[0:1], off lds 196; GFX900-NEXT: s_endpgm 197; 198; GFX90A-LABEL: global_load_lds_ubyte_vaddr: 199; GFX90A: ; %bb.0: ; %main_body 200; GFX90A-NEXT: v_readfirstlane_b32 s0, v2 201; GFX90A-NEXT: s_mov_b32 m0, s0 202; GFX90A-NEXT: s_nop 0 203; GFX90A-NEXT: global_load_ubyte v[0:1], off lds 204; GFX90A-NEXT: s_endpgm 205; 206; GFX940-LABEL: global_load_lds_ubyte_vaddr: 207; GFX940: ; %bb.0: ; %main_body 208; GFX940-NEXT: v_readfirstlane_b32 s0, v2 209; GFX940-NEXT: s_mov_b32 m0, s0 210; GFX940-NEXT: s_nop 0 211; GFX940-NEXT: global_load_lds_ubyte v[0:1], off 212; GFX940-NEXT: s_endpgm 213; 214; GFX10-LABEL: global_load_lds_ubyte_vaddr: 215; GFX10: ; %bb.0: ; %main_body 216; GFX10-NEXT: v_readfirstlane_b32 s0, v2 217; GFX10-NEXT: s_mov_b32 m0, s0 218; GFX10-NEXT: global_load_ubyte v[0:1], off lds 219; GFX10-NEXT: s_endpgm 220; 221; GFX900-GISEL-LABEL: global_load_lds_ubyte_vaddr: 222; GFX900-GISEL: ; %bb.0: ; %main_body 223; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v2 224; GFX900-GISEL-NEXT: s_nop 4 225; GFX900-GISEL-NEXT: global_load_ubyte v[0:1], off lds 226; GFX900-GISEL-NEXT: s_endpgm 227main_body: 228 call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 1, i32 0, i32 0) 229 ret void 230} 231