xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900
3; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX90A
4; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX940
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX10
6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900-GISEL
7
8declare void @llvm.amdgcn.global.load.lds(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux)
9
10define amdgpu_ps void @global_load_lds_dword_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) {
11; GFX900-LABEL: global_load_lds_dword_vaddr:
12; GFX900:       ; %bb.0: ; %main_body
13; GFX900-NEXT:    v_readfirstlane_b32 s0, v2
14; GFX900-NEXT:    s_mov_b32 m0, s0
15; GFX900-NEXT:    s_nop 0
16; GFX900-NEXT:    global_load_dword v[0:1], off offset:16 glc lds
17; GFX900-NEXT:    s_endpgm
18;
19; GFX90A-LABEL: global_load_lds_dword_vaddr:
20; GFX90A:       ; %bb.0: ; %main_body
21; GFX90A-NEXT:    v_readfirstlane_b32 s0, v2
22; GFX90A-NEXT:    s_mov_b32 m0, s0
23; GFX90A-NEXT:    s_nop 0
24; GFX90A-NEXT:    global_load_dword v[0:1], off offset:16 glc lds
25; GFX90A-NEXT:    s_endpgm
26;
27; GFX940-LABEL: global_load_lds_dword_vaddr:
28; GFX940:       ; %bb.0: ; %main_body
29; GFX940-NEXT:    v_readfirstlane_b32 s0, v2
30; GFX940-NEXT:    s_mov_b32 m0, s0
31; GFX940-NEXT:    s_nop 0
32; GFX940-NEXT:    global_load_lds_dword v[0:1], off offset:16 sc0
33; GFX940-NEXT:    s_endpgm
34;
35; GFX10-LABEL: global_load_lds_dword_vaddr:
36; GFX10:       ; %bb.0: ; %main_body
37; GFX10-NEXT:    v_readfirstlane_b32 s0, v2
38; GFX10-NEXT:    s_mov_b32 m0, s0
39; GFX10-NEXT:    global_load_dword v[0:1], off offset:16 glc lds
40; GFX10-NEXT:    s_endpgm
41;
42; GFX900-GISEL-LABEL: global_load_lds_dword_vaddr:
43; GFX900-GISEL:       ; %bb.0: ; %main_body
44; GFX900-GISEL-NEXT:    v_readfirstlane_b32 m0, v2
45; GFX900-GISEL-NEXT:    s_nop 4
46; GFX900-GISEL-NEXT:    global_load_dword v[0:1], off offset:16 glc lds
47; GFX900-GISEL-NEXT:    s_endpgm
48main_body:
49  call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 16, i32 1)
50  ret void
51}
52
53define amdgpu_ps void @global_load_lds_dword_saddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr) {
54; GFX900-LABEL: global_load_lds_dword_saddr:
55; GFX900:       ; %bb.0: ; %main_body
56; GFX900-NEXT:    v_readfirstlane_b32 s2, v0
57; GFX900-NEXT:    v_mov_b32_e32 v1, 0
58; GFX900-NEXT:    s_mov_b32 m0, s2
59; GFX900-NEXT:    s_nop 0
60; GFX900-NEXT:    global_load_dword v1, s[0:1] offset:32 slc lds
61; GFX900-NEXT:    s_endpgm
62;
63; GFX90A-LABEL: global_load_lds_dword_saddr:
64; GFX90A:       ; %bb.0: ; %main_body
65; GFX90A-NEXT:    v_readfirstlane_b32 s2, v0
66; GFX90A-NEXT:    v_mov_b32_e32 v1, 0
67; GFX90A-NEXT:    s_mov_b32 m0, s2
68; GFX90A-NEXT:    s_nop 0
69; GFX90A-NEXT:    global_load_dword v1, s[0:1] offset:32 slc lds
70; GFX90A-NEXT:    s_endpgm
71;
72; GFX940-LABEL: global_load_lds_dword_saddr:
73; GFX940:       ; %bb.0: ; %main_body
74; GFX940-NEXT:    v_readfirstlane_b32 s2, v0
75; GFX940-NEXT:    v_mov_b32_e32 v1, 0
76; GFX940-NEXT:    s_mov_b32 m0, s2
77; GFX940-NEXT:    s_nop 0
78; GFX940-NEXT:    global_load_lds_dword v1, s[0:1] offset:32 nt
79; GFX940-NEXT:    s_endpgm
80;
81; GFX10-LABEL: global_load_lds_dword_saddr:
82; GFX10:       ; %bb.0: ; %main_body
83; GFX10-NEXT:    v_readfirstlane_b32 s2, v0
84; GFX10-NEXT:    v_mov_b32_e32 v0, 0
85; GFX10-NEXT:    s_mov_b32 m0, s2
86; GFX10-NEXT:    global_load_dword v0, s[0:1] offset:32 slc lds
87; GFX10-NEXT:    s_endpgm
88;
89; GFX900-GISEL-LABEL: global_load_lds_dword_saddr:
90; GFX900-GISEL:       ; %bb.0: ; %main_body
91; GFX900-GISEL-NEXT:    v_readfirstlane_b32 m0, v0
92; GFX900-GISEL-NEXT:    v_mov_b32_e32 v0, 0
93; GFX900-GISEL-NEXT:    s_nop 3
94; GFX900-GISEL-NEXT:    global_load_dword v0, s[0:1] offset:32 slc lds
95; GFX900-GISEL-NEXT:    s_endpgm
96main_body:
97  call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 32, i32 2)
98  ret void
99}
100
101define amdgpu_ps void @global_load_lds_dword_saddr_and_vaddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr, i32 %voffset) {
102; GFX900-LABEL: global_load_lds_dword_saddr_and_vaddr:
103; GFX900:       ; %bb.0: ; %main_body
104; GFX900-NEXT:    v_readfirstlane_b32 s2, v0
105; GFX900-NEXT:    s_mov_b32 m0, s2
106; GFX900-NEXT:    s_nop 0
107; GFX900-NEXT:    global_load_dword v1, s[0:1] offset:48 lds
108; GFX900-NEXT:    s_endpgm
109;
110; GFX90A-LABEL: global_load_lds_dword_saddr_and_vaddr:
111; GFX90A:       ; %bb.0: ; %main_body
112; GFX90A-NEXT:    v_readfirstlane_b32 s2, v0
113; GFX90A-NEXT:    s_mov_b32 m0, s2
114; GFX90A-NEXT:    s_nop 0
115; GFX90A-NEXT:    global_load_dword v1, s[0:1] offset:48 scc lds
116; GFX90A-NEXT:    s_endpgm
117;
118; GFX940-LABEL: global_load_lds_dword_saddr_and_vaddr:
119; GFX940:       ; %bb.0: ; %main_body
120; GFX940-NEXT:    v_readfirstlane_b32 s2, v0
121; GFX940-NEXT:    s_mov_b32 m0, s2
122; GFX940-NEXT:    s_nop 0
123; GFX940-NEXT:    global_load_lds_dword v1, s[0:1] offset:48 sc1
124; GFX940-NEXT:    s_endpgm
125;
126; GFX10-LABEL: global_load_lds_dword_saddr_and_vaddr:
127; GFX10:       ; %bb.0: ; %main_body
128; GFX10-NEXT:    v_readfirstlane_b32 s2, v0
129; GFX10-NEXT:    s_mov_b32 m0, s2
130; GFX10-NEXT:    global_load_dword v1, s[0:1] offset:48 lds
131; GFX10-NEXT:    s_endpgm
132;
133; GFX900-GISEL-LABEL: global_load_lds_dword_saddr_and_vaddr:
134; GFX900-GISEL:       ; %bb.0: ; %main_body
135; GFX900-GISEL-NEXT:    v_readfirstlane_b32 m0, v0
136; GFX900-GISEL-NEXT:    s_nop 4
137; GFX900-GISEL-NEXT:    global_load_dword v1, s[0:1] offset:48 lds
138; GFX900-GISEL-NEXT:    s_endpgm
139main_body:
140  %voffset.64 = zext i32 %voffset to i64
141  %gep = getelementptr i8, ptr addrspace(1) %gptr, i64 %voffset.64
142  call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gep, ptr addrspace(3) %lptr, i32 4, i32 48, i32 16)
143  ret void
144}
145
146define amdgpu_ps void @global_load_lds_ushort_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) {
147; GFX900-LABEL: global_load_lds_ushort_vaddr:
148; GFX900:       ; %bb.0: ; %main_body
149; GFX900-NEXT:    v_readfirstlane_b32 s0, v2
150; GFX900-NEXT:    s_mov_b32 m0, s0
151; GFX900-NEXT:    s_nop 0
152; GFX900-NEXT:    global_load_ushort v[0:1], off lds
153; GFX900-NEXT:    s_endpgm
154;
155; GFX90A-LABEL: global_load_lds_ushort_vaddr:
156; GFX90A:       ; %bb.0: ; %main_body
157; GFX90A-NEXT:    v_readfirstlane_b32 s0, v2
158; GFX90A-NEXT:    s_mov_b32 m0, s0
159; GFX90A-NEXT:    s_nop 0
160; GFX90A-NEXT:    global_load_ushort v[0:1], off lds
161; GFX90A-NEXT:    s_endpgm
162;
163; GFX940-LABEL: global_load_lds_ushort_vaddr:
164; GFX940:       ; %bb.0: ; %main_body
165; GFX940-NEXT:    v_readfirstlane_b32 s0, v2
166; GFX940-NEXT:    s_mov_b32 m0, s0
167; GFX940-NEXT:    s_nop 0
168; GFX940-NEXT:    global_load_lds_ushort v[0:1], off
169; GFX940-NEXT:    s_endpgm
170;
171; GFX10-LABEL: global_load_lds_ushort_vaddr:
172; GFX10:       ; %bb.0: ; %main_body
173; GFX10-NEXT:    v_readfirstlane_b32 s0, v2
174; GFX10-NEXT:    s_mov_b32 m0, s0
175; GFX10-NEXT:    global_load_ushort v[0:1], off dlc lds
176; GFX10-NEXT:    s_endpgm
177;
178; GFX900-GISEL-LABEL: global_load_lds_ushort_vaddr:
179; GFX900-GISEL:       ; %bb.0: ; %main_body
180; GFX900-GISEL-NEXT:    v_readfirstlane_b32 m0, v2
181; GFX900-GISEL-NEXT:    s_nop 4
182; GFX900-GISEL-NEXT:    global_load_ushort v[0:1], off lds
183; GFX900-GISEL-NEXT:    s_endpgm
184main_body:
185  call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 2, i32 0, i32 4)
186  ret void
187}
188
189define amdgpu_ps void @global_load_lds_ubyte_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) {
190; GFX900-LABEL: global_load_lds_ubyte_vaddr:
191; GFX900:       ; %bb.0: ; %main_body
192; GFX900-NEXT:    v_readfirstlane_b32 s0, v2
193; GFX900-NEXT:    s_mov_b32 m0, s0
194; GFX900-NEXT:    s_nop 0
195; GFX900-NEXT:    global_load_ubyte v[0:1], off lds
196; GFX900-NEXT:    s_endpgm
197;
198; GFX90A-LABEL: global_load_lds_ubyte_vaddr:
199; GFX90A:       ; %bb.0: ; %main_body
200; GFX90A-NEXT:    v_readfirstlane_b32 s0, v2
201; GFX90A-NEXT:    s_mov_b32 m0, s0
202; GFX90A-NEXT:    s_nop 0
203; GFX90A-NEXT:    global_load_ubyte v[0:1], off lds
204; GFX90A-NEXT:    s_endpgm
205;
206; GFX940-LABEL: global_load_lds_ubyte_vaddr:
207; GFX940:       ; %bb.0: ; %main_body
208; GFX940-NEXT:    v_readfirstlane_b32 s0, v2
209; GFX940-NEXT:    s_mov_b32 m0, s0
210; GFX940-NEXT:    s_nop 0
211; GFX940-NEXT:    global_load_lds_ubyte v[0:1], off
212; GFX940-NEXT:    s_endpgm
213;
214; GFX10-LABEL: global_load_lds_ubyte_vaddr:
215; GFX10:       ; %bb.0: ; %main_body
216; GFX10-NEXT:    v_readfirstlane_b32 s0, v2
217; GFX10-NEXT:    s_mov_b32 m0, s0
218; GFX10-NEXT:    global_load_ubyte v[0:1], off lds
219; GFX10-NEXT:    s_endpgm
220;
221; GFX900-GISEL-LABEL: global_load_lds_ubyte_vaddr:
222; GFX900-GISEL:       ; %bb.0: ; %main_body
223; GFX900-GISEL-NEXT:    v_readfirstlane_b32 m0, v2
224; GFX900-GISEL-NEXT:    s_nop 4
225; GFX900-GISEL-NEXT:    global_load_ubyte v[0:1], off lds
226; GFX900-GISEL-NEXT:    s_endpgm
227main_body:
228  call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 1, i32 0, i32 0)
229  ret void
230}
231