xref: /llvm-project/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck --check-prefixes=OPT,OPT-GFX7 %s
3; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck --check-prefixes=OPT,OPT-GFX8 %s
4; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 < %s | FileCheck --check-prefixes=OPT,OPT-GFX9 %s
5; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=gfx1030 < %s | FileCheck --check-prefixes=OPT,OPT-GFX10 %s
6
7; RUN: llc -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefix=GFX7 %s
8; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=GFX8 %s
9; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
10; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck --check-prefix=GFX10 %s
11
12define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
13; OPT-GFX7-LABEL: @test_sinkable_flat_small_offset_i32(
14; OPT-GFX7-NEXT:  entry:
15; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
16; OPT-GFX7-NEXT:    [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
17; OPT-GFX7-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
18; OPT-GFX7-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
19; OPT-GFX7:       if:
20; OPT-GFX7-NEXT:    [[LOAD:%.*]] = load i32, ptr [[IN_GEP]], align 4
21; OPT-GFX7-NEXT:    br label [[ENDIF]]
22; OPT-GFX7:       endif:
23; OPT-GFX7-NEXT:    [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
24; OPT-GFX7-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
25; OPT-GFX7-NEXT:    br label [[DONE:%.*]]
26; OPT-GFX7:       done:
27; OPT-GFX7-NEXT:    ret void
28;
29; OPT-GFX8-LABEL: @test_sinkable_flat_small_offset_i32(
30; OPT-GFX8-NEXT:  entry:
31; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
32; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
33; OPT-GFX8-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
34; OPT-GFX8-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
35; OPT-GFX8:       if:
36; OPT-GFX8-NEXT:    [[LOAD:%.*]] = load i32, ptr [[IN_GEP]], align 4
37; OPT-GFX8-NEXT:    br label [[ENDIF]]
38; OPT-GFX8:       endif:
39; OPT-GFX8-NEXT:    [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
40; OPT-GFX8-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
41; OPT-GFX8-NEXT:    br label [[DONE:%.*]]
42; OPT-GFX8:       done:
43; OPT-GFX8-NEXT:    ret void
44;
45; OPT-GFX9-LABEL: @test_sinkable_flat_small_offset_i32(
46; OPT-GFX9-NEXT:  entry:
47; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
48; OPT-GFX9-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
49; OPT-GFX9-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
50; OPT-GFX9:       if:
51; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
52; OPT-GFX9-NEXT:    [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
53; OPT-GFX9-NEXT:    br label [[ENDIF]]
54; OPT-GFX9:       endif:
55; OPT-GFX9-NEXT:    [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
56; OPT-GFX9-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
57; OPT-GFX9-NEXT:    ret void
58;
59; OPT-GFX10-LABEL: @test_sinkable_flat_small_offset_i32(
60; OPT-GFX10-NEXT:  entry:
61; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
62; OPT-GFX10-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
63; OPT-GFX10-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
64; OPT-GFX10:       if:
65; OPT-GFX10-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
66; OPT-GFX10-NEXT:    [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
67; OPT-GFX10-NEXT:    br label [[ENDIF]]
68; OPT-GFX10:       endif:
69; OPT-GFX10-NEXT:    [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
70; OPT-GFX10-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
71; OPT-GFX10-NEXT:    ret void
72;
73; GFX7-LABEL: test_sinkable_flat_small_offset_i32:
74; GFX7:       ; %bb.0: ; %entry
75; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
77; GFX7-NEXT:    v_mov_b32_e32 v4, 0
78; GFX7-NEXT:    s_and_saveexec_b64 s[4:5], vcc
79; GFX7-NEXT:    s_cbranch_execz .LBB0_2
80; GFX7-NEXT:  ; %bb.1: ; %if
81; GFX7-NEXT:    v_add_i32_e32 v2, vcc, 28, v2
82; GFX7-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
83; GFX7-NEXT:    flat_load_dword v4, v[2:3]
84; GFX7-NEXT:  .LBB0_2: ; %endif
85; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
86; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0x3d08fc, v0
87; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
88; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
89; GFX7-NEXT:    flat_store_dword v[0:1], v4
90; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
91; GFX7-NEXT:    s_setpc_b64 s[30:31]
92;
93; GFX8-LABEL: test_sinkable_flat_small_offset_i32:
94; GFX8:       ; %bb.0: ; %entry
95; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
97; GFX8-NEXT:    v_mov_b32_e32 v4, 0
98; GFX8-NEXT:    s_and_saveexec_b64 s[4:5], vcc
99; GFX8-NEXT:    s_cbranch_execz .LBB0_2
100; GFX8-NEXT:  ; %bb.1: ; %if
101; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 28, v2
102; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
103; GFX8-NEXT:    flat_load_dword v4, v[2:3]
104; GFX8-NEXT:  .LBB0_2: ; %endif
105; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
106; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x3d08fc, v0
107; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
108; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
109; GFX8-NEXT:    flat_store_dword v[0:1], v4
110; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
111; GFX8-NEXT:    s_setpc_b64 s[30:31]
112;
113; GFX9-LABEL: test_sinkable_flat_small_offset_i32:
114; GFX9:       ; %bb.0: ; %entry
115; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
117; GFX9-NEXT:    v_mov_b32_e32 v4, 0
118; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
119; GFX9-NEXT:    s_cbranch_execz .LBB0_2
120; GFX9-NEXT:  ; %bb.1: ; %if
121; GFX9-NEXT:    flat_load_dword v4, v[2:3] offset:28
122; GFX9-NEXT:  .LBB0_2: ; %endif
123; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
124; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
125; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
126; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
127; GFX9-NEXT:    flat_store_dword v[0:1], v4 offset:2300
128; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
129; GFX9-NEXT:    s_setpc_b64 s[30:31]
130;
131; GFX10-LABEL: test_sinkable_flat_small_offset_i32:
132; GFX10:       ; %bb.0: ; %entry
133; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
135; GFX10-NEXT:    v_mov_b32_e32 v4, 0
136; GFX10-NEXT:    s_and_saveexec_b32 s4, vcc_lo
137; GFX10-NEXT:    s_cbranch_execz .LBB0_2
138; GFX10-NEXT:  ; %bb.1: ; %if
139; GFX10-NEXT:    flat_load_dword v4, v[2:3] offset:28
140; GFX10-NEXT:  .LBB0_2: ; %endif
141; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
142; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
143; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
144; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
145; GFX10-NEXT:    flat_store_dword v[0:1], v4 offset:252
146; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
147; GFX10-NEXT:    s_setpc_b64 s[30:31]
148entry:
149  %out.gep = getelementptr i32, ptr %out, i64 999999
150  %in.gep = getelementptr i32, ptr %in, i64 7
151  %cmp0 = icmp eq i32 %cond, 0
152  br i1 %cmp0, label %endif, label %if
153
154if:
155  %load = load i32, ptr %in.gep
156  br label %endif
157
158endif:
159  %x = phi i32 [ %load, %if ], [ 0, %entry ]
160  store i32 %x, ptr %out.gep
161  br label %done
162
163done:
164  ret void
165}
166
167define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, i32 %cond) {
168; OPT-GFX7-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
169; OPT-GFX7-NEXT:  entry:
170; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
171; OPT-GFX7-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
172; OPT-GFX7-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
173; OPT-GFX7:       if:
174; OPT-GFX7-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
175; OPT-GFX7-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
176; OPT-GFX7-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
177; OPT-GFX7-NEXT:    br label [[ENDIF]]
178; OPT-GFX7:       endif:
179; OPT-GFX7-NEXT:    [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
180; OPT-GFX7-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
181; OPT-GFX7-NEXT:    ret void
182;
183; OPT-GFX8-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
184; OPT-GFX8-NEXT:  entry:
185; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
186; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
187; OPT-GFX8-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
188; OPT-GFX8-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
189; OPT-GFX8:       if:
190; OPT-GFX8-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN_GEP]] to ptr addrspace(1)
191; OPT-GFX8-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[TMP0]], align 4
192; OPT-GFX8-NEXT:    br label [[ENDIF]]
193; OPT-GFX8:       endif:
194; OPT-GFX8-NEXT:    [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
195; OPT-GFX8-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
196; OPT-GFX8-NEXT:    ret void
197;
198; OPT-GFX9-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
199; OPT-GFX9-NEXT:  entry:
200; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
201; OPT-GFX9-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
202; OPT-GFX9-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
203; OPT-GFX9:       if:
204; OPT-GFX9-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
205; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
206; OPT-GFX9-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
207; OPT-GFX9-NEXT:    br label [[ENDIF]]
208; OPT-GFX9:       endif:
209; OPT-GFX9-NEXT:    [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
210; OPT-GFX9-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
211; OPT-GFX9-NEXT:    ret void
212;
213; OPT-GFX10-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
214; OPT-GFX10-NEXT:  entry:
215; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
216; OPT-GFX10-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
217; OPT-GFX10-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
218; OPT-GFX10:       if:
219; OPT-GFX10-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
220; OPT-GFX10-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
221; OPT-GFX10-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
222; OPT-GFX10-NEXT:    br label [[ENDIF]]
223; OPT-GFX10:       endif:
224; OPT-GFX10-NEXT:    [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
225; OPT-GFX10-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
226; OPT-GFX10-NEXT:    ret void
227;
228; GFX7-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32:
229; GFX7:       ; %bb.0: ; %entry
230; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
231; GFX7-NEXT:    s_mov_b32 s6, 0
232; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
233; GFX7-NEXT:    v_mov_b32_e32 v4, 0
234; GFX7-NEXT:    s_and_saveexec_b64 s[8:9], vcc
235; GFX7-NEXT:    s_cbranch_execz .LBB1_2
236; GFX7-NEXT:  ; %bb.1: ; %if
237; GFX7-NEXT:    s_mov_b32 s7, 0xf000
238; GFX7-NEXT:    s_mov_b32 s4, s6
239; GFX7-NEXT:    s_mov_b32 s5, s6
240; GFX7-NEXT:    buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 offset:28
241; GFX7-NEXT:  .LBB1_2: ; %endif
242; GFX7-NEXT:    s_or_b64 exec, exec, s[8:9]
243; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0x3d08fc, v0
244; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
245; GFX7-NEXT:    s_waitcnt vmcnt(0)
246; GFX7-NEXT:    flat_store_dword v[0:1], v4
247; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
248; GFX7-NEXT:    s_setpc_b64 s[30:31]
249;
250; GFX8-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32:
251; GFX8:       ; %bb.0: ; %entry
252; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
254; GFX8-NEXT:    v_mov_b32_e32 v4, 0
255; GFX8-NEXT:    s_and_saveexec_b64 s[4:5], vcc
256; GFX8-NEXT:    s_cbranch_execz .LBB1_2
257; GFX8-NEXT:  ; %bb.1: ; %if
258; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 28, v2
259; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
260; GFX8-NEXT:    flat_load_dword v4, v[2:3]
261; GFX8-NEXT:  .LBB1_2: ; %endif
262; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
263; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x3d08fc, v0
264; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
265; GFX8-NEXT:    s_waitcnt vmcnt(0)
266; GFX8-NEXT:    flat_store_dword v[0:1], v4
267; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
268; GFX8-NEXT:    s_setpc_b64 s[30:31]
269;
270; GFX9-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32:
271; GFX9:       ; %bb.0: ; %entry
272; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
274; GFX9-NEXT:    v_mov_b32_e32 v4, 0
275; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
276; GFX9-NEXT:    s_cbranch_execz .LBB1_2
277; GFX9-NEXT:  ; %bb.1: ; %if
278; GFX9-NEXT:    global_load_dword v4, v[2:3], off offset:28
279; GFX9-NEXT:  .LBB1_2: ; %endif
280; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
281; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
282; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
283; GFX9-NEXT:    s_waitcnt vmcnt(0)
284; GFX9-NEXT:    flat_store_dword v[0:1], v4 offset:2300
285; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
286; GFX9-NEXT:    s_setpc_b64 s[30:31]
287;
288; GFX10-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32:
289; GFX10:       ; %bb.0: ; %entry
290; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
292; GFX10-NEXT:    v_mov_b32_e32 v4, 0
293; GFX10-NEXT:    s_and_saveexec_b32 s4, vcc_lo
294; GFX10-NEXT:    s_cbranch_execz .LBB1_2
295; GFX10-NEXT:  ; %bb.1: ; %if
296; GFX10-NEXT:    global_load_dword v4, v[2:3], off offset:28
297; GFX10-NEXT:  .LBB1_2: ; %endif
298; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
299; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
300; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
301; GFX10-NEXT:    s_waitcnt vmcnt(0)
302; GFX10-NEXT:    flat_store_dword v[0:1], v4 offset:252
303; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
304; GFX10-NEXT:    s_setpc_b64 s[30:31]
305entry:
306  %out.gep = getelementptr i32, ptr %out, i64 999999
307  %in.gep = getelementptr i32, ptr %in, i64 7
308  %cast = addrspacecast ptr %in.gep to ptr addrspace(1)
309  %cmp0 = icmp eq i32 %cond, 0
310  br i1 %cmp0, label %endif, label %if
311
312if:
313  %load = load i32, ptr addrspace(1) %cast
314  br label %endif
315
316endif:
317  %x = phi i32 [ %load, %if ], [ 0, %entry ]
318  store i32 %x, ptr %out.gep
319  br label %done
320
321done:
322  ret void
323}
324
325define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in, i32 %cond) {
326; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
327; OPT-NEXT:  entry:
328; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
329; OPT-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
330; OPT-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
331; OPT:       if:
332; OPT-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(4)
333; OPT-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i64 28
334; OPT-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(4) [[SUNKADDR]], align 4
335; OPT-NEXT:    br label [[ENDIF]]
336; OPT:       endif:
337; OPT-NEXT:    [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
338; OPT-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
339; OPT-NEXT:    ret void
340;
341; GFX7-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32:
342; GFX7:       ; %bb.0: ; %entry
343; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344; GFX7-NEXT:    s_mov_b32 s6, 0
345; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
346; GFX7-NEXT:    v_mov_b32_e32 v4, 0
347; GFX7-NEXT:    s_and_saveexec_b64 s[8:9], vcc
348; GFX7-NEXT:    s_cbranch_execz .LBB2_2
349; GFX7-NEXT:  ; %bb.1: ; %if
350; GFX7-NEXT:    s_mov_b32 s7, 0xf000
351; GFX7-NEXT:    s_mov_b32 s4, s6
352; GFX7-NEXT:    s_mov_b32 s5, s6
353; GFX7-NEXT:    buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 offset:28
354; GFX7-NEXT:  .LBB2_2: ; %endif
355; GFX7-NEXT:    s_or_b64 exec, exec, s[8:9]
356; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0x3d08fc, v0
357; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
358; GFX7-NEXT:    s_waitcnt vmcnt(0)
359; GFX7-NEXT:    flat_store_dword v[0:1], v4
360; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
361; GFX7-NEXT:    s_setpc_b64 s[30:31]
362;
363; GFX8-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32:
364; GFX8:       ; %bb.0: ; %entry
365; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
366; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
367; GFX8-NEXT:    v_mov_b32_e32 v4, 0
368; GFX8-NEXT:    s_and_saveexec_b64 s[4:5], vcc
369; GFX8-NEXT:    s_cbranch_execz .LBB2_2
370; GFX8-NEXT:  ; %bb.1: ; %if
371; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 28, v2
372; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
373; GFX8-NEXT:    flat_load_dword v4, v[2:3]
374; GFX8-NEXT:  .LBB2_2: ; %endif
375; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
376; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x3d08fc, v0
377; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
378; GFX8-NEXT:    s_waitcnt vmcnt(0)
379; GFX8-NEXT:    flat_store_dword v[0:1], v4
380; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
381; GFX8-NEXT:    s_setpc_b64 s[30:31]
382;
383; GFX9-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32:
384; GFX9:       ; %bb.0: ; %entry
385; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
386; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
387; GFX9-NEXT:    v_mov_b32_e32 v4, 0
388; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
389; GFX9-NEXT:    s_cbranch_execz .LBB2_2
390; GFX9-NEXT:  ; %bb.1: ; %if
391; GFX9-NEXT:    global_load_dword v4, v[2:3], off offset:28
392; GFX9-NEXT:  .LBB2_2: ; %endif
393; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
394; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
395; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
396; GFX9-NEXT:    s_waitcnt vmcnt(0)
397; GFX9-NEXT:    flat_store_dword v[0:1], v4 offset:2300
398; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
399; GFX9-NEXT:    s_setpc_b64 s[30:31]
400;
401; GFX10-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32:
402; GFX10:       ; %bb.0: ; %entry
403; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
405; GFX10-NEXT:    v_mov_b32_e32 v4, 0
406; GFX10-NEXT:    s_and_saveexec_b32 s4, vcc_lo
407; GFX10-NEXT:    s_cbranch_execz .LBB2_2
408; GFX10-NEXT:  ; %bb.1: ; %if
409; GFX10-NEXT:    global_load_dword v4, v[2:3], off offset:28
410; GFX10-NEXT:  .LBB2_2: ; %endif
411; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
412; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
413; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
414; GFX10-NEXT:    s_waitcnt vmcnt(0)
415; GFX10-NEXT:    flat_store_dword v[0:1], v4 offset:252
416; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
417; GFX10-NEXT:    s_setpc_b64 s[30:31]
418entry:
419  %out.gep = getelementptr i32, ptr %out, i64 999999
420  %in.gep = getelementptr i32, ptr %in, i64 7
421  %cast = addrspacecast ptr %in.gep to ptr addrspace(4)
422  %cmp0 = icmp eq i32 %cond, 0
423  br i1 %cmp0, label %endif, label %if
424
425if:
426  %load = load i32, ptr addrspace(4) %cast
427  br label %endif
428
429endif:
430  %x = phi i32 [ %load, %if ], [ 0, %entry ]
431  store i32 %x, ptr %out.gep
432  br label %done
433
434done:
435  ret void
436}
437
438define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
439; OPT-GFX7-LABEL: @test_sink_flat_small_max_flat_offset(
440; OPT-GFX7-NEXT:  entry:
441; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
442; OPT-GFX7-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
443; OPT-GFX7-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
444; OPT-GFX7-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
445; OPT-GFX7-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
446; OPT-GFX7:       if:
447; OPT-GFX7-NEXT:    [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
448; OPT-GFX7-NEXT:    [[CAST:%.*]] = sext i8 [[LOAD]] to i32
449; OPT-GFX7-NEXT:    br label [[ENDIF]]
450; OPT-GFX7:       endif:
451; OPT-GFX7-NEXT:    [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
452; OPT-GFX7-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
453; OPT-GFX7-NEXT:    br label [[DONE:%.*]]
454; OPT-GFX7:       done:
455; OPT-GFX7-NEXT:    ret void
456;
457; OPT-GFX8-LABEL: @test_sink_flat_small_max_flat_offset(
458; OPT-GFX8-NEXT:  entry:
459; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
460; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
461; OPT-GFX8-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
462; OPT-GFX8-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
463; OPT-GFX8-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
464; OPT-GFX8:       if:
465; OPT-GFX8-NEXT:    [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
466; OPT-GFX8-NEXT:    [[CAST:%.*]] = sext i8 [[LOAD]] to i32
467; OPT-GFX8-NEXT:    br label [[ENDIF]]
468; OPT-GFX8:       endif:
469; OPT-GFX8-NEXT:    [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
470; OPT-GFX8-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
471; OPT-GFX8-NEXT:    br label [[DONE:%.*]]
472; OPT-GFX8:       done:
473; OPT-GFX8-NEXT:    ret void
474;
475; OPT-GFX9-LABEL: @test_sink_flat_small_max_flat_offset(
476; OPT-GFX9-NEXT:  entry:
477; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
478; OPT-GFX9-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
479; OPT-GFX9-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
480; OPT-GFX9-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
481; OPT-GFX9:       if:
482; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
483; OPT-GFX9-NEXT:    [[LOAD:%.*]] = load i8, ptr [[SUNKADDR]], align 1
484; OPT-GFX9-NEXT:    [[CAST:%.*]] = sext i8 [[LOAD]] to i32
485; OPT-GFX9-NEXT:    br label [[ENDIF]]
486; OPT-GFX9:       endif:
487; OPT-GFX9-NEXT:    [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
488; OPT-GFX9-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
489; OPT-GFX9-NEXT:    ret void
490;
491; OPT-GFX10-LABEL: @test_sink_flat_small_max_flat_offset(
492; OPT-GFX10-NEXT:  entry:
493; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
494; OPT-GFX10-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
495; OPT-GFX10-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
496; OPT-GFX10-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
497; OPT-GFX10-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
498; OPT-GFX10:       if:
499; OPT-GFX10-NEXT:    [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
500; OPT-GFX10-NEXT:    [[CAST:%.*]] = sext i8 [[LOAD]] to i32
501; OPT-GFX10-NEXT:    br label [[ENDIF]]
502; OPT-GFX10:       endif:
503; OPT-GFX10-NEXT:    [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
504; OPT-GFX10-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
505; OPT-GFX10-NEXT:    br label [[DONE:%.*]]
506; OPT-GFX10:       done:
507; OPT-GFX10-NEXT:    ret void
508;
509; GFX7-LABEL: test_sink_flat_small_max_flat_offset:
510; GFX7:       ; %bb.0: ; %entry
511; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512; GFX7-NEXT:    v_mbcnt_lo_u32_b32_e64 v5, -1, 0
513; GFX7-NEXT:    v_mov_b32_e32 v4, 0
514; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
515; GFX7-NEXT:    s_and_saveexec_b64 s[4:5], vcc
516; GFX7-NEXT:    s_cbranch_execz .LBB3_2
517; GFX7-NEXT:  ; %bb.1: ; %if
518; GFX7-NEXT:    v_add_i32_e32 v2, vcc, 0xfff, v2
519; GFX7-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
520; GFX7-NEXT:    flat_load_sbyte v4, v[2:3]
521; GFX7-NEXT:  .LBB3_2: ; %endif
522; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
523; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0x1000, v0
524; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
525; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
526; GFX7-NEXT:    flat_store_dword v[0:1], v4
527; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
528; GFX7-NEXT:    s_setpc_b64 s[30:31]
529;
530; GFX8-LABEL: test_sink_flat_small_max_flat_offset:
531; GFX8:       ; %bb.0: ; %entry
532; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533; GFX8-NEXT:    v_mbcnt_lo_u32_b32 v5, -1, 0
534; GFX8-NEXT:    v_mov_b32_e32 v4, 0
535; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
536; GFX8-NEXT:    s_and_saveexec_b64 s[4:5], vcc
537; GFX8-NEXT:    s_cbranch_execz .LBB3_2
538; GFX8-NEXT:  ; %bb.1: ; %if
539; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xfff, v2
540; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
541; GFX8-NEXT:    flat_load_sbyte v4, v[2:3]
542; GFX8-NEXT:  .LBB3_2: ; %endif
543; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
544; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x1000, v0
545; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
546; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
547; GFX8-NEXT:    flat_store_dword v[0:1], v4
548; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
549; GFX8-NEXT:    s_setpc_b64 s[30:31]
550;
551; GFX9-LABEL: test_sink_flat_small_max_flat_offset:
552; GFX9:       ; %bb.0: ; %entry
553; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v5, -1, 0
555; GFX9-NEXT:    v_mov_b32_e32 v4, 0
556; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
557; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
558; GFX9-NEXT:    s_cbranch_execz .LBB3_2
559; GFX9-NEXT:  ; %bb.1: ; %if
560; GFX9-NEXT:    flat_load_sbyte v4, v[2:3] offset:4095
561; GFX9-NEXT:  .LBB3_2: ; %endif
562; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
563; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
564; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
565; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
566; GFX9-NEXT:    flat_store_dword v[0:1], v4
567; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
568; GFX9-NEXT:    s_setpc_b64 s[30:31]
569;
570; GFX10-LABEL: test_sink_flat_small_max_flat_offset:
571; GFX10:       ; %bb.0: ; %entry
572; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
573; GFX10-NEXT:    v_mbcnt_lo_u32_b32 v4, -1, 0
574; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
575; GFX10-NEXT:    v_mov_b32_e32 v4, 0
576; GFX10-NEXT:    s_and_saveexec_b32 s4, vcc_lo
577; GFX10-NEXT:    s_cbranch_execz .LBB3_2
578; GFX10-NEXT:  ; %bb.1: ; %if
579; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, 0x800, v2
580; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
581; GFX10-NEXT:    flat_load_sbyte v4, v[2:3] offset:2047
582; GFX10-NEXT:  .LBB3_2: ; %endif
583; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
584; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
585; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
586; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
587; GFX10-NEXT:    flat_store_dword v[0:1], v4
588; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
589; GFX10-NEXT:    s_setpc_b64 s[30:31]
590entry:
591  %out.gep = getelementptr i32, ptr %out, i32 1024
592  %in.gep = getelementptr i8, ptr %in, i64 4095
593  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
594  %cmp0 = icmp eq i32 %tid, 0
595  br i1 %cmp0, label %endif, label %if
596
597if:
598  %load = load i8, ptr %in.gep
599  %cast = sext i8 %load to i32
600  br label %endif
601
602endif:
603  %x = phi i32 [ %cast, %if ], [ 0, %entry ]
604  store i32 %x, ptr %out.gep
605  br label %done
606
607done:
608  ret void
609}
610
611define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
612; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset(
613; OPT-NEXT:  entry:
614; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 99999
615; OPT-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4096
616; OPT-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
617; OPT-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
618; OPT-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
619; OPT:       if:
620; OPT-NEXT:    [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
621; OPT-NEXT:    [[CAST:%.*]] = sext i8 [[LOAD]] to i32
622; OPT-NEXT:    br label [[ENDIF]]
623; OPT:       endif:
624; OPT-NEXT:    [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
625; OPT-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
626; OPT-NEXT:    br label [[DONE:%.*]]
627; OPT:       done:
628; OPT-NEXT:    ret void
629;
630; GFX7-LABEL: test_sink_flat_small_max_plus_1_flat_offset:
631; GFX7:       ; %bb.0: ; %entry
632; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
633; GFX7-NEXT:    v_mbcnt_lo_u32_b32_e64 v5, -1, 0
634; GFX7-NEXT:    v_mov_b32_e32 v4, 0
635; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
636; GFX7-NEXT:    s_and_saveexec_b64 s[4:5], vcc
637; GFX7-NEXT:    s_cbranch_execz .LBB4_2
638; GFX7-NEXT:  ; %bb.1: ; %if
639; GFX7-NEXT:    v_add_i32_e32 v2, vcc, 0x1000, v2
640; GFX7-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
641; GFX7-NEXT:    flat_load_sbyte v4, v[2:3]
642; GFX7-NEXT:  .LBB4_2: ; %endif
643; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
644; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0x61a7c, v0
645; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
646; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
647; GFX7-NEXT:    flat_store_dword v[0:1], v4
648; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
649; GFX7-NEXT:    s_setpc_b64 s[30:31]
650;
651; GFX8-LABEL: test_sink_flat_small_max_plus_1_flat_offset:
652; GFX8:       ; %bb.0: ; %entry
653; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
654; GFX8-NEXT:    v_mbcnt_lo_u32_b32 v5, -1, 0
655; GFX8-NEXT:    v_mov_b32_e32 v4, 0
656; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
657; GFX8-NEXT:    s_and_saveexec_b64 s[4:5], vcc
658; GFX8-NEXT:    s_cbranch_execz .LBB4_2
659; GFX8-NEXT:  ; %bb.1: ; %if
660; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x1000, v2
661; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
662; GFX8-NEXT:    flat_load_sbyte v4, v[2:3]
663; GFX8-NEXT:  .LBB4_2: ; %endif
664; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
665; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x61a7c, v0
666; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
667; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
668; GFX8-NEXT:    flat_store_dword v[0:1], v4
669; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
670; GFX8-NEXT:    s_setpc_b64 s[30:31]
671;
672; GFX9-LABEL: test_sink_flat_small_max_plus_1_flat_offset:
673; GFX9:       ; %bb.0: ; %entry
674; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
675; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v5, -1, 0
676; GFX9-NEXT:    v_mov_b32_e32 v4, 0
677; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
678; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
679; GFX9-NEXT:    s_cbranch_execz .LBB4_2
680; GFX9-NEXT:  ; %bb.1: ; %if
681; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, 0x1000, v2
682; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
683; GFX9-NEXT:    flat_load_sbyte v4, v[2:3]
684; GFX9-NEXT:  .LBB4_2: ; %endif
685; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
686; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x61000, v0
687; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
688; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
689; GFX9-NEXT:    flat_store_dword v[0:1], v4 offset:2684
690; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
691; GFX9-NEXT:    s_setpc_b64 s[30:31]
692;
693; GFX10-LABEL: test_sink_flat_small_max_plus_1_flat_offset:
694; GFX10:       ; %bb.0: ; %entry
695; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
696; GFX10-NEXT:    v_mbcnt_lo_u32_b32 v4, -1, 0
697; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
698; GFX10-NEXT:    v_mov_b32_e32 v4, 0
699; GFX10-NEXT:    s_and_saveexec_b32 s4, vcc_lo
700; GFX10-NEXT:    s_cbranch_execz .LBB4_2
701; GFX10-NEXT:  ; %bb.1: ; %if
702; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, 0x1000, v2
703; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
704; GFX10-NEXT:    flat_load_sbyte v4, v[2:3]
705; GFX10-NEXT:  .LBB4_2: ; %endif
706; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
707; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x61800, v0
708; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
709; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
710; GFX10-NEXT:    flat_store_dword v[0:1], v4 offset:636
711; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
712; GFX10-NEXT:    s_setpc_b64 s[30:31]
713entry:
714  %out.gep = getelementptr i32, ptr %out, i64 99999
715  %in.gep = getelementptr i8, ptr %in, i64 4096
716  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
717  %cmp0 = icmp eq i32 %tid, 0
718  br i1 %cmp0, label %endif, label %if
719
720if:
721  %load = load i8, ptr %in.gep
722  %cast = sext i8 %load to i32
723  br label %endif
724
725endif:
726  %x = phi i32 [ %cast, %if ], [ 0, %entry ]
727  store i32 %x, ptr %out.gep
728  br label %done
729
730done:
731  ret void
732}
733
734define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
735; OPT-LABEL: @test_sinkable_flat_reg_offset(
736; OPT-NEXT:  entry:
737; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
738; OPT-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[REG:%.*]]
739; OPT-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3]]
740; OPT-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
741; OPT-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
742; OPT:       if:
743; OPT-NEXT:    [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
744; OPT-NEXT:    [[CAST:%.*]] = sext i8 [[LOAD]] to i32
745; OPT-NEXT:    br label [[ENDIF]]
746; OPT:       endif:
747; OPT-NEXT:    [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
748; OPT-NEXT:    store i32 [[X]], ptr [[OUT_GEP]], align 4
749; OPT-NEXT:    br label [[DONE:%.*]]
750; OPT:       done:
751; OPT-NEXT:    ret void
752;
753; GFX7-LABEL: test_sinkable_flat_reg_offset:
754; GFX7:       ; %bb.0: ; %entry
755; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
756; GFX7-NEXT:    v_mbcnt_lo_u32_b32_e64 v7, -1, 0
757; GFX7-NEXT:    v_mov_b32_e32 v6, 0
758; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
759; GFX7-NEXT:    s_and_saveexec_b64 s[4:5], vcc
760; GFX7-NEXT:    s_cbranch_execz .LBB5_2
761; GFX7-NEXT:  ; %bb.1: ; %if
762; GFX7-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
763; GFX7-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
764; GFX7-NEXT:    flat_load_sbyte v6, v[2:3]
765; GFX7-NEXT:  .LBB5_2: ; %endif
766; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
767; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0x1000, v0
768; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
769; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
770; GFX7-NEXT:    flat_store_dword v[0:1], v6
771; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
772; GFX7-NEXT:    s_setpc_b64 s[30:31]
773;
774; GFX8-LABEL: test_sinkable_flat_reg_offset:
775; GFX8:       ; %bb.0: ; %entry
776; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
777; GFX8-NEXT:    v_mbcnt_lo_u32_b32 v7, -1, 0
778; GFX8-NEXT:    v_mov_b32_e32 v6, 0
779; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
780; GFX8-NEXT:    s_and_saveexec_b64 s[4:5], vcc
781; GFX8-NEXT:    s_cbranch_execz .LBB5_2
782; GFX8-NEXT:  ; %bb.1: ; %if
783; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
784; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
785; GFX8-NEXT:    flat_load_sbyte v6, v[2:3]
786; GFX8-NEXT:  .LBB5_2: ; %endif
787; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
788; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x1000, v0
789; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
790; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
791; GFX8-NEXT:    flat_store_dword v[0:1], v6
792; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
793; GFX8-NEXT:    s_setpc_b64 s[30:31]
794;
795; GFX9-LABEL: test_sinkable_flat_reg_offset:
796; GFX9:       ; %bb.0: ; %entry
797; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
798; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v7, -1, 0
799; GFX9-NEXT:    v_mov_b32_e32 v6, 0
800; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
801; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
802; GFX9-NEXT:    s_cbranch_execz .LBB5_2
803; GFX9-NEXT:  ; %bb.1: ; %if
804; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v4
805; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
806; GFX9-NEXT:    flat_load_sbyte v6, v[2:3]
807; GFX9-NEXT:  .LBB5_2: ; %endif
808; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
809; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
810; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
811; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
812; GFX9-NEXT:    flat_store_dword v[0:1], v6
813; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
814; GFX9-NEXT:    s_setpc_b64 s[30:31]
815;
816; GFX10-LABEL: test_sinkable_flat_reg_offset:
817; GFX10:       ; %bb.0: ; %entry
818; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
819; GFX10-NEXT:    v_mbcnt_lo_u32_b32 v6, -1, 0
820; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v6
821; GFX10-NEXT:    v_mov_b32_e32 v6, 0
822; GFX10-NEXT:    s_and_saveexec_b32 s4, vcc_lo
823; GFX10-NEXT:    s_cbranch_execz .LBB5_2
824; GFX10-NEXT:  ; %bb.1: ; %if
825; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v2, v4
826; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo
827; GFX10-NEXT:    flat_load_sbyte v6, v[2:3]
828; GFX10-NEXT:  .LBB5_2: ; %endif
829; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
830; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
831; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
832; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
833; GFX10-NEXT:    flat_store_dword v[0:1], v6
834; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
835; GFX10-NEXT:    s_setpc_b64 s[30:31]
836entry:
837  %out.gep = getelementptr i32, ptr %out, i32 1024
838  %in.gep = getelementptr i8, ptr %in, i64 %reg
839  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
840  %cmp0 = icmp eq i32 %tid, 0
841  br i1 %cmp0, label %endif, label %if
842
843if:
844  %load = load i8, ptr %in.gep
845  %cast = sext i8 %load to i32
846  br label %endif
847
848endif:
849  %x = phi i32 [ %cast, %if ], [ 0, %entry ]
850  store i32 %x, ptr %out.gep
851  br label %done
852
853done:
854  ret void
855}
856
857declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
858
859attributes #0 = { nounwind readnone }
860attributes #1 = { nounwind }
861attributes #2 = { nounwind argmemonly }
862