xref: /llvm-project/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll (revision eeac0ffaf46cf9f9b0f680b9940cc4b68a0286d8)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -O3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
3
4%"struct.__llvm_libc::rpc::Buffer" = type { [8 x i64] }
5
6define void @issue63986(i64 %0, i64 %idxprom) {
7; CHECK-LABEL: issue63986:
8; CHECK:       ; %bb.0: ; %entry
9; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; CHECK-NEXT:    v_lshlrev_b64 v[4:5], 6, v[2:3]
11; CHECK-NEXT:    s_mov_b64 s[4:5], 0
12; CHECK-NEXT:  .LBB0_1: ; %loop-memcpy-expansion
13; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
14; CHECK-NEXT:    v_mov_b32_e32 v7, s5
15; CHECK-NEXT:    v_mov_b32_e32 v6, s4
16; CHECK-NEXT:    flat_load_dwordx4 v[6:9], v[6:7]
17; CHECK-NEXT:    v_add_co_u32_e32 v10, vcc, s4, v4
18; CHECK-NEXT:    s_add_u32 s4, s4, 16
19; CHECK-NEXT:    v_mov_b32_e32 v11, s5
20; CHECK-NEXT:    s_addc_u32 s5, s5, 0
21; CHECK-NEXT:    v_cmp_ge_u64_e64 s[6:7], s[4:5], 32
22; CHECK-NEXT:    v_addc_co_u32_e32 v11, vcc, v5, v11, vcc
23; CHECK-NEXT:    s_and_b64 vcc, exec, s[6:7]
24; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
25; CHECK-NEXT:    flat_store_dwordx4 v[10:11], v[6:9]
26; CHECK-NEXT:    s_cbranch_vccz .LBB0_1
27; CHECK-NEXT:  ; %bb.2: ; %loop-memcpy-residual-header
28; CHECK-NEXT:    s_branch .LBB0_4
29; CHECK-NEXT:  ; %bb.3:
30; CHECK-NEXT:    ; implicit-def: $vgpr6_vgpr7
31; CHECK-NEXT:    s_branch .LBB0_5
32; CHECK-NEXT:  .LBB0_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
33; CHECK-NEXT:    v_lshlrev_b64 v[6:7], 6, v[2:3]
34; CHECK-NEXT:    s_cbranch_execnz .LBB0_7
35; CHECK-NEXT:  .LBB0_5: ; %loop-memcpy-residual.preheader
36; CHECK-NEXT:    v_or_b32_e32 v2, 32, v4
37; CHECK-NEXT:    v_mov_b32_e32 v3, v5
38; CHECK-NEXT:    s_mov_b64 s[4:5], 0
39; CHECK-NEXT:  ; %bb.6: ; %loop-memcpy-residual
40; CHECK-NEXT:    s_add_u32 s6, 32, s4
41; CHECK-NEXT:    s_addc_u32 s7, 0, s5
42; CHECK-NEXT:    v_mov_b32_e32 v6, s6
43; CHECK-NEXT:    v_mov_b32_e32 v7, s7
44; CHECK-NEXT:    flat_load_ubyte v10, v[6:7]
45; CHECK-NEXT:    v_mov_b32_e32 v9, s5
46; CHECK-NEXT:    v_add_co_u32_e32 v8, vcc, s4, v2
47; CHECK-NEXT:    v_mov_b32_e32 v7, v5
48; CHECK-NEXT:    v_addc_co_u32_e32 v9, vcc, v3, v9, vcc
49; CHECK-NEXT:    s_add_u32 s4, s4, 1
50; CHECK-NEXT:    v_mov_b32_e32 v6, v4
51; CHECK-NEXT:    s_addc_u32 s5, s5, 0
52; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
53; CHECK-NEXT:    flat_store_byte v[8:9], v10
54; CHECK-NEXT:  .LBB0_7: ; %post-loop-memcpy-expansion
55; CHECK-NEXT:    v_and_b32_e32 v2, 15, v0
56; CHECK-NEXT:    v_mov_b32_e32 v3, 0
57; CHECK-NEXT:    v_and_b32_e32 v0, -16, v0
58; CHECK-NEXT:    v_cmp_ne_u64_e64 s[4:5], 0, v[0:1]
59; CHECK-NEXT:    v_cmp_ne_u64_e64 s[6:7], 0, v[2:3]
60; CHECK-NEXT:    v_add_co_u32_e32 v6, vcc, v6, v0
61; CHECK-NEXT:    v_addc_co_u32_e32 v7, vcc, v7, v1, vcc
62; CHECK-NEXT:    s_branch .LBB0_10
63; CHECK-NEXT:  .LBB0_8: ; %Flow14
64; CHECK-NEXT:    ; in Loop: Header=BB0_10 Depth=1
65; CHECK-NEXT:    s_or_b64 exec, exec, s[10:11]
66; CHECK-NEXT:    s_mov_b64 s[8:9], 0
67; CHECK-NEXT:  .LBB0_9: ; %Flow16
68; CHECK-NEXT:    ; in Loop: Header=BB0_10 Depth=1
69; CHECK-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
70; CHECK-NEXT:    s_cbranch_vccz .LBB0_18
71; CHECK-NEXT:  .LBB0_10: ; %while.cond
72; CHECK-NEXT:    ; =>This Loop Header: Depth=1
73; CHECK-NEXT:    ; Child Loop BB0_12 Depth 2
74; CHECK-NEXT:    ; Child Loop BB0_16 Depth 2
75; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
76; CHECK-NEXT:    s_cbranch_execz .LBB0_13
77; CHECK-NEXT:  ; %bb.11: ; %loop-memcpy-expansion2.preheader
78; CHECK-NEXT:    ; in Loop: Header=BB0_10 Depth=1
79; CHECK-NEXT:    s_mov_b64 s[10:11], 0
80; CHECK-NEXT:    s_mov_b64 s[12:13], 0
81; CHECK-NEXT:  .LBB0_12: ; %loop-memcpy-expansion2
82; CHECK-NEXT:    ; Parent Loop BB0_10 Depth=1
83; CHECK-NEXT:    ; => This Inner Loop Header: Depth=2
84; CHECK-NEXT:    v_mov_b32_e32 v8, s12
85; CHECK-NEXT:    v_mov_b32_e32 v9, s13
86; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[8:9]
87; CHECK-NEXT:    v_mov_b32_e32 v13, s13
88; CHECK-NEXT:    v_add_co_u32_e32 v12, vcc, s12, v4
89; CHECK-NEXT:    s_add_u32 s12, s12, 16
90; CHECK-NEXT:    v_addc_co_u32_e32 v13, vcc, v5, v13, vcc
91; CHECK-NEXT:    s_addc_u32 s13, s13, 0
92; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
93; CHECK-NEXT:    s_or_b64 s[10:11], vcc, s[10:11]
94; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
95; CHECK-NEXT:    flat_store_dwordx4 v[12:13], v[8:11]
96; CHECK-NEXT:    s_andn2_b64 exec, exec, s[10:11]
97; CHECK-NEXT:    s_cbranch_execnz .LBB0_12
98; CHECK-NEXT:  .LBB0_13: ; %Flow15
99; CHECK-NEXT:    ; in Loop: Header=BB0_10 Depth=1
100; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
101; CHECK-NEXT:    s_mov_b64 s[8:9], -1
102; CHECK-NEXT:    s_cbranch_execz .LBB0_9
103; CHECK-NEXT:  ; %bb.14: ; %loop-memcpy-residual-header5
104; CHECK-NEXT:    ; in Loop: Header=BB0_10 Depth=1
105; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[6:7]
106; CHECK-NEXT:    s_xor_b64 s[10:11], exec, s[8:9]
107; CHECK-NEXT:    s_cbranch_execz .LBB0_8
108; CHECK-NEXT:  ; %bb.15: ; %loop-memcpy-residual4.preheader
109; CHECK-NEXT:    ; in Loop: Header=BB0_10 Depth=1
110; CHECK-NEXT:    s_mov_b64 s[12:13], 0
111; CHECK-NEXT:    s_mov_b64 s[14:15], 0
112; CHECK-NEXT:  .LBB0_16: ; %loop-memcpy-residual4
113; CHECK-NEXT:    ; Parent Loop BB0_10 Depth=1
114; CHECK-NEXT:    ; => This Inner Loop Header: Depth=2
115; CHECK-NEXT:    v_mov_b32_e32 v10, s15
116; CHECK-NEXT:    v_add_co_u32_e32 v8, vcc, s14, v0
117; CHECK-NEXT:    v_addc_co_u32_e32 v9, vcc, v1, v10, vcc
118; CHECK-NEXT:    flat_load_ubyte v11, v[8:9]
119; CHECK-NEXT:    v_add_co_u32_e32 v8, vcc, s14, v6
120; CHECK-NEXT:    s_add_u32 s14, s14, 1
121; CHECK-NEXT:    s_addc_u32 s15, s15, 0
122; CHECK-NEXT:    v_cmp_ge_u64_e64 s[8:9], s[14:15], v[2:3]
123; CHECK-NEXT:    v_addc_co_u32_e32 v9, vcc, v7, v10, vcc
124; CHECK-NEXT:    s_or_b64 s[12:13], s[8:9], s[12:13]
125; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
126; CHECK-NEXT:    flat_store_byte v[8:9], v11
127; CHECK-NEXT:    s_andn2_b64 exec, exec, s[12:13]
128; CHECK-NEXT:    s_cbranch_execnz .LBB0_16
129; CHECK-NEXT:  ; %bb.17: ; %Flow
130; CHECK-NEXT:    ; in Loop: Header=BB0_10 Depth=1
131; CHECK-NEXT:    s_or_b64 exec, exec, s[12:13]
132; CHECK-NEXT:    s_branch .LBB0_8
133; CHECK-NEXT:  .LBB0_18: ; %DummyReturnBlock
134; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
135; CHECK-NEXT:    s_setpc_b64 s[30:31]
136entry:
137  %arrayidx = getelementptr [32 x %"struct.__llvm_libc::rpc::Buffer"], ptr null, i64 0, i64 %idxprom
138  %spec.select = tail call i64 @llvm.umin.i64(i64 sub (i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) inttoptr (i64 32 to ptr addrspace(4)) to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) null to ptr) to i64)), i64 56)
139  tail call void @llvm.memcpy.p0.p0.i64(ptr %arrayidx, ptr null, i64 %spec.select, i1 false)
140  br label %while.cond
141
142while.cond:                         ; preds = %while.cond
143  tail call void @llvm.memcpy.p0.p0.i64(ptr %arrayidx, ptr null, i64 %0, i1 false)
144  br label %while.cond
145}
146
147define void @issue63986_reduced_expanded(i64 %idxprom) {
148; CHECK-LABEL: issue63986_reduced_expanded:
149; CHECK:       ; %bb.0: ; %entry
150; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151; CHECK-NEXT:  ; %bb.1: ; %loop-memcpy-expansion.preheader
152; CHECK-NEXT:    s_setpc_b64 s[30:31]
153; CHECK-NEXT:  ; %bb.2: ; %loop-memcpy-residual-header
154; CHECK-NEXT:    s_and_b32 s4, 32, 15
155; CHECK-NEXT:    s_mov_b32 s5, 0
156; CHECK-NEXT:    s_cbranch_scc0 .LBB1_4
157; CHECK-NEXT:  ; %bb.3:
158; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
159; CHECK-NEXT:    s_branch .LBB1_5
160; CHECK-NEXT:  .LBB1_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
161; CHECK-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
162; CHECK-NEXT:    s_cbranch_execnz .LBB1_8
163; CHECK-NEXT:  .LBB1_5: ; %loop-memcpy-residual.preheader
164; CHECK-NEXT:    v_mov_b32_e32 v0, s4
165; CHECK-NEXT:    s_mov_b64 s[6:7], 0
166; CHECK-NEXT:    v_mov_b32_e32 v1, s5
167; CHECK-NEXT:  .LBB1_6: ; %loop-memcpy-residual
168; CHECK-NEXT:    s_add_u32 s4, s6, 1
169; CHECK-NEXT:    s_addc_u32 s5, s7, 0
170; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1]
171; CHECK-NEXT:    s_mov_b64 s[6:7], 1
172; CHECK-NEXT:    s_cbranch_vccnz .LBB1_6
173; CHECK-NEXT:  ; %bb.7: ; %Flow
174; CHECK-NEXT:    v_mov_b32_e32 v0, 0
175; CHECK-NEXT:    v_mov_b32_e32 v1, 0
176; CHECK-NEXT:  .LBB1_8: ; %post-loop-memcpy-expansion
177; CHECK-NEXT:    v_mov_b32_e32 v2, 0
178; CHECK-NEXT:    v_mov_b32_e32 v3, v2
179; CHECK-NEXT:    v_mov_b32_e32 v4, v2
180; CHECK-NEXT:    v_mov_b32_e32 v5, v2
181; CHECK-NEXT:    s_and_b64 vcc, exec, 0
182; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
183; CHECK-NEXT:  .LBB1_9: ; %loop-memcpy-expansion2
184; CHECK-NEXT:    s_mov_b64 vcc, vcc
185; CHECK-NEXT:    s_cbranch_vccz .LBB1_9
186; CHECK-NEXT:  ; %bb.10: ; %DummyReturnBlock
187; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
188; CHECK-NEXT:    s_setpc_b64 s[30:31]
189entry:
190  %spec.select = tail call i64 @llvm.umin.i64(i64 sub (i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) inttoptr (i64 32 to ptr addrspace(4)) to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) null to ptr) to i64)), i64 56)
191  %i = trunc i64 %spec.select to i32
192  %i1 = urem i32 %i, 16
193  %i2 = zext i32 %i to i64
194  %i3 = zext i32 %i1 to i64
195  %i4 = icmp ne i64 %i2, 0
196  br i1 %i4, label %loop-memcpy-expansion.preheader, label %loop-memcpy-residual-header
197
198loop-memcpy-expansion.preheader:                  ; preds = %entry
199  ret void
200
201loop-memcpy-residual:                             ; preds = %loop-memcpy-residual.preheader, %loop-memcpy-residual
202  %residual-loop-index1 = phi i64 [ 1, %loop-memcpy-residual ], [ 0, %loop-memcpy-residual.preheader ]
203  %i5 = add i64 %residual-loop-index1, 1
204  %i6 = icmp ult i64 %i5, %i3
205  br i1 %i6, label %loop-memcpy-residual, label %post-loop-memcpy-expansion
206
207post-loop-memcpy-expansion:                       ; preds = %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge, %loop-memcpy-residual
208  %.pre-phi = phi i64 [ %.pre, %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge ], [ 0, %loop-memcpy-residual ]
209  br label %loop-memcpy-expansion2
210
211loop-memcpy-expansion2:                           ; preds = %loop-memcpy-expansion2, %post-loop-memcpy-expansion
212  %scevgep7 = getelementptr i8, ptr null, i64 %.pre-phi
213  store <4 x i32> zeroinitializer, ptr %scevgep7, align 1
214  br label %loop-memcpy-expansion2
215
216loop-memcpy-residual-header:                      ; preds = %entry
217  %i7 = icmp ne i64 %i3, 0
218  br i1 %i7, label %loop-memcpy-residual.preheader, label %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
219
220loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge: ; preds = %loop-memcpy-residual-header
221  %.pre = shl i64 %idxprom, 1
222  br label %post-loop-memcpy-expansion
223
224loop-memcpy-residual.preheader:                   ; preds = %loop-memcpy-residual-header
225  br label %loop-memcpy-residual
226}
227
228declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
229declare i64 @llvm.umin.i64(i64, i64) #1
230
231attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
232attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
233