xref: /llvm-project/llvm/test/CodeGen/AArch64/merge-store-dependency.ll (revision 3d18c8cd265c0c0bf1d85226c4770a2dd0f86e8f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mcpu cortex-a53 -mtriple=aarch64 | FileCheck %s --check-prefix=A53
3
4; PR26827 - Merge stores causes wrong dependency.
5%struct1 = type { ptr, ptr, i32, i32, i16, i16, ptr, ptr }
6@gv0 = internal unnamed_addr global i32 0, align 4
7@gv1 = internal unnamed_addr global ptr null, align 8
8
9define void @test(ptr %fde, i32 %fd, ptr %func, ptr %arg) uwtable {
10;CHECK-LABEL: test
11; A53-LABEL: test:
12; A53:       // %bb.0: // %entry
13; A53-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
14; A53-NEXT:    .cfi_def_cfa_offset 16
15; A53-NEXT:    .cfi_offset w19, -8
16; A53-NEXT:    .cfi_offset w30, -16
17; A53-NEXT:    .cfi_remember_state
18; A53-NEXT:    movi v0.2d, #0000000000000000
19; A53-NEXT:    mov x8, x0
20; A53-NEXT:    mov x19, x8
21; A53-NEXT:    mov w0, w1
22; A53-NEXT:    mov w9, #256
23; A53-NEXT:    stp x2, x3, [x8, #32]
24; A53-NEXT:    mov x2, x8
25; A53-NEXT:    str q0, [x19, #16]!
26; A53-NEXT:    str w1, [x19]
27; A53-NEXT:    mov w1, #4
28; A53-NEXT:    str q0, [x8]
29; A53-NEXT:    strh w9, [x8, #24]
30; A53-NEXT:    str wzr, [x8, #20]
31; A53-NEXT:    bl fcntl
32; A53-NEXT:    adrp x9, gv0
33; A53-NEXT:    add x9, x9, :lo12:gv0
34; A53-NEXT:    cmp x19, x9
35; A53-NEXT:    b.eq .LBB0_4
36; A53-NEXT:  // %bb.1:
37; A53-NEXT:    ldr w8, [x19]
38; A53-NEXT:    ldr w9, [x9]
39; A53-NEXT:    .p2align 4, , 8
40; A53-NEXT:  .LBB0_2: // %while.body.i.split.ver.us
41; A53-NEXT:    // =>This Inner Loop Header: Depth=1
42; A53-NEXT:    lsl w9, w9, #1
43; A53-NEXT:    cmp w9, w8
44; A53-NEXT:    b.le .LBB0_2
45; A53-NEXT:  // %bb.3: // %while.end.i
46; A53-NEXT:    bl foo
47; A53-NEXT:    adrp x8, gv1
48; A53-NEXT:    str x0, [x8, :lo12:gv1]
49; A53-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
50; A53-NEXT:    .cfi_def_cfa_offset 0
51; A53-NEXT:    .cfi_restore w19
52; A53-NEXT:    .cfi_restore w30
53; A53-NEXT:    ret
54; A53-NEXT:    .p2align 4, , 8
55; A53-NEXT:  .LBB0_4: // %while.body.i.split
56; A53-NEXT:    // =>This Inner Loop Header: Depth=1
57; A53-NEXT:    .cfi_restore_state
58; A53-NEXT:    b .LBB0_4
59entry:
60  tail call void @llvm.memset.p0.i64(ptr align 8 %fde, i8 0, i64 40, i1 false)
61  %state = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 4
62  store i16 256, ptr %state, align 8
63  %fd1 = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 2
64  store i32 %fd, ptr %fd1, align 8
65  %force_eof = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 3
66  store i32 0, ptr %force_eof, align 4
67  %func2 = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 6
68  store ptr %func, ptr %func2, align 8
69  %arg3 = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 7
70  store ptr %arg, ptr %arg3, align 8
71  %call = tail call i32 (i32, i32, ...) @fcntl(i32 %fd, i32 4, ptr %fde) #6
72  %0 = load i32, ptr %fd1, align 8
73  %cmp.i = icmp slt i32 %0, 0
74  br i1 %cmp.i, label %if.then.i, label %while.body.i.preheader
75if.then.i:
76  unreachable
77
78while.body.i.preheader:
79  %1 = load i32, ptr @gv0, align 4
80  %2 = icmp eq ptr %fd1, @gv0
81  br i1 %2, label %while.body.i.split, label %while.body.i.split.ver.us.preheader
82
83while.body.i.split.ver.us.preheader:
84  br label %while.body.i.split.ver.us
85
86while.body.i.split.ver.us:
87  %.reg2mem21.0 = phi i32 [ %mul.i.ver.us, %while.body.i.split.ver.us ], [ %1, %while.body.i.split.ver.us.preheader ]
88  %mul.i.ver.us = shl nsw i32 %.reg2mem21.0, 1
89  %3 = icmp sgt i32 %mul.i.ver.us, %0
90  br i1 %3, label %while.end.i, label %while.body.i.split.ver.us
91
92while.body.i.split:
93  br label %while.body.i.split
94
95while.end.i:
96  %call.i = tail call ptr @foo()
97  store ptr %call.i, ptr @gv1, align 8
98  br label %exit
99
100exit:
101  ret void
102}
103
104; TODO: rev16?
105
106define void @rotate16_in_place(ptr %p) {
107; A53-LABEL: rotate16_in_place:
108; A53:       // %bb.0:
109; A53-NEXT:    ldrb w8, [x0, #1]
110; A53-NEXT:    ldrb w9, [x0]
111; A53-NEXT:    strb w8, [x0]
112; A53-NEXT:    strb w9, [x0, #1]
113; A53-NEXT:    ret
114  %p1 = getelementptr i8, ptr %p, i64 1
115  %i0 = load i8, ptr %p, align 1
116  %i1 = load i8, ptr %p1, align 1
117  store i8 %i1, ptr %p, align 1
118  store i8 %i0, ptr %p1, align 1
119  ret void
120}
121
122; TODO: rev16?
123
124define void @rotate16(ptr %p, ptr %q) {
125; A53-LABEL: rotate16:
126; A53:       // %bb.0:
127; A53-NEXT:    ldrb w8, [x0, #1]
128; A53-NEXT:    ldrb w9, [x0]
129; A53-NEXT:    strb w8, [x1]
130; A53-NEXT:    strb w9, [x1, #1]
131; A53-NEXT:    ret
132  %p1 = getelementptr i8, ptr %p, i64 1
133  %q1 = getelementptr i8, ptr %q, i64 1
134  %i0 = load i8, ptr %p, align 1
135  %i1 = load i8, ptr %p1, align 1
136  store i8 %i1, ptr %q, align 1
137  store i8 %i0, ptr %q1, align 1
138  ret void
139}
140
141define void @rotate32_in_place(ptr %p) {
142; A53-LABEL: rotate32_in_place:
143; A53:       // %bb.0:
144; A53-NEXT:    ldr w8, [x0]
145; A53-NEXT:    ror w8, w8, #16
146; A53-NEXT:    str w8, [x0]
147; A53-NEXT:    ret
148  %p1 = getelementptr i16, ptr %p, i64 1
149  %i0 = load i16, ptr %p, align 2
150  %i1 = load i16, ptr %p1, align 2
151  store i16 %i1, ptr %p, align 2
152  store i16 %i0, ptr %p1, align 2
153  ret void
154}
155
156define void @rotate32(ptr %p) {
157; A53-LABEL: rotate32:
158; A53:       // %bb.0:
159; A53-NEXT:    ldr w8, [x0]
160; A53-NEXT:    ror w8, w8, #16
161; A53-NEXT:    str w8, [x0, #84]
162; A53-NEXT:    ret
163  %p1 = getelementptr i16, ptr %p, i64 1
164  %p42 = getelementptr i16, ptr %p, i64 42
165  %p43 = getelementptr i16, ptr %p, i64 43
166  %i0 = load i16, ptr %p, align 2
167  %i1 = load i16, ptr %p1, align 2
168  store i16 %i1, ptr %p42, align 2
169  store i16 %i0, ptr %p43, align 2
170  ret void
171}
172
173; Prefer paired memops over rotate.
174
175define void @rotate64_in_place(ptr %p) {
176; A53-LABEL: rotate64_in_place:
177; A53:       // %bb.0:
178; A53-NEXT:    ldp w9, w8, [x0]
179; A53-NEXT:    stp w8, w9, [x0]
180; A53-NEXT:    ret
181  %p1 = getelementptr i32, ptr %p, i64 1
182  %i0 = load i32, ptr %p, align 4
183  %i1 = load i32, ptr %p1, align 4
184  store i32 %i1, ptr %p, align 4
185  store i32 %i0, ptr %p1, align 4
186  ret void
187}
188
189; Prefer paired memops over rotate.
190
191define void @rotate64(ptr %p) {
192; A53-LABEL: rotate64:
193; A53:       // %bb.0:
194; A53-NEXT:    ldp w9, w8, [x0]
195; A53-NEXT:    stp w8, w9, [x0, #8]
196; A53-NEXT:    ret
197  %p1 = getelementptr i32, ptr %p, i64 1
198  %p2 = getelementptr i32, ptr %p, i64 2
199  %p3 = getelementptr i32, ptr %p, i64 3
200  %i0 = load i32, ptr %p, align 4
201  %i1 = load i32, ptr %p1, align 4
202  store i32 %i1, ptr %p2, align 4
203  store i32 %i0, ptr %p3, align 4
204  ret void
205}
206
207declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1)
208declare i32 @fcntl(i32, i32, ...)
209declare noalias ptr @foo()
210