1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mcpu cortex-a53 -mtriple=aarch64 | FileCheck %s --check-prefix=A53 3 4; PR26827 - Merge stores causes wrong dependency. 5%struct1 = type { ptr, ptr, i32, i32, i16, i16, ptr, ptr } 6@gv0 = internal unnamed_addr global i32 0, align 4 7@gv1 = internal unnamed_addr global ptr null, align 8 8 9define void @test(ptr %fde, i32 %fd, ptr %func, ptr %arg) uwtable { 10;CHECK-LABEL: test 11; A53-LABEL: test: 12; A53: // %bb.0: // %entry 13; A53-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill 14; A53-NEXT: .cfi_def_cfa_offset 16 15; A53-NEXT: .cfi_offset w19, -8 16; A53-NEXT: .cfi_offset w30, -16 17; A53-NEXT: .cfi_remember_state 18; A53-NEXT: movi v0.2d, #0000000000000000 19; A53-NEXT: mov x8, x0 20; A53-NEXT: mov x19, x8 21; A53-NEXT: mov w0, w1 22; A53-NEXT: mov w9, #256 23; A53-NEXT: stp x2, x3, [x8, #32] 24; A53-NEXT: mov x2, x8 25; A53-NEXT: str q0, [x19, #16]! 26; A53-NEXT: str w1, [x19] 27; A53-NEXT: mov w1, #4 28; A53-NEXT: str q0, [x8] 29; A53-NEXT: strh w9, [x8, #24] 30; A53-NEXT: str wzr, [x8, #20] 31; A53-NEXT: bl fcntl 32; A53-NEXT: adrp x9, gv0 33; A53-NEXT: add x9, x9, :lo12:gv0 34; A53-NEXT: cmp x19, x9 35; A53-NEXT: b.eq .LBB0_4 36; A53-NEXT: // %bb.1: 37; A53-NEXT: ldr w8, [x19] 38; A53-NEXT: ldr w9, [x9] 39; A53-NEXT: .p2align 4, , 8 40; A53-NEXT: .LBB0_2: // %while.body.i.split.ver.us 41; A53-NEXT: // =>This Inner Loop Header: Depth=1 42; A53-NEXT: lsl w9, w9, #1 43; A53-NEXT: cmp w9, w8 44; A53-NEXT: b.le .LBB0_2 45; A53-NEXT: // %bb.3: // %while.end.i 46; A53-NEXT: bl foo 47; A53-NEXT: adrp x8, gv1 48; A53-NEXT: str x0, [x8, :lo12:gv1] 49; A53-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload 50; A53-NEXT: .cfi_def_cfa_offset 0 51; A53-NEXT: .cfi_restore w19 52; A53-NEXT: .cfi_restore w30 53; A53-NEXT: ret 54; A53-NEXT: .p2align 4, , 8 55; A53-NEXT: .LBB0_4: // %while.body.i.split 56; A53-NEXT: // =>This Inner Loop Header: Depth=1 57; A53-NEXT: .cfi_restore_state 58; A53-NEXT: b .LBB0_4 59entry: 60 tail call void @llvm.memset.p0.i64(ptr align 8 %fde, i8 0, i64 40, i1 false) 61 %state = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 4 62 store i16 256, ptr %state, align 8 63 %fd1 = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 2 64 store i32 %fd, ptr %fd1, align 8 65 %force_eof = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 3 66 store i32 0, ptr %force_eof, align 4 67 %func2 = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 6 68 store ptr %func, ptr %func2, align 8 69 %arg3 = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 7 70 store ptr %arg, ptr %arg3, align 8 71 %call = tail call i32 (i32, i32, ...) @fcntl(i32 %fd, i32 4, ptr %fde) #6 72 %0 = load i32, ptr %fd1, align 8 73 %cmp.i = icmp slt i32 %0, 0 74 br i1 %cmp.i, label %if.then.i, label %while.body.i.preheader 75if.then.i: 76 unreachable 77 78while.body.i.preheader: 79 %1 = load i32, ptr @gv0, align 4 80 %2 = icmp eq ptr %fd1, @gv0 81 br i1 %2, label %while.body.i.split, label %while.body.i.split.ver.us.preheader 82 83while.body.i.split.ver.us.preheader: 84 br label %while.body.i.split.ver.us 85 86while.body.i.split.ver.us: 87 %.reg2mem21.0 = phi i32 [ %mul.i.ver.us, %while.body.i.split.ver.us ], [ %1, %while.body.i.split.ver.us.preheader ] 88 %mul.i.ver.us = shl nsw i32 %.reg2mem21.0, 1 89 %3 = icmp sgt i32 %mul.i.ver.us, %0 90 br i1 %3, label %while.end.i, label %while.body.i.split.ver.us 91 92while.body.i.split: 93 br label %while.body.i.split 94 95while.end.i: 96 %call.i = tail call ptr @foo() 97 store ptr %call.i, ptr @gv1, align 8 98 br label %exit 99 100exit: 101 ret void 102} 103 104; TODO: rev16? 105 106define void @rotate16_in_place(ptr %p) { 107; A53-LABEL: rotate16_in_place: 108; A53: // %bb.0: 109; A53-NEXT: ldrb w8, [x0, #1] 110; A53-NEXT: ldrb w9, [x0] 111; A53-NEXT: strb w8, [x0] 112; A53-NEXT: strb w9, [x0, #1] 113; A53-NEXT: ret 114 %p1 = getelementptr i8, ptr %p, i64 1 115 %i0 = load i8, ptr %p, align 1 116 %i1 = load i8, ptr %p1, align 1 117 store i8 %i1, ptr %p, align 1 118 store i8 %i0, ptr %p1, align 1 119 ret void 120} 121 122; TODO: rev16? 123 124define void @rotate16(ptr %p, ptr %q) { 125; A53-LABEL: rotate16: 126; A53: // %bb.0: 127; A53-NEXT: ldrb w8, [x0, #1] 128; A53-NEXT: ldrb w9, [x0] 129; A53-NEXT: strb w8, [x1] 130; A53-NEXT: strb w9, [x1, #1] 131; A53-NEXT: ret 132 %p1 = getelementptr i8, ptr %p, i64 1 133 %q1 = getelementptr i8, ptr %q, i64 1 134 %i0 = load i8, ptr %p, align 1 135 %i1 = load i8, ptr %p1, align 1 136 store i8 %i1, ptr %q, align 1 137 store i8 %i0, ptr %q1, align 1 138 ret void 139} 140 141define void @rotate32_in_place(ptr %p) { 142; A53-LABEL: rotate32_in_place: 143; A53: // %bb.0: 144; A53-NEXT: ldr w8, [x0] 145; A53-NEXT: ror w8, w8, #16 146; A53-NEXT: str w8, [x0] 147; A53-NEXT: ret 148 %p1 = getelementptr i16, ptr %p, i64 1 149 %i0 = load i16, ptr %p, align 2 150 %i1 = load i16, ptr %p1, align 2 151 store i16 %i1, ptr %p, align 2 152 store i16 %i0, ptr %p1, align 2 153 ret void 154} 155 156define void @rotate32(ptr %p) { 157; A53-LABEL: rotate32: 158; A53: // %bb.0: 159; A53-NEXT: ldr w8, [x0] 160; A53-NEXT: ror w8, w8, #16 161; A53-NEXT: str w8, [x0, #84] 162; A53-NEXT: ret 163 %p1 = getelementptr i16, ptr %p, i64 1 164 %p42 = getelementptr i16, ptr %p, i64 42 165 %p43 = getelementptr i16, ptr %p, i64 43 166 %i0 = load i16, ptr %p, align 2 167 %i1 = load i16, ptr %p1, align 2 168 store i16 %i1, ptr %p42, align 2 169 store i16 %i0, ptr %p43, align 2 170 ret void 171} 172 173; Prefer paired memops over rotate. 174 175define void @rotate64_in_place(ptr %p) { 176; A53-LABEL: rotate64_in_place: 177; A53: // %bb.0: 178; A53-NEXT: ldp w9, w8, [x0] 179; A53-NEXT: stp w8, w9, [x0] 180; A53-NEXT: ret 181 %p1 = getelementptr i32, ptr %p, i64 1 182 %i0 = load i32, ptr %p, align 4 183 %i1 = load i32, ptr %p1, align 4 184 store i32 %i1, ptr %p, align 4 185 store i32 %i0, ptr %p1, align 4 186 ret void 187} 188 189; Prefer paired memops over rotate. 190 191define void @rotate64(ptr %p) { 192; A53-LABEL: rotate64: 193; A53: // %bb.0: 194; A53-NEXT: ldp w9, w8, [x0] 195; A53-NEXT: stp w8, w9, [x0, #8] 196; A53-NEXT: ret 197 %p1 = getelementptr i32, ptr %p, i64 1 198 %p2 = getelementptr i32, ptr %p, i64 2 199 %p3 = getelementptr i32, ptr %p, i64 3 200 %i0 = load i32, ptr %p, align 4 201 %i1 = load i32, ptr %p1, align 4 202 store i32 %i1, ptr %p2, align 4 203 store i32 %i0, ptr %p3, align 4 204 ret void 205} 206 207declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) 208declare i32 @fcntl(i32, i32, ...) 209declare noalias ptr @foo() 210