1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-postlegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s 3; RUN: llc -debugify-and-strip-all-safe -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-postlegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s 4 5define ptr @test_simple_load_pre(ptr %ptr) { 6 7 ; CHECK-LABEL: name: test_simple_load_pre 8 ; CHECK: bb.1 (%ir-block.0): 9 ; CHECK-NEXT: liveins: $x0 10 ; CHECK-NEXT: {{ $}} 11 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 12 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 13 ; CHECK-NEXT: [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[C]](s64), 1 :: (volatile load (s8) from %ir.next) 14 ; CHECK-NEXT: $x0 = COPY [[INDEXED_LOAD1]](p0) 15 ; CHECK-NEXT: RET_ReallyLR implicit $x0 16 %next = getelementptr i8, ptr %ptr, i32 42 17 load volatile i8, ptr %next 18 ret ptr %next 19} 20 21define ptr @test_unused_load_pre(ptr %ptr) { 22 23 ; CHECK-LABEL: name: test_unused_load_pre 24 ; CHECK: bb.1 (%ir-block.0): 25 ; CHECK-NEXT: liveins: $x0 26 ; CHECK-NEXT: {{ $}} 27 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 28 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 29 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) 30 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (volatile load (s8) from %ir.next) 31 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 32 ; CHECK-NEXT: $x0 = COPY [[C1]](p0) 33 ; CHECK-NEXT: RET_ReallyLR implicit $x0 34 %next = getelementptr i8, ptr %ptr, i32 42 35 load volatile i8, ptr %next 36 ret ptr null 37} 38 39define ptr @test_simple_store_pre(ptr %ptr) { 40 41 ; CHECK-LABEL: name: test_simple_store_pre 42 ; CHECK: bb.1 (%ir-block.0): 43 ; CHECK-NEXT: liveins: $x0 44 ; CHECK-NEXT: {{ $}} 45 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 46 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 47 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 48 ; CHECK-NEXT: [[INDEXED_STORE:%[0-9]+]]:_(p0) = G_INDEXED_STORE [[C1]](s8), [[COPY]], [[C]](s64), 1 :: (volatile store (s8) into %ir.next) 49 ; CHECK-NEXT: $x0 = COPY [[INDEXED_STORE]](p0) 50 ; CHECK-NEXT: RET_ReallyLR implicit $x0 51 %next = getelementptr i8, ptr %ptr, i32 42 52 store volatile i8 0, ptr %next 53 ret ptr %next 54} 55 56; The potentially pre-indexed address is used as the value stored. Converting 57; would produce the value too late but only by one instruction. 58define ptr @test_store_pre_val_loop(ptr %ptr) { 59 60 ; CHECK-LABEL: name: test_store_pre_val_loop 61 ; CHECK: bb.1 (%ir-block.0): 62 ; CHECK-NEXT: liveins: $x0 63 ; CHECK-NEXT: {{ $}} 64 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 65 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 336 66 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) 67 ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[PTR_ADD]](p0) :: (volatile store (p0) into %ir.next) 68 ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0) 69 ; CHECK-NEXT: RET_ReallyLR implicit $x0 70 %next = getelementptr ptr, ptr %ptr, i32 42 71 store volatile ptr %next, ptr %next 72 ret ptr %next 73} 74 75; Potentially pre-indexed address is used between GEP computing it and load. 76define ptr @test_load_pre_before(ptr %ptr) { 77 78 ; CHECK-LABEL: name: test_load_pre_before 79 ; CHECK: bb.1 (%ir-block.0): 80 ; CHECK-NEXT: liveins: $x0 81 ; CHECK-NEXT: {{ $}} 82 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 83 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp 84 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 85 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) 86 ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0) 87 ; CHECK-NEXT: BL @bar, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0 88 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp 89 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (volatile load (s8) from %ir.next) 90 ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0) 91 ; CHECK-NEXT: RET_ReallyLR implicit $x0 92 %next = getelementptr i8, ptr %ptr, i32 42 93 call void @bar(ptr %next) 94 load volatile i8, ptr %next 95 ret ptr %next 96} 97 98; Materializing the base into a writable register (from sp/fp) would be just as 99; bad as the original GEP. 100define ptr @test_alloca_load_pre() { 101 102 ; CHECK-LABEL: name: test_alloca_load_pre 103 ; CHECK: bb.1 (%ir-block.0): 104 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 105 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr 106 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) 107 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (volatile load (s8) from %ir.next) 108 ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0) 109 ; CHECK-NEXT: RET_ReallyLR implicit $x0 110 %ptr = alloca i8, i32 128 111 %next = getelementptr i8, ptr %ptr, i32 42 112 load volatile i8, ptr %next 113 ret ptr %next 114} 115 116define ptr @test_simple_load_post(ptr %ptr) { 117 118 ; CHECK-LABEL: name: test_simple_load_post 119 ; CHECK: bb.1 (%ir-block.0): 120 ; CHECK-NEXT: liveins: $x0 121 ; CHECK-NEXT: {{ $}} 122 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 123 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 124 ; CHECK-NEXT: [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[C]](s64), 0 :: (volatile load (s8) from %ir.ptr) 125 ; CHECK-NEXT: $x0 = COPY [[INDEXED_LOAD1]](p0) 126 ; CHECK-NEXT: RET_ReallyLR implicit $x0 127 %next = getelementptr i8, ptr %ptr, i32 42 128 load volatile i8, ptr %ptr 129 ret ptr %next 130} 131 132define ptr @test_simple_load_post_gep_after(ptr %ptr) { 133 134 ; CHECK-LABEL: name: test_simple_load_post_gep_after 135 ; CHECK: bb.1 (%ir-block.0): 136 ; CHECK-NEXT: liveins: $x0 137 ; CHECK-NEXT: {{ $}} 138 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 139 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp 140 ; CHECK-NEXT: BL @get_offset, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0 141 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp 142 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 143 ; CHECK-NEXT: [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[COPY1]](s64), 0 :: (volatile load (s8) from %ir.ptr) 144 ; CHECK-NEXT: $x0 = COPY [[INDEXED_LOAD1]](p0) 145 ; CHECK-NEXT: RET_ReallyLR implicit $x0 146 %offset = call i64 @get_offset() 147 load volatile i8, ptr %ptr 148 %next = getelementptr i8, ptr %ptr, i64 %offset 149 ret ptr %next 150} 151 152define ptr @test_load_post_keep_looking(ptr %ptr) { 153 154 ; CHECK-LABEL: name: test_load_post_keep_looking 155 ; CHECK: bb.1 (%ir-block.0): 156 ; CHECK-NEXT: liveins: $x0 157 ; CHECK-NEXT: {{ $}} 158 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 159 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp 160 ; CHECK-NEXT: BL @get_offset, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0 161 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp 162 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 163 ; CHECK-NEXT: [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[COPY1]](s64), 0 :: (volatile load (s8) from %ir.ptr) 164 ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0) 165 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PTRTOINT]](s64) 166 ; CHECK-NEXT: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var 167 ; CHECK-NEXT: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var 168 ; CHECK-NEXT: G_STORE [[TRUNC]](s8), [[ADD_LOW]](p0) :: (store (s8) into @var) 169 ; CHECK-NEXT: $x0 = COPY [[INDEXED_LOAD1]](p0) 170 ; CHECK-NEXT: RET_ReallyLR implicit $x0 171 %offset = call i64 @get_offset() 172 load volatile i8, ptr %ptr 173 %intval = ptrtoint ptr %ptr to i8 174 store i8 %intval, ptr @var 175 176 %next = getelementptr i8, ptr %ptr, i64 %offset 177 ret ptr %next 178} 179 180; Base is frame index. Using indexing would need copy anyway. 181define ptr @test_load_post_alloca() { 182 183 ; CHECK-LABEL: name: test_load_post_alloca 184 ; CHECK: bb.1 (%ir-block.0): 185 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr 186 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (volatile load (s8) from %ir.ptr) 187 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 188 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) 189 ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0) 190 ; CHECK-NEXT: RET_ReallyLR implicit $x0 191 %ptr = alloca i8, i32 128 192 %next = getelementptr i8, ptr %ptr, i32 42 193 load volatile i8, ptr %ptr 194 ret ptr %next 195} 196 197; Offset computation does not dominate the load we might be indexing. 198define ptr @test_load_post_gep_offset_after(ptr %ptr) { 199 200 ; CHECK-LABEL: name: test_load_post_gep_offset_after 201 ; CHECK: bb.1 (%ir-block.0): 202 ; CHECK-NEXT: liveins: $x0 203 ; CHECK-NEXT: {{ $}} 204 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 205 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (volatile load (s8) from %ir.ptr) 206 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp 207 ; CHECK-NEXT: BL @get_offset, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0 208 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp 209 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 210 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64) 211 ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0) 212 ; CHECK-NEXT: RET_ReallyLR implicit $x0 213 load volatile i8, ptr %ptr 214 %offset = call i64 @get_offset() 215 %next = getelementptr i8, ptr %ptr, i64 %offset 216 ret ptr %next 217} 218 219declare void @bar(ptr) 220declare i64 @get_offset() 221@var = global i8 0 222@varp8 = global ptr null 223