1// RUN: mlir-opt %s --pass-pipeline="builtin.module(llvm.func(sroa))" --split-input-file | FileCheck %s 2 3// CHECK-LABEL: llvm.func @memset 4llvm.func @memset() -> i32 { 5 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 6 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 7 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 8 // After SROA, only one i32 will be actually used, so only 4 bytes will be set. 9 // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 10 %0 = llvm.mlir.constant(1 : i32) : i32 11 %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr 12 %memset_value = llvm.mlir.constant(42 : i8) : i8 13 // 16 bytes means it will span over the first 4 i32 entries 14 %memset_len = llvm.mlir.constant(16 : i32) : i32 15 // CHECK: "llvm.intr.memset"(%[[ALLOCA]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}> 16 "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> () 17 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 18 %3 = llvm.load %2 : !llvm.ptr -> i32 19 llvm.return %3 : i32 20} 21 22// ----- 23 24// CHECK-LABEL: llvm.func @memset_inline 25llvm.func @memset_inline() -> i32 { 26 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 27 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 28 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 29 // After SROA, only one i32 will be actually used, so only 4 bytes will be set. 30 %0 = llvm.mlir.constant(1 : i32) : i32 31 %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr 32 %memset_value = llvm.mlir.constant(42 : i8) : i8 33 // 16 bytes means it will span over the first 4 i32 entries. 34 // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}> 35 "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 16}> : (!llvm.ptr, i8) -> () 36 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 37 %3 = llvm.load %2 : !llvm.ptr -> i32 38 llvm.return %3 : i32 39} 40 41// ----- 42 43// CHECK-LABEL: llvm.func @memset_partial 44llvm.func @memset_partial() -> i32 { 45 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 46 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 47 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 48 // After SROA, only the second i32 will be actually used. As the memset writes up 49 // to half of it, only 2 bytes will be set. 50 // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(2 : i32) : i32 51 %0 = llvm.mlir.constant(1 : i32) : i32 52 %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr 53 %memset_value = llvm.mlir.constant(42 : i8) : i8 54 // 6 bytes means it will span over the first i32 and half of the second i32. 55 %memset_len = llvm.mlir.constant(6 : i32) : i32 56 // CHECK: "llvm.intr.memset"(%[[ALLOCA]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}> 57 "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> () 58 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 59 %3 = llvm.load %2 : !llvm.ptr -> i32 60 llvm.return %3 : i32 61} 62 63// ----- 64 65// CHECK-LABEL: llvm.func @memset_inline_partial 66llvm.func @memset_inline_partial() -> i32 { 67 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 68 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 69 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 70 // After SROA, only the second i32 will be actually used. As the memset writes up 71 // to half of it, only 2 bytes will be set. 72 %0 = llvm.mlir.constant(1 : i32) : i32 73 %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr 74 %memset_value = llvm.mlir.constant(42 : i8) : i8 75 // 6 bytes means it will span over the first i32 and half of the second i32. 76 // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 2 : i64}> 77 "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> () 78 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 79 %3 = llvm.load %2 : !llvm.ptr -> i32 80 llvm.return %3 : i32 81} 82 83// ----- 84 85// CHECK-LABEL: llvm.func @memset_full 86llvm.func @memset_full() -> i32 { 87 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 88 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 89 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 90 // After SROA, only one i32 will be actually used, so only 4 bytes will be set. 91 // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 92 %0 = llvm.mlir.constant(1 : i32) : i32 93 %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr 94 %memset_value = llvm.mlir.constant(42 : i8) : i8 95 // 40 bytes means it will span over the entire array 96 %memset_len = llvm.mlir.constant(40 : i32) : i32 97 // CHECK: "llvm.intr.memset"(%[[ALLOCA]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}> 98 "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> () 99 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 100 %3 = llvm.load %2 : !llvm.ptr -> i32 101 llvm.return %3 : i32 102} 103 104// ----- 105 106// CHECK-LABEL: llvm.func @memset_inline_full 107llvm.func @memset_inline_full() -> i32 { 108 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 109 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 110 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 111 // After SROA, only one i32 will be actually used, so only 4 bytes will be set. 112 %0 = llvm.mlir.constant(1 : i32) : i32 113 %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr 114 %memset_value = llvm.mlir.constant(42 : i8) : i8 115 // 40 bytes means it will span over the entire array. 116 // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}> 117 "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 40}> : (!llvm.ptr, i8) -> () 118 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 119 %3 = llvm.load %2 : !llvm.ptr -> i32 120 llvm.return %3 : i32 121} 122 123// ----- 124 125// CHECK-LABEL: llvm.func @memset_too_much 126llvm.func @memset_too_much() -> i32 { 127 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 128 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32> 129 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 130 // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(41 : i32) : i32 131 %0 = llvm.mlir.constant(1 : i32) : i32 132 %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr 133 %memset_value = llvm.mlir.constant(42 : i8) : i8 134 // 41 bytes means it will span over the entire array, and then some 135 %memset_len = llvm.mlir.constant(41 : i32) : i32 136 // CHECK: "llvm.intr.memset"(%[[ALLOCA]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}> 137 "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> () 138 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 139 %3 = llvm.load %2 : !llvm.ptr -> i32 140 llvm.return %3 : i32 141} 142 143// ----- 144 145// CHECK-LABEL: llvm.func @memset_inline_too_much 146llvm.func @memset_inline_too_much() -> i32 { 147 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 148 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32> 149 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 150 %0 = llvm.mlir.constant(1 : i32) : i32 151 %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr 152 %memset_value = llvm.mlir.constant(42 : i8) : i8 153 // 41 bytes means it will span over the entire array, and then some. 154 // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 41 : i64}> 155 "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 41}> : (!llvm.ptr, i8) -> () 156 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 157 %3 = llvm.load %2 : !llvm.ptr -> i32 158 llvm.return %3 : i32 159} 160 161// ----- 162 163// CHECK-LABEL: llvm.func @memset_no_volatile 164llvm.func @memset_no_volatile() -> i32 { 165 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 166 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32> 167 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 168 // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(16 : i32) : i32 169 %0 = llvm.mlir.constant(1 : i32) : i32 170 %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr 171 %memset_value = llvm.mlir.constant(42 : i8) : i8 172 %memset_len = llvm.mlir.constant(16 : i32) : i32 173 // CHECK: "llvm.intr.memset"(%[[ALLOCA]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = true}> 174 "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = true}> : (!llvm.ptr, i8, i32) -> () 175 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 176 %3 = llvm.load %2 : !llvm.ptr -> i32 177 llvm.return %3 : i32 178} 179 180// ----- 181 182// CHECK-LABEL: llvm.func @memset_inline_no_volatile 183llvm.func @memset_inline_no_volatile() -> i32 { 184 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 185 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32> 186 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 187 %0 = llvm.mlir.constant(1 : i32) : i32 188 %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr 189 %memset_value = llvm.mlir.constant(42 : i8) : i8 190 // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = true, len = 16 : i64}> 191 "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = true, len = 16}> : (!llvm.ptr, i8) -> () 192 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 193 %3 = llvm.load %2 : !llvm.ptr -> i32 194 llvm.return %3 : i32 195} 196 197// ----- 198 199// CHECK-LABEL: llvm.func @indirect_memset 200llvm.func @indirect_memset() -> i32 { 201 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 202 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 203 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 204 // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 205 %0 = llvm.mlir.constant(1 : i32) : i32 206 %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr 207 %memset_value = llvm.mlir.constant(42 : i8) : i8 208 // This memset will only cover the selected element. 209 %memset_len = llvm.mlir.constant(4 : i32) : i32 210 %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)> 211 // CHECK: "llvm.intr.memset"(%[[ALLOCA]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}> 212 "llvm.intr.memset"(%2, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> () 213 %3 = llvm.load %2 : !llvm.ptr -> i32 214 llvm.return %3 : i32 215} 216 217// ----- 218 219// CHECK-LABEL: llvm.func @indirect_memset_inline 220llvm.func @indirect_memset_inline() -> i32 { 221 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 222 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 223 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 224 %0 = llvm.mlir.constant(1 : i32) : i32 225 %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr 226 %memset_value = llvm.mlir.constant(42 : i8) : i8 227 // This memset will only cover the selected element. 228 %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)> 229 // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}> 230 "llvm.intr.memset.inline"(%2, %memset_value) <{isVolatile = false, len = 4}> : (!llvm.ptr, i8) -> () 231 %3 = llvm.load %2 : !llvm.ptr -> i32 232 llvm.return %3 : i32 233} 234 235// ----- 236 237// CHECK-LABEL: llvm.func @invalid_indirect_memset 238llvm.func @invalid_indirect_memset() -> i32 { 239 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 240 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.struct<"foo", (i32, i32)> 241 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 242 // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(6 : i32) : i32 243 %0 = llvm.mlir.constant(1 : i32) : i32 244 %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr 245 %memset_value = llvm.mlir.constant(42 : i8) : i8 246 // This memset will go slightly beyond one of the elements. 247 %memset_len = llvm.mlir.constant(6 : i32) : i32 248 // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 0] 249 %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)> 250 // CHECK: "llvm.intr.memset"(%[[GEP]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}> 251 "llvm.intr.memset"(%2, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> () 252 %3 = llvm.load %2 : !llvm.ptr -> i32 253 llvm.return %3 : i32 254} 255 256// ----- 257 258// CHECK-LABEL: llvm.func @invalid_indirect_memset_inline 259llvm.func @invalid_indirect_memset_inline() -> i32 { 260 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 261 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.struct<"foo", (i32, i32)> 262 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 263 %0 = llvm.mlir.constant(1 : i32) : i32 264 %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr 265 %memset_value = llvm.mlir.constant(42 : i8) : i8 266 // This memset will go slightly beyond one of the elements. 267 // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 0] 268 %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)> 269 // CHECK: "llvm.intr.memset.inline"(%[[GEP]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 6 : i64}> 270 "llvm.intr.memset.inline"(%2, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> () 271 %3 = llvm.load %2 : !llvm.ptr -> i32 272 llvm.return %3 : i32 273} 274 275// ----- 276 277// CHECK-LABEL: llvm.func @memset_double_use 278llvm.func @memset_double_use() -> i32 { 279 // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 280 // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32 281 // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 282 // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 283 %0 = llvm.mlir.constant(1 : i32) : i32 284 %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr 285 %memset_value = llvm.mlir.constant(42 : i8) : i8 286 // 8 bytes means it will span over the two i32 entries. 287 %memset_len = llvm.mlir.constant(8 : i32) : i32 288 // We expect two generated memset, one for each field. 289 // CHECK-NOT: "llvm.intr.memset" 290 // After SROA, only one i32 will be actually used, so only 4 bytes will be set. 291 // CHECK: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 292 // CHECK: "llvm.intr.memset"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}> 293 // CHECK: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 294 // CHECK: "llvm.intr.memset"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}> 295 // CHECK-NOT: "llvm.intr.memset" 296 "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> () 297 %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)> 298 %3 = llvm.load %2 : !llvm.ptr -> i32 299 %4 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)> 300 %5 = llvm.load %4 : !llvm.ptr -> f32 301 // We use this exotic bitcast to use the f32 easily. Semantics do not matter here. 302 %6 = llvm.bitcast %5 : f32 to i32 303 %7 = llvm.add %3, %6 : i32 304 llvm.return %7 : i32 305} 306 307// ----- 308 309// CHECK-LABEL: llvm.func @memset_inline_double_use 310llvm.func @memset_inline_double_use() -> i32 { 311 // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 312 // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32 313 // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 314 // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 315 %0 = llvm.mlir.constant(1 : i32) : i32 316 %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr 317 %memset_value = llvm.mlir.constant(42 : i8) : i8 318 // We expect two generated memset, one for each field. 319 // CHECK-NOT: "llvm.intr.memset.inline" 320 // After SROA, only one i32 will be actually used, so only 4 bytes will be set. 321 // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}> 322 // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}> 323 // CHECK-NOT: "llvm.intr.memset.inline" 324 // 8 bytes means it will span over the two i32 entries. 325 "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> () 326 %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)> 327 %3 = llvm.load %2 : !llvm.ptr -> i32 328 %4 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)> 329 %5 = llvm.load %4 : !llvm.ptr -> f32 330 // We use this exotic bitcast to use the f32 easily. Semantics do not matter here. 331 %6 = llvm.bitcast %5 : f32 to i32 332 %7 = llvm.add %3, %6 : i32 333 llvm.return %7 : i32 334} 335 336// ----- 337 338// CHECK-LABEL: llvm.func @memset_considers_alignment 339llvm.func @memset_considers_alignment() -> i32 { 340 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 341 // CHECK-DAG: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 342 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 343 // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set. 344 // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 345 %0 = llvm.mlir.constant(1 : i32) : i32 346 %1 = llvm.alloca %0 x !llvm.struct<"foo", (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr 347 %memset_value = llvm.mlir.constant(42 : i8) : i8 348 // 8 bytes means it will span over the i8 and the i32 entry. 349 // Because of padding, the f32 entry will not be touched. 350 %memset_len = llvm.mlir.constant(8 : i32) : i32 351 // Even though the two i32 are used, only one memset should be generated, 352 // as the second i32 is not touched by the initial memset. 353 // CHECK-NOT: "llvm.intr.memset" 354 // CHECK: "llvm.intr.memset"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}> 355 // CHECK-NOT: "llvm.intr.memset" 356 "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> () 357 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)> 358 %3 = llvm.load %2 : !llvm.ptr -> i32 359 %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)> 360 %5 = llvm.load %4 : !llvm.ptr -> f32 361 // We use this exotic bitcast to use the f32 easily. Semantics do not matter here. 362 %6 = llvm.bitcast %5 : f32 to i32 363 %7 = llvm.add %3, %6 : i32 364 llvm.return %7 : i32 365} 366 367// ----- 368 369// CHECK-LABEL: llvm.func @memset_inline_considers_alignment 370llvm.func @memset_inline_considers_alignment() -> i32 { 371 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 372 // CHECK-DAG: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 373 // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 374 // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set. 375 %0 = llvm.mlir.constant(1 : i32) : i32 376 %1 = llvm.alloca %0 x !llvm.struct<"foo", (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr 377 %memset_value = llvm.mlir.constant(42 : i8) : i8 378 // 8 bytes means it will span over the i8 and the i32 entry. 379 // Because of padding, the f32 entry will not be touched. 380 // Even though the two i32 are used, only one memset should be generated, 381 // as the second i32 is not touched by the initial memset. 382 // CHECK-NOT: "llvm.intr.memset.inline" 383 // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}> 384 // CHECK-NOT: "llvm.intr.memset.inline" 385 "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> () 386 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)> 387 %3 = llvm.load %2 : !llvm.ptr -> i32 388 %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)> 389 %5 = llvm.load %4 : !llvm.ptr -> f32 390 // We use this exotic bitcast to use the f32 easily. Semantics do not matter here. 391 %6 = llvm.bitcast %5 : f32 to i32 392 %7 = llvm.add %3, %6 : i32 393 llvm.return %7 : i32 394} 395 396// ----- 397 398// CHECK-LABEL: llvm.func @memset_considers_packing 399llvm.func @memset_considers_packing() -> i32 { 400 // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 401 // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32 402 // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 403 // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 404 %0 = llvm.mlir.constant(1 : i32) : i32 405 %1 = llvm.alloca %0 x !llvm.struct<"foo", packed (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr 406 %memset_value = llvm.mlir.constant(42 : i8) : i8 407 // 8 bytes means it will span over all the fields, because there is no padding as the struct is packed. 408 %memset_len = llvm.mlir.constant(8 : i32) : i32 409 // Now all fields are touched by the memset. 410 // CHECK-NOT: "llvm.intr.memset" 411 // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set. 412 // CHECK: %[[MEMSET_LEN_WHOLE:.*]] = llvm.mlir.constant(4 : i32) : i32 413 // CHECK: "llvm.intr.memset"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN_WHOLE]]) <{isVolatile = false}> 414 // CHECK: %[[MEMSET_LEN_PARTIAL:.*]] = llvm.mlir.constant(3 : i32) : i32 415 // CHECK: "llvm.intr.memset"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN_PARTIAL]]) <{isVolatile = false}> 416 // CHECK-NOT: "llvm.intr.memset" 417 "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> () 418 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)> 419 %3 = llvm.load %2 : !llvm.ptr -> i32 420 %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)> 421 %5 = llvm.load %4 : !llvm.ptr -> f32 422 // We use this exotic bitcast to use the f32 easily. Semantics do not matter here. 423 %6 = llvm.bitcast %5 : f32 to i32 424 %7 = llvm.add %3, %6 : i32 425 llvm.return %7 : i32 426} 427 428// ----- 429 430// CHECK-LABEL: llvm.func @memset_inline_considers_packing 431llvm.func @memset_inline_considers_packing() -> i32 { 432 // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 433 // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32 434 // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 435 // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 436 %0 = llvm.mlir.constant(1 : i32) : i32 437 %1 = llvm.alloca %0 x !llvm.struct<"foo", packed (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr 438 %memset_value = llvm.mlir.constant(42 : i8) : i8 439 // Now all fields are touched by the memset. 440 // CHECK-NOT: "llvm.intr.memset.inline" 441 // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set. 442 // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}> 443 // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 3 : i64}> 444 // CHECK-NOT: "llvm.intr.memset.inline" 445 // 8 bytes means it will span over all the fields, because there is no padding as the struct is packed. 446 "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> () 447 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)> 448 %3 = llvm.load %2 : !llvm.ptr -> i32 449 %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)> 450 %5 = llvm.load %4 : !llvm.ptr -> f32 451 // We use this exotic bitcast to use the f32 easily. Semantics do not matter here. 452 %6 = llvm.bitcast %5 : f32 to i32 453 %7 = llvm.add %3, %6 : i32 454 llvm.return %7 : i32 455} 456 457// ----- 458 459// CHECK-LABEL: llvm.func @memcpy_dest 460// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr) 461llvm.func @memcpy_dest(%other_array: !llvm.ptr) -> i32 { 462 // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 463 // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 464 %0 = llvm.mlir.constant(1 : i32) : i32 465 %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr 466 %memcpy_len = llvm.mlir.constant(40 : i32) : i32 467 // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 468 // After SROA, only one i32 will be actually used, so only 4 bytes will be set. 469 // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 470 // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[SLOT_IN_OTHER]], %[[MEMCPY_LEN]]) <{isVolatile = false}> 471 "llvm.intr.memcpy"(%1, %other_array, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () 472 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 473 %3 = llvm.load %2 : !llvm.ptr -> i32 474 llvm.return %3 : i32 475} 476 477// ----- 478 479// CHECK-LABEL: llvm.func @memcpy_src 480// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr) 481llvm.func @memcpy_src(%other_array: !llvm.ptr) -> i32 { 482 // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 483 // After SROA, only one i32 will be actually used, so only 4 bytes will be set. 484 // CHECK-COUNT-4: = llvm.alloca %[[ALLOCA_LEN]] x i32 485 %0 = llvm.mlir.constant(1 : i32) : i32 486 %1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i32) -> !llvm.ptr 487 %memcpy_len = llvm.mlir.constant(16 : i32) : i32 488 // Unfortunately because of FileCheck limitations it is not possible to check which slot gets read from. 489 // We can only check that the amount of operations and allocated slots is correct, which should be sufficient 490 // as unused slots are not generated. 491 // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 492 // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 493 // CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}> 494 // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 495 // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 496 // CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}> 497 // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 498 // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 499 // CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}> 500 // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 501 // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 502 // CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}> 503 "llvm.intr.memcpy"(%other_array, %1, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () 504 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 505 %3 = llvm.load %2 : !llvm.ptr -> i32 506 llvm.return %3 : i32 507} 508 509// ----- 510 511// CHECK-LABEL: llvm.func @memcpy_double 512llvm.func @memcpy_double() -> i32 { 513 // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 514 %0 = llvm.mlir.constant(1 : i32) : i32 515 // CHECK: = llvm.alloca %[[ALLOCA_LEN]] x i32 516 // TODO: This should also disappear as a GEP with all zero indices should be 517 // ignored. 518 // CHECK: = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<1 x i32> 519 %1 = llvm.alloca %0 x !llvm.array<1 x i32> : (i32) -> !llvm.ptr 520 %2 = llvm.alloca %0 x !llvm.array<1 x i32> : (i32) -> !llvm.ptr 521 // Match the dead constant, to avoid collision with the newly created one. 522 // CHECK: llvm.mlir.constant 523 %memcpy_len = llvm.mlir.constant(4 : i32) : i32 524 // CHECK-NOT: "llvm.intr.memcpy" 525 // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 526 // CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}> 527 // CHECK-NOT: "llvm.intr.memcpy" 528 "llvm.intr.memcpy"(%1, %2, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () 529 %3 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<1 x i32> 530 %4 = llvm.load %3 : !llvm.ptr -> i32 531 llvm.return %4 : i32 532} 533 534// ----- 535 536// CHECK-LABEL: llvm.func @memcpy_no_partial 537// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr) 538llvm.func @memcpy_no_partial(%other_array: !llvm.ptr) -> i32 { 539 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 540 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32> 541 // CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(21 : i32) : i32 542 %0 = llvm.mlir.constant(1 : i32) : i32 543 %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr 544 %memcpy_len = llvm.mlir.constant(21 : i32) : i32 545 // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[OTHER_ARRAY]], %[[MEMCPY_LEN]]) <{isVolatile = false}> 546 "llvm.intr.memcpy"(%1, %other_array, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () 547 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 548 %3 = llvm.load %2 : !llvm.ptr -> i32 549 llvm.return %3 : i32 550} 551 552// ----- 553 554// CHECK-LABEL: llvm.func @memcpy_no_volatile 555// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr) 556llvm.func @memcpy_no_volatile(%other_array: !llvm.ptr) -> i32 { 557 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 558 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32> 559 // CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(40 : i32) : i32 560 %0 = llvm.mlir.constant(1 : i32) : i32 561 %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr 562 %memcpy_len = llvm.mlir.constant(40 : i32) : i32 563 // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[OTHER_ARRAY]], %[[MEMCPY_LEN]]) <{isVolatile = true}> 564 "llvm.intr.memcpy"(%1, %other_array, %memcpy_len) <{isVolatile = true}> : (!llvm.ptr, !llvm.ptr, i32) -> () 565 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 566 %3 = llvm.load %2 : !llvm.ptr -> i32 567 llvm.return %3 : i32 568} 569 570// ----- 571 572// CHECK-LABEL: llvm.func @memmove_dest 573// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr) 574llvm.func @memmove_dest(%other_array: !llvm.ptr) -> i32 { 575 // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 576 // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 577 %0 = llvm.mlir.constant(1 : i32) : i32 578 %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr 579 %memmove_len = llvm.mlir.constant(40 : i32) : i32 580 // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 581 // After SROA, only one i32 will be actually used, so only 4 bytes will be set. 582 // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 583 // CHECK: "llvm.intr.memmove"(%[[ALLOCA]], %[[SLOT_IN_OTHER]], %[[MEMMOVE_LEN]]) <{isVolatile = false}> 584 "llvm.intr.memmove"(%1, %other_array, %memmove_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () 585 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 586 %3 = llvm.load %2 : !llvm.ptr -> i32 587 llvm.return %3 : i32 588} 589 590// ----- 591 592// CHECK-LABEL: llvm.func @memmove_src 593// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr) 594llvm.func @memmove_src(%other_array: !llvm.ptr) -> i32 { 595 // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 596 // CHECK-COUNT-4: = llvm.alloca %[[ALLOCA_LEN]] x i32 597 %0 = llvm.mlir.constant(1 : i32) : i32 598 %1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i32) -> !llvm.ptr 599 %memmove_len = llvm.mlir.constant(16 : i32) : i32 600 // Unfortunately because of FileCheck limitations it is not possible to check which slot gets read from. 601 // We can only check that the amount of operations and allocated slots is correct, which should be sufficient 602 // as unused slots are not generated. 603 // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 604 // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 605 // CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}> 606 // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 607 // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 608 // CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}> 609 // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 610 // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 611 // CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}> 612 // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 613 // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32 614 // CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}> 615 "llvm.intr.memmove"(%other_array, %1, %memmove_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () 616 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 617 %3 = llvm.load %2 : !llvm.ptr -> i32 618 llvm.return %3 : i32 619} 620 621// ----- 622 623// CHECK-LABEL: llvm.func @memcpy_inline_dest 624// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr) 625llvm.func @memcpy_inline_dest(%other_array: !llvm.ptr) -> i32 { 626 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 627 // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 628 // After SROA, only one i32 will be actually used, so only 4 bytes will be set. 629 %0 = llvm.mlir.constant(1 : i32) : i32 630 %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr 631 // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 632 // CHECK: "llvm.intr.memcpy.inline"(%[[ALLOCA]], %[[SLOT_IN_OTHER]]) <{isVolatile = false, len = 4 : i32}> 633 "llvm.intr.memcpy.inline"(%1, %other_array) <{isVolatile = false, len = 40 : i32}> : (!llvm.ptr, !llvm.ptr) -> () 634 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> 635 %3 = llvm.load %2 : !llvm.ptr -> i32 636 llvm.return %3 : i32 637} 638 639// ----- 640 641// CHECK-LABEL: llvm.func @memcpy_inline_src 642// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr) 643llvm.func @memcpy_inline_src(%other_array: !llvm.ptr) -> i32 { 644 // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 645 // After SROA, only one i32 will be actually used, so only 4 bytes will be set. 646 // CHECK-COUNT-4: = llvm.alloca %[[ALLOCA_LEN]] x i32 647 %0 = llvm.mlir.constant(1 : i32) : i32 648 %1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i32) -> !llvm.ptr 649 // Unfortunately because of FileCheck limitations it is not possible to check which slot gets read from. 650 // We can only check that the amount of operations and allocated slots is correct, which should be sufficient 651 // as unused slots are not generated. 652 // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 653 // CHECK-DAG: "llvm.intr.memcpy.inline"(%[[SLOT_IN_OTHER]], %{{.*}}) <{isVolatile = false, len = 4 : i32}> 654 // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 655 // CHECK-DAG: "llvm.intr.memcpy.inline"(%[[SLOT_IN_OTHER]], %{{.*}}) <{isVolatile = false, len = 4 : i32}> 656 // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 657 // CHECK-DAG: "llvm.intr.memcpy.inline"(%[[SLOT_IN_OTHER]], %{{.*}}) <{isVolatile = false, len = 4 : i32}> 658 // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 659 // CHECK-DAG: "llvm.intr.memcpy.inline"(%[[SLOT_IN_OTHER]], %{{.*}}) <{isVolatile = false, len = 4 : i32}> 660 "llvm.intr.memcpy.inline"(%other_array, %1) <{isVolatile = false, len = 16 : i32}> : (!llvm.ptr, !llvm.ptr) -> () 661 %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32> 662 %3 = llvm.load %2 : !llvm.ptr -> i32 663 llvm.return %3 : i32 664} 665