1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -debugify-and-strip-all-safe -mtriple aarch64 -O0 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombiner-only-enable-rule="load_or_combine" -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=LITTLE 3# RUN: llc -debugify-and-strip-all-safe -mtriple arm64eb -O0 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombiner-only-enable-rule="load_or_combine" -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=BIG 4 5# REQUIRES: asserts 6 7# Test that we can combine patterns like 8# 9# s8* x = ... 10# s32 y = (x[0] | (x[1] << 8)) | ((x[2] << 16) | (x[3] << 24)) 11# 12# Into either a load, or a load with a bswap. 13 14... 15--- 16name: s8_loads_to_s32_little_endian_pat 17tracksRegLiveness: true 18body: | 19 bb.0: 20 liveins: $x0, $x1 21 22 ; s8* x = ... 23 ; s32 y = (x[0] | (x[1] << 8)) | ((x[2] << 16) | (x[3] << 24)) 24 ; 25 ; -> Little endian: Load from x[0] 26 ; -> Big endian: Load from x[0] + BSWAP 27 28 ; LITTLE-LABEL: name: s8_loads_to_s32_little_endian_pat 29 ; LITTLE: liveins: $x0, $x1 30 ; LITTLE: %ptr:_(p0) = COPY $x1 31 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) 32 ; LITTLE: $w1 = COPY %full_load(s32) 33 ; LITTLE: RET_ReallyLR implicit $w1 34 ; BIG-LABEL: name: s8_loads_to_s32_little_endian_pat 35 ; BIG: liveins: $x0, $x1 36 ; BIG: %ptr:_(p0) = COPY $x1 37 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) 38 ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] 39 ; BIG: $w1 = COPY %full_load(s32) 40 ; BIG: RET_ReallyLR implicit $w1 41 %cst_1:_(s64) = G_CONSTANT i64 1 42 %cst_2:_(s64) = G_CONSTANT i64 2 43 %cst_3:_(s64) = G_CONSTANT i64 3 44 45 %cst_8:_(s32) = G_CONSTANT i32 8 46 %cst_16:_(s32) = G_CONSTANT i32 16 47 %cst_24:_(s32) = G_CONSTANT i32 24 48 49 %ptr:_(p0) = COPY $x1 50 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 51 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) 52 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 53 54 %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 55 56 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 57 %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) 58 %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) 59 60 %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) 61 %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) 62 %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) 63 64 ; Note the shape of the tree: 65 ; 66 ; byte byte byte byte 67 ; \ / \ / 68 ; OR OR 69 ; \ / 70 ; \ / 71 ; OR 72 73 %or1:_(s32) = G_OR %byte0, %byte1 74 %or2:_(s32) = G_OR %byte2, %byte3 75 %full_load:_(s32) = G_OR %or1, %or2 76 77 $w1 = COPY %full_load(s32) 78 RET_ReallyLR implicit $w1 79 80... 81--- 82name: s8_loads_to_s32_big_endian_pat 83tracksRegLiveness: true 84body: | 85 bb.0: 86 liveins: $x0, $x1 87 88 ; s8* x = ... 89 ; s32 y = (x[0] << 24 | (x[1] << 16)) | ((x[2] << 8) | x[3])) 90 ; 91 ; -> Little endian: Load from x[0] + BSWAP 92 ; -> Big endian: Load from x[0] 93 94 ; LITTLE-LABEL: name: s8_loads_to_s32_big_endian_pat 95 ; LITTLE: liveins: $x0, $x1 96 ; LITTLE: %ptr:_(p0) = COPY $x1 97 ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) 98 ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]] 99 ; LITTLE: $w1 = COPY %full_load(s32) 100 ; LITTLE: RET_ReallyLR implicit $w1 101 ; BIG-LABEL: name: s8_loads_to_s32_big_endian_pat 102 ; BIG: liveins: $x0, $x1 103 ; BIG: %ptr:_(p0) = COPY $x1 104 ; BIG: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) 105 ; BIG: $w1 = COPY %full_load(s32) 106 ; BIG: RET_ReallyLR implicit $w1 107 %cst_1:_(s64) = G_CONSTANT i64 1 108 %cst_2:_(s64) = G_CONSTANT i64 2 109 %cst_3:_(s64) = G_CONSTANT i64 3 110 111 %cst_8:_(s32) = G_CONSTANT i32 8 112 %cst_16:_(s32) = G_CONSTANT i32 16 113 %cst_24:_(s32) = G_CONSTANT i32 24 114 115 %ptr:_(p0) = COPY $x1 116 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 117 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) 118 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 119 120 %elt0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 121 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 122 %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) 123 124 %byte0:_(s32) = nuw G_SHL %elt0, %cst_24(s32) 125 %byte1:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 126 %byte2:_(s32) = nuw G_SHL %elt2, %cst_8(s32) 127 %byte3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) 128 129 %or1:_(s32) = G_OR %byte0, %byte1 130 %or2:_(s32) = G_OR %byte2, %byte3 131 %full_load:_(s32) = G_OR %or1, %or2 132 133 $w1 = COPY %full_load(s32) 134 RET_ReallyLR implicit $w1 135 136... 137--- 138name: different_or_pattern 139tracksRegLiveness: true 140body: | 141 bb.0: 142 liveins: $x0, $x1 143 144 ; Slightly different OR tree. 145 ; 146 ; s8* x = ... 147 ; s32 y = (((x[0] | (x[1] << 8)) | (x[2] << 16)) | (x[3] << 24)) 148 ; 149 ; -> Little endian: Load from x[0] 150 ; -> Big endian: Load from x[0] + BSWAP 151 152 ; LITTLE-LABEL: name: different_or_pattern 153 ; LITTLE: liveins: $x0, $x1 154 ; LITTLE: %ptr:_(p0) = COPY $x1 155 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) 156 ; LITTLE: $w1 = COPY %full_load(s32) 157 ; LITTLE: RET_ReallyLR implicit $w1 158 ; BIG-LABEL: name: different_or_pattern 159 ; BIG: liveins: $x0, $x1 160 ; BIG: %ptr:_(p0) = COPY $x1 161 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) 162 ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] 163 ; BIG: $w1 = COPY %full_load(s32) 164 ; BIG: RET_ReallyLR implicit $w1 165 %cst_1:_(s64) = G_CONSTANT i64 1 166 %cst_2:_(s64) = G_CONSTANT i64 2 167 %cst_3:_(s64) = G_CONSTANT i64 3 168 169 %cst_8:_(s32) = G_CONSTANT i32 8 170 %cst_16:_(s32) = G_CONSTANT i32 16 171 %cst_24:_(s32) = G_CONSTANT i32 24 172 173 %ptr:_(p0) = COPY $x1 174 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 175 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) 176 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 177 178 %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 179 180 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 181 %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) 182 %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) 183 184 %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) 185 %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) 186 %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) 187 188 ; Note the shape of the tree: 189 ; 190 ; byte byte 191 ; \ / 192 ; OR_1 byte 193 ; \ / 194 ; OR_2 195 ; \ 196 ; ... 197 198 %or1:_(s32) = G_OR %byte0, %byte1 199 %or2:_(s32) = G_OR %or1, %byte2 200 %full_load:_(s32) = G_OR %or2, %byte3 201 202 $w1 = COPY %full_load(s32) 203 RET_ReallyLR implicit $w1 204 205... 206--- 207name: s16_loads_to_s32_little_endian_pat 208tracksRegLiveness: true 209body: | 210 bb.0: 211 liveins: $x0, $x1 212 213 ; s16* x = ... 214 ; s32 y = x[0] | (x[1] << 16) 215 ; 216 ; -> Little endian: Load from x[0] 217 ; -> Big endian: Load from x[0] + BSWAP 218 219 ; LITTLE-LABEL: name: s16_loads_to_s32_little_endian_pat 220 ; LITTLE: liveins: $x0, $x1 221 ; LITTLE: %ptr:_(p0) = COPY $x1 222 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) 223 ; LITTLE: $w1 = COPY %full_load(s32) 224 ; LITTLE: RET_ReallyLR implicit $w1 225 ; BIG-LABEL: name: s16_loads_to_s32_little_endian_pat 226 ; BIG: liveins: $x0, $x1 227 ; BIG: %ptr:_(p0) = COPY $x1 228 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) 229 ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] 230 ; BIG: $w1 = COPY %full_load(s32) 231 ; BIG: RET_ReallyLR implicit $w1 232 %cst_1:_(s64) = G_CONSTANT i64 1 233 %cst_16:_(s32) = G_CONSTANT i32 16 234 235 %ptr:_(p0) = COPY $x1 236 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 237 238 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 239 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 240 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 241 242 %full_load:_(s32) = G_OR %low_half, %high_half 243 $w1 = COPY %full_load(s32) 244 RET_ReallyLR implicit $w1 245 246... 247--- 248name: s16_loads_to_s32_big_endian_pat 249tracksRegLiveness: true 250body: | 251 bb.0: 252 liveins: $x0, $x1 253 254 ; s16 *x = ... 255 ; s32 y = x[1] | (x[0] << 16) 256 ; 257 ; -> Little endian: Load from x[0] + BSWAP 258 ; -> Big endian: Load from x[0] 259 260 ; LITTLE-LABEL: name: s16_loads_to_s32_big_endian_pat 261 ; LITTLE: liveins: $x0, $x1 262 ; LITTLE: %ptr:_(p0) = COPY $x1 263 ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) 264 ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]] 265 ; LITTLE: $w1 = COPY %full_load(s32) 266 ; LITTLE: RET_ReallyLR implicit $w1 267 ; BIG-LABEL: name: s16_loads_to_s32_big_endian_pat 268 ; BIG: liveins: $x0, $x1 269 ; BIG: %ptr:_(p0) = COPY $x1 270 ; BIG: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) 271 ; BIG: $w1 = COPY %full_load(s32) 272 ; BIG: RET_ReallyLR implicit $w1 273 %cst_1:_(s64) = G_CONSTANT i64 1 274 %cst_16:_(s32) = G_CONSTANT i32 16 275 276 %ptr:_(p0) = COPY $x1 277 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 278 279 %elt0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 280 %high_half:_(s32) = nuw G_SHL %elt0, %cst_16(s32) 281 %low_half:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 282 283 %full_load:_(s32) = G_OR %low_half, %high_half 284 $w1 = COPY %full_load(s32) 285 RET_ReallyLR implicit $w1 286 287... 288--- 289name: s16_loads_to_s64_little_endian_pat 290tracksRegLiveness: true 291body: | 292 bb.0: 293 liveins: $x0, $x1 294 295 ; s16 *x = ... 296 ; s32 y = (x[0] | (x[1] << 16)) | ((x[2] << 32) | (x[3] << 48)) 297 ; 298 ; -> Little endian: Load from x[0] 299 ; -> Big endian: Load from x[0] + BSWAP 300 301 ; LITTLE-LABEL: name: s16_loads_to_s64_little_endian_pat 302 ; LITTLE: liveins: $x0, $x1 303 ; LITTLE: %ptr:_(p0) = COPY $x1 304 ; LITTLE: %full_load:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2) 305 ; LITTLE: $x1 = COPY %full_load(s64) 306 ; LITTLE: RET_ReallyLR implicit $x1 307 ; BIG-LABEL: name: s16_loads_to_s64_little_endian_pat 308 ; BIG: liveins: $x0, $x1 309 ; BIG: %ptr:_(p0) = COPY $x1 310 ; BIG: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2) 311 ; BIG: %full_load:_(s64) = G_BSWAP [[LOAD]] 312 ; BIG: $x1 = COPY %full_load(s64) 313 ; BIG: RET_ReallyLR implicit $x1 314 %cst_1:_(s64) = G_CONSTANT i64 1 315 %cst_2:_(s64) = G_CONSTANT i64 2 316 %cst_3:_(s64) = G_CONSTANT i64 3 317 318 %cst_16:_(s64) = G_CONSTANT i64 16 319 %cst_32:_(s64) = G_CONSTANT i64 32 320 %cst_48:_(s64) = G_CONSTANT i64 48 321 322 %ptr:_(p0) = COPY $x1 323 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 324 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) 325 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 326 327 %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 328 329 %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 330 %elt2:_(s64) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s16)) 331 %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) 332 333 %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64) 334 %byte4_byte5:_(s64) = nuw G_SHL %elt2, %cst_32(s64) 335 %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64) 336 337 %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3 338 %or2:_(s64) = G_OR %byte4_byte5, %byte6_byte7 339 %full_load:_(s64) = G_OR %or1, %or2 340 341 $x1 = COPY %full_load(s64) 342 RET_ReallyLR implicit $x1 343 344... 345--- 346name: s16_loads_to_s64_big_endian_pat 347tracksRegLiveness: true 348body: | 349 bb.0: 350 liveins: $x0, $x1 351 352 ; s16 *x = ... 353 ; s64 y = (x[3] | (x[2] << 16)) | ((x[1] << 32) | (x[0] << 48)) 354 ; 355 ; -> Little endian: Load from x[0] + BSWAP 356 ; -> Big endian: Load from x[0] 357 358 ; LITTLE-LABEL: name: s16_loads_to_s64_big_endian_pat 359 ; LITTLE: liveins: $x0, $x1 360 ; LITTLE: %ptr:_(p0) = COPY $x1 361 ; LITTLE: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2) 362 ; LITTLE: %full_load:_(s64) = G_BSWAP [[LOAD]] 363 ; LITTLE: $x1 = COPY %full_load(s64) 364 ; LITTLE: RET_ReallyLR implicit $x1 365 ; BIG-LABEL: name: s16_loads_to_s64_big_endian_pat 366 ; BIG: liveins: $x0, $x1 367 ; BIG: %ptr:_(p0) = COPY $x1 368 ; BIG: %full_load:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2) 369 ; BIG: $x1 = COPY %full_load(s64) 370 ; BIG: RET_ReallyLR implicit $x1 371 %cst_1:_(s64) = G_CONSTANT i64 1 372 %cst_2:_(s64) = G_CONSTANT i64 2 373 %cst_3:_(s64) = G_CONSTANT i64 3 374 375 %cst_16:_(s64) = G_CONSTANT i64 16 376 %cst_32:_(s64) = G_CONSTANT i64 32 377 %cst_48:_(s64) = G_CONSTANT i64 48 378 379 %ptr:_(p0) = COPY $x1 380 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 381 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) 382 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 383 384 %elt0:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 385 %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 386 %elt2:_(s64) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s16)) 387 388 %byte0_byte1:_(s64) = nuw G_SHL %elt0, %cst_48(s64) 389 %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_32(s64) 390 %byte4_byte5:_(s64) = nuw G_SHL %elt2, %cst_16(s64) 391 %byte6_byte7:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) 392 393 %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3 394 %or2:_(s64) = G_OR %byte4_byte5, %byte6_byte7 395 %full_load:_(s64) = G_OR %or1, %or2 396 397 $x1 = COPY %full_load(s64) 398 RET_ReallyLR implicit $x1 399 400 401... 402--- 403name: nonzero_start_idx_positive_little_endian_pat 404tracksRegLiveness: true 405body: | 406 bb.0: 407 liveins: $x0, $x1 408 409 ; s8* x = ... 410 ; s32 y = (x[1] | (x[2] << 8)) | ((x[3] << 16) | (x[4] << 24)) 411 ; 412 ; -> Little endian: Load from x[1] 413 ; -> Big endian: Load from x[1] + BSWAP 414 415 ; LITTLE-LABEL: name: nonzero_start_idx_positive_little_endian_pat 416 ; LITTLE: liveins: $x0, $x1 417 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 418 ; LITTLE: %ptr:_(p0) = COPY $x0 419 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 420 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) 421 ; LITTLE: $w1 = COPY %full_load(s32) 422 ; LITTLE: RET_ReallyLR implicit $w1 423 ; BIG-LABEL: name: nonzero_start_idx_positive_little_endian_pat 424 ; BIG: liveins: $x0, $x1 425 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 426 ; BIG: %ptr:_(p0) = COPY $x0 427 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 428 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) 429 ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] 430 ; BIG: $w1 = COPY %full_load(s32) 431 ; BIG: RET_ReallyLR implicit $w1 432 %cst_1:_(s64) = G_CONSTANT i64 1 433 %cst_2:_(s64) = G_CONSTANT i64 2 434 %cst_3:_(s64) = G_CONSTANT i64 3 435 %cst_4:_(s64) = G_CONSTANT i64 4 436 437 %cst_8:_(s32) = G_CONSTANT i32 8 438 %cst_16:_(s32) = G_CONSTANT i32 16 439 %cst_24:_(s32) = G_CONSTANT i32 24 440 441 %ptr:_(p0) = COPY $x0 442 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 443 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) 444 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 445 %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s64) 446 447 %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) 448 %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) 449 %elt4:_(s32) = G_ZEXTLOAD %ptr_elt_4(p0) :: (load (s8)) 450 451 %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 452 %byte1:_(s32) = nuw G_SHL %elt2, %cst_8(s32) 453 %byte2:_(s32) = nuw G_SHL %elt3, %cst_16(s32) 454 %byte3:_(s32) = nuw G_SHL %elt4, %cst_24(s32) 455 456 %or1:_(s32) = G_OR %byte0, %byte1 457 %or2:_(s32) = G_OR %byte2, %byte3 458 %full_load:_(s32) = G_OR %or1, %or2 459 460 $w1 = COPY %full_load(s32) 461 RET_ReallyLR implicit $w1 462 463... 464--- 465name: nonzero_start_idx_positive_big_endian_pat 466tracksRegLiveness: true 467body: | 468 bb.0: 469 liveins: $x0, $x1 470 471 ; s8* x = ... 472 ; s32 y = (x[4] | (x[3] << 8)) | ((x[2] << 16) | (x[1] << 24)) 473 ; 474 ; -> Little endian: Load from x[1] + BSWAP 475 ; -> Big endian: Load from x[1] 476 477 ; LITTLE-LABEL: name: nonzero_start_idx_positive_big_endian_pat 478 ; LITTLE: liveins: $x0, $x1 479 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 480 ; LITTLE: %ptr:_(p0) = COPY $x0 481 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 482 ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) 483 ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]] 484 ; LITTLE: $w1 = COPY %full_load(s32) 485 ; LITTLE: RET_ReallyLR implicit $w1 486 ; BIG-LABEL: name: nonzero_start_idx_positive_big_endian_pat 487 ; BIG: liveins: $x0, $x1 488 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 489 ; BIG: %ptr:_(p0) = COPY $x0 490 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 491 ; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) 492 ; BIG: $w1 = COPY %full_load(s32) 493 ; BIG: RET_ReallyLR implicit $w1 494 %cst_1:_(s64) = G_CONSTANT i64 1 495 %cst_2:_(s64) = G_CONSTANT i64 2 496 %cst_3:_(s64) = G_CONSTANT i64 3 497 %cst_4:_(s64) = G_CONSTANT i64 4 498 499 %cst_8:_(s32) = G_CONSTANT i32 8 500 %cst_16:_(s32) = G_CONSTANT i32 16 501 %cst_24:_(s32) = G_CONSTANT i32 24 502 503 %ptr:_(p0) = COPY $x0 504 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 505 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) 506 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 507 %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s64) 508 509 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 510 %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) 511 %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) 512 513 %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_4(p0) :: (load (s8)) 514 %byte1:_(s32) = nuw G_SHL %elt3, %cst_8(s32) 515 %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) 516 %byte3:_(s32) = nuw G_SHL %elt1, %cst_24(s32) 517 518 %or1:_(s32) = G_OR %byte0, %byte1 519 %or2:_(s32) = G_OR %byte2, %byte3 520 %full_load:_(s32) = G_OR %or1, %or2 521 522 $w1 = COPY %full_load(s32) 523 RET_ReallyLR implicit $w1 524 525... 526--- 527name: nonzero_start_idx_negative_little_endian_pat 528tracksRegLiveness: true 529body: | 530 bb.0: 531 liveins: $x0, $x1 532 533 ; s8* x = ... 534 ; s32 y = (x[-3] | (x[-2] << 8)) | ((x[-1] << 16) | (x[0] << 24)) 535 ; 536 ; -> Little endian: Load from x[-3] 537 ; -> Big endian: Load from x[-3] + BSWAP 538 539 ; LITTLE-LABEL: name: nonzero_start_idx_negative_little_endian_pat 540 ; LITTLE: liveins: $x0, $x1 541 ; LITTLE: %cst_neg_3:_(s64) = G_CONSTANT i64 -3 542 ; LITTLE: %ptr:_(p0) = COPY $x0 543 ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64) 544 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) 545 ; LITTLE: $w1 = COPY %full_load(s32) 546 ; LITTLE: RET_ReallyLR implicit $w1 547 ; BIG-LABEL: name: nonzero_start_idx_negative_little_endian_pat 548 ; BIG: liveins: $x0, $x1 549 ; BIG: %cst_neg_3:_(s64) = G_CONSTANT i64 -3 550 ; BIG: %ptr:_(p0) = COPY $x0 551 ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64) 552 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) 553 ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] 554 ; BIG: $w1 = COPY %full_load(s32) 555 ; BIG: RET_ReallyLR implicit $w1 556 %cst_neg_1:_(s64) = G_CONSTANT i64 -1 557 %cst_neg_2:_(s64) = G_CONSTANT i64 -2 558 %cst_neg_3:_(s64) = G_CONSTANT i64 -3 559 560 %cst_8:_(s32) = G_CONSTANT i32 8 561 %cst_16:_(s32) = G_CONSTANT i32 16 562 %cst_24:_(s32) = G_CONSTANT i32 24 563 564 %ptr:_(p0) = COPY $x0 565 %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64) 566 %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s64) 567 %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s64) 568 569 %elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8)) 570 %elt_neg_1:_(s32) = G_ZEXTLOAD %ptr_elt_neg_1(p0) :: (load (s8)) 571 %elt_0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 572 573 %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_neg_3(p0) :: (load (s8)) 574 %byte1:_(s32) = nuw G_SHL %elt_neg_2, %cst_8(s32) 575 %byte2:_(s32) = nuw G_SHL %elt_neg_1, %cst_16(s32) 576 %byte3:_(s32) = nuw G_SHL %elt_0, %cst_24(s32) 577 578 %or1:_(s32) = G_OR %byte0, %byte1 579 %or2:_(s32) = G_OR %byte2, %byte3 580 %full_load:_(s32) = G_OR %or1, %or2 581 582 $w1 = COPY %full_load(s32) 583 RET_ReallyLR implicit $w1 584 585... 586--- 587name: nonzero_start_idx_negative_big_endian_pat 588tracksRegLiveness: true 589body: | 590 bb.0: 591 liveins: $x0, $x1 592 593 ; s8* x = ... 594 ; s32 y = (x[0] | (x[-1] << 8)) | ((x[-2] << 16) | (x[-3] << 24)) 595 ; 596 ; -> Little endian: Load from x[-3] + BSWAP 597 ; -> Big endian: Load from x[-3] 598 599 ; LITTLE-LABEL: name: nonzero_start_idx_negative_big_endian_pat 600 ; LITTLE: liveins: $x0, $x1 601 ; LITTLE: %cst_neg_3:_(s64) = G_CONSTANT i64 -3 602 ; LITTLE: %ptr:_(p0) = COPY $x0 603 ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64) 604 ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) 605 ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]] 606 ; LITTLE: $w1 = COPY %full_load(s32) 607 ; LITTLE: RET_ReallyLR implicit $w1 608 ; BIG-LABEL: name: nonzero_start_idx_negative_big_endian_pat 609 ; BIG: liveins: $x0, $x1 610 ; BIG: %cst_neg_3:_(s64) = G_CONSTANT i64 -3 611 ; BIG: %ptr:_(p0) = COPY $x0 612 ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64) 613 ; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) 614 ; BIG: $w1 = COPY %full_load(s32) 615 ; BIG: RET_ReallyLR implicit $w1 616 %cst_neg_1:_(s64) = G_CONSTANT i64 -1 617 %cst_neg_2:_(s64) = G_CONSTANT i64 -2 618 %cst_neg_3:_(s64) = G_CONSTANT i64 -3 619 620 %cst_8:_(s32) = G_CONSTANT i32 8 621 %cst_16:_(s32) = G_CONSTANT i32 16 622 %cst_24:_(s32) = G_CONSTANT i32 24 623 624 %ptr:_(p0) = COPY $x0 625 %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64) 626 %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s64) 627 %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s64) 628 629 %elt_neg_3:_(s32) = G_ZEXTLOAD %ptr_elt_neg_3(p0) :: (load (s8)) 630 %elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8)) 631 %elt_neg_1:_(s32) = G_ZEXTLOAD %ptr_elt_neg_1(p0) :: (load (s8)) 632 %elt_0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 633 634 %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 635 %byte1:_(s32) = nuw G_SHL %elt_neg_1, %cst_8(s32) 636 %byte2:_(s32) = nuw G_SHL %elt_neg_2, %cst_16(s32) 637 %byte3:_(s32) = nuw G_SHL %elt_neg_3, %cst_24(s32) 638 639 %or1:_(s32) = G_OR %byte0, %byte1 640 %or2:_(s32) = G_OR %byte2, %byte3 641 %full_load:_(s32) = G_OR %or1, %or2 642 643 $w1 = COPY %full_load(s32) 644 RET_ReallyLR implicit $w1 645 646... 647--- 648name: dont_combine_volatile 649tracksRegLiveness: true 650body: | 651 bb.0: 652 liveins: $x0, $x1 653 654 ; Combine should only happen with unordered loads. 655 656 ; LITTLE-LABEL: name: dont_combine_volatile 657 ; LITTLE: liveins: $x0, $x1 658 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 659 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 660 ; LITTLE: %ptr:_(p0) = COPY $x1 661 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 662 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 663 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load (s16)) 664 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 665 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half 666 ; LITTLE: $w1 = COPY %full_load(s32) 667 ; LITTLE: RET_ReallyLR implicit $w1 668 ; BIG-LABEL: name: dont_combine_volatile 669 ; BIG: liveins: $x0, $x1 670 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 671 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 672 ; BIG: %ptr:_(p0) = COPY $x1 673 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 674 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 675 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load (s16)) 676 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 677 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half 678 ; BIG: $w1 = COPY %full_load(s32) 679 ; BIG: RET_ReallyLR implicit $w1 680 %cst_1:_(s64) = G_CONSTANT i64 1 681 %cst_16:_(s32) = G_CONSTANT i32 16 682 683 %ptr:_(p0) = COPY $x1 684 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 685 686 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 687 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load (s16)) 688 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 689 690 %full_load:_(s32) = G_OR %low_half, %high_half 691 $w1 = COPY %full_load(s32) 692 RET_ReallyLR implicit $w1 693 694... 695--- 696name: dont_wrong_memop_size 697tracksRegLiveness: true 698body: | 699 bb.0: 700 liveins: $x0, $x1 701 702 ; Combine should only happen when the loads load the same size. 703 704 ; LITTLE-LABEL: name: dont_wrong_memop_size 705 ; LITTLE: liveins: $x0, $x1 706 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 707 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 708 ; LITTLE: %ptr:_(p0) = COPY $x1 709 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 710 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 711 ; LITTLE: %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 712 ; LITTLE: %high_half:_(s32) = nuw G_SHL %wrong_size_load, %cst_16(s32) 713 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half 714 ; LITTLE: $w1 = COPY %full_load(s32) 715 ; LITTLE: RET_ReallyLR implicit $w1 716 ; BIG-LABEL: name: dont_wrong_memop_size 717 ; BIG: liveins: $x0, $x1 718 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 719 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 720 ; BIG: %ptr:_(p0) = COPY $x1 721 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 722 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 723 ; BIG: %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 724 ; BIG: %high_half:_(s32) = nuw G_SHL %wrong_size_load, %cst_16(s32) 725 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half 726 ; BIG: $w1 = COPY %full_load(s32) 727 ; BIG: RET_ReallyLR implicit $w1 728 %cst_1:_(s64) = G_CONSTANT i64 1 729 %cst_16:_(s32) = G_CONSTANT i32 16 730 731 %ptr:_(p0) = COPY $x1 732 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 733 734 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 735 %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 736 %high_half:_(s32) = nuw G_SHL %wrong_size_load, %cst_16(s32) 737 738 %full_load:_(s32) = G_OR %low_half, %high_half 739 $w1 = COPY %full_load(s32) 740 RET_ReallyLR implicit $w1 741 742... 743--- 744name: dont_combine_wrong_offset 745tracksRegLiveness: true 746body: | 747 bb.0: 748 liveins: $x0, $x1 749 750 ; This is not equivalent to a 32-bit load with/without a BSWAP: 751 ; 752 ; s16 *x = ... 753 ; s32 y = x[0] | (x[1] << 24) 754 755 ; LITTLE-LABEL: name: dont_combine_wrong_offset 756 ; LITTLE: liveins: $x0, $x1 757 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 758 ; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24 759 ; LITTLE: %ptr:_(p0) = COPY $x1 760 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 761 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 762 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 763 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_24(s32) 764 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half 765 ; LITTLE: $w1 = COPY %full_load(s32) 766 ; LITTLE: RET_ReallyLR implicit $w1 767 ; BIG-LABEL: name: dont_combine_wrong_offset 768 ; BIG: liveins: $x0, $x1 769 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 770 ; BIG: %cst_24:_(s32) = G_CONSTANT i32 24 771 ; BIG: %ptr:_(p0) = COPY $x1 772 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 773 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 774 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 775 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_24(s32) 776 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half 777 ; BIG: $w1 = COPY %full_load(s32) 778 ; BIG: RET_ReallyLR implicit $w1 779 %cst_1:_(s64) = G_CONSTANT i64 1 780 %cst_24:_(s32) = G_CONSTANT i32 24 781 782 %ptr:_(p0) = COPY $x1 783 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 784 785 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 786 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 787 %high_half:_(s32) = nuw G_SHL %elt1, %cst_24(s32) 788 789 %full_load:_(s32) = G_OR %low_half, %high_half 790 $w1 = COPY %full_load(s32) 791 RET_ReallyLR implicit $w1 792 793... 794--- 795name: dont_combine_wrong_offset_2 796tracksRegLiveness: true 797body: | 798 bb.0: 799 liveins: $x0, $x1 800 801 ; This does not correspond to a 32-bit load with/without a BSWAP: 802 ; 803 ; s16 *x = ... 804 ; s32 y = x[0] | (x[1] << 8) 805 806 ; LITTLE-LABEL: name: dont_combine_wrong_offset_2 807 ; LITTLE: liveins: $x0, $x1 808 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 809 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8 810 ; LITTLE: %ptr:_(p0) = COPY $x1 811 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 812 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 813 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 814 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_8(s32) 815 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half 816 ; LITTLE: $w1 = COPY %full_load(s32) 817 ; LITTLE: RET_ReallyLR implicit $w1 818 ; BIG-LABEL: name: dont_combine_wrong_offset_2 819 ; BIG: liveins: $x0, $x1 820 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 821 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8 822 ; BIG: %ptr:_(p0) = COPY $x1 823 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 824 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 825 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 826 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_8(s32) 827 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half 828 ; BIG: $w1 = COPY %full_load(s32) 829 ; BIG: RET_ReallyLR implicit $w1 830 %cst_1:_(s64) = G_CONSTANT i64 1 831 %cst_8:_(s32) = G_CONSTANT i32 8 832 833 %ptr:_(p0) = COPY $x1 834 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 835 836 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 837 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 838 %high_half:_(s32) = nuw G_SHL %elt1, %cst_8(s32) 839 840 %full_load:_(s32) = G_OR %low_half, %high_half 841 $w1 = COPY %full_load(s32) 842 RET_ReallyLR implicit $w1 843 844... 845--- 846name: dont_combine_missing_load 847tracksRegLiveness: true 848body: | 849 bb.0: 850 liveins: $x0, $x1 851 852 ; This is missing x[2], so we shouldn't combine: 853 ; 854 ; s16 *x = ... 855 ; s64 y = (x[0] | (x[1] << 16)) | (x[3] << 48) 856 857 ; LITTLE-LABEL: name: dont_combine_missing_load 858 ; LITTLE: liveins: $x0, $x1 859 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 860 ; LITTLE: %cst_3:_(s64) = G_CONSTANT i64 3 861 ; LITTLE: %cst_16:_(s64) = G_CONSTANT i64 16 862 ; LITTLE: %cst_48:_(s64) = G_CONSTANT i64 48 863 ; LITTLE: %ptr:_(p0) = COPY $x1 864 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 865 ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 866 ; LITTLE: %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 867 ; LITTLE: %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 868 ; LITTLE: %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) 869 ; LITTLE: %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64) 870 ; LITTLE: %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64) 871 ; LITTLE: %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3 872 ; LITTLE: %full_load:_(s64) = G_OR %or1, %byte6_byte7 873 ; LITTLE: $x1 = COPY %full_load(s64) 874 ; LITTLE: RET_ReallyLR implicit $x1 875 ; BIG-LABEL: name: dont_combine_missing_load 876 ; BIG: liveins: $x0, $x1 877 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 878 ; BIG: %cst_3:_(s64) = G_CONSTANT i64 3 879 ; BIG: %cst_16:_(s64) = G_CONSTANT i64 16 880 ; BIG: %cst_48:_(s64) = G_CONSTANT i64 48 881 ; BIG: %ptr:_(p0) = COPY $x1 882 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 883 ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 884 ; BIG: %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 885 ; BIG: %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 886 ; BIG: %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) 887 ; BIG: %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64) 888 ; BIG: %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64) 889 ; BIG: %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3 890 ; BIG: %full_load:_(s64) = G_OR %or1, %byte6_byte7 891 ; BIG: $x1 = COPY %full_load(s64) 892 ; BIG: RET_ReallyLR implicit $x1 893 %cst_1:_(s64) = G_CONSTANT i64 1 894 %cst_3:_(s64) = G_CONSTANT i64 3 895 896 %cst_16:_(s64) = G_CONSTANT i64 16 897 %cst_48:_(s64) = G_CONSTANT i64 48 898 899 %ptr:_(p0) = COPY $x1 900 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 901 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 902 903 %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 904 905 %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 906 %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) 907 908 %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64) 909 %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64) 910 911 %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3 912 %full_load:_(s64) = G_OR %or1, %byte6_byte7 913 914 $x1 = COPY %full_load(s64) 915 RET_ReallyLR implicit $x1 916 917... 918--- 919name: dont_combine_different_addr_spaces 920tracksRegLiveness: true 921body: | 922 bb.0: 923 liveins: $x0, $x1 924 925 ; When the loads are from different address spaces, don't combine. 926 927 ; LITTLE-LABEL: name: dont_combine_different_addr_spaces 928 ; LITTLE: liveins: $x0, $x1 929 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 930 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 931 ; LITTLE: %ptr:_(p0) = COPY $x1 932 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 933 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 934 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16), addrspace 1) 935 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 936 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half 937 ; LITTLE: $w1 = COPY %full_load(s32) 938 ; LITTLE: RET_ReallyLR implicit $w1 939 ; BIG-LABEL: name: dont_combine_different_addr_spaces 940 ; BIG: liveins: $x0, $x1 941 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 942 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 943 ; BIG: %ptr:_(p0) = COPY $x1 944 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 945 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 946 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16), addrspace 1) 947 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 948 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half 949 ; BIG: $w1 = COPY %full_load(s32) 950 ; BIG: RET_ReallyLR implicit $w1 951 %cst_1:_(s64) = G_CONSTANT i64 1 952 %cst_16:_(s32) = G_CONSTANT i32 16 953 954 %ptr:_(p0) = COPY $x1 955 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 956 957 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16), addrspace 0) 958 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16), addrspace 1) 959 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 960 961 %full_load:_(s32) = G_OR %low_half, %high_half 962 $w1 = COPY %full_load(s32) 963 RET_ReallyLR implicit $w1 964 965... 966--- 967name: dont_combine_duplicate_idx 968tracksRegLiveness: true 969body: | 970 bb.0: 971 liveins: $x0, $x1 972 973 ; If two of the G_PTR_ADDs have the same index, then don't combine. 974 ; 975 ; sN *x = ... 976 ; sM y = (x[i] << A) | (x[i] << B) ... 977 978 ; LITTLE-LABEL: name: dont_combine_duplicate_idx 979 ; LITTLE: liveins: $x0, $x1 980 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 981 ; LITTLE: %reused_idx:_(s64) = G_CONSTANT i64 2 982 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8 983 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 984 ; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24 985 ; LITTLE: %ptr:_(p0) = COPY $x1 986 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 987 ; LITTLE: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64) 988 ; LITTLE: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64) 989 ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 990 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 991 ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8)) 992 ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load (s8)) 993 ; LITTLE: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) 994 ; LITTLE: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) 995 ; LITTLE: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) 996 ; LITTLE: %or1:_(s32) = G_OR %byte0, %byte1 997 ; LITTLE: %or2:_(s32) = G_OR %byte2, %byte3 998 ; LITTLE: %full_load:_(s32) = G_OR %or1, %or2 999 ; LITTLE: $w1 = COPY %full_load(s32) 1000 ; LITTLE: RET_ReallyLR implicit $w1 1001 ; BIG-LABEL: name: dont_combine_duplicate_idx 1002 ; BIG: liveins: $x0, $x1 1003 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 1004 ; BIG: %reused_idx:_(s64) = G_CONSTANT i64 2 1005 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8 1006 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 1007 ; BIG: %cst_24:_(s32) = G_CONSTANT i32 24 1008 ; BIG: %ptr:_(p0) = COPY $x1 1009 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1010 ; BIG: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64) 1011 ; BIG: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64) 1012 ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 1013 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 1014 ; BIG: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8)) 1015 ; BIG: %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load (s8)) 1016 ; BIG: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) 1017 ; BIG: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) 1018 ; BIG: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) 1019 ; BIG: %or1:_(s32) = G_OR %byte0, %byte1 1020 ; BIG: %or2:_(s32) = G_OR %byte2, %byte3 1021 ; BIG: %full_load:_(s32) = G_OR %or1, %or2 1022 ; BIG: $w1 = COPY %full_load(s32) 1023 ; BIG: RET_ReallyLR implicit $w1 1024 %cst_1:_(s64) = G_CONSTANT i64 1 1025 %reused_idx:_(s64) = G_CONSTANT i64 2 1026 1027 %cst_8:_(s32) = G_CONSTANT i32 8 1028 %cst_16:_(s32) = G_CONSTANT i32 16 1029 %cst_24:_(s32) = G_CONSTANT i32 24 1030 1031 %ptr:_(p0) = COPY $x1 1032 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1033 %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64) 1034 %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64) 1035 1036 %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 1037 1038 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 1039 %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8)) 1040 %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load (s8)) 1041 1042 %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) 1043 %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) 1044 %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) 1045 1046 %or1:_(s32) = G_OR %byte0, %byte1 1047 %or2:_(s32) = G_OR %byte2, %byte3 1048 %full_load:_(s32) = G_OR %or1, %or2 1049 1050 $w1 = COPY %full_load(s32) 1051 RET_ReallyLR implicit $w1 1052... 1053--- 1054name: dont_combine_duplicate_offset 1055tracksRegLiveness: true 1056body: | 1057 bb.0: 1058 liveins: $x0, $x1 1059 1060 ; If two of the G_SHLs have the same constant, then we should not combine. 1061 ; 1062 ; sN *x = ... 1063 ; sM y = (x[i] << A) | (x[i+1] << A) ... 1064 1065 ; LITTLE-LABEL: name: dont_combine_duplicate_offset 1066 ; LITTLE: liveins: $x0, $x1 1067 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 1068 ; LITTLE: %cst_2:_(s64) = G_CONSTANT i64 2 1069 ; LITTLE: %cst_3:_(s64) = G_CONSTANT i64 3 1070 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8 1071 ; LITTLE: %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16 1072 ; LITTLE: %ptr:_(p0) = COPY $x1 1073 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1074 ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) 1075 ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 1076 ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 1077 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 1078 ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) 1079 ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) 1080 ; LITTLE: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) 1081 ; LITTLE: %duplicate_shl_1:_(s32) = nuw G_SHL %elt2, %duplicate_shl_cst(s32) 1082 ; LITTLE: %duplicate_shl_2:_(s32) = nuw G_SHL %elt3, %duplicate_shl_cst(s32) 1083 ; LITTLE: %or1:_(s32) = G_OR %byte0, %byte1 1084 ; LITTLE: %or2:_(s32) = G_OR %duplicate_shl_1, %duplicate_shl_2 1085 ; LITTLE: %full_load:_(s32) = G_OR %or1, %or2 1086 ; LITTLE: $w1 = COPY %full_load(s32) 1087 ; LITTLE: RET_ReallyLR implicit $w1 1088 ; BIG-LABEL: name: dont_combine_duplicate_offset 1089 ; BIG: liveins: $x0, $x1 1090 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 1091 ; BIG: %cst_2:_(s64) = G_CONSTANT i64 2 1092 ; BIG: %cst_3:_(s64) = G_CONSTANT i64 3 1093 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8 1094 ; BIG: %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16 1095 ; BIG: %ptr:_(p0) = COPY $x1 1096 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1097 ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) 1098 ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 1099 ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 1100 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 1101 ; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) 1102 ; BIG: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) 1103 ; BIG: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) 1104 ; BIG: %duplicate_shl_1:_(s32) = nuw G_SHL %elt2, %duplicate_shl_cst(s32) 1105 ; BIG: %duplicate_shl_2:_(s32) = nuw G_SHL %elt3, %duplicate_shl_cst(s32) 1106 ; BIG: %or1:_(s32) = G_OR %byte0, %byte1 1107 ; BIG: %or2:_(s32) = G_OR %duplicate_shl_1, %duplicate_shl_2 1108 ; BIG: %full_load:_(s32) = G_OR %or1, %or2 1109 ; BIG: $w1 = COPY %full_load(s32) 1110 ; BIG: RET_ReallyLR implicit $w1 1111 %cst_1:_(s64) = G_CONSTANT i64 1 1112 %cst_2:_(s64) = G_CONSTANT i64 2 1113 %cst_3:_(s64) = G_CONSTANT i64 3 1114 1115 %cst_8:_(s32) = G_CONSTANT i32 8 1116 %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16 1117 1118 %ptr:_(p0) = COPY $x1 1119 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1120 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) 1121 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 1122 1123 %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 1124 1125 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 1126 %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) 1127 %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) 1128 1129 %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) 1130 %duplicate_shl_1:_(s32) = nuw G_SHL %elt2, %duplicate_shl_cst(s32) 1131 %duplicate_shl_2:_(s32) = nuw G_SHL %elt3, %duplicate_shl_cst(s32) 1132 1133 %or1:_(s32) = G_OR %byte0, %byte1 1134 %or2:_(s32) = G_OR %duplicate_shl_1, %duplicate_shl_2 1135 %full_load:_(s32) = G_OR %or1, %or2 1136 1137 $w1 = COPY %full_load(s32) 1138 RET_ReallyLR implicit $w1 1139 1140... 1141--- 1142name: dont_combine_lowest_index_not_zero_offset 1143tracksRegLiveness: true 1144body: | 1145 bb.0: 1146 liveins: $x0, $x1 1147 1148 ; In this case, the lowest index load (e.g. x[0]) does not end up at byte 1149 ; offset 0. We shouldn't combine. 1150 ; 1151 ; s8 *x = ... 1152 ; s32 y = (x[0] << 8) | (x[1]) | (x[2] << 16) ... 1153 1154 ; LITTLE-LABEL: name: dont_combine_lowest_index_not_zero_offset 1155 ; LITTLE: liveins: $x0, $x1 1156 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 1157 ; LITTLE: %cst_2:_(s64) = G_CONSTANT i64 2 1158 ; LITTLE: %cst_3:_(s64) = G_CONSTANT i64 3 1159 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8 1160 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 1161 ; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24 1162 ; LITTLE: %ptr:_(p0) = COPY $x1 1163 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1164 ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) 1165 ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 1166 ; LITTLE: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 1167 ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 1168 ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) 1169 ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) 1170 ; LITTLE: %byte1:_(s32) = nuw G_SHL %lowest_idx_load, %cst_8(s32) 1171 ; LITTLE: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) 1172 ; LITTLE: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) 1173 ; LITTLE: %or1:_(s32) = G_OR %byte0, %byte1 1174 ; LITTLE: %or2:_(s32) = G_OR %byte2, %byte3 1175 ; LITTLE: %full_load:_(s32) = G_OR %or1, %or2 1176 ; LITTLE: $w1 = COPY %full_load(s32) 1177 ; LITTLE: RET_ReallyLR implicit $w1 1178 ; BIG-LABEL: name: dont_combine_lowest_index_not_zero_offset 1179 ; BIG: liveins: $x0, $x1 1180 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 1181 ; BIG: %cst_2:_(s64) = G_CONSTANT i64 2 1182 ; BIG: %cst_3:_(s64) = G_CONSTANT i64 3 1183 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8 1184 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 1185 ; BIG: %cst_24:_(s32) = G_CONSTANT i32 24 1186 ; BIG: %ptr:_(p0) = COPY $x1 1187 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1188 ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) 1189 ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 1190 ; BIG: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 1191 ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 1192 ; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) 1193 ; BIG: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) 1194 ; BIG: %byte1:_(s32) = nuw G_SHL %lowest_idx_load, %cst_8(s32) 1195 ; BIG: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) 1196 ; BIG: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) 1197 ; BIG: %or1:_(s32) = G_OR %byte0, %byte1 1198 ; BIG: %or2:_(s32) = G_OR %byte2, %byte3 1199 ; BIG: %full_load:_(s32) = G_OR %or1, %or2 1200 ; BIG: $w1 = COPY %full_load(s32) 1201 ; BIG: RET_ReallyLR implicit $w1 1202 %cst_1:_(s64) = G_CONSTANT i64 1 1203 %cst_2:_(s64) = G_CONSTANT i64 2 1204 %cst_3:_(s64) = G_CONSTANT i64 3 1205 1206 %cst_8:_(s32) = G_CONSTANT i32 8 1207 %cst_16:_(s32) = G_CONSTANT i32 16 1208 %cst_24:_(s32) = G_CONSTANT i32 24 1209 1210 %ptr:_(p0) = COPY $x1 1211 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1212 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) 1213 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) 1214 1215 ; This load is index 0 1216 %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) 1217 %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) 1218 %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) 1219 %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) 1220 1221 ; ... But it ends up being shifted, so we shouldn't combine. 1222 %byte1:_(s32) = nuw G_SHL %lowest_idx_load, %cst_8(s32) 1223 %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) 1224 %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) 1225 1226 %or1:_(s32) = G_OR %byte0, %byte1 1227 %or2:_(s32) = G_OR %byte2, %byte3 1228 %full_load:_(s32) = G_OR %or1, %or2 1229 1230 $w1 = COPY %full_load(s32) 1231 RET_ReallyLR implicit $w1 1232 1233... 1234--- 1235name: dont_combine_more_than_one_use_load 1236tracksRegLiveness: true 1237body: | 1238 bb.0: 1239 liveins: $x0, $x1 1240 1241 ; If any load is used more than once, don't combine. We want to remove the 1242 ; entire tree. 1243 1244 ; LITTLE-LABEL: name: dont_combine_more_than_one_use_load 1245 ; LITTLE: liveins: $x0, $x1 1246 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 1247 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 1248 ; LITTLE: %ptr:_(p0) = COPY $x1 1249 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1250 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1251 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1252 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1253 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half 1254 ; LITTLE: %extra_use:_(s32) = G_AND %full_load, %low_half 1255 ; LITTLE: $w1 = COPY %extra_use(s32) 1256 ; LITTLE: RET_ReallyLR implicit $w1 1257 ; BIG-LABEL: name: dont_combine_more_than_one_use_load 1258 ; BIG: liveins: $x0, $x1 1259 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 1260 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 1261 ; BIG: %ptr:_(p0) = COPY $x1 1262 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1263 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1264 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1265 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1266 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half 1267 ; BIG: %extra_use:_(s32) = G_AND %full_load, %low_half 1268 ; BIG: $w1 = COPY %extra_use(s32) 1269 ; BIG: RET_ReallyLR implicit $w1 1270 %cst_1:_(s64) = G_CONSTANT i64 1 1271 %cst_16:_(s32) = G_CONSTANT i32 16 1272 1273 %ptr:_(p0) = COPY $x1 1274 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1275 1276 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1277 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1278 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1279 1280 %full_load:_(s32) = G_OR %low_half, %high_half 1281 %extra_use:_(s32) = G_AND %full_load, %low_half 1282 $w1 = COPY %extra_use(s32) 1283 RET_ReallyLR implicit $w1 1284 1285... 1286--- 1287name: dont_combine_more_than_one_use_shl 1288tracksRegLiveness: true 1289body: | 1290 bb.0: 1291 liveins: $x0, $x1 1292 1293 ; If anything feeding into any of the ors is used more than once, don't 1294 ; combine. 1295 1296 ; LITTLE-LABEL: name: dont_combine_more_than_one_use_shl 1297 ; LITTLE: liveins: $x0, $x1 1298 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 1299 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 1300 ; LITTLE: %ptr:_(p0) = COPY $x1 1301 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1302 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1303 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1304 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1305 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half 1306 ; LITTLE: %extra_use:_(s32) = G_AND %full_load, %high_half 1307 ; LITTLE: $w1 = COPY %extra_use(s32) 1308 ; LITTLE: RET_ReallyLR implicit $w1 1309 ; BIG-LABEL: name: dont_combine_more_than_one_use_shl 1310 ; BIG: liveins: $x0, $x1 1311 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 1312 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 1313 ; BIG: %ptr:_(p0) = COPY $x1 1314 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1315 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1316 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1317 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1318 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half 1319 ; BIG: %extra_use:_(s32) = G_AND %full_load, %high_half 1320 ; BIG: $w1 = COPY %extra_use(s32) 1321 ; BIG: RET_ReallyLR implicit $w1 1322 %cst_1:_(s64) = G_CONSTANT i64 1 1323 %cst_16:_(s32) = G_CONSTANT i32 16 1324 1325 %ptr:_(p0) = COPY $x1 1326 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1327 1328 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1329 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1330 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1331 1332 %full_load:_(s32) = G_OR %low_half, %high_half 1333 %extra_use:_(s32) = G_AND %full_load, %high_half 1334 $w1 = COPY %extra_use(s32) 1335 RET_ReallyLR implicit $w1 1336 1337... 1338--- 1339name: dont_combine_store_between_same_mbb 1340tracksRegLiveness: true 1341body: | 1342 bb.0: 1343 liveins: $x0, $x1 1344 ; If there is a store between any of the loads, then do not combine. 1345 1346 ; LITTLE-LABEL: name: dont_combine_store_between_same_mbb 1347 ; LITTLE: liveins: $x0, $x1 1348 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 1349 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 1350 ; LITTLE: %ptr:_(p0) = COPY $x1 1351 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1352 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1353 ; LITTLE: %other_ptr:_(p0) = COPY $x1 1354 ; LITTLE: %some_val:_(s32) = G_CONSTANT i32 12 1355 ; LITTLE: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16)) 1356 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1357 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1358 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half 1359 ; LITTLE: $w1 = COPY %full_load(s32) 1360 ; LITTLE: RET_ReallyLR implicit $w1 1361 ; BIG-LABEL: name: dont_combine_store_between_same_mbb 1362 ; BIG: liveins: $x0, $x1 1363 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 1364 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 1365 ; BIG: %ptr:_(p0) = COPY $x1 1366 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1367 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1368 ; BIG: %other_ptr:_(p0) = COPY $x1 1369 ; BIG: %some_val:_(s32) = G_CONSTANT i32 12 1370 ; BIG: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16)) 1371 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1372 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1373 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half 1374 ; BIG: $w1 = COPY %full_load(s32) 1375 ; BIG: RET_ReallyLR implicit $w1 1376 %cst_1:_(s64) = G_CONSTANT i64 1 1377 %cst_16:_(s32) = G_CONSTANT i32 16 1378 1379 %ptr:_(p0) = COPY $x1 1380 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1381 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1382 1383 ; Memory could be modified here, so don't combine! 1384 %other_ptr:_(p0) = COPY $x1 1385 %some_val:_(s32) = G_CONSTANT i32 12 1386 G_STORE %some_val, %other_ptr :: (store (s16)) 1387 1388 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1389 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1390 1391 %full_load:_(s32) = G_OR %low_half, %high_half 1392 $w1 = COPY %full_load(s32) 1393 RET_ReallyLR implicit $w1 1394 1395... 1396--- 1397name: dont_combine_store_between_different_mbb 1398tracksRegLiveness: true 1399body: | 1400 ; LITTLE-LABEL: name: dont_combine_store_between_different_mbb 1401 ; LITTLE: bb.0: 1402 ; LITTLE: successors: %bb.1(0x80000000) 1403 ; LITTLE: liveins: $x0, $x1 1404 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 1405 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 1406 ; LITTLE: %ptr:_(p0) = COPY $x1 1407 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1408 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1409 ; LITTLE: bb.1: 1410 ; LITTLE: successors: %bb.2(0x80000000) 1411 ; LITTLE: liveins: $x0, $x1 1412 ; LITTLE: %other_ptr:_(p0) = COPY $x1 1413 ; LITTLE: %some_val:_(s32) = G_CONSTANT i32 12 1414 ; LITTLE: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16)) 1415 ; LITTLE: bb.2: 1416 ; LITTLE: liveins: $x0, $x1 1417 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1418 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1419 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half 1420 ; LITTLE: $w1 = COPY %full_load(s32) 1421 ; LITTLE: RET_ReallyLR implicit $w1 1422 ; BIG-LABEL: name: dont_combine_store_between_different_mbb 1423 ; BIG: bb.0: 1424 ; BIG: successors: %bb.1(0x80000000) 1425 ; BIG: liveins: $x0, $x1 1426 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 1427 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 1428 ; BIG: %ptr:_(p0) = COPY $x1 1429 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1430 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1431 ; BIG: bb.1: 1432 ; BIG: successors: %bb.2(0x80000000) 1433 ; BIG: liveins: $x0, $x1 1434 ; BIG: %other_ptr:_(p0) = COPY $x1 1435 ; BIG: %some_val:_(s32) = G_CONSTANT i32 12 1436 ; BIG: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16)) 1437 ; BIG: bb.2: 1438 ; BIG: liveins: $x0, $x1 1439 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1440 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1441 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half 1442 ; BIG: $w1 = COPY %full_load(s32) 1443 ; BIG: RET_ReallyLR implicit $w1 1444 ; There is a store between the two loads, hidden away in a different MBB. 1445 ; We should not combine here. 1446 1447 1448 bb.0: 1449 successors: %bb.1(0x80000000) 1450 liveins: $x0, $x1 1451 ; If there is a store between any of the loads, then do not combine. 1452 1453 %cst_1:_(s64) = G_CONSTANT i64 1 1454 %cst_16:_(s32) = G_CONSTANT i32 16 1455 1456 %ptr:_(p0) = COPY $x1 1457 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1458 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1459 1460 bb.1: 1461 liveins: $x0, $x1 1462 successors: %bb.2(0x80000000) 1463 ; Memory could be modified here, so don't combine! 1464 %other_ptr:_(p0) = COPY $x1 1465 %some_val:_(s32) = G_CONSTANT i32 12 1466 G_STORE %some_val, %other_ptr :: (store (s16)) 1467 1468 bb.2: 1469 liveins: $x0, $x1 1470 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1471 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1472 1473 %full_load:_(s32) = G_OR %low_half, %high_half 1474 $w1 = COPY %full_load(s32) 1475 RET_ReallyLR implicit $w1 1476 1477... 1478--- 1479name: different_mbb 1480tracksRegLiveness: true 1481body: | 1482 ; LITTLE-LABEL: name: different_mbb 1483 ; LITTLE: bb.0: 1484 ; LITTLE: successors: %bb.1(0x80000000) 1485 ; LITTLE: liveins: $x0, $x1 1486 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 1487 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 1488 ; LITTLE: %ptr:_(p0) = COPY $x1 1489 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1490 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1491 ; LITTLE: bb.1: 1492 ; LITTLE: liveins: $x0, $x1 1493 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1494 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1495 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half 1496 ; LITTLE: $w1 = COPY %full_load(s32) 1497 ; LITTLE: RET_ReallyLR implicit $w1 1498 ; BIG-LABEL: name: different_mbb 1499 ; BIG: bb.0: 1500 ; BIG: successors: %bb.1(0x80000000) 1501 ; BIG: liveins: $x0, $x1 1502 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 1503 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 1504 ; BIG: %ptr:_(p0) = COPY $x1 1505 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1506 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1507 ; BIG: bb.1: 1508 ; BIG: liveins: $x0, $x1 1509 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1510 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1511 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half 1512 ; BIG: $w1 = COPY %full_load(s32) 1513 ; BIG: RET_ReallyLR implicit $w1 1514 ; It should be possible to combine here, but it's not supported right now. 1515 1516 1517 bb.0: 1518 successors: %bb.1(0x80000000) 1519 liveins: $x0, $x1 1520 1521 %cst_1:_(s64) = G_CONSTANT i64 1 1522 %cst_16:_(s32) = G_CONSTANT i32 16 1523 1524 %ptr:_(p0) = COPY $x1 1525 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1526 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1527 1528 bb.1: 1529 liveins: $x0, $x1 1530 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1531 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1532 1533 %full_load:_(s32) = G_OR %low_half, %high_half 1534 $w1 = COPY %full_load(s32) 1535 RET_ReallyLR implicit $w1 1536 1537... 1538--- 1539name: load_first 1540tracksRegLiveness: true 1541body: | 1542 bb.0: 1543 liveins: $x0, $x1 1544 ; Test for a bug fix for predecessor-checking code. 1545 1546 ; LITTLE-LABEL: name: load_first 1547 ; LITTLE: liveins: $x0, $x1 1548 ; LITTLE: %ptr:_(p0) = COPY $x1 1549 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) 1550 ; LITTLE: $w1 = COPY %full_load(s32) 1551 ; LITTLE: RET_ReallyLR implicit $w1 1552 ; BIG-LABEL: name: load_first 1553 ; BIG: liveins: $x0, $x1 1554 ; BIG: %ptr:_(p0) = COPY $x1 1555 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) 1556 ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] 1557 ; BIG: $w1 = COPY %full_load(s32) 1558 ; BIG: RET_ReallyLR implicit $w1 1559 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) 1560 %cst_1:_(s64) = G_CONSTANT i64 1 1561 %cst_16:_(s32) = G_CONSTANT i32 16 1562 1563 %ptr:_(p0) = COPY $x1 1564 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) 1565 1566 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) 1567 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) 1568 1569 %full_load:_(s32) = G_OR %low_half, %high_half 1570 $w1 = COPY %full_load(s32) 1571 RET_ReallyLR implicit $w1 1572 1573... 1574--- 1575name: store_between_loads_and_or 1576alignment: 4 1577tracksRegLiveness: true 1578 1579liveins: 1580 - { reg: '$x0' } 1581 - { reg: '$x1' } 1582frameInfo: 1583 maxAlignment: 1 1584body: | 1585 bb.1: 1586 liveins: $x0, $x1 1587 ; Check that we build the G_LOAD at the point of the last load, instead of place of the G_OR. 1588 ; We could have a G_STORE in between which may not be safe to move the load across. 1589 liveins: $x0, $x1 1590 ; LITTLE-LABEL: name: store_between_loads_and_or 1591 ; LITTLE: liveins: $x0, $x1, $x0, $x1 1592 ; LITTLE: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 1593 ; LITTLE: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 1594 ; LITTLE: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 1595 ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) 1596 ; LITTLE: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8)) 1597 ; LITTLE: $w0 = COPY [[LOAD]](s32) 1598 ; LITTLE: RET_ReallyLR implicit $w0 1599 ; BIG-LABEL: name: store_between_loads_and_or 1600 ; BIG: liveins: $x0, $x1, $x0, $x1 1601 ; BIG: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 1602 ; BIG: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 1603 ; BIG: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 1604 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) 1605 ; BIG: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[LOAD]] 1606 ; BIG: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8)) 1607 ; BIG: $w0 = COPY [[BSWAP]](s32) 1608 ; BIG: RET_ReallyLR implicit $w0 1609 %0:_(p0) = COPY $x0 1610 %1:_(p0) = COPY $x1 1611 %12:_(s8) = G_CONSTANT i8 1 1612 %15:_(s32) = G_CONSTANT i32 8 1613 %19:_(s32) = G_CONSTANT i32 16 1614 %23:_(s32) = G_CONSTANT i32 24 1615 %13:_(s32) = G_ZEXTLOAD %0:_(p0) :: (load (s8)) 1616 %3:_(s64) = G_CONSTANT i64 1 1617 %4:_(p0) = G_PTR_ADD %0:_, %3:_(s64) 1618 %14:_(s32) = G_ZEXTLOAD %4:_(p0) :: (load (s8)) 1619 %6:_(s64) = G_CONSTANT i64 2 1620 %7:_(p0) = G_PTR_ADD %0:_, %6:_(s64) 1621 %18:_(s32) = G_ZEXTLOAD %7:_(p0) :: (load (s8)) 1622 %9:_(s64) = G_CONSTANT i64 3 1623 %10:_(p0) = G_PTR_ADD %0:_, %9:_(s64) 1624 %22:_(s32) = G_ZEXTLOAD %10:_(p0) :: (load (s8)) 1625 G_STORE %12:_(s8), %1:_(p0) :: (store (s8)) 1626 %16:_(s32) = nuw nsw G_SHL %14:_, %15:_(s32) 1627 %17:_(s32) = G_OR %16:_, %13:_ 1628 %20:_(s32) = nuw nsw G_SHL %18:_, %19:_(s32) 1629 %21:_(s32) = G_OR %17:_, %20:_ 1630 %24:_(s32) = nuw G_SHL %22:_, %23:_(s32) 1631 %25:_(s32) = G_OR %21:_, %24:_ 1632 $w0 = COPY %25:_(s32) 1633 RET_ReallyLR implicit $w0 1634 1635... 1636