1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s 3 4%structTy = type { i8, i32, i32 } 5 6@e = common dso_local global %structTy zeroinitializer, align 4 7 8;; Ensure that MergeConsecutiveStores doesn't incorrectly reorder 9;; store operations. The first test stores in increasing address 10;; order, the second in decreasing -- but in both cases should have 11;; the same result in memory in the end. 12 13define dso_local void @redundant_stores_merging() { 14; CHECK-LABEL: redundant_stores_merging: 15; CHECK: # %bb.0: 16; CHECK-NEXT: movabsq $1958505086977, %rax # imm = 0x1C800000001 17; CHECK-NEXT: movq %rax, e+4(%rip) 18; CHECK-NEXT: retq 19 store i32 1, ptr getelementptr inbounds (%structTy, ptr @e, i64 0, i32 1), align 4 20 store i32 123, ptr getelementptr inbounds (%structTy, ptr @e, i64 0, i32 2), align 4 21 store i32 456, ptr getelementptr inbounds (%structTy, ptr @e, i64 0, i32 2), align 4 22 ret void 23} 24 25;; This variant tests PR25154. 26define dso_local void @redundant_stores_merging_reverse() { 27; CHECK-LABEL: redundant_stores_merging_reverse: 28; CHECK: # %bb.0: 29; CHECK-NEXT: movabsq $528280977409, %rax # imm = 0x7B00000001 30; CHECK-NEXT: movq %rax, e+4(%rip) 31; CHECK-NEXT: movl $456, e+8(%rip) # imm = 0x1C8 32; CHECK-NEXT: retq 33 store i32 123, ptr getelementptr inbounds (%structTy, ptr @e, i64 0, i32 2), align 4 34 store i32 456, ptr getelementptr inbounds (%structTy, ptr @e, i64 0, i32 2), align 4 35 store i32 1, ptr getelementptr inbounds (%structTy, ptr @e, i64 0, i32 1), align 4 36 ret void 37} 38 39@b = common dso_local global [8 x i8] zeroinitializer, align 2 40 41;; The 2-byte store to offset 3 overlaps the 2-byte store to offset 2; 42;; these must not be reordered in MergeConsecutiveStores such that the 43;; store to 3 comes first (e.g. by merging the stores to 0 and 2 into 44;; a movl, after the store to 3). 45 46define dso_local void @overlapping_stores_merging() { 47; CHECK-LABEL: overlapping_stores_merging: 48; CHECK: # %bb.0: 49; CHECK-NEXT: movl $1, b(%rip) 50; CHECK-NEXT: movw $2, b+3(%rip) 51; CHECK-NEXT: retq 52 store i16 0, ptr getelementptr inbounds ([8 x i8], ptr @b, i64 0, i64 2), align 2 53 store i16 2, ptr getelementptr inbounds ([8 x i8], ptr @b, i64 0, i64 3), align 1 54 store i16 1, ptr @b, align 2 55 ret void 56} 57 58define dso_local void @extract_vector_store_16_consecutive_bytes(<2 x i64> %v, ptr %ptr) #0 { 59; CHECK-LABEL: extract_vector_store_16_consecutive_bytes: 60; CHECK: # %bb.0: 61; CHECK-NEXT: vmovups %xmm0, (%rdi) 62; CHECK-NEXT: retq 63 %bc = bitcast <2 x i64> %v to <16 x i8> 64 %ext00 = extractelement <16 x i8> %bc, i32 0 65 %ext01 = extractelement <16 x i8> %bc, i32 1 66 %ext02 = extractelement <16 x i8> %bc, i32 2 67 %ext03 = extractelement <16 x i8> %bc, i32 3 68 %ext04 = extractelement <16 x i8> %bc, i32 4 69 %ext05 = extractelement <16 x i8> %bc, i32 5 70 %ext06 = extractelement <16 x i8> %bc, i32 6 71 %ext07 = extractelement <16 x i8> %bc, i32 7 72 %ext08 = extractelement <16 x i8> %bc, i32 8 73 %ext09 = extractelement <16 x i8> %bc, i32 9 74 %ext10 = extractelement <16 x i8> %bc, i32 10 75 %ext11 = extractelement <16 x i8> %bc, i32 11 76 %ext12 = extractelement <16 x i8> %bc, i32 12 77 %ext13 = extractelement <16 x i8> %bc, i32 13 78 %ext14 = extractelement <16 x i8> %bc, i32 14 79 %ext15 = extractelement <16 x i8> %bc, i32 15 80 %gep01 = getelementptr inbounds i8, ptr %ptr, i64 1 81 %gep02 = getelementptr inbounds i8, ptr %ptr, i64 2 82 %gep03 = getelementptr inbounds i8, ptr %ptr, i64 3 83 %gep04 = getelementptr inbounds i8, ptr %ptr, i64 4 84 %gep05 = getelementptr inbounds i8, ptr %ptr, i64 5 85 %gep06 = getelementptr inbounds i8, ptr %ptr, i64 6 86 %gep07 = getelementptr inbounds i8, ptr %ptr, i64 7 87 %gep08 = getelementptr inbounds i8, ptr %ptr, i64 8 88 %gep09 = getelementptr inbounds i8, ptr %ptr, i64 9 89 %gep10 = getelementptr inbounds i8, ptr %ptr, i64 10 90 %gep11 = getelementptr inbounds i8, ptr %ptr, i64 11 91 %gep12 = getelementptr inbounds i8, ptr %ptr, i64 12 92 %gep13 = getelementptr inbounds i8, ptr %ptr, i64 13 93 %gep14 = getelementptr inbounds i8, ptr %ptr, i64 14 94 %gep15 = getelementptr inbounds i8, ptr %ptr, i64 15 95 store i8 %ext00, ptr %ptr, align 1 96 store i8 %ext01, ptr %gep01, align 1 97 store i8 %ext02, ptr %gep02, align 1 98 store i8 %ext03, ptr %gep03, align 1 99 store i8 %ext04, ptr %gep04, align 1 100 store i8 %ext05, ptr %gep05, align 1 101 store i8 %ext06, ptr %gep06, align 1 102 store i8 %ext07, ptr %gep07, align 1 103 store i8 %ext08, ptr %gep08, align 1 104 store i8 %ext09, ptr %gep09, align 1 105 store i8 %ext10, ptr %gep10, align 1 106 store i8 %ext11, ptr %gep11, align 1 107 store i8 %ext12, ptr %gep12, align 1 108 store i8 %ext13, ptr %gep13, align 1 109 store i8 %ext14, ptr %gep14, align 1 110 store i8 %ext15, ptr %gep15, align 1 111 ret void 112} 113 114; PR34217 - https://bugs.llvm.org/show_bug.cgi?id=34217 115 116define dso_local void @extract_vector_store_32_consecutive_bytes(<4 x i64> %v, ptr %ptr) #0 { 117; CHECK-LABEL: extract_vector_store_32_consecutive_bytes: 118; CHECK: # %bb.0: 119; CHECK-NEXT: vmovups %ymm0, (%rdi) 120; CHECK-NEXT: vzeroupper 121; CHECK-NEXT: retq 122 %bc = bitcast <4 x i64> %v to <32 x i8> 123 %ext00 = extractelement <32 x i8> %bc, i32 0 124 %ext01 = extractelement <32 x i8> %bc, i32 1 125 %ext02 = extractelement <32 x i8> %bc, i32 2 126 %ext03 = extractelement <32 x i8> %bc, i32 3 127 %ext04 = extractelement <32 x i8> %bc, i32 4 128 %ext05 = extractelement <32 x i8> %bc, i32 5 129 %ext06 = extractelement <32 x i8> %bc, i32 6 130 %ext07 = extractelement <32 x i8> %bc, i32 7 131 %ext08 = extractelement <32 x i8> %bc, i32 8 132 %ext09 = extractelement <32 x i8> %bc, i32 9 133 %ext10 = extractelement <32 x i8> %bc, i32 10 134 %ext11 = extractelement <32 x i8> %bc, i32 11 135 %ext12 = extractelement <32 x i8> %bc, i32 12 136 %ext13 = extractelement <32 x i8> %bc, i32 13 137 %ext14 = extractelement <32 x i8> %bc, i32 14 138 %ext15 = extractelement <32 x i8> %bc, i32 15 139 %ext16 = extractelement <32 x i8> %bc, i32 16 140 %ext17 = extractelement <32 x i8> %bc, i32 17 141 %ext18 = extractelement <32 x i8> %bc, i32 18 142 %ext19 = extractelement <32 x i8> %bc, i32 19 143 %ext20 = extractelement <32 x i8> %bc, i32 20 144 %ext21 = extractelement <32 x i8> %bc, i32 21 145 %ext22 = extractelement <32 x i8> %bc, i32 22 146 %ext23 = extractelement <32 x i8> %bc, i32 23 147 %ext24 = extractelement <32 x i8> %bc, i32 24 148 %ext25 = extractelement <32 x i8> %bc, i32 25 149 %ext26 = extractelement <32 x i8> %bc, i32 26 150 %ext27 = extractelement <32 x i8> %bc, i32 27 151 %ext28 = extractelement <32 x i8> %bc, i32 28 152 %ext29 = extractelement <32 x i8> %bc, i32 29 153 %ext30 = extractelement <32 x i8> %bc, i32 30 154 %ext31 = extractelement <32 x i8> %bc, i32 31 155 %gep01 = getelementptr inbounds i8, ptr %ptr, i64 1 156 %gep02 = getelementptr inbounds i8, ptr %ptr, i64 2 157 %gep03 = getelementptr inbounds i8, ptr %ptr, i64 3 158 %gep04 = getelementptr inbounds i8, ptr %ptr, i64 4 159 %gep05 = getelementptr inbounds i8, ptr %ptr, i64 5 160 %gep06 = getelementptr inbounds i8, ptr %ptr, i64 6 161 %gep07 = getelementptr inbounds i8, ptr %ptr, i64 7 162 %gep08 = getelementptr inbounds i8, ptr %ptr, i64 8 163 %gep09 = getelementptr inbounds i8, ptr %ptr, i64 9 164 %gep10 = getelementptr inbounds i8, ptr %ptr, i64 10 165 %gep11 = getelementptr inbounds i8, ptr %ptr, i64 11 166 %gep12 = getelementptr inbounds i8, ptr %ptr, i64 12 167 %gep13 = getelementptr inbounds i8, ptr %ptr, i64 13 168 %gep14 = getelementptr inbounds i8, ptr %ptr, i64 14 169 %gep15 = getelementptr inbounds i8, ptr %ptr, i64 15 170 %gep16 = getelementptr inbounds i8, ptr %ptr, i64 16 171 %gep17 = getelementptr inbounds i8, ptr %ptr, i64 17 172 %gep18 = getelementptr inbounds i8, ptr %ptr, i64 18 173 %gep19 = getelementptr inbounds i8, ptr %ptr, i64 19 174 %gep20 = getelementptr inbounds i8, ptr %ptr, i64 20 175 %gep21 = getelementptr inbounds i8, ptr %ptr, i64 21 176 %gep22 = getelementptr inbounds i8, ptr %ptr, i64 22 177 %gep23 = getelementptr inbounds i8, ptr %ptr, i64 23 178 %gep24 = getelementptr inbounds i8, ptr %ptr, i64 24 179 %gep25 = getelementptr inbounds i8, ptr %ptr, i64 25 180 %gep26 = getelementptr inbounds i8, ptr %ptr, i64 26 181 %gep27 = getelementptr inbounds i8, ptr %ptr, i64 27 182 %gep28 = getelementptr inbounds i8, ptr %ptr, i64 28 183 %gep29 = getelementptr inbounds i8, ptr %ptr, i64 29 184 %gep30 = getelementptr inbounds i8, ptr %ptr, i64 30 185 %gep31 = getelementptr inbounds i8, ptr %ptr, i64 31 186 store i8 %ext00, ptr %ptr, align 1 187 store i8 %ext01, ptr %gep01, align 1 188 store i8 %ext02, ptr %gep02, align 1 189 store i8 %ext03, ptr %gep03, align 1 190 store i8 %ext04, ptr %gep04, align 1 191 store i8 %ext05, ptr %gep05, align 1 192 store i8 %ext06, ptr %gep06, align 1 193 store i8 %ext07, ptr %gep07, align 1 194 store i8 %ext08, ptr %gep08, align 1 195 store i8 %ext09, ptr %gep09, align 1 196 store i8 %ext10, ptr %gep10, align 1 197 store i8 %ext11, ptr %gep11, align 1 198 store i8 %ext12, ptr %gep12, align 1 199 store i8 %ext13, ptr %gep13, align 1 200 store i8 %ext14, ptr %gep14, align 1 201 store i8 %ext15, ptr %gep15, align 1 202 store i8 %ext16, ptr %gep16, align 1 203 store i8 %ext17, ptr %gep17, align 1 204 store i8 %ext18, ptr %gep18, align 1 205 store i8 %ext19, ptr %gep19, align 1 206 store i8 %ext20, ptr %gep20, align 1 207 store i8 %ext21, ptr %gep21, align 1 208 store i8 %ext22, ptr %gep22, align 1 209 store i8 %ext23, ptr %gep23, align 1 210 store i8 %ext24, ptr %gep24, align 1 211 store i8 %ext25, ptr %gep25, align 1 212 store i8 %ext26, ptr %gep26, align 1 213 store i8 %ext27, ptr %gep27, align 1 214 store i8 %ext28, ptr %gep28, align 1 215 store i8 %ext29, ptr %gep29, align 1 216 store i8 %ext30, ptr %gep30, align 1 217 store i8 %ext31, ptr %gep31, align 1 218 ret void 219} 220 221; https://bugs.llvm.org/show_bug.cgi?id=43446 222define dso_local void @pr43446_0(i64 %x) { 223; CHECK-LABEL: pr43446_0: 224; CHECK: # %bb.0: 225; CHECK-NEXT: movb $1, (%rdi) 226; CHECK-NEXT: retq 227 %a = inttoptr i64 %x to ptr 228 store i8 -2, ptr %a, align 1 229 %b = inttoptr i64 %x to ptr 230 store i1 true, ptr %b, align 1 231 ret void 232} 233define dso_local void @pr43446_1(ptr %a) { 234; CHECK-LABEL: pr43446_1: 235; CHECK: # %bb.0: 236; CHECK-NEXT: movb $1, (%rdi) 237; CHECK-NEXT: retq 238 store i8 -2, ptr %a, align 1 239 store i1 true, ptr %a, align 1 240 ret void 241} 242 243define dso_local void @rotate16_in_place(ptr %p) { 244; CHECK-LABEL: rotate16_in_place: 245; CHECK: # %bb.0: 246; CHECK-NEXT: rolw $8, (%rdi) 247; CHECK-NEXT: retq 248 %p1 = getelementptr i8, ptr %p, i64 1 249 %i0 = load i8, ptr %p, align 1 250 %i1 = load i8, ptr %p1, align 1 251 store i8 %i1, ptr %p, align 1 252 store i8 %i0, ptr %p1, align 1 253 ret void 254} 255 256define dso_local void @rotate16(ptr %p, ptr %q) { 257; CHECK-LABEL: rotate16: 258; CHECK: # %bb.0: 259; CHECK-NEXT: movzwl (%rdi), %eax 260; CHECK-NEXT: rolw $8, %ax 261; CHECK-NEXT: movw %ax, (%rsi) 262; CHECK-NEXT: retq 263 %p1 = getelementptr i8, ptr %p, i64 1 264 %q1 = getelementptr i8, ptr %q, i64 1 265 %i0 = load i8, ptr %p, align 1 266 %i1 = load i8, ptr %p1, align 1 267 store i8 %i1, ptr %q, align 1 268 store i8 %i0, ptr %q1, align 1 269 ret void 270} 271 272define dso_local void @rotate32_in_place(ptr %p) { 273; CHECK-LABEL: rotate32_in_place: 274; CHECK: # %bb.0: 275; CHECK-NEXT: roll $16, (%rdi) 276; CHECK-NEXT: retq 277 %p1 = getelementptr i16, ptr %p, i64 1 278 %i0 = load i16, ptr %p, align 2 279 %i1 = load i16, ptr %p1, align 2 280 store i16 %i1, ptr %p, align 2 281 store i16 %i0, ptr %p1, align 2 282 ret void 283} 284 285define dso_local void @rotate32(ptr %p) { 286; CHECK-LABEL: rotate32: 287; CHECK: # %bb.0: 288; CHECK-NEXT: movl (%rdi), %eax 289; CHECK-NEXT: roll $16, %eax 290; CHECK-NEXT: movl %eax, 84(%rdi) 291; CHECK-NEXT: retq 292 %p1 = getelementptr i16, ptr %p, i64 1 293 %p42 = getelementptr i16, ptr %p, i64 42 294 %p43 = getelementptr i16, ptr %p, i64 43 295 %i0 = load i16, ptr %p, align 2 296 %i1 = load i16, ptr %p1, align 2 297 store i16 %i1, ptr %p42, align 2 298 store i16 %i0, ptr %p43, align 2 299 ret void 300} 301 302define dso_local void @rotate64_in_place(ptr %p) { 303; CHECK-LABEL: rotate64_in_place: 304; CHECK: # %bb.0: 305; CHECK-NEXT: rolq $32, (%rdi) 306; CHECK-NEXT: retq 307 %p1 = getelementptr i32, ptr %p, i64 1 308 %i0 = load i32, ptr %p, align 4 309 %i1 = load i32, ptr %p1, align 4 310 store i32 %i1, ptr %p, align 4 311 store i32 %i0, ptr %p1, align 4 312 ret void 313} 314 315define dso_local void @rotate64(ptr %p) { 316; CHECK-LABEL: rotate64: 317; CHECK: # %bb.0: 318; CHECK-NEXT: movq (%rdi), %rax 319; CHECK-NEXT: rolq $32, %rax 320; CHECK-NEXT: movq %rax, 8(%rdi) 321; CHECK-NEXT: retq 322 %p1 = getelementptr i32, ptr %p, i64 1 323 %p2 = getelementptr i32, ptr %p, i64 2 324 %p3 = getelementptr i32, ptr %p, i64 3 325 %i0 = load i32, ptr %p, align 4 326 %i1 = load i32, ptr %p1, align 4 327 store i32 %i1, ptr %p2, align 4 328 store i32 %i0, ptr %p3, align 4 329 ret void 330} 331 332define dso_local void @rotate64_iterate(ptr %p) { 333; CHECK-LABEL: rotate64_iterate: 334; CHECK: # %bb.0: 335; CHECK-NEXT: movq (%rdi), %rax 336; CHECK-NEXT: rolq $32, %rax 337; CHECK-NEXT: movq %rax, 84(%rdi) 338; CHECK-NEXT: retq 339 %p1 = getelementptr i16, ptr %p, i64 1 340 %p2 = getelementptr i16, ptr %p, i64 2 341 %p3 = getelementptr i16, ptr %p, i64 3 342 %p42 = getelementptr i16, ptr %p, i64 42 343 %p43 = getelementptr i16, ptr %p, i64 43 344 %p44 = getelementptr i16, ptr %p, i64 44 345 %p45 = getelementptr i16, ptr %p, i64 45 346 %i0 = load i16, ptr %p, align 2 347 %i1 = load i16, ptr %p1, align 2 348 %i2 = load i16, ptr %p2, align 2 349 %i3 = load i16, ptr %p3, align 2 350 store i16 %i2, ptr %p42, align 2 351 store i16 %i3, ptr %p43, align 2 352 store i16 %i0, ptr %p44, align 2 353 store i16 %i1, ptr %p45, align 2 354 ret void 355} 356 357; TODO: recognize this as 2 rotates? 358 359define dso_local void @rotate32_consecutive(ptr %p) { 360; CHECK-LABEL: rotate32_consecutive: 361; CHECK: # %bb.0: 362; CHECK-NEXT: movzwl (%rdi), %eax 363; CHECK-NEXT: movzwl 2(%rdi), %ecx 364; CHECK-NEXT: movzwl 4(%rdi), %edx 365; CHECK-NEXT: movzwl 6(%rdi), %esi 366; CHECK-NEXT: movw %cx, 84(%rdi) 367; CHECK-NEXT: movw %ax, 86(%rdi) 368; CHECK-NEXT: movw %si, 88(%rdi) 369; CHECK-NEXT: movw %dx, 90(%rdi) 370; CHECK-NEXT: retq 371 %p1 = getelementptr i16, ptr %p, i64 1 372 %p2 = getelementptr i16, ptr %p, i64 2 373 %p3 = getelementptr i16, ptr %p, i64 3 374 %p42 = getelementptr i16, ptr %p, i64 42 375 %p43 = getelementptr i16, ptr %p, i64 43 376 %p44 = getelementptr i16, ptr %p, i64 44 377 %p45 = getelementptr i16, ptr %p, i64 45 378 %i0 = load i16, ptr %p, align 2 379 %i1 = load i16, ptr %p1, align 2 380 %i2 = load i16, ptr %p2, align 2 381 %i3 = load i16, ptr %p3, align 2 382 store i16 %i1, ptr %p42, align 2 383 store i16 %i0, ptr %p43, align 2 384 store i16 %i3, ptr %p44, align 2 385 store i16 %i2, ptr %p45, align 2 386 ret void 387} 388 389; Same as above, but now the stores are not all consecutive. 390 391define dso_local void @rotate32_twice(ptr %p) { 392; CHECK-LABEL: rotate32_twice: 393; CHECK: # %bb.0: 394; CHECK-NEXT: movl (%rdi), %eax 395; CHECK-NEXT: movl 4(%rdi), %ecx 396; CHECK-NEXT: roll $16, %eax 397; CHECK-NEXT: roll $16, %ecx 398; CHECK-NEXT: movl %eax, 84(%rdi) 399; CHECK-NEXT: movl %ecx, 108(%rdi) 400; CHECK-NEXT: retq 401 %p1 = getelementptr i16, ptr %p, i64 1 402 %p2 = getelementptr i16, ptr %p, i64 2 403 %p3 = getelementptr i16, ptr %p, i64 3 404 %p42 = getelementptr i16, ptr %p, i64 42 405 %p43 = getelementptr i16, ptr %p, i64 43 406 %p54 = getelementptr i16, ptr %p, i64 54 407 %p55 = getelementptr i16, ptr %p, i64 55 408 %i0 = load i16, ptr %p, align 2 409 %i1 = load i16, ptr %p1, align 2 410 %i2 = load i16, ptr %p2, align 2 411 %i3 = load i16, ptr %p3, align 2 412 store i16 %i1, ptr %p42, align 2 413 store i16 %i0, ptr %p43, align 2 414 store i16 %i3, ptr %p54, align 2 415 store i16 %i2, ptr %p55, align 2 416 ret void 417} 418 419define dso_local void @trunc_i16_to_i8(i16 %x, ptr %p) { 420; CHECK-LABEL: trunc_i16_to_i8: 421; CHECK: # %bb.0: 422; CHECK-NEXT: movw %di, (%rsi) 423; CHECK-NEXT: retq 424 %t1 = trunc i16 %x to i8 425 %sh = lshr i16 %x, 8 426 %t2 = trunc i16 %sh to i8 427 store i8 %t1, ptr %p, align 1 428 %p1 = getelementptr inbounds i8, ptr %p, i64 1 429 store i8 %t2, ptr %p1, align 1 430 ret void 431} 432 433define dso_local void @trunc_i32_to_i8(i32 %x, ptr %p) { 434; CHECK-LABEL: trunc_i32_to_i8: 435; CHECK: # %bb.0: 436; CHECK-NEXT: movl %edi, (%rsi) 437; CHECK-NEXT: retq 438 %t1 = trunc i32 %x to i8 439 %sh1 = lshr i32 %x, 8 440 %t2 = trunc i32 %sh1 to i8 441 %sh2 = lshr i32 %x, 16 442 %t3 = trunc i32 %sh2 to i8 443 %sh3 = lshr i32 %x, 24 444 %t4 = trunc i32 %sh3 to i8 445 store i8 %t1, ptr %p, align 1 446 %p1 = getelementptr inbounds i8, ptr %p, i64 1 447 store i8 %t2, ptr %p1, align 1 448 %p2 = getelementptr inbounds i8, ptr %p, i64 2 449 store i8 %t3, ptr %p2, align 1 450 %p3 = getelementptr inbounds i8, ptr %p, i64 3 451 store i8 %t4, ptr %p3, align 1 452 ret void 453} 454 455define dso_local void @trunc_i32_to_i16(i32 %x, ptr %p) { 456; CHECK-LABEL: trunc_i32_to_i16: 457; CHECK: # %bb.0: 458; CHECK-NEXT: movl %edi, (%rsi) 459; CHECK-NEXT: retq 460 %t1 = trunc i32 %x to i16 461 %sh = lshr i32 %x, 16 462 %t2 = trunc i32 %sh to i16 463 store i16 %t1, ptr %p, align 2 464 %p1 = getelementptr inbounds i16, ptr %p, i64 1 465 store i16 %t2, ptr %p1, align 2 466 ret void 467} 468 469define dso_local void @be_i32_to_i16(i32 %x, ptr %p0) { 470; CHECK-LABEL: be_i32_to_i16: 471; CHECK: # %bb.0: 472; CHECK-NEXT: rorl $16, %edi 473; CHECK-NEXT: movl %edi, (%rsi) 474; CHECK-NEXT: retq 475 %sh1 = lshr i32 %x, 16 476 %t0 = trunc i32 %x to i16 477 %t1 = trunc i32 %sh1 to i16 478 %p1 = getelementptr inbounds i16, ptr %p0, i64 1 479 store i16 %t0, ptr %p1, align 2 480 store i16 %t1, ptr %p0, align 2 481 ret void 482} 483 484define dso_local void @be_i32_to_i16_order(i32 %x, ptr %p0) { 485; CHECK-LABEL: be_i32_to_i16_order: 486; CHECK: # %bb.0: 487; CHECK-NEXT: rorl $16, %edi 488; CHECK-NEXT: movl %edi, (%rsi) 489; CHECK-NEXT: retq 490 %sh1 = lshr i32 %x, 16 491 %t0 = trunc i32 %x to i16 492 %t1 = trunc i32 %sh1 to i16 493 %p1 = getelementptr inbounds i16, ptr %p0, i64 1 494 store i16 %t1, ptr %p0, align 2 495 store i16 %t0, ptr %p1, align 2 496 ret void 497} 498 499define dso_local void @trunc_i64_to_i8(i64 %x, ptr %p) { 500; CHECK-LABEL: trunc_i64_to_i8: 501; CHECK: # %bb.0: 502; CHECK-NEXT: movq %rdi, (%rsi) 503; CHECK-NEXT: retq 504 %t1 = trunc i64 %x to i8 505 %sh1 = lshr i64 %x, 8 506 %t2 = trunc i64 %sh1 to i8 507 %sh2 = lshr i64 %x, 16 508 %t3 = trunc i64 %sh2 to i8 509 %sh3 = lshr i64 %x, 24 510 %t4 = trunc i64 %sh3 to i8 511 %sh4 = lshr i64 %x, 32 512 %t5 = trunc i64 %sh4 to i8 513 %sh5 = lshr i64 %x, 40 514 %t6 = trunc i64 %sh5 to i8 515 %sh6 = lshr i64 %x, 48 516 %t7 = trunc i64 %sh6 to i8 517 %sh7 = lshr i64 %x, 56 518 %t8 = trunc i64 %sh7 to i8 519 store i8 %t1, ptr %p, align 1 520 %p1 = getelementptr inbounds i8, ptr %p, i64 1 521 store i8 %t2, ptr %p1, align 1 522 %p2 = getelementptr inbounds i8, ptr %p, i64 2 523 store i8 %t3, ptr %p2, align 1 524 %p3 = getelementptr inbounds i8, ptr %p, i64 3 525 store i8 %t4, ptr %p3, align 1 526 %p4 = getelementptr inbounds i8, ptr %p, i64 4 527 store i8 %t5, ptr %p4, align 1 528 %p5 = getelementptr inbounds i8, ptr %p, i64 5 529 store i8 %t6, ptr %p5, align 1 530 %p6 = getelementptr inbounds i8, ptr %p, i64 6 531 store i8 %t7, ptr %p6, align 1 532 %p7 = getelementptr inbounds i8, ptr %p, i64 7 533 store i8 %t8, ptr %p7, align 1 534 ret void 535} 536 537define dso_local void @trunc_i64_to_i16(i64 %x, ptr %p) { 538; CHECK-LABEL: trunc_i64_to_i16: 539; CHECK: # %bb.0: 540; CHECK-NEXT: movq %rdi, (%rsi) 541; CHECK-NEXT: retq 542 %t1 = trunc i64 %x to i16 543 %sh1 = lshr i64 %x, 16 544 %t2 = trunc i64 %sh1 to i16 545 %sh2 = lshr i64 %x, 32 546 %t3 = trunc i64 %sh2 to i16 547 %sh3 = lshr i64 %x, 48 548 %t4 = trunc i64 %sh3 to i16 549 store i16 %t1, ptr %p, align 2 550 %p1 = getelementptr inbounds i16, ptr %p, i64 1 551 store i16 %t2, ptr %p1, align 2 552 %p2 = getelementptr inbounds i16, ptr %p, i64 2 553 store i16 %t3, ptr %p2, align 2 554 %p3 = getelementptr inbounds i16, ptr %p, i64 3 555 store i16 %t4, ptr %p3, align 2 556 ret void 557} 558 559define dso_local void @trunc_i64_to_i32(i64 %x, ptr %p) { 560; CHECK-LABEL: trunc_i64_to_i32: 561; CHECK: # %bb.0: 562; CHECK-NEXT: movq %rdi, (%rsi) 563; CHECK-NEXT: retq 564 %t1 = trunc i64 %x to i32 565 %sh = lshr i64 %x, 32 566 %t2 = trunc i64 %sh to i32 567 store i32 %t1, ptr %p, align 4 568 %p1 = getelementptr inbounds i32, ptr %p, i64 1 569 store i32 %t2, ptr %p1, align 4 570 ret void 571} 572 573define dso_local void @be_i64_to_i32(i64 %x, ptr %p0) { 574; CHECK-LABEL: be_i64_to_i32: 575; CHECK: # %bb.0: 576; CHECK-NEXT: rorq $32, %rdi 577; CHECK-NEXT: movq %rdi, (%rsi) 578; CHECK-NEXT: retq 579 %sh1 = lshr i64 %x, 32 580 %t0 = trunc i64 %x to i32 581 %t1 = trunc i64 %sh1 to i32 582 %p1 = getelementptr inbounds i32, ptr %p0, i64 1 583 store i32 %t0, ptr %p1, align 4 584 store i32 %t1, ptr %p0, align 4 585 ret void 586} 587 588define dso_local void @be_i64_to_i32_order(i64 %x, ptr %p0) { 589; CHECK-LABEL: be_i64_to_i32_order: 590; CHECK: # %bb.0: 591; CHECK-NEXT: rorq $32, %rdi 592; CHECK-NEXT: movq %rdi, (%rsi) 593; CHECK-NEXT: retq 594 %sh1 = lshr i64 %x, 32 595 %t0 = trunc i64 %x to i32 596 %t1 = trunc i64 %sh1 to i32 597 %p1 = getelementptr inbounds i32, ptr %p0, i64 1 598 store i32 %t1, ptr %p0, align 4 599 store i32 %t0, ptr %p1, align 4 600 ret void 601} 602 603; https://llvm.org/PR50623 604; It is a miscompile to merge the stores if we are not 605; writing all of the bytes from the source value. 606 607define void @merge_hole(i32 %x, ptr %p) { 608; CHECK-LABEL: merge_hole: 609; CHECK: # %bb.0: 610; CHECK-NEXT: movb %dil, (%rsi) 611; CHECK-NEXT: shrl $16, %edi 612; CHECK-NEXT: movw %di, 2(%rsi) 613; CHECK-NEXT: retq 614 %p2 = getelementptr inbounds i16, ptr %p, i64 1 615 %x3 = trunc i32 %x to i8 616 store i8 %x3, ptr %p, align 1 617 %sh = lshr i32 %x, 16 618 %x01 = trunc i32 %sh to i16 619 store i16 %x01, ptr %p2, align 1 620 ret void 621} 622 623; Change the order of the stores. 624; It is a miscompile to merge the stores if we are not 625; writing all of the bytes from the source value. 626 627define void @merge_hole2(i32 %x, ptr %p) { 628; CHECK-LABEL: merge_hole2: 629; CHECK: # %bb.0: 630; CHECK-NEXT: movl %edi, %eax 631; CHECK-NEXT: shrl $16, %eax 632; CHECK-NEXT: movw %ax, 2(%rsi) 633; CHECK-NEXT: movb %dil, (%rsi) 634; CHECK-NEXT: retq 635 %p2 = getelementptr inbounds i16, ptr %p, i64 1 636 %sh = lshr i32 %x, 16 637 %x01 = trunc i32 %sh to i16 638 store i16 %x01, ptr %p2, align 1 639 %x3 = trunc i32 %x to i8 640 store i8 %x3, ptr %p, align 1 641 ret void 642} 643 644; Change offset. 645; It is a miscompile to merge the stores if we are not 646; writing all of the bytes from the source value. 647 648define void @merge_hole3(i32 %x, ptr %p) { 649; CHECK-LABEL: merge_hole3: 650; CHECK: # %bb.0: 651; CHECK-NEXT: movb %dil, 1(%rsi) 652; CHECK-NEXT: shrl $16, %edi 653; CHECK-NEXT: movw %di, 2(%rsi) 654; CHECK-NEXT: retq 655 %p1 = getelementptr inbounds i8, ptr %p, i64 1 656 %p2 = getelementptr inbounds i16, ptr %p, i64 1 657 %x3 = trunc i32 %x to i8 658 store i8 %x3, ptr %p1, align 1 659 %sh = lshr i32 %x, 16 660 %x01 = trunc i32 %sh to i16 661 store i16 %x01, ptr %p2, align 1 662 ret void 663} 664 665; Change offset. 666; It is a miscompile to merge the stores if we are not 667; writing all of the bytes from the source value. 668 669define void @merge_hole4(i32 %x, ptr %p) { 670; CHECK-LABEL: merge_hole4: 671; CHECK: # %bb.0: 672; CHECK-NEXT: movb %dil, 2(%rsi) 673; CHECK-NEXT: shrl $16, %edi 674; CHECK-NEXT: movw %di, (%rsi) 675; CHECK-NEXT: retq 676 %p2 = getelementptr inbounds i8, ptr %p, i64 2 677 %x3 = trunc i32 %x to i8 678 store i8 %x3, ptr %p2, align 1 679 %sh = lshr i32 %x, 16 680 %x01 = trunc i32 %sh to i16 681 store i16 %x01, ptr %p, align 1 682 ret void 683} 684