1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-O0 %s 3; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-O3 %s 4 5define i8 @load_i8(ptr %ptr) { 6; CHECK-O0-LABEL: load_i8: 7; CHECK-O0: # %bb.0: 8; CHECK-O0-NEXT: movb (%rdi), %al 9; CHECK-O0-NEXT: retq 10; 11; CHECK-O3-LABEL: load_i8: 12; CHECK-O3: # %bb.0: 13; CHECK-O3-NEXT: movzbl (%rdi), %eax 14; CHECK-O3-NEXT: retq 15 %v = load atomic i8, ptr %ptr unordered, align 1 16 ret i8 %v 17} 18 19define void @store_i8(ptr %ptr, i8 %v) { 20; CHECK-O0-LABEL: store_i8: 21; CHECK-O0: # %bb.0: 22; CHECK-O0-NEXT: movb %sil, %al 23; CHECK-O0-NEXT: movb %al, (%rdi) 24; CHECK-O0-NEXT: retq 25; 26; CHECK-O3-LABEL: store_i8: 27; CHECK-O3: # %bb.0: 28; CHECK-O3-NEXT: movb %sil, (%rdi) 29; CHECK-O3-NEXT: retq 30 store atomic i8 %v, ptr %ptr unordered, align 1 31 ret void 32} 33 34define i16 @load_i16(ptr %ptr) { 35; CHECK-O0-LABEL: load_i16: 36; CHECK-O0: # %bb.0: 37; CHECK-O0-NEXT: movw (%rdi), %ax 38; CHECK-O0-NEXT: retq 39; 40; CHECK-O3-LABEL: load_i16: 41; CHECK-O3: # %bb.0: 42; CHECK-O3-NEXT: movzwl (%rdi), %eax 43; CHECK-O3-NEXT: retq 44 %v = load atomic i16, ptr %ptr unordered, align 2 45 ret i16 %v 46} 47 48 49define void @store_i16(ptr %ptr, i16 %v) { 50; CHECK-O0-LABEL: store_i16: 51; CHECK-O0: # %bb.0: 52; CHECK-O0-NEXT: movw %si, %ax 53; CHECK-O0-NEXT: movw %ax, (%rdi) 54; CHECK-O0-NEXT: retq 55; 56; CHECK-O3-LABEL: store_i16: 57; CHECK-O3: # %bb.0: 58; CHECK-O3-NEXT: movw %si, (%rdi) 59; CHECK-O3-NEXT: retq 60 store atomic i16 %v, ptr %ptr unordered, align 2 61 ret void 62} 63 64define i32 @load_i32(ptr %ptr) { 65; CHECK-LABEL: load_i32: 66; CHECK: # %bb.0: 67; CHECK-NEXT: movl (%rdi), %eax 68; CHECK-NEXT: retq 69 %v = load atomic i32, ptr %ptr unordered, align 4 70 ret i32 %v 71} 72 73define void @store_i32(ptr %ptr, i32 %v) { 74; CHECK-LABEL: store_i32: 75; CHECK: # %bb.0: 76; CHECK-NEXT: movl %esi, (%rdi) 77; CHECK-NEXT: retq 78 store atomic i32 %v, ptr %ptr unordered, align 4 79 ret void 80} 81 82define i64 @load_i64(ptr %ptr) { 83; CHECK-LABEL: load_i64: 84; CHECK: # %bb.0: 85; CHECK-NEXT: movq (%rdi), %rax 86; CHECK-NEXT: retq 87 %v = load atomic i64, ptr %ptr unordered, align 8 88 ret i64 %v 89} 90 91define void @store_i64(ptr %ptr, i64 %v) { 92; CHECK-LABEL: store_i64: 93; CHECK: # %bb.0: 94; CHECK-NEXT: movq %rsi, (%rdi) 95; CHECK-NEXT: retq 96 store atomic i64 %v, ptr %ptr unordered, align 8 97 ret void 98} 99 100;; The tests in the rest of this file are intended to show transforms which we 101;; either *can't* do for legality, or don't currently implement. The later 102;; are noted carefully where relevant. 103 104;; Start w/some clearly illegal ones. 105 106; Must use a full width op, not a byte op 107define void @narrow_writeback_or(ptr %ptr) { 108; CHECK-O0-LABEL: narrow_writeback_or: 109; CHECK-O0: # %bb.0: 110; CHECK-O0-NEXT: movq (%rdi), %rax 111; CHECK-O0-NEXT: orq $7, %rax 112; CHECK-O0-NEXT: movq %rax, (%rdi) 113; CHECK-O0-NEXT: retq 114; 115; CHECK-O3-LABEL: narrow_writeback_or: 116; CHECK-O3: # %bb.0: 117; CHECK-O3-NEXT: orq $7, (%rdi) 118; CHECK-O3-NEXT: retq 119 %v = load atomic i64, ptr %ptr unordered, align 8 120 %v.new = or i64 %v, 7 121 store atomic i64 %v.new, ptr %ptr unordered, align 8 122 ret void 123} 124 125; Must use a full width op, not a byte op 126define void @narrow_writeback_and(ptr %ptr) { 127; CHECK-O0-LABEL: narrow_writeback_and: 128; CHECK-O0: # %bb.0: 129; CHECK-O0-NEXT: movq (%rdi), %rax 130; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 131; CHECK-O0-NEXT: andl $-256, %eax 132; CHECK-O0-NEXT: # kill: def $rax killed $eax 133; CHECK-O0-NEXT: movq %rax, (%rdi) 134; CHECK-O0-NEXT: retq 135; 136; CHECK-O3-LABEL: narrow_writeback_and: 137; CHECK-O3: # %bb.0: 138; CHECK-O3-NEXT: movl $4294967040, %eax # imm = 0xFFFFFF00 139; CHECK-O3-NEXT: andq %rax, (%rdi) 140; CHECK-O3-NEXT: retq 141 %v = load atomic i64, ptr %ptr unordered, align 8 142 %v.new = and i64 %v, 4294967040 ;; 0xFFFF_FF00 143 store atomic i64 %v.new, ptr %ptr unordered, align 8 144 ret void 145} 146 147; Must use a full width op, not a byte op 148define void @narrow_writeback_xor(ptr %ptr) { 149; CHECK-O0-LABEL: narrow_writeback_xor: 150; CHECK-O0: # %bb.0: 151; CHECK-O0-NEXT: movq (%rdi), %rax 152; CHECK-O0-NEXT: xorq $7, %rax 153; CHECK-O0-NEXT: movq %rax, (%rdi) 154; CHECK-O0-NEXT: retq 155; 156; CHECK-O3-LABEL: narrow_writeback_xor: 157; CHECK-O3: # %bb.0: 158; CHECK-O3-NEXT: xorq $7, (%rdi) 159; CHECK-O3-NEXT: retq 160 %v = load atomic i64, ptr %ptr unordered, align 8 161 %v.new = xor i64 %v, 7 162 store atomic i64 %v.new, ptr %ptr unordered, align 8 163 ret void 164} 165 166;; Next batch of tests are exercising cases where store widening would 167;; improve codegeneration. Note that widening is only legal if the 168;; resulting type would be atomic. Each tests has a well aligned, and 169;; unaligned variant to ensure we get correct codegen here. 170;; Note: It's not a legality issue, but there's a gotcha here to be aware 171;; of. Once we widen a pair of atomic stores, we loose the information 172;; that the original atomicity requirement was half the width. Given that, 173;; we can't then split the load again. This challenges our usual iterative 174;; approach to incremental improvement. 175 176; Legal if wider type is also atomic (TODO) 177define void @widen_store(ptr %p0, i32 %v1, i32 %v2) { 178; CHECK-LABEL: widen_store: 179; CHECK: # %bb.0: 180; CHECK-NEXT: movl %esi, (%rdi) 181; CHECK-NEXT: movl %edx, 4(%rdi) 182; CHECK-NEXT: retq 183 %p1 = getelementptr i32, ptr %p0, i64 1 184 store atomic i32 %v1, ptr %p0 unordered, align 8 185 store atomic i32 %v2, ptr %p1 unordered, align 4 186 ret void 187} 188 189; This one is *NOT* legal to widen. With weaker alignment, 190; the wider type might cross a cache line and violate the 191; atomicity requirement. 192define void @widen_store_unaligned(ptr %p0, i32 %v1, i32 %v2) { 193; CHECK-LABEL: widen_store_unaligned: 194; CHECK: # %bb.0: 195; CHECK-NEXT: movl %esi, (%rdi) 196; CHECK-NEXT: movl %edx, 4(%rdi) 197; CHECK-NEXT: retq 198 %p1 = getelementptr i32, ptr %p0, i64 1 199 store atomic i32 %v1, ptr %p0 unordered, align 4 200 store atomic i32 %v2, ptr %p1 unordered, align 4 201 ret void 202} 203 204; Legal if wider type is also atomic (TODO) 205define void @widen_broadcast(ptr %p0, i32 %v) { 206; CHECK-LABEL: widen_broadcast: 207; CHECK: # %bb.0: 208; CHECK-NEXT: movl %esi, (%rdi) 209; CHECK-NEXT: movl %esi, 4(%rdi) 210; CHECK-NEXT: retq 211 %p1 = getelementptr i32, ptr %p0, i64 1 212 store atomic i32 %v, ptr %p0 unordered, align 8 213 store atomic i32 %v, ptr %p1 unordered, align 4 214 ret void 215} 216 217; Not legal to widen due to alignment restriction 218define void @widen_broadcast_unaligned(ptr %p0, i32 %v) { 219; CHECK-LABEL: widen_broadcast_unaligned: 220; CHECK: # %bb.0: 221; CHECK-NEXT: movl %esi, (%rdi) 222; CHECK-NEXT: movl %esi, 4(%rdi) 223; CHECK-NEXT: retq 224 %p1 = getelementptr i32, ptr %p0, i64 1 225 store atomic i32 %v, ptr %p0 unordered, align 4 226 store atomic i32 %v, ptr %p1 unordered, align 4 227 ret void 228} 229 230define i128 @load_i128(ptr %ptr) { 231; CHECK-LABEL: load_i128: 232; CHECK: # %bb.0: 233; CHECK-NEXT: vmovdqa (%rdi), %xmm0 234; CHECK-NEXT: vmovq %xmm0, %rax 235; CHECK-NEXT: vpextrq $1, %xmm0, %rdx 236; CHECK-NEXT: retq 237 %v = load atomic i128, ptr %ptr unordered, align 16 238 ret i128 %v 239} 240 241define void @store_i128(ptr %ptr, i128 %v) { 242; CHECK-O0-LABEL: store_i128: 243; CHECK-O0: # %bb.0: 244; CHECK-O0-NEXT: vmovq %rsi, %xmm0 245; CHECK-O0-NEXT: vmovq %rdx, %xmm1 246; CHECK-O0-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 247; CHECK-O0-NEXT: vmovdqa %xmm0, (%rdi) 248; CHECK-O0-NEXT: retq 249; 250; CHECK-O3-LABEL: store_i128: 251; CHECK-O3: # %bb.0: 252; CHECK-O3-NEXT: vmovq %rdx, %xmm0 253; CHECK-O3-NEXT: vmovq %rsi, %xmm1 254; CHECK-O3-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 255; CHECK-O3-NEXT: vmovdqa %xmm0, (%rdi) 256; CHECK-O3-NEXT: retq 257 store atomic i128 %v, ptr %ptr unordered, align 16 258 ret void 259} 260 261define i256 @load_i256(ptr %ptr) { 262; CHECK-O0-LABEL: load_i256: 263; CHECK-O0: # %bb.0: 264; CHECK-O0-NEXT: subq $56, %rsp 265; CHECK-O0-NEXT: .cfi_def_cfa_offset 64 266; CHECK-O0-NEXT: movq %rdi, %rax 267; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 268; CHECK-O0-NEXT: movq %rdi, (%rsp) # 8-byte Spill 269; CHECK-O0-NEXT: movl $32, %edi 270; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx 271; CHECK-O0-NEXT: xorl %ecx, %ecx 272; CHECK-O0-NEXT: callq __atomic_load@PLT 273; CHECK-O0-NEXT: movq (%rsp), %rdi # 8-byte Reload 274; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 275; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rcx 276; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx 277; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi 278; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %r8 279; CHECK-O0-NEXT: movq %r8, 24(%rdi) 280; CHECK-O0-NEXT: movq %rsi, 16(%rdi) 281; CHECK-O0-NEXT: movq %rdx, 8(%rdi) 282; CHECK-O0-NEXT: movq %rcx, (%rdi) 283; CHECK-O0-NEXT: addq $56, %rsp 284; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 285; CHECK-O0-NEXT: retq 286; 287; CHECK-O3-LABEL: load_i256: 288; CHECK-O3: # %bb.0: 289; CHECK-O3-NEXT: pushq %rbx 290; CHECK-O3-NEXT: .cfi_def_cfa_offset 16 291; CHECK-O3-NEXT: subq $32, %rsp 292; CHECK-O3-NEXT: .cfi_def_cfa_offset 48 293; CHECK-O3-NEXT: .cfi_offset %rbx, -16 294; CHECK-O3-NEXT: movq %rdi, %rbx 295; CHECK-O3-NEXT: movq %rsp, %rdx 296; CHECK-O3-NEXT: movl $32, %edi 297; CHECK-O3-NEXT: xorl %ecx, %ecx 298; CHECK-O3-NEXT: callq __atomic_load@PLT 299; CHECK-O3-NEXT: vmovups (%rsp), %ymm0 300; CHECK-O3-NEXT: vmovups %ymm0, (%rbx) 301; CHECK-O3-NEXT: movq %rbx, %rax 302; CHECK-O3-NEXT: addq $32, %rsp 303; CHECK-O3-NEXT: .cfi_def_cfa_offset 16 304; CHECK-O3-NEXT: popq %rbx 305; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 306; CHECK-O3-NEXT: vzeroupper 307; CHECK-O3-NEXT: retq 308 %v = load atomic i256, ptr %ptr unordered, align 16 309 ret i256 %v 310} 311 312define void @store_i256(ptr %ptr, i256 %v) { 313; CHECK-O0-LABEL: store_i256: 314; CHECK-O0: # %bb.0: 315; CHECK-O0-NEXT: subq $40, %rsp 316; CHECK-O0-NEXT: .cfi_def_cfa_offset 48 317; CHECK-O0-NEXT: movq %rsi, %rax 318; CHECK-O0-NEXT: movq %rdi, %rsi 319; CHECK-O0-NEXT: movq %rax, (%rsp) 320; CHECK-O0-NEXT: movq %rdx, {{[0-9]+}}(%rsp) 321; CHECK-O0-NEXT: movq %rcx, {{[0-9]+}}(%rsp) 322; CHECK-O0-NEXT: movq %r8, {{[0-9]+}}(%rsp) 323; CHECK-O0-NEXT: movl $32, %edi 324; CHECK-O0-NEXT: movq %rsp, %rdx 325; CHECK-O0-NEXT: xorl %ecx, %ecx 326; CHECK-O0-NEXT: callq __atomic_store@PLT 327; CHECK-O0-NEXT: addq $40, %rsp 328; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 329; CHECK-O0-NEXT: retq 330; 331; CHECK-O3-LABEL: store_i256: 332; CHECK-O3: # %bb.0: 333; CHECK-O3-NEXT: subq $40, %rsp 334; CHECK-O3-NEXT: .cfi_def_cfa_offset 48 335; CHECK-O3-NEXT: movq %rdi, %rax 336; CHECK-O3-NEXT: movq %r8, {{[0-9]+}}(%rsp) 337; CHECK-O3-NEXT: movq %rcx, {{[0-9]+}}(%rsp) 338; CHECK-O3-NEXT: movq %rdx, {{[0-9]+}}(%rsp) 339; CHECK-O3-NEXT: movq %rsi, (%rsp) 340; CHECK-O3-NEXT: movq %rsp, %rdx 341; CHECK-O3-NEXT: movl $32, %edi 342; CHECK-O3-NEXT: movq %rax, %rsi 343; CHECK-O3-NEXT: xorl %ecx, %ecx 344; CHECK-O3-NEXT: callq __atomic_store@PLT 345; CHECK-O3-NEXT: addq $40, %rsp 346; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 347; CHECK-O3-NEXT: retq 348 store atomic i256 %v, ptr %ptr unordered, align 16 349 ret void 350} 351 352; Legal if wider type is also atomic (TODO) 353define void @vec_store(ptr %p0, <2 x i32> %vec) { 354; CHECK-O0-LABEL: vec_store: 355; CHECK-O0: # %bb.0: 356; CHECK-O0-NEXT: vmovd %xmm0, %ecx 357; CHECK-O0-NEXT: vpextrd $1, %xmm0, %eax 358; CHECK-O0-NEXT: movl %ecx, (%rdi) 359; CHECK-O0-NEXT: movl %eax, 4(%rdi) 360; CHECK-O0-NEXT: retq 361; 362; CHECK-O3-LABEL: vec_store: 363; CHECK-O3: # %bb.0: 364; CHECK-O3-NEXT: vmovd %xmm0, %eax 365; CHECK-O3-NEXT: vpextrd $1, %xmm0, %ecx 366; CHECK-O3-NEXT: movl %eax, (%rdi) 367; CHECK-O3-NEXT: movl %ecx, 4(%rdi) 368; CHECK-O3-NEXT: retq 369 %v1 = extractelement <2 x i32> %vec, i32 0 370 %v2 = extractelement <2 x i32> %vec, i32 1 371 %p1 = getelementptr i32, ptr %p0, i64 1 372 store atomic i32 %v1, ptr %p0 unordered, align 8 373 store atomic i32 %v2, ptr %p1 unordered, align 4 374 ret void 375} 376 377; Not legal to widen due to alignment restriction 378define void @vec_store_unaligned(ptr %p0, <2 x i32> %vec) { 379; CHECK-O0-LABEL: vec_store_unaligned: 380; CHECK-O0: # %bb.0: 381; CHECK-O0-NEXT: vmovd %xmm0, %ecx 382; CHECK-O0-NEXT: vpextrd $1, %xmm0, %eax 383; CHECK-O0-NEXT: movl %ecx, (%rdi) 384; CHECK-O0-NEXT: movl %eax, 4(%rdi) 385; CHECK-O0-NEXT: retq 386; 387; CHECK-O3-LABEL: vec_store_unaligned: 388; CHECK-O3: # %bb.0: 389; CHECK-O3-NEXT: vmovd %xmm0, %eax 390; CHECK-O3-NEXT: vpextrd $1, %xmm0, %ecx 391; CHECK-O3-NEXT: movl %eax, (%rdi) 392; CHECK-O3-NEXT: movl %ecx, 4(%rdi) 393; CHECK-O3-NEXT: retq 394 %v1 = extractelement <2 x i32> %vec, i32 0 395 %v2 = extractelement <2 x i32> %vec, i32 1 396 %p1 = getelementptr i32, ptr %p0, i64 1 397 store atomic i32 %v1, ptr %p0 unordered, align 4 398 store atomic i32 %v2, ptr %p1 unordered, align 4 399 ret void 400} 401 402 403 404; Legal if wider type is also atomic (TODO) 405; Also, can avoid register move from xmm to eax (TODO) 406define void @widen_broadcast2(ptr %p0, <2 x i32> %vec) { 407; CHECK-LABEL: widen_broadcast2: 408; CHECK: # %bb.0: 409; CHECK-NEXT: vmovd %xmm0, %eax 410; CHECK-NEXT: movl %eax, (%rdi) 411; CHECK-NEXT: movl %eax, 4(%rdi) 412; CHECK-NEXT: retq 413 %v1 = extractelement <2 x i32> %vec, i32 0 414 %p1 = getelementptr i32, ptr %p0, i64 1 415 store atomic i32 %v1, ptr %p0 unordered, align 8 416 store atomic i32 %v1, ptr %p1 unordered, align 4 417 ret void 418} 419 420; Not legal to widen due to alignment restriction 421define void @widen_broadcast2_unaligned(ptr %p0, <2 x i32> %vec) { 422; CHECK-LABEL: widen_broadcast2_unaligned: 423; CHECK: # %bb.0: 424; CHECK-NEXT: vmovd %xmm0, %eax 425; CHECK-NEXT: movl %eax, (%rdi) 426; CHECK-NEXT: movl %eax, 4(%rdi) 427; CHECK-NEXT: retq 428 %v1 = extractelement <2 x i32> %vec, i32 0 429 %p1 = getelementptr i32, ptr %p0, i64 1 430 store atomic i32 %v1, ptr %p0 unordered, align 4 431 store atomic i32 %v1, ptr %p1 unordered, align 4 432 ret void 433} 434 435; Legal if wider type is also atomic (TODO) 436define void @widen_zero_init(ptr %p0, i32 %v1, i32 %v2) { 437; CHECK-LABEL: widen_zero_init: 438; CHECK: # %bb.0: 439; CHECK-NEXT: movl $0, (%rdi) 440; CHECK-NEXT: movl $0, 4(%rdi) 441; CHECK-NEXT: retq 442 %p1 = getelementptr i32, ptr %p0, i64 1 443 store atomic i32 0, ptr %p0 unordered, align 8 444 store atomic i32 0, ptr %p1 unordered, align 4 445 ret void 446} 447 448; Not legal to widen due to alignment restriction 449define void @widen_zero_init_unaligned(ptr %p0, i32 %v1, i32 %v2) { 450; CHECK-LABEL: widen_zero_init_unaligned: 451; CHECK: # %bb.0: 452; CHECK-NEXT: movl $0, (%rdi) 453; CHECK-NEXT: movl $0, 4(%rdi) 454; CHECK-NEXT: retq 455 %p1 = getelementptr i32, ptr %p0, i64 1 456 store atomic i32 0, ptr %p0 unordered, align 4 457 store atomic i32 0, ptr %p1 unordered, align 4 458 ret void 459} 460 461;; The next batch of tests are stressing load folding. Folding is legal 462;; on x86, so these are simply checking optimization quality. 463 464; Legal, as expected 465define i64 @load_fold_add1(ptr %p) { 466; CHECK-LABEL: load_fold_add1: 467; CHECK: # %bb.0: 468; CHECK-NEXT: movq (%rdi), %rax 469; CHECK-NEXT: addq $15, %rax 470; CHECK-NEXT: retq 471 %v = load atomic i64, ptr %p unordered, align 8 472 %ret = add i64 %v, 15 473 ret i64 %ret 474} 475 476define i64 @load_fold_add2(ptr %p, i64 %v2) { 477; CHECK-LABEL: load_fold_add2: 478; CHECK: # %bb.0: 479; CHECK-NEXT: movq %rsi, %rax 480; CHECK-NEXT: addq (%rdi), %rax 481; CHECK-NEXT: retq 482 %v = load atomic i64, ptr %p unordered, align 8 483 %ret = add i64 %v, %v2 484 ret i64 %ret 485} 486 487define i64 @load_fold_add3(ptr %p1, ptr %p2) { 488; CHECK-O0-LABEL: load_fold_add3: 489; CHECK-O0: # %bb.0: 490; CHECK-O0-NEXT: movq (%rdi), %rax 491; CHECK-O0-NEXT: addq (%rsi), %rax 492; CHECK-O0-NEXT: retq 493; 494; CHECK-O3-LABEL: load_fold_add3: 495; CHECK-O3: # %bb.0: 496; CHECK-O3-NEXT: movq (%rsi), %rax 497; CHECK-O3-NEXT: addq (%rdi), %rax 498; CHECK-O3-NEXT: retq 499 %v = load atomic i64, ptr %p1 unordered, align 8 500 %v2 = load atomic i64, ptr %p2 unordered, align 8 501 %ret = add i64 %v, %v2 502 ret i64 %ret 503} 504 505; Legal, as expected 506define i64 @load_fold_sub1(ptr %p) { 507; CHECK-O0-LABEL: load_fold_sub1: 508; CHECK-O0: # %bb.0: 509; CHECK-O0-NEXT: movq (%rdi), %rax 510; CHECK-O0-NEXT: subq $15, %rax 511; CHECK-O0-NEXT: retq 512; 513; CHECK-O3-LABEL: load_fold_sub1: 514; CHECK-O3: # %bb.0: 515; CHECK-O3-NEXT: movq (%rdi), %rax 516; CHECK-O3-NEXT: addq $-15, %rax 517; CHECK-O3-NEXT: retq 518 %v = load atomic i64, ptr %p unordered, align 8 519 %ret = sub i64 %v, 15 520 ret i64 %ret 521} 522 523define i64 @load_fold_sub2(ptr %p, i64 %v2) { 524; CHECK-LABEL: load_fold_sub2: 525; CHECK: # %bb.0: 526; CHECK-NEXT: movq (%rdi), %rax 527; CHECK-NEXT: subq %rsi, %rax 528; CHECK-NEXT: retq 529 %v = load atomic i64, ptr %p unordered, align 8 530 %ret = sub i64 %v, %v2 531 ret i64 %ret 532} 533 534define i64 @load_fold_sub3(ptr %p1, ptr %p2) { 535; CHECK-LABEL: load_fold_sub3: 536; CHECK: # %bb.0: 537; CHECK-NEXT: movq (%rdi), %rax 538; CHECK-NEXT: subq (%rsi), %rax 539; CHECK-NEXT: retq 540 %v = load atomic i64, ptr %p1 unordered, align 8 541 %v2 = load atomic i64, ptr %p2 unordered, align 8 542 %ret = sub i64 %v, %v2 543 ret i64 %ret 544} 545 546; Legal, as expected 547define i64 @load_fold_mul1(ptr %p) { 548; CHECK-O0-LABEL: load_fold_mul1: 549; CHECK-O0: # %bb.0: 550; CHECK-O0-NEXT: imulq $15, (%rdi), %rax 551; CHECK-O0-NEXT: retq 552; 553; CHECK-O3-LABEL: load_fold_mul1: 554; CHECK-O3: # %bb.0: 555; CHECK-O3-NEXT: movq (%rdi), %rax 556; CHECK-O3-NEXT: leaq (%rax,%rax,4), %rax 557; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax 558; CHECK-O3-NEXT: retq 559 %v = load atomic i64, ptr %p unordered, align 8 560 %ret = mul i64 %v, 15 561 ret i64 %ret 562} 563 564define i64 @load_fold_mul2(ptr %p, i64 %v2) { 565; CHECK-LABEL: load_fold_mul2: 566; CHECK: # %bb.0: 567; CHECK-NEXT: movq %rsi, %rax 568; CHECK-NEXT: imulq (%rdi), %rax 569; CHECK-NEXT: retq 570 %v = load atomic i64, ptr %p unordered, align 8 571 %ret = mul i64 %v, %v2 572 ret i64 %ret 573} 574 575define i64 @load_fold_mul3(ptr %p1, ptr %p2) { 576; CHECK-O0-LABEL: load_fold_mul3: 577; CHECK-O0: # %bb.0: 578; CHECK-O0-NEXT: movq (%rdi), %rax 579; CHECK-O0-NEXT: imulq (%rsi), %rax 580; CHECK-O0-NEXT: retq 581; 582; CHECK-O3-LABEL: load_fold_mul3: 583; CHECK-O3: # %bb.0: 584; CHECK-O3-NEXT: movq (%rsi), %rax 585; CHECK-O3-NEXT: imulq (%rdi), %rax 586; CHECK-O3-NEXT: retq 587 %v = load atomic i64, ptr %p1 unordered, align 8 588 %v2 = load atomic i64, ptr %p2 unordered, align 8 589 %ret = mul i64 %v, %v2 590 ret i64 %ret 591} 592 593; Legal to fold (TODO) 594define i64 @load_fold_sdiv1(ptr %p) { 595; CHECK-O0-LABEL: load_fold_sdiv1: 596; CHECK-O0: # %bb.0: 597; CHECK-O0-NEXT: movq (%rdi), %rax 598; CHECK-O0-NEXT: movl $15, %ecx 599; CHECK-O0-NEXT: cqto 600; CHECK-O0-NEXT: idivq %rcx 601; CHECK-O0-NEXT: retq 602; 603; CHECK-O3-LABEL: load_fold_sdiv1: 604; CHECK-O3: # %bb.0: 605; CHECK-O3-NEXT: movq (%rdi), %rcx 606; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 607; CHECK-O3-NEXT: movq %rcx, %rax 608; CHECK-O3-NEXT: imulq %rdx 609; CHECK-O3-NEXT: addq %rdx, %rcx 610; CHECK-O3-NEXT: movq %rcx, %rax 611; CHECK-O3-NEXT: shrq $63, %rax 612; CHECK-O3-NEXT: sarq $3, %rcx 613; CHECK-O3-NEXT: addq %rax, %rcx 614; CHECK-O3-NEXT: movq %rcx, %rax 615; CHECK-O3-NEXT: retq 616 %v = load atomic i64, ptr %p unordered, align 8 617 %ret = sdiv i64 %v, 15 618 ret i64 %ret 619} 620 621; Legal to fold (TODO) 622define i64 @load_fold_sdiv2(ptr %p, i64 %v2) { 623; CHECK-O0-LABEL: load_fold_sdiv2: 624; CHECK-O0: # %bb.0: 625; CHECK-O0-NEXT: movq (%rdi), %rax 626; CHECK-O0-NEXT: cqto 627; CHECK-O0-NEXT: idivq %rsi 628; CHECK-O0-NEXT: retq 629; 630; CHECK-O3-LABEL: load_fold_sdiv2: 631; CHECK-O3: # %bb.0: 632; CHECK-O3-NEXT: movq (%rdi), %rax 633; CHECK-O3-NEXT: movq %rax, %rcx 634; CHECK-O3-NEXT: orq %rsi, %rcx 635; CHECK-O3-NEXT: shrq $32, %rcx 636; CHECK-O3-NEXT: je .LBB35_1 637; CHECK-O3-NEXT: # %bb.2: 638; CHECK-O3-NEXT: cqto 639; CHECK-O3-NEXT: idivq %rsi 640; CHECK-O3-NEXT: retq 641; CHECK-O3-NEXT: .LBB35_1: 642; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 643; CHECK-O3-NEXT: xorl %edx, %edx 644; CHECK-O3-NEXT: divl %esi 645; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 646; CHECK-O3-NEXT: retq 647 %v = load atomic i64, ptr %p unordered, align 8 648 %ret = sdiv i64 %v, %v2 649 ret i64 %ret 650} 651 652define i64 @load_fold_sdiv3(ptr %p1, ptr %p2) { 653; CHECK-O0-LABEL: load_fold_sdiv3: 654; CHECK-O0: # %bb.0: 655; CHECK-O0-NEXT: movq (%rdi), %rax 656; CHECK-O0-NEXT: cqto 657; CHECK-O0-NEXT: idivq (%rsi) 658; CHECK-O0-NEXT: retq 659; 660; CHECK-O3-LABEL: load_fold_sdiv3: 661; CHECK-O3: # %bb.0: 662; CHECK-O3-NEXT: movq (%rdi), %rax 663; CHECK-O3-NEXT: movq (%rsi), %rcx 664; CHECK-O3-NEXT: movq %rax, %rdx 665; CHECK-O3-NEXT: orq %rcx, %rdx 666; CHECK-O3-NEXT: shrq $32, %rdx 667; CHECK-O3-NEXT: je .LBB36_1 668; CHECK-O3-NEXT: # %bb.2: 669; CHECK-O3-NEXT: cqto 670; CHECK-O3-NEXT: idivq %rcx 671; CHECK-O3-NEXT: retq 672; CHECK-O3-NEXT: .LBB36_1: 673; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 674; CHECK-O3-NEXT: xorl %edx, %edx 675; CHECK-O3-NEXT: divl %ecx 676; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 677; CHECK-O3-NEXT: retq 678 %v = load atomic i64, ptr %p1 unordered, align 8 679 %v2 = load atomic i64, ptr %p2 unordered, align 8 680 %ret = sdiv i64 %v, %v2 681 ret i64 %ret 682} 683 684; Legal to fold (TODO) 685define i64 @load_fold_udiv1(ptr %p) { 686; CHECK-O0-LABEL: load_fold_udiv1: 687; CHECK-O0: # %bb.0: 688; CHECK-O0-NEXT: movq (%rdi), %rax 689; CHECK-O0-NEXT: movl $15, %ecx 690; CHECK-O0-NEXT: xorl %edx, %edx 691; CHECK-O0-NEXT: # kill: def $rdx killed $edx 692; CHECK-O0-NEXT: divq %rcx 693; CHECK-O0-NEXT: retq 694; 695; CHECK-O3-LABEL: load_fold_udiv1: 696; CHECK-O3: # %bb.0: 697; CHECK-O3-NEXT: movq (%rdi), %rdx 698; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 699; CHECK-O3-NEXT: mulxq %rax, %rax, %rax 700; CHECK-O3-NEXT: shrq $3, %rax 701; CHECK-O3-NEXT: retq 702 %v = load atomic i64, ptr %p unordered, align 8 703 %ret = udiv i64 %v, 15 704 ret i64 %ret 705} 706 707define i64 @load_fold_udiv2(ptr %p, i64 %v2) { 708; CHECK-O0-LABEL: load_fold_udiv2: 709; CHECK-O0: # %bb.0: 710; CHECK-O0-NEXT: movq (%rdi), %rax 711; CHECK-O0-NEXT: xorl %ecx, %ecx 712; CHECK-O0-NEXT: movl %ecx, %edx 713; CHECK-O0-NEXT: divq %rsi 714; CHECK-O0-NEXT: retq 715; 716; CHECK-O3-LABEL: load_fold_udiv2: 717; CHECK-O3: # %bb.0: 718; CHECK-O3-NEXT: movq (%rdi), %rax 719; CHECK-O3-NEXT: movq %rax, %rcx 720; CHECK-O3-NEXT: orq %rsi, %rcx 721; CHECK-O3-NEXT: shrq $32, %rcx 722; CHECK-O3-NEXT: je .LBB38_1 723; CHECK-O3-NEXT: # %bb.2: 724; CHECK-O3-NEXT: xorl %edx, %edx 725; CHECK-O3-NEXT: divq %rsi 726; CHECK-O3-NEXT: retq 727; CHECK-O3-NEXT: .LBB38_1: 728; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 729; CHECK-O3-NEXT: xorl %edx, %edx 730; CHECK-O3-NEXT: divl %esi 731; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 732; CHECK-O3-NEXT: retq 733 %v = load atomic i64, ptr %p unordered, align 8 734 %ret = udiv i64 %v, %v2 735 ret i64 %ret 736} 737 738define i64 @load_fold_udiv3(ptr %p1, ptr %p2) { 739; CHECK-O0-LABEL: load_fold_udiv3: 740; CHECK-O0: # %bb.0: 741; CHECK-O0-NEXT: movq (%rdi), %rax 742; CHECK-O0-NEXT: xorl %ecx, %ecx 743; CHECK-O0-NEXT: movl %ecx, %edx 744; CHECK-O0-NEXT: divq (%rsi) 745; CHECK-O0-NEXT: retq 746; 747; CHECK-O3-LABEL: load_fold_udiv3: 748; CHECK-O3: # %bb.0: 749; CHECK-O3-NEXT: movq (%rdi), %rax 750; CHECK-O3-NEXT: movq (%rsi), %rcx 751; CHECK-O3-NEXT: movq %rax, %rdx 752; CHECK-O3-NEXT: orq %rcx, %rdx 753; CHECK-O3-NEXT: shrq $32, %rdx 754; CHECK-O3-NEXT: je .LBB39_1 755; CHECK-O3-NEXT: # %bb.2: 756; CHECK-O3-NEXT: xorl %edx, %edx 757; CHECK-O3-NEXT: divq %rcx 758; CHECK-O3-NEXT: retq 759; CHECK-O3-NEXT: .LBB39_1: 760; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 761; CHECK-O3-NEXT: xorl %edx, %edx 762; CHECK-O3-NEXT: divl %ecx 763; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 764; CHECK-O3-NEXT: retq 765 %v = load atomic i64, ptr %p1 unordered, align 8 766 %v2 = load atomic i64, ptr %p2 unordered, align 8 767 %ret = udiv i64 %v, %v2 768 ret i64 %ret 769} 770 771; Legal to fold (TODO) 772define i64 @load_fold_srem1(ptr %p) { 773; CHECK-O0-LABEL: load_fold_srem1: 774; CHECK-O0: # %bb.0: 775; CHECK-O0-NEXT: movq (%rdi), %rax 776; CHECK-O0-NEXT: movl $15, %ecx 777; CHECK-O0-NEXT: cqto 778; CHECK-O0-NEXT: idivq %rcx 779; CHECK-O0-NEXT: movq %rdx, %rax 780; CHECK-O0-NEXT: retq 781; 782; CHECK-O3-LABEL: load_fold_srem1: 783; CHECK-O3: # %bb.0: 784; CHECK-O3-NEXT: movq (%rdi), %rcx 785; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 786; CHECK-O3-NEXT: movq %rcx, %rax 787; CHECK-O3-NEXT: imulq %rdx 788; CHECK-O3-NEXT: addq %rcx, %rdx 789; CHECK-O3-NEXT: movq %rdx, %rax 790; CHECK-O3-NEXT: shrq $63, %rax 791; CHECK-O3-NEXT: sarq $3, %rdx 792; CHECK-O3-NEXT: addq %rax, %rdx 793; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax 794; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax 795; CHECK-O3-NEXT: subq %rax, %rcx 796; CHECK-O3-NEXT: movq %rcx, %rax 797; CHECK-O3-NEXT: retq 798 %v = load atomic i64, ptr %p unordered, align 8 799 %ret = srem i64 %v, 15 800 ret i64 %ret 801} 802 803; Legal, as expected 804define i64 @load_fold_srem2(ptr %p, i64 %v2) { 805; CHECK-O0-LABEL: load_fold_srem2: 806; CHECK-O0: # %bb.0: 807; CHECK-O0-NEXT: movq (%rdi), %rax 808; CHECK-O0-NEXT: cqto 809; CHECK-O0-NEXT: idivq %rsi 810; CHECK-O0-NEXT: movq %rdx, %rax 811; CHECK-O0-NEXT: retq 812; 813; CHECK-O3-LABEL: load_fold_srem2: 814; CHECK-O3: # %bb.0: 815; CHECK-O3-NEXT: movq (%rdi), %rax 816; CHECK-O3-NEXT: movq %rax, %rcx 817; CHECK-O3-NEXT: orq %rsi, %rcx 818; CHECK-O3-NEXT: shrq $32, %rcx 819; CHECK-O3-NEXT: je .LBB41_1 820; CHECK-O3-NEXT: # %bb.2: 821; CHECK-O3-NEXT: cqto 822; CHECK-O3-NEXT: idivq %rsi 823; CHECK-O3-NEXT: movq %rdx, %rax 824; CHECK-O3-NEXT: retq 825; CHECK-O3-NEXT: .LBB41_1: 826; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 827; CHECK-O3-NEXT: xorl %edx, %edx 828; CHECK-O3-NEXT: divl %esi 829; CHECK-O3-NEXT: movl %edx, %eax 830; CHECK-O3-NEXT: retq 831 %v = load atomic i64, ptr %p unordered, align 8 832 %ret = srem i64 %v, %v2 833 ret i64 %ret 834} 835 836define i64 @load_fold_srem3(ptr %p1, ptr %p2) { 837; CHECK-O0-LABEL: load_fold_srem3: 838; CHECK-O0: # %bb.0: 839; CHECK-O0-NEXT: movq (%rdi), %rax 840; CHECK-O0-NEXT: cqto 841; CHECK-O0-NEXT: idivq (%rsi) 842; CHECK-O0-NEXT: movq %rdx, %rax 843; CHECK-O0-NEXT: retq 844; 845; CHECK-O3-LABEL: load_fold_srem3: 846; CHECK-O3: # %bb.0: 847; CHECK-O3-NEXT: movq (%rdi), %rax 848; CHECK-O3-NEXT: movq (%rsi), %rcx 849; CHECK-O3-NEXT: movq %rax, %rdx 850; CHECK-O3-NEXT: orq %rcx, %rdx 851; CHECK-O3-NEXT: shrq $32, %rdx 852; CHECK-O3-NEXT: je .LBB42_1 853; CHECK-O3-NEXT: # %bb.2: 854; CHECK-O3-NEXT: cqto 855; CHECK-O3-NEXT: idivq %rcx 856; CHECK-O3-NEXT: movq %rdx, %rax 857; CHECK-O3-NEXT: retq 858; CHECK-O3-NEXT: .LBB42_1: 859; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 860; CHECK-O3-NEXT: xorl %edx, %edx 861; CHECK-O3-NEXT: divl %ecx 862; CHECK-O3-NEXT: movl %edx, %eax 863; CHECK-O3-NEXT: retq 864 %v = load atomic i64, ptr %p1 unordered, align 8 865 %v2 = load atomic i64, ptr %p2 unordered, align 8 866 %ret = srem i64 %v, %v2 867 ret i64 %ret 868} 869 870; Legal to fold (TODO) 871define i64 @load_fold_urem1(ptr %p) { 872; CHECK-O0-LABEL: load_fold_urem1: 873; CHECK-O0: # %bb.0: 874; CHECK-O0-NEXT: movq (%rdi), %rax 875; CHECK-O0-NEXT: movl $15, %ecx 876; CHECK-O0-NEXT: xorl %edx, %edx 877; CHECK-O0-NEXT: # kill: def $rdx killed $edx 878; CHECK-O0-NEXT: divq %rcx 879; CHECK-O0-NEXT: movq %rdx, %rax 880; CHECK-O0-NEXT: retq 881; 882; CHECK-O3-LABEL: load_fold_urem1: 883; CHECK-O3: # %bb.0: 884; CHECK-O3-NEXT: movq (%rdi), %rax 885; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 886; CHECK-O3-NEXT: movq %rax, %rdx 887; CHECK-O3-NEXT: mulxq %rcx, %rcx, %rcx 888; CHECK-O3-NEXT: shrq $3, %rcx 889; CHECK-O3-NEXT: leaq (%rcx,%rcx,4), %rcx 890; CHECK-O3-NEXT: leaq (%rcx,%rcx,2), %rcx 891; CHECK-O3-NEXT: subq %rcx, %rax 892; CHECK-O3-NEXT: retq 893 %v = load atomic i64, ptr %p unordered, align 8 894 %ret = urem i64 %v, 15 895 ret i64 %ret 896} 897 898; Legal, as expected 899define i64 @load_fold_urem2(ptr %p, i64 %v2) { 900; CHECK-O0-LABEL: load_fold_urem2: 901; CHECK-O0: # %bb.0: 902; CHECK-O0-NEXT: movq (%rdi), %rax 903; CHECK-O0-NEXT: xorl %ecx, %ecx 904; CHECK-O0-NEXT: movl %ecx, %edx 905; CHECK-O0-NEXT: divq %rsi 906; CHECK-O0-NEXT: movq %rdx, %rax 907; CHECK-O0-NEXT: retq 908; 909; CHECK-O3-LABEL: load_fold_urem2: 910; CHECK-O3: # %bb.0: 911; CHECK-O3-NEXT: movq (%rdi), %rax 912; CHECK-O3-NEXT: movq %rax, %rcx 913; CHECK-O3-NEXT: orq %rsi, %rcx 914; CHECK-O3-NEXT: shrq $32, %rcx 915; CHECK-O3-NEXT: je .LBB44_1 916; CHECK-O3-NEXT: # %bb.2: 917; CHECK-O3-NEXT: xorl %edx, %edx 918; CHECK-O3-NEXT: divq %rsi 919; CHECK-O3-NEXT: movq %rdx, %rax 920; CHECK-O3-NEXT: retq 921; CHECK-O3-NEXT: .LBB44_1: 922; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 923; CHECK-O3-NEXT: xorl %edx, %edx 924; CHECK-O3-NEXT: divl %esi 925; CHECK-O3-NEXT: movl %edx, %eax 926; CHECK-O3-NEXT: retq 927 %v = load atomic i64, ptr %p unordered, align 8 928 %ret = urem i64 %v, %v2 929 ret i64 %ret 930} 931 932define i64 @load_fold_urem3(ptr %p1, ptr %p2) { 933; CHECK-O0-LABEL: load_fold_urem3: 934; CHECK-O0: # %bb.0: 935; CHECK-O0-NEXT: movq (%rdi), %rax 936; CHECK-O0-NEXT: xorl %ecx, %ecx 937; CHECK-O0-NEXT: movl %ecx, %edx 938; CHECK-O0-NEXT: divq (%rsi) 939; CHECK-O0-NEXT: movq %rdx, %rax 940; CHECK-O0-NEXT: retq 941; 942; CHECK-O3-LABEL: load_fold_urem3: 943; CHECK-O3: # %bb.0: 944; CHECK-O3-NEXT: movq (%rdi), %rax 945; CHECK-O3-NEXT: movq (%rsi), %rcx 946; CHECK-O3-NEXT: movq %rax, %rdx 947; CHECK-O3-NEXT: orq %rcx, %rdx 948; CHECK-O3-NEXT: shrq $32, %rdx 949; CHECK-O3-NEXT: je .LBB45_1 950; CHECK-O3-NEXT: # %bb.2: 951; CHECK-O3-NEXT: xorl %edx, %edx 952; CHECK-O3-NEXT: divq %rcx 953; CHECK-O3-NEXT: movq %rdx, %rax 954; CHECK-O3-NEXT: retq 955; CHECK-O3-NEXT: .LBB45_1: 956; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 957; CHECK-O3-NEXT: xorl %edx, %edx 958; CHECK-O3-NEXT: divl %ecx 959; CHECK-O3-NEXT: movl %edx, %eax 960; CHECK-O3-NEXT: retq 961 %v = load atomic i64, ptr %p1 unordered, align 8 962 %v2 = load atomic i64, ptr %p2 unordered, align 8 963 %ret = urem i64 %v, %v2 964 ret i64 %ret 965} 966 967; Legal, as expected 968define i64 @load_fold_shl1(ptr %p) { 969; CHECK-LABEL: load_fold_shl1: 970; CHECK: # %bb.0: 971; CHECK-NEXT: movq (%rdi), %rax 972; CHECK-NEXT: shlq $15, %rax 973; CHECK-NEXT: retq 974 %v = load atomic i64, ptr %p unordered, align 8 975 %ret = shl i64 %v, 15 976 ret i64 %ret 977} 978 979define i64 @load_fold_shl2(ptr %p, i64 %v2) { 980; CHECK-O0-LABEL: load_fold_shl2: 981; CHECK-O0: # %bb.0: 982; CHECK-O0-NEXT: movq %rsi, %rcx 983; CHECK-O0-NEXT: movq (%rdi), %rax 984; CHECK-O0-NEXT: # kill: def $cl killed $rcx 985; CHECK-O0-NEXT: shlq %cl, %rax 986; CHECK-O0-NEXT: retq 987; 988; CHECK-O3-LABEL: load_fold_shl2: 989; CHECK-O3: # %bb.0: 990; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax 991; CHECK-O3-NEXT: retq 992 %v = load atomic i64, ptr %p unordered, align 8 993 %ret = shl i64 %v, %v2 994 ret i64 %ret 995} 996 997define i64 @load_fold_shl3(ptr %p1, ptr %p2) { 998; CHECK-O0-LABEL: load_fold_shl3: 999; CHECK-O0: # %bb.0: 1000; CHECK-O0-NEXT: movq (%rdi), %rax 1001; CHECK-O0-NEXT: movq (%rsi), %rcx 1002; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1003; CHECK-O0-NEXT: shlq %cl, %rax 1004; CHECK-O0-NEXT: retq 1005; 1006; CHECK-O3-LABEL: load_fold_shl3: 1007; CHECK-O3: # %bb.0: 1008; CHECK-O3-NEXT: movq (%rsi), %rax 1009; CHECK-O3-NEXT: shlxq %rax, (%rdi), %rax 1010; CHECK-O3-NEXT: retq 1011 %v = load atomic i64, ptr %p1 unordered, align 8 1012 %v2 = load atomic i64, ptr %p2 unordered, align 8 1013 %ret = shl i64 %v, %v2 1014 ret i64 %ret 1015} 1016 1017; Legal, as expected 1018define i64 @load_fold_lshr1(ptr %p) { 1019; CHECK-LABEL: load_fold_lshr1: 1020; CHECK: # %bb.0: 1021; CHECK-NEXT: movq (%rdi), %rax 1022; CHECK-NEXT: shrq $15, %rax 1023; CHECK-NEXT: retq 1024 %v = load atomic i64, ptr %p unordered, align 8 1025 %ret = lshr i64 %v, 15 1026 ret i64 %ret 1027} 1028 1029define i64 @load_fold_lshr2(ptr %p, i64 %v2) { 1030; CHECK-O0-LABEL: load_fold_lshr2: 1031; CHECK-O0: # %bb.0: 1032; CHECK-O0-NEXT: movq %rsi, %rcx 1033; CHECK-O0-NEXT: movq (%rdi), %rax 1034; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1035; CHECK-O0-NEXT: shrq %cl, %rax 1036; CHECK-O0-NEXT: retq 1037; 1038; CHECK-O3-LABEL: load_fold_lshr2: 1039; CHECK-O3: # %bb.0: 1040; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax 1041; CHECK-O3-NEXT: retq 1042 %v = load atomic i64, ptr %p unordered, align 8 1043 %ret = lshr i64 %v, %v2 1044 ret i64 %ret 1045} 1046 1047define i64 @load_fold_lshr3(ptr %p1, ptr %p2) { 1048; CHECK-O0-LABEL: load_fold_lshr3: 1049; CHECK-O0: # %bb.0: 1050; CHECK-O0-NEXT: movq (%rdi), %rax 1051; CHECK-O0-NEXT: movq (%rsi), %rcx 1052; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1053; CHECK-O0-NEXT: shrq %cl, %rax 1054; CHECK-O0-NEXT: retq 1055; 1056; CHECK-O3-LABEL: load_fold_lshr3: 1057; CHECK-O3: # %bb.0: 1058; CHECK-O3-NEXT: movq (%rsi), %rax 1059; CHECK-O3-NEXT: shrxq %rax, (%rdi), %rax 1060; CHECK-O3-NEXT: retq 1061 %v = load atomic i64, ptr %p1 unordered, align 8 1062 %v2 = load atomic i64, ptr %p2 unordered, align 8 1063 %ret = lshr i64 %v, %v2 1064 ret i64 %ret 1065} 1066 1067; Legal, as expected 1068define i64 @load_fold_ashr1(ptr %p) { 1069; CHECK-LABEL: load_fold_ashr1: 1070; CHECK: # %bb.0: 1071; CHECK-NEXT: movq (%rdi), %rax 1072; CHECK-NEXT: sarq $15, %rax 1073; CHECK-NEXT: retq 1074 %v = load atomic i64, ptr %p unordered, align 8 1075 %ret = ashr i64 %v, 15 1076 ret i64 %ret 1077} 1078 1079define i64 @load_fold_ashr2(ptr %p, i64 %v2) { 1080; CHECK-O0-LABEL: load_fold_ashr2: 1081; CHECK-O0: # %bb.0: 1082; CHECK-O0-NEXT: movq %rsi, %rcx 1083; CHECK-O0-NEXT: movq (%rdi), %rax 1084; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1085; CHECK-O0-NEXT: sarq %cl, %rax 1086; CHECK-O0-NEXT: retq 1087; 1088; CHECK-O3-LABEL: load_fold_ashr2: 1089; CHECK-O3: # %bb.0: 1090; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax 1091; CHECK-O3-NEXT: retq 1092 %v = load atomic i64, ptr %p unordered, align 8 1093 %ret = ashr i64 %v, %v2 1094 ret i64 %ret 1095} 1096 1097define i64 @load_fold_ashr3(ptr %p1, ptr %p2) { 1098; CHECK-O0-LABEL: load_fold_ashr3: 1099; CHECK-O0: # %bb.0: 1100; CHECK-O0-NEXT: movq (%rdi), %rax 1101; CHECK-O0-NEXT: movq (%rsi), %rcx 1102; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1103; CHECK-O0-NEXT: sarq %cl, %rax 1104; CHECK-O0-NEXT: retq 1105; 1106; CHECK-O3-LABEL: load_fold_ashr3: 1107; CHECK-O3: # %bb.0: 1108; CHECK-O3-NEXT: movq (%rsi), %rax 1109; CHECK-O3-NEXT: sarxq %rax, (%rdi), %rax 1110; CHECK-O3-NEXT: retq 1111 %v = load atomic i64, ptr %p1 unordered, align 8 1112 %v2 = load atomic i64, ptr %p2 unordered, align 8 1113 %ret = ashr i64 %v, %v2 1114 ret i64 %ret 1115} 1116 1117; Legal, as expected 1118define i64 @load_fold_and1(ptr %p) { 1119; CHECK-O0-LABEL: load_fold_and1: 1120; CHECK-O0: # %bb.0: 1121; CHECK-O0-NEXT: movq (%rdi), %rax 1122; CHECK-O0-NEXT: andq $15, %rax 1123; CHECK-O0-NEXT: retq 1124; 1125; CHECK-O3-LABEL: load_fold_and1: 1126; CHECK-O3: # %bb.0: 1127; CHECK-O3-NEXT: movq (%rdi), %rax 1128; CHECK-O3-NEXT: andl $15, %eax 1129; CHECK-O3-NEXT: retq 1130 %v = load atomic i64, ptr %p unordered, align 8 1131 %ret = and i64 %v, 15 1132 ret i64 %ret 1133} 1134 1135define i64 @load_fold_and2(ptr %p, i64 %v2) { 1136; CHECK-LABEL: load_fold_and2: 1137; CHECK: # %bb.0: 1138; CHECK-NEXT: movq %rsi, %rax 1139; CHECK-NEXT: andq (%rdi), %rax 1140; CHECK-NEXT: retq 1141 %v = load atomic i64, ptr %p unordered, align 8 1142 %ret = and i64 %v, %v2 1143 ret i64 %ret 1144} 1145 1146define i64 @load_fold_and3(ptr %p1, ptr %p2) { 1147; CHECK-O0-LABEL: load_fold_and3: 1148; CHECK-O0: # %bb.0: 1149; CHECK-O0-NEXT: movq (%rdi), %rax 1150; CHECK-O0-NEXT: andq (%rsi), %rax 1151; CHECK-O0-NEXT: retq 1152; 1153; CHECK-O3-LABEL: load_fold_and3: 1154; CHECK-O3: # %bb.0: 1155; CHECK-O3-NEXT: movq (%rsi), %rax 1156; CHECK-O3-NEXT: andq (%rdi), %rax 1157; CHECK-O3-NEXT: retq 1158 %v = load atomic i64, ptr %p1 unordered, align 8 1159 %v2 = load atomic i64, ptr %p2 unordered, align 8 1160 %ret = and i64 %v, %v2 1161 ret i64 %ret 1162} 1163 1164; Legal, as expected 1165define i64 @load_fold_or1(ptr %p) { 1166; CHECK-LABEL: load_fold_or1: 1167; CHECK: # %bb.0: 1168; CHECK-NEXT: movq (%rdi), %rax 1169; CHECK-NEXT: orq $15, %rax 1170; CHECK-NEXT: retq 1171 %v = load atomic i64, ptr %p unordered, align 8 1172 %ret = or i64 %v, 15 1173 ret i64 %ret 1174} 1175 1176define i64 @load_fold_or2(ptr %p, i64 %v2) { 1177; CHECK-LABEL: load_fold_or2: 1178; CHECK: # %bb.0: 1179; CHECK-NEXT: movq %rsi, %rax 1180; CHECK-NEXT: orq (%rdi), %rax 1181; CHECK-NEXT: retq 1182 %v = load atomic i64, ptr %p unordered, align 8 1183 %ret = or i64 %v, %v2 1184 ret i64 %ret 1185} 1186 1187define i64 @load_fold_or3(ptr %p1, ptr %p2) { 1188; CHECK-O0-LABEL: load_fold_or3: 1189; CHECK-O0: # %bb.0: 1190; CHECK-O0-NEXT: movq (%rdi), %rax 1191; CHECK-O0-NEXT: orq (%rsi), %rax 1192; CHECK-O0-NEXT: retq 1193; 1194; CHECK-O3-LABEL: load_fold_or3: 1195; CHECK-O3: # %bb.0: 1196; CHECK-O3-NEXT: movq (%rsi), %rax 1197; CHECK-O3-NEXT: orq (%rdi), %rax 1198; CHECK-O3-NEXT: retq 1199 %v = load atomic i64, ptr %p1 unordered, align 8 1200 %v2 = load atomic i64, ptr %p2 unordered, align 8 1201 %ret = or i64 %v, %v2 1202 ret i64 %ret 1203} 1204 1205; Legal, as expected 1206define i64 @load_fold_xor1(ptr %p) { 1207; CHECK-LABEL: load_fold_xor1: 1208; CHECK: # %bb.0: 1209; CHECK-NEXT: movq (%rdi), %rax 1210; CHECK-NEXT: xorq $15, %rax 1211; CHECK-NEXT: retq 1212 %v = load atomic i64, ptr %p unordered, align 8 1213 %ret = xor i64 %v, 15 1214 ret i64 %ret 1215} 1216 1217define i64 @load_fold_xor2(ptr %p, i64 %v2) { 1218; CHECK-LABEL: load_fold_xor2: 1219; CHECK: # %bb.0: 1220; CHECK-NEXT: movq %rsi, %rax 1221; CHECK-NEXT: xorq (%rdi), %rax 1222; CHECK-NEXT: retq 1223 %v = load atomic i64, ptr %p unordered, align 8 1224 %ret = xor i64 %v, %v2 1225 ret i64 %ret 1226} 1227 1228define i64 @load_fold_xor3(ptr %p1, ptr %p2) { 1229; CHECK-O0-LABEL: load_fold_xor3: 1230; CHECK-O0: # %bb.0: 1231; CHECK-O0-NEXT: movq (%rdi), %rax 1232; CHECK-O0-NEXT: xorq (%rsi), %rax 1233; CHECK-O0-NEXT: retq 1234; 1235; CHECK-O3-LABEL: load_fold_xor3: 1236; CHECK-O3: # %bb.0: 1237; CHECK-O3-NEXT: movq (%rsi), %rax 1238; CHECK-O3-NEXT: xorq (%rdi), %rax 1239; CHECK-O3-NEXT: retq 1240 %v = load atomic i64, ptr %p1 unordered, align 8 1241 %v2 = load atomic i64, ptr %p2 unordered, align 8 1242 %ret = xor i64 %v, %v2 1243 ret i64 %ret 1244} 1245 1246define i1 @load_fold_icmp1(ptr %p) { 1247; CHECK-O0-LABEL: load_fold_icmp1: 1248; CHECK-O0: # %bb.0: 1249; CHECK-O0-NEXT: movq (%rdi), %rax 1250; CHECK-O0-NEXT: subq $15, %rax 1251; CHECK-O0-NEXT: sete %al 1252; CHECK-O0-NEXT: retq 1253; 1254; CHECK-O3-LABEL: load_fold_icmp1: 1255; CHECK-O3: # %bb.0: 1256; CHECK-O3-NEXT: cmpq $15, (%rdi) 1257; CHECK-O3-NEXT: sete %al 1258; CHECK-O3-NEXT: retq 1259 %v = load atomic i64, ptr %p unordered, align 8 1260 %ret = icmp eq i64 %v, 15 1261 ret i1 %ret 1262} 1263 1264define i1 @load_fold_icmp2(ptr %p, i64 %v2) { 1265; CHECK-O0-LABEL: load_fold_icmp2: 1266; CHECK-O0: # %bb.0: 1267; CHECK-O0-NEXT: movq (%rdi), %rax 1268; CHECK-O0-NEXT: subq %rsi, %rax 1269; CHECK-O0-NEXT: sete %al 1270; CHECK-O0-NEXT: retq 1271; 1272; CHECK-O3-LABEL: load_fold_icmp2: 1273; CHECK-O3: # %bb.0: 1274; CHECK-O3-NEXT: cmpq %rsi, (%rdi) 1275; CHECK-O3-NEXT: sete %al 1276; CHECK-O3-NEXT: retq 1277 %v = load atomic i64, ptr %p unordered, align 8 1278 %ret = icmp eq i64 %v, %v2 1279 ret i1 %ret 1280} 1281 1282define i1 @load_fold_icmp3(ptr %p1, ptr %p2) { 1283; CHECK-O0-LABEL: load_fold_icmp3: 1284; CHECK-O0: # %bb.0: 1285; CHECK-O0-NEXT: movq (%rdi), %rax 1286; CHECK-O0-NEXT: movq (%rsi), %rcx 1287; CHECK-O0-NEXT: subq %rcx, %rax 1288; CHECK-O0-NEXT: sete %al 1289; CHECK-O0-NEXT: retq 1290; 1291; CHECK-O3-LABEL: load_fold_icmp3: 1292; CHECK-O3: # %bb.0: 1293; CHECK-O3-NEXT: movq (%rsi), %rax 1294; CHECK-O3-NEXT: cmpq %rax, (%rdi) 1295; CHECK-O3-NEXT: sete %al 1296; CHECK-O3-NEXT: retq 1297 %v = load atomic i64, ptr %p1 unordered, align 8 1298 %v2 = load atomic i64, ptr %p2 unordered, align 8 1299 %ret = icmp eq i64 %v, %v2 1300 ret i1 %ret 1301} 1302 1303 1304;; The next batch of tests check for read-modify-write patterns 1305;; Legally, it's okay to use a memory operand here as long as the operand 1306;; is well aligned (i.e. doesn't cross a cache line boundary). We are 1307;; required not to narrow the store though! 1308 1309; Legal, as expected 1310define void @rmw_fold_add1(ptr %p, i64 %v) { 1311; CHECK-O0-LABEL: rmw_fold_add1: 1312; CHECK-O0: # %bb.0: 1313; CHECK-O0-NEXT: movq (%rdi), %rax 1314; CHECK-O0-NEXT: addq $15, %rax 1315; CHECK-O0-NEXT: movq %rax, (%rdi) 1316; CHECK-O0-NEXT: retq 1317; 1318; CHECK-O3-LABEL: rmw_fold_add1: 1319; CHECK-O3: # %bb.0: 1320; CHECK-O3-NEXT: addq $15, (%rdi) 1321; CHECK-O3-NEXT: retq 1322 %prev = load atomic i64, ptr %p unordered, align 8 1323 %val = add i64 %prev, 15 1324 store atomic i64 %val, ptr %p unordered, align 8 1325 ret void 1326} 1327 1328; Legal, as expected 1329define void @rmw_fold_add2(ptr %p, i64 %v) { 1330; CHECK-O0-LABEL: rmw_fold_add2: 1331; CHECK-O0: # %bb.0: 1332; CHECK-O0-NEXT: movq (%rdi), %rax 1333; CHECK-O0-NEXT: addq %rsi, %rax 1334; CHECK-O0-NEXT: movq %rax, (%rdi) 1335; CHECK-O0-NEXT: retq 1336; 1337; CHECK-O3-LABEL: rmw_fold_add2: 1338; CHECK-O3: # %bb.0: 1339; CHECK-O3-NEXT: addq %rsi, (%rdi) 1340; CHECK-O3-NEXT: retq 1341 %prev = load atomic i64, ptr %p unordered, align 8 1342 %val = add i64 %prev, %v 1343 store atomic i64 %val, ptr %p unordered, align 8 1344 ret void 1345} 1346 1347; Legal, as expected 1348define void @rmw_fold_sub1(ptr %p, i64 %v) { 1349; CHECK-O0-LABEL: rmw_fold_sub1: 1350; CHECK-O0: # %bb.0: 1351; CHECK-O0-NEXT: movq (%rdi), %rax 1352; CHECK-O0-NEXT: addq $-15, %rax 1353; CHECK-O0-NEXT: movq %rax, (%rdi) 1354; CHECK-O0-NEXT: retq 1355; 1356; CHECK-O3-LABEL: rmw_fold_sub1: 1357; CHECK-O3: # %bb.0: 1358; CHECK-O3-NEXT: addq $-15, (%rdi) 1359; CHECK-O3-NEXT: retq 1360 %prev = load atomic i64, ptr %p unordered, align 8 1361 %val = sub i64 %prev, 15 1362 store atomic i64 %val, ptr %p unordered, align 8 1363 ret void 1364} 1365 1366; Legal, as expected 1367define void @rmw_fold_sub2(ptr %p, i64 %v) { 1368; CHECK-O0-LABEL: rmw_fold_sub2: 1369; CHECK-O0: # %bb.0: 1370; CHECK-O0-NEXT: movq (%rdi), %rax 1371; CHECK-O0-NEXT: subq %rsi, %rax 1372; CHECK-O0-NEXT: movq %rax, (%rdi) 1373; CHECK-O0-NEXT: retq 1374; 1375; CHECK-O3-LABEL: rmw_fold_sub2: 1376; CHECK-O3: # %bb.0: 1377; CHECK-O3-NEXT: subq %rsi, (%rdi) 1378; CHECK-O3-NEXT: retq 1379 %prev = load atomic i64, ptr %p unordered, align 8 1380 %val = sub i64 %prev, %v 1381 store atomic i64 %val, ptr %p unordered, align 8 1382 ret void 1383} 1384 1385; Legal, as expected 1386define void @rmw_fold_mul1(ptr %p, i64 %v) { 1387; CHECK-LABEL: rmw_fold_mul1: 1388; CHECK: # %bb.0: 1389; CHECK-NEXT: movq (%rdi), %rax 1390; CHECK-NEXT: leaq (%rax,%rax,4), %rax 1391; CHECK-NEXT: leaq (%rax,%rax,2), %rax 1392; CHECK-NEXT: movq %rax, (%rdi) 1393; CHECK-NEXT: retq 1394 %prev = load atomic i64, ptr %p unordered, align 8 1395 %val = mul i64 %prev, 15 1396 store atomic i64 %val, ptr %p unordered, align 8 1397 ret void 1398} 1399 1400; Legal to fold (TODO) 1401define void @rmw_fold_mul2(ptr %p, i64 %v) { 1402; CHECK-O0-LABEL: rmw_fold_mul2: 1403; CHECK-O0: # %bb.0: 1404; CHECK-O0-NEXT: movq (%rdi), %rax 1405; CHECK-O0-NEXT: imulq %rsi, %rax 1406; CHECK-O0-NEXT: movq %rax, (%rdi) 1407; CHECK-O0-NEXT: retq 1408; 1409; CHECK-O3-LABEL: rmw_fold_mul2: 1410; CHECK-O3: # %bb.0: 1411; CHECK-O3-NEXT: imulq (%rdi), %rsi 1412; CHECK-O3-NEXT: movq %rsi, (%rdi) 1413; CHECK-O3-NEXT: retq 1414 %prev = load atomic i64, ptr %p unordered, align 8 1415 %val = mul i64 %prev, %v 1416 store atomic i64 %val, ptr %p unordered, align 8 1417 ret void 1418} 1419 1420; Legal, as expected 1421define void @rmw_fold_sdiv1(ptr %p, i64 %v) { 1422; CHECK-O0-LABEL: rmw_fold_sdiv1: 1423; CHECK-O0: # %bb.0: 1424; CHECK-O0-NEXT: movq (%rdi), %rcx 1425; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 1426; CHECK-O0-NEXT: movq %rcx, %rax 1427; CHECK-O0-NEXT: imulq %rdx 1428; CHECK-O0-NEXT: movq %rdx, %rax 1429; CHECK-O0-NEXT: addq %rcx, %rax 1430; CHECK-O0-NEXT: movq %rax, %rcx 1431; CHECK-O0-NEXT: shrq $63, %rcx 1432; CHECK-O0-NEXT: sarq $3, %rax 1433; CHECK-O0-NEXT: addq %rcx, %rax 1434; CHECK-O0-NEXT: movq %rax, (%rdi) 1435; CHECK-O0-NEXT: retq 1436; 1437; CHECK-O3-LABEL: rmw_fold_sdiv1: 1438; CHECK-O3: # %bb.0: 1439; CHECK-O3-NEXT: movq (%rdi), %rcx 1440; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 1441; CHECK-O3-NEXT: movq %rcx, %rax 1442; CHECK-O3-NEXT: imulq %rdx 1443; CHECK-O3-NEXT: addq %rcx, %rdx 1444; CHECK-O3-NEXT: movq %rdx, %rax 1445; CHECK-O3-NEXT: shrq $63, %rax 1446; CHECK-O3-NEXT: sarq $3, %rdx 1447; CHECK-O3-NEXT: addq %rax, %rdx 1448; CHECK-O3-NEXT: movq %rdx, (%rdi) 1449; CHECK-O3-NEXT: retq 1450 %prev = load atomic i64, ptr %p unordered, align 8 1451 %val = sdiv i64 %prev, 15 1452 store atomic i64 %val, ptr %p unordered, align 8 1453 ret void 1454} 1455 1456; Legal, as expected 1457define void @rmw_fold_sdiv2(ptr %p, i64 %v) { 1458; CHECK-O0-LABEL: rmw_fold_sdiv2: 1459; CHECK-O0: # %bb.0: 1460; CHECK-O0-NEXT: movq (%rdi), %rax 1461; CHECK-O0-NEXT: cqto 1462; CHECK-O0-NEXT: idivq %rsi 1463; CHECK-O0-NEXT: movq %rax, (%rdi) 1464; CHECK-O0-NEXT: retq 1465; 1466; CHECK-O3-LABEL: rmw_fold_sdiv2: 1467; CHECK-O3: # %bb.0: 1468; CHECK-O3-NEXT: movq (%rdi), %rax 1469; CHECK-O3-NEXT: movq %rax, %rcx 1470; CHECK-O3-NEXT: orq %rsi, %rcx 1471; CHECK-O3-NEXT: shrq $32, %rcx 1472; CHECK-O3-NEXT: je .LBB74_1 1473; CHECK-O3-NEXT: # %bb.2: 1474; CHECK-O3-NEXT: cqto 1475; CHECK-O3-NEXT: idivq %rsi 1476; CHECK-O3-NEXT: movq %rax, (%rdi) 1477; CHECK-O3-NEXT: retq 1478; CHECK-O3-NEXT: .LBB74_1: 1479; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1480; CHECK-O3-NEXT: xorl %edx, %edx 1481; CHECK-O3-NEXT: divl %esi 1482; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 1483; CHECK-O3-NEXT: movq %rax, (%rdi) 1484; CHECK-O3-NEXT: retq 1485 %prev = load atomic i64, ptr %p unordered, align 8 1486 %val = sdiv i64 %prev, %v 1487 store atomic i64 %val, ptr %p unordered, align 8 1488 ret void 1489} 1490 1491; Legal, as expected 1492define void @rmw_fold_udiv1(ptr %p, i64 %v) { 1493; CHECK-LABEL: rmw_fold_udiv1: 1494; CHECK: # %bb.0: 1495; CHECK-NEXT: movq (%rdi), %rdx 1496; CHECK-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 1497; CHECK-NEXT: mulxq %rax, %rax, %rax 1498; CHECK-NEXT: shrq $3, %rax 1499; CHECK-NEXT: movq %rax, (%rdi) 1500; CHECK-NEXT: retq 1501 %prev = load atomic i64, ptr %p unordered, align 8 1502 %val = udiv i64 %prev, 15 1503 store atomic i64 %val, ptr %p unordered, align 8 1504 ret void 1505} 1506 1507; Legal, as expected 1508define void @rmw_fold_udiv2(ptr %p, i64 %v) { 1509; CHECK-O0-LABEL: rmw_fold_udiv2: 1510; CHECK-O0: # %bb.0: 1511; CHECK-O0-NEXT: movq (%rdi), %rax 1512; CHECK-O0-NEXT: xorl %ecx, %ecx 1513; CHECK-O0-NEXT: movl %ecx, %edx 1514; CHECK-O0-NEXT: divq %rsi 1515; CHECK-O0-NEXT: movq %rax, (%rdi) 1516; CHECK-O0-NEXT: retq 1517; 1518; CHECK-O3-LABEL: rmw_fold_udiv2: 1519; CHECK-O3: # %bb.0: 1520; CHECK-O3-NEXT: movq (%rdi), %rax 1521; CHECK-O3-NEXT: movq %rax, %rcx 1522; CHECK-O3-NEXT: orq %rsi, %rcx 1523; CHECK-O3-NEXT: shrq $32, %rcx 1524; CHECK-O3-NEXT: je .LBB76_1 1525; CHECK-O3-NEXT: # %bb.2: 1526; CHECK-O3-NEXT: xorl %edx, %edx 1527; CHECK-O3-NEXT: divq %rsi 1528; CHECK-O3-NEXT: movq %rax, (%rdi) 1529; CHECK-O3-NEXT: retq 1530; CHECK-O3-NEXT: .LBB76_1: 1531; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1532; CHECK-O3-NEXT: xorl %edx, %edx 1533; CHECK-O3-NEXT: divl %esi 1534; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 1535; CHECK-O3-NEXT: movq %rax, (%rdi) 1536; CHECK-O3-NEXT: retq 1537 %prev = load atomic i64, ptr %p unordered, align 8 1538 %val = udiv i64 %prev, %v 1539 store atomic i64 %val, ptr %p unordered, align 8 1540 ret void 1541} 1542 1543; Legal, as expected 1544define void @rmw_fold_srem1(ptr %p, i64 %v) { 1545; CHECK-O0-LABEL: rmw_fold_srem1: 1546; CHECK-O0: # %bb.0: 1547; CHECK-O0-NEXT: movq (%rdi), %rax 1548; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1549; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 1550; CHECK-O0-NEXT: imulq %rcx 1551; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 1552; CHECK-O0-NEXT: movq %rdx, %rcx 1553; CHECK-O0-NEXT: addq %rax, %rcx 1554; CHECK-O0-NEXT: movq %rcx, %rdx 1555; CHECK-O0-NEXT: shrq $63, %rdx 1556; CHECK-O0-NEXT: sarq $3, %rcx 1557; CHECK-O0-NEXT: addq %rdx, %rcx 1558; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx 1559; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx 1560; CHECK-O0-NEXT: subq %rcx, %rax 1561; CHECK-O0-NEXT: movq %rax, (%rdi) 1562; CHECK-O0-NEXT: retq 1563; 1564; CHECK-O3-LABEL: rmw_fold_srem1: 1565; CHECK-O3: # %bb.0: 1566; CHECK-O3-NEXT: movq (%rdi), %rcx 1567; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 1568; CHECK-O3-NEXT: movq %rcx, %rax 1569; CHECK-O3-NEXT: imulq %rdx 1570; CHECK-O3-NEXT: addq %rcx, %rdx 1571; CHECK-O3-NEXT: movq %rdx, %rax 1572; CHECK-O3-NEXT: shrq $63, %rax 1573; CHECK-O3-NEXT: sarq $3, %rdx 1574; CHECK-O3-NEXT: addq %rax, %rdx 1575; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax 1576; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax 1577; CHECK-O3-NEXT: subq %rax, %rcx 1578; CHECK-O3-NEXT: movq %rcx, (%rdi) 1579; CHECK-O3-NEXT: retq 1580 %prev = load atomic i64, ptr %p unordered, align 8 1581 %val = srem i64 %prev, 15 1582 store atomic i64 %val, ptr %p unordered, align 8 1583 ret void 1584} 1585 1586; Legal, as expected 1587define void @rmw_fold_srem2(ptr %p, i64 %v) { 1588; CHECK-O0-LABEL: rmw_fold_srem2: 1589; CHECK-O0: # %bb.0: 1590; CHECK-O0-NEXT: movq (%rdi), %rax 1591; CHECK-O0-NEXT: cqto 1592; CHECK-O0-NEXT: idivq %rsi 1593; CHECK-O0-NEXT: movq %rdx, (%rdi) 1594; CHECK-O0-NEXT: retq 1595; 1596; CHECK-O3-LABEL: rmw_fold_srem2: 1597; CHECK-O3: # %bb.0: 1598; CHECK-O3-NEXT: movq (%rdi), %rax 1599; CHECK-O3-NEXT: movq %rax, %rcx 1600; CHECK-O3-NEXT: orq %rsi, %rcx 1601; CHECK-O3-NEXT: shrq $32, %rcx 1602; CHECK-O3-NEXT: je .LBB78_1 1603; CHECK-O3-NEXT: # %bb.2: 1604; CHECK-O3-NEXT: cqto 1605; CHECK-O3-NEXT: idivq %rsi 1606; CHECK-O3-NEXT: movq %rdx, (%rdi) 1607; CHECK-O3-NEXT: retq 1608; CHECK-O3-NEXT: .LBB78_1: 1609; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1610; CHECK-O3-NEXT: xorl %edx, %edx 1611; CHECK-O3-NEXT: divl %esi 1612; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx 1613; CHECK-O3-NEXT: movq %rdx, (%rdi) 1614; CHECK-O3-NEXT: retq 1615 %prev = load atomic i64, ptr %p unordered, align 8 1616 %val = srem i64 %prev, %v 1617 store atomic i64 %val, ptr %p unordered, align 8 1618 ret void 1619} 1620 1621; Legal, as expected 1622define void @rmw_fold_urem1(ptr %p, i64 %v) { 1623; CHECK-O0-LABEL: rmw_fold_urem1: 1624; CHECK-O0: # %bb.0: 1625; CHECK-O0-NEXT: movq (%rdi), %rax 1626; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 1627; CHECK-O0-NEXT: movq %rax, %rdx 1628; CHECK-O0-NEXT: mulxq %rcx, %rcx, %rcx 1629; CHECK-O0-NEXT: shrq $3, %rcx 1630; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx 1631; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx 1632; CHECK-O0-NEXT: subq %rcx, %rax 1633; CHECK-O0-NEXT: movq %rax, (%rdi) 1634; CHECK-O0-NEXT: retq 1635; 1636; CHECK-O3-LABEL: rmw_fold_urem1: 1637; CHECK-O3: # %bb.0: 1638; CHECK-O3-NEXT: movq (%rdi), %rdx 1639; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 1640; CHECK-O3-NEXT: mulxq %rax, %rax, %rax 1641; CHECK-O3-NEXT: shrq $3, %rax 1642; CHECK-O3-NEXT: leaq (%rax,%rax,4), %rax 1643; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax 1644; CHECK-O3-NEXT: subq %rax, %rdx 1645; CHECK-O3-NEXT: movq %rdx, (%rdi) 1646; CHECK-O3-NEXT: retq 1647 %prev = load atomic i64, ptr %p unordered, align 8 1648 %val = urem i64 %prev, 15 1649 store atomic i64 %val, ptr %p unordered, align 8 1650 ret void 1651} 1652 1653; Legal, as expected 1654define void @rmw_fold_urem2(ptr %p, i64 %v) { 1655; CHECK-O0-LABEL: rmw_fold_urem2: 1656; CHECK-O0: # %bb.0: 1657; CHECK-O0-NEXT: movq (%rdi), %rax 1658; CHECK-O0-NEXT: xorl %ecx, %ecx 1659; CHECK-O0-NEXT: movl %ecx, %edx 1660; CHECK-O0-NEXT: divq %rsi 1661; CHECK-O0-NEXT: movq %rdx, (%rdi) 1662; CHECK-O0-NEXT: retq 1663; 1664; CHECK-O3-LABEL: rmw_fold_urem2: 1665; CHECK-O3: # %bb.0: 1666; CHECK-O3-NEXT: movq (%rdi), %rax 1667; CHECK-O3-NEXT: movq %rax, %rcx 1668; CHECK-O3-NEXT: orq %rsi, %rcx 1669; CHECK-O3-NEXT: shrq $32, %rcx 1670; CHECK-O3-NEXT: je .LBB80_1 1671; CHECK-O3-NEXT: # %bb.2: 1672; CHECK-O3-NEXT: xorl %edx, %edx 1673; CHECK-O3-NEXT: divq %rsi 1674; CHECK-O3-NEXT: movq %rdx, (%rdi) 1675; CHECK-O3-NEXT: retq 1676; CHECK-O3-NEXT: .LBB80_1: 1677; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1678; CHECK-O3-NEXT: xorl %edx, %edx 1679; CHECK-O3-NEXT: divl %esi 1680; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx 1681; CHECK-O3-NEXT: movq %rdx, (%rdi) 1682; CHECK-O3-NEXT: retq 1683 %prev = load atomic i64, ptr %p unordered, align 8 1684 %val = urem i64 %prev, %v 1685 store atomic i64 %val, ptr %p unordered, align 8 1686 ret void 1687} 1688 1689; Legal to fold (TODO) 1690define void @rmw_fold_shl1(ptr %p, i64 %v) { 1691; CHECK-LABEL: rmw_fold_shl1: 1692; CHECK: # %bb.0: 1693; CHECK-NEXT: movq (%rdi), %rax 1694; CHECK-NEXT: shlq $15, %rax 1695; CHECK-NEXT: movq %rax, (%rdi) 1696; CHECK-NEXT: retq 1697 %prev = load atomic i64, ptr %p unordered, align 8 1698 %val = shl i64 %prev, 15 1699 store atomic i64 %val, ptr %p unordered, align 8 1700 ret void 1701} 1702 1703; Legal to fold (TODO) 1704define void @rmw_fold_shl2(ptr %p, i64 %v) { 1705; CHECK-O0-LABEL: rmw_fold_shl2: 1706; CHECK-O0: # %bb.0: 1707; CHECK-O0-NEXT: movq (%rdi), %rax 1708; CHECK-O0-NEXT: movb %sil, %dl 1709; CHECK-O0-NEXT: # implicit-def: $rcx 1710; CHECK-O0-NEXT: movb %dl, %cl 1711; CHECK-O0-NEXT: shlxq %rcx, %rax, %rax 1712; CHECK-O0-NEXT: movq %rax, (%rdi) 1713; CHECK-O0-NEXT: retq 1714; 1715; CHECK-O3-LABEL: rmw_fold_shl2: 1716; CHECK-O3: # %bb.0: 1717; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax 1718; CHECK-O3-NEXT: movq %rax, (%rdi) 1719; CHECK-O3-NEXT: retq 1720 %prev = load atomic i64, ptr %p unordered, align 8 1721 %val = shl i64 %prev, %v 1722 store atomic i64 %val, ptr %p unordered, align 8 1723 ret void 1724} 1725 1726; Legal to fold (TODO) 1727define void @rmw_fold_lshr1(ptr %p, i64 %v) { 1728; CHECK-LABEL: rmw_fold_lshr1: 1729; CHECK: # %bb.0: 1730; CHECK-NEXT: movq (%rdi), %rax 1731; CHECK-NEXT: shrq $15, %rax 1732; CHECK-NEXT: movq %rax, (%rdi) 1733; CHECK-NEXT: retq 1734 %prev = load atomic i64, ptr %p unordered, align 8 1735 %val = lshr i64 %prev, 15 1736 store atomic i64 %val, ptr %p unordered, align 8 1737 ret void 1738} 1739 1740; Legal to fold (TODO) 1741define void @rmw_fold_lshr2(ptr %p, i64 %v) { 1742; CHECK-O0-LABEL: rmw_fold_lshr2: 1743; CHECK-O0: # %bb.0: 1744; CHECK-O0-NEXT: movq (%rdi), %rax 1745; CHECK-O0-NEXT: movb %sil, %dl 1746; CHECK-O0-NEXT: # implicit-def: $rcx 1747; CHECK-O0-NEXT: movb %dl, %cl 1748; CHECK-O0-NEXT: shrxq %rcx, %rax, %rax 1749; CHECK-O0-NEXT: movq %rax, (%rdi) 1750; CHECK-O0-NEXT: retq 1751; 1752; CHECK-O3-LABEL: rmw_fold_lshr2: 1753; CHECK-O3: # %bb.0: 1754; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax 1755; CHECK-O3-NEXT: movq %rax, (%rdi) 1756; CHECK-O3-NEXT: retq 1757 %prev = load atomic i64, ptr %p unordered, align 8 1758 %val = lshr i64 %prev, %v 1759 store atomic i64 %val, ptr %p unordered, align 8 1760 ret void 1761} 1762 1763; Legal to fold (TODO) 1764define void @rmw_fold_ashr1(ptr %p, i64 %v) { 1765; CHECK-LABEL: rmw_fold_ashr1: 1766; CHECK: # %bb.0: 1767; CHECK-NEXT: movq (%rdi), %rax 1768; CHECK-NEXT: sarq $15, %rax 1769; CHECK-NEXT: movq %rax, (%rdi) 1770; CHECK-NEXT: retq 1771 %prev = load atomic i64, ptr %p unordered, align 8 1772 %val = ashr i64 %prev, 15 1773 store atomic i64 %val, ptr %p unordered, align 8 1774 ret void 1775} 1776 1777; Legal to fold (TODO) 1778define void @rmw_fold_ashr2(ptr %p, i64 %v) { 1779; CHECK-O0-LABEL: rmw_fold_ashr2: 1780; CHECK-O0: # %bb.0: 1781; CHECK-O0-NEXT: movq (%rdi), %rax 1782; CHECK-O0-NEXT: movb %sil, %dl 1783; CHECK-O0-NEXT: # implicit-def: $rcx 1784; CHECK-O0-NEXT: movb %dl, %cl 1785; CHECK-O0-NEXT: sarxq %rcx, %rax, %rax 1786; CHECK-O0-NEXT: movq %rax, (%rdi) 1787; CHECK-O0-NEXT: retq 1788; 1789; CHECK-O3-LABEL: rmw_fold_ashr2: 1790; CHECK-O3: # %bb.0: 1791; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax 1792; CHECK-O3-NEXT: movq %rax, (%rdi) 1793; CHECK-O3-NEXT: retq 1794 %prev = load atomic i64, ptr %p unordered, align 8 1795 %val = ashr i64 %prev, %v 1796 store atomic i64 %val, ptr %p unordered, align 8 1797 ret void 1798} 1799 1800; Legal, as expected 1801define void @rmw_fold_and1(ptr %p, i64 %v) { 1802; CHECK-O0-LABEL: rmw_fold_and1: 1803; CHECK-O0: # %bb.0: 1804; CHECK-O0-NEXT: movq (%rdi), %rax 1805; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 1806; CHECK-O0-NEXT: andl $15, %eax 1807; CHECK-O0-NEXT: # kill: def $rax killed $eax 1808; CHECK-O0-NEXT: movq %rax, (%rdi) 1809; CHECK-O0-NEXT: retq 1810; 1811; CHECK-O3-LABEL: rmw_fold_and1: 1812; CHECK-O3: # %bb.0: 1813; CHECK-O3-NEXT: andq $15, (%rdi) 1814; CHECK-O3-NEXT: retq 1815 %prev = load atomic i64, ptr %p unordered, align 8 1816 %val = and i64 %prev, 15 1817 store atomic i64 %val, ptr %p unordered, align 8 1818 ret void 1819} 1820 1821; Legal, as expected 1822define void @rmw_fold_and2(ptr %p, i64 %v) { 1823; CHECK-O0-LABEL: rmw_fold_and2: 1824; CHECK-O0: # %bb.0: 1825; CHECK-O0-NEXT: movq (%rdi), %rax 1826; CHECK-O0-NEXT: andq %rsi, %rax 1827; CHECK-O0-NEXT: movq %rax, (%rdi) 1828; CHECK-O0-NEXT: retq 1829; 1830; CHECK-O3-LABEL: rmw_fold_and2: 1831; CHECK-O3: # %bb.0: 1832; CHECK-O3-NEXT: andq %rsi, (%rdi) 1833; CHECK-O3-NEXT: retq 1834 %prev = load atomic i64, ptr %p unordered, align 8 1835 %val = and i64 %prev, %v 1836 store atomic i64 %val, ptr %p unordered, align 8 1837 ret void 1838} 1839 1840; Legal, as expected 1841define void @rmw_fold_or1(ptr %p, i64 %v) { 1842; CHECK-O0-LABEL: rmw_fold_or1: 1843; CHECK-O0: # %bb.0: 1844; CHECK-O0-NEXT: movq (%rdi), %rax 1845; CHECK-O0-NEXT: orq $15, %rax 1846; CHECK-O0-NEXT: movq %rax, (%rdi) 1847; CHECK-O0-NEXT: retq 1848; 1849; CHECK-O3-LABEL: rmw_fold_or1: 1850; CHECK-O3: # %bb.0: 1851; CHECK-O3-NEXT: orq $15, (%rdi) 1852; CHECK-O3-NEXT: retq 1853 %prev = load atomic i64, ptr %p unordered, align 8 1854 %val = or i64 %prev, 15 1855 store atomic i64 %val, ptr %p unordered, align 8 1856 ret void 1857} 1858 1859; Legal, as expected 1860define void @rmw_fold_or2(ptr %p, i64 %v) { 1861; CHECK-O0-LABEL: rmw_fold_or2: 1862; CHECK-O0: # %bb.0: 1863; CHECK-O0-NEXT: movq (%rdi), %rax 1864; CHECK-O0-NEXT: orq %rsi, %rax 1865; CHECK-O0-NEXT: movq %rax, (%rdi) 1866; CHECK-O0-NEXT: retq 1867; 1868; CHECK-O3-LABEL: rmw_fold_or2: 1869; CHECK-O3: # %bb.0: 1870; CHECK-O3-NEXT: orq %rsi, (%rdi) 1871; CHECK-O3-NEXT: retq 1872 %prev = load atomic i64, ptr %p unordered, align 8 1873 %val = or i64 %prev, %v 1874 store atomic i64 %val, ptr %p unordered, align 8 1875 ret void 1876} 1877 1878; Legal, as expected 1879define void @rmw_fold_xor1(ptr %p, i64 %v) { 1880; CHECK-O0-LABEL: rmw_fold_xor1: 1881; CHECK-O0: # %bb.0: 1882; CHECK-O0-NEXT: movq (%rdi), %rax 1883; CHECK-O0-NEXT: xorq $15, %rax 1884; CHECK-O0-NEXT: movq %rax, (%rdi) 1885; CHECK-O0-NEXT: retq 1886; 1887; CHECK-O3-LABEL: rmw_fold_xor1: 1888; CHECK-O3: # %bb.0: 1889; CHECK-O3-NEXT: xorq $15, (%rdi) 1890; CHECK-O3-NEXT: retq 1891 %prev = load atomic i64, ptr %p unordered, align 8 1892 %val = xor i64 %prev, 15 1893 store atomic i64 %val, ptr %p unordered, align 8 1894 ret void 1895} 1896 1897; Legal, as expected 1898define void @rmw_fold_xor2(ptr %p, i64 %v) { 1899; CHECK-O0-LABEL: rmw_fold_xor2: 1900; CHECK-O0: # %bb.0: 1901; CHECK-O0-NEXT: movq (%rdi), %rax 1902; CHECK-O0-NEXT: xorq %rsi, %rax 1903; CHECK-O0-NEXT: movq %rax, (%rdi) 1904; CHECK-O0-NEXT: retq 1905; 1906; CHECK-O3-LABEL: rmw_fold_xor2: 1907; CHECK-O3: # %bb.0: 1908; CHECK-O3-NEXT: xorq %rsi, (%rdi) 1909; CHECK-O3-NEXT: retq 1910 %prev = load atomic i64, ptr %p unordered, align 8 1911 %val = xor i64 %prev, %v 1912 store atomic i64 %val, ptr %p unordered, align 8 1913 ret void 1914} 1915 1916;; The next batch test truncations, in combination w/operations which could 1917;; be folded against the memory operation. 1918 1919; Legal to reduce the load width (TODO) 1920define i32 @fold_trunc(ptr %p) { 1921; CHECK-LABEL: fold_trunc: 1922; CHECK: # %bb.0: 1923; CHECK-NEXT: movq (%rdi), %rax 1924; CHECK-NEXT: # kill: def $eax killed $eax killed $rax 1925; CHECK-NEXT: retq 1926 %v = load atomic i64, ptr %p unordered, align 8 1927 %ret = trunc i64 %v to i32 1928 ret i32 %ret 1929} 1930 1931; Legal to reduce the load width and fold the load (TODO) 1932define i32 @fold_trunc_add(ptr %p, i32 %v2) { 1933; CHECK-O0-LABEL: fold_trunc_add: 1934; CHECK-O0: # %bb.0: 1935; CHECK-O0-NEXT: movq (%rdi), %rax 1936; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 1937; CHECK-O0-NEXT: addl %esi, %eax 1938; CHECK-O0-NEXT: retq 1939; 1940; CHECK-O3-LABEL: fold_trunc_add: 1941; CHECK-O3: # %bb.0: 1942; CHECK-O3-NEXT: movq (%rdi), %rax 1943; CHECK-O3-NEXT: addl %esi, %eax 1944; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1945; CHECK-O3-NEXT: retq 1946 %v = load atomic i64, ptr %p unordered, align 8 1947 %trunc = trunc i64 %v to i32 1948 %ret = add i32 %trunc, %v2 1949 ret i32 %ret 1950} 1951 1952; Legal to reduce the load width and fold the load (TODO) 1953define i32 @fold_trunc_and(ptr %p, i32 %v2) { 1954; CHECK-O0-LABEL: fold_trunc_and: 1955; CHECK-O0: # %bb.0: 1956; CHECK-O0-NEXT: movq (%rdi), %rax 1957; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 1958; CHECK-O0-NEXT: andl %esi, %eax 1959; CHECK-O0-NEXT: retq 1960; 1961; CHECK-O3-LABEL: fold_trunc_and: 1962; CHECK-O3: # %bb.0: 1963; CHECK-O3-NEXT: movq (%rdi), %rax 1964; CHECK-O3-NEXT: andl %esi, %eax 1965; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1966; CHECK-O3-NEXT: retq 1967 %v = load atomic i64, ptr %p unordered, align 8 1968 %trunc = trunc i64 %v to i32 1969 %ret = and i32 %trunc, %v2 1970 ret i32 %ret 1971} 1972 1973; Legal to reduce the load width and fold the load (TODO) 1974define i32 @fold_trunc_or(ptr %p, i32 %v2) { 1975; CHECK-O0-LABEL: fold_trunc_or: 1976; CHECK-O0: # %bb.0: 1977; CHECK-O0-NEXT: movq (%rdi), %rax 1978; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 1979; CHECK-O0-NEXT: orl %esi, %eax 1980; CHECK-O0-NEXT: retq 1981; 1982; CHECK-O3-LABEL: fold_trunc_or: 1983; CHECK-O3: # %bb.0: 1984; CHECK-O3-NEXT: movq (%rdi), %rax 1985; CHECK-O3-NEXT: orl %esi, %eax 1986; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1987; CHECK-O3-NEXT: retq 1988 %v = load atomic i64, ptr %p unordered, align 8 1989 %trunc = trunc i64 %v to i32 1990 %ret = or i32 %trunc, %v2 1991 ret i32 %ret 1992} 1993 1994; It's tempting to split the wide load into two smaller byte loads 1995; to reduce memory traffic, but this would be illegal for a atomic load 1996define i32 @split_load(ptr %p) { 1997; CHECK-O0-LABEL: split_load: 1998; CHECK-O0: # %bb.0: 1999; CHECK-O0-NEXT: movq (%rdi), %rcx 2000; CHECK-O0-NEXT: movb %cl, %al 2001; CHECK-O0-NEXT: shrq $32, %rcx 2002; CHECK-O0-NEXT: # kill: def $cl killed $cl killed $rcx 2003; CHECK-O0-NEXT: orb %cl, %al 2004; CHECK-O0-NEXT: movzbl %al, %eax 2005; CHECK-O0-NEXT: retq 2006; 2007; CHECK-O3-LABEL: split_load: 2008; CHECK-O3: # %bb.0: 2009; CHECK-O3-NEXT: movq (%rdi), %rax 2010; CHECK-O3-NEXT: movq %rax, %rcx 2011; CHECK-O3-NEXT: shrq $32, %rcx 2012; CHECK-O3-NEXT: orl %eax, %ecx 2013; CHECK-O3-NEXT: movzbl %cl, %eax 2014; CHECK-O3-NEXT: retq 2015 %v = load atomic i64, ptr %p unordered, align 8 2016 %b1 = trunc i64 %v to i8 2017 %v.shift = lshr i64 %v, 32 2018 %b2 = trunc i64 %v.shift to i8 2019 %or = or i8 %b1, %b2 2020 %ret = zext i8 %or to i32 2021 ret i32 %ret 2022} 2023 2024;; A collection of simple memory forwarding tests. Nothing particular 2025;; interesting semantic wise, just demonstrating obvious missed transforms. 2026 2027@Zero = constant i64 0 2028 2029; TODO: should return constant 2030define i64 @constant_folding(ptr %p) { 2031; CHECK-LABEL: constant_folding: 2032; CHECK: # %bb.0: 2033; CHECK-NEXT: movq (%rdi), %rax 2034; CHECK-NEXT: retq 2035 %v = load atomic i64, ptr %p unordered, align 8 2036 ret i64 %v 2037} 2038 2039; Legal to forward and fold (TODO) 2040define i64 @load_forwarding(ptr %p) { 2041; CHECK-LABEL: load_forwarding: 2042; CHECK: # %bb.0: 2043; CHECK-NEXT: movq (%rdi), %rax 2044; CHECK-NEXT: orq (%rdi), %rax 2045; CHECK-NEXT: retq 2046 %v = load atomic i64, ptr %p unordered, align 8 2047 %v2 = load atomic i64, ptr %p unordered, align 8 2048 %ret = or i64 %v, %v2 2049 ret i64 %ret 2050} 2051 2052; Legal to forward (TODO) 2053define i64 @store_forward(ptr %p, i64 %v) { 2054; CHECK-LABEL: store_forward: 2055; CHECK: # %bb.0: 2056; CHECK-NEXT: movq %rsi, (%rdi) 2057; CHECK-NEXT: movq (%rdi), %rax 2058; CHECK-NEXT: retq 2059 store atomic i64 %v, ptr %p unordered, align 8 2060 %ret = load atomic i64, ptr %p unordered, align 8 2061 ret i64 %ret 2062} 2063 2064; Legal to kill (TODO) 2065define void @dead_writeback(ptr %p) { 2066; CHECK-LABEL: dead_writeback: 2067; CHECK: # %bb.0: 2068; CHECK-NEXT: movq (%rdi), %rax 2069; CHECK-NEXT: movq %rax, (%rdi) 2070; CHECK-NEXT: retq 2071 %v = load atomic i64, ptr %p unordered, align 8 2072 store atomic i64 %v, ptr %p unordered, align 8 2073 ret void 2074} 2075 2076; Legal to kill (TODO) 2077define void @dead_store(ptr %p, i64 %v) { 2078; CHECK-LABEL: dead_store: 2079; CHECK: # %bb.0: 2080; CHECK-NEXT: movq $0, (%rdi) 2081; CHECK-NEXT: movq %rsi, (%rdi) 2082; CHECK-NEXT: retq 2083 store atomic i64 0, ptr %p unordered, align 8 2084 store atomic i64 %v, ptr %p unordered, align 8 2085 ret void 2086} 2087 2088;; The next batch of tests ensure that we don't try to fold a load into a 2089;; use where the code motion implied for the load is prevented by a fence. 2090;; Note: We're checking that the load doesn't get moved below the fence as 2091;; part of folding, but is technically legal to lift the add above the fence. 2092;; If that were to happen, please rewrite the test to ensure load movement 2093;; isn't violated. 2094 2095define i64 @nofold_fence(ptr %p) { 2096; CHECK-LABEL: nofold_fence: 2097; CHECK: # %bb.0: 2098; CHECK-NEXT: movq (%rdi), %rax 2099; CHECK-NEXT: mfence 2100; CHECK-NEXT: addq $15, %rax 2101; CHECK-NEXT: retq 2102 %v = load atomic i64, ptr %p unordered, align 8 2103 fence seq_cst 2104 %ret = add i64 %v, 15 2105 ret i64 %ret 2106} 2107 2108define i64 @nofold_fence_acquire(ptr %p) { 2109; CHECK-LABEL: nofold_fence_acquire: 2110; CHECK: # %bb.0: 2111; CHECK-NEXT: movq (%rdi), %rax 2112; CHECK-NEXT: #MEMBARRIER 2113; CHECK-NEXT: addq $15, %rax 2114; CHECK-NEXT: retq 2115 %v = load atomic i64, ptr %p unordered, align 8 2116 fence acquire 2117 %ret = add i64 %v, 15 2118 ret i64 %ret 2119} 2120 2121 2122define i64 @nofold_stfence(ptr %p) { 2123; CHECK-LABEL: nofold_stfence: 2124; CHECK: # %bb.0: 2125; CHECK-NEXT: movq (%rdi), %rax 2126; CHECK-NEXT: #MEMBARRIER 2127; CHECK-NEXT: addq $15, %rax 2128; CHECK-NEXT: retq 2129 %v = load atomic i64, ptr %p unordered, align 8 2130 fence syncscope("singlethread") seq_cst 2131 %ret = add i64 %v, 15 2132 ret i64 %ret 2133} 2134 2135;; Next, test how well we can fold invariant loads. 2136 2137@Constant = external dso_local constant i64 2138 2139define i64 @fold_constant(i64 %arg) { 2140; CHECK-O0-LABEL: fold_constant: 2141; CHECK-O0: # %bb.0: 2142; CHECK-O0-NEXT: movq %rdi, %rax 2143; CHECK-O0-NEXT: addq Constant, %rax 2144; CHECK-O0-NEXT: retq 2145; 2146; CHECK-O3-LABEL: fold_constant: 2147; CHECK-O3: # %bb.0: 2148; CHECK-O3-NEXT: movq %rdi, %rax 2149; CHECK-O3-NEXT: addq Constant(%rip), %rax 2150; CHECK-O3-NEXT: retq 2151 %v = load atomic i64, ptr @Constant unordered, align 8 2152 %ret = add i64 %v, %arg 2153 ret i64 %ret 2154} 2155 2156define i64 @fold_constant_clobber(ptr %p, i64 %arg) { 2157; CHECK-LABEL: fold_constant_clobber: 2158; CHECK: # %bb.0: 2159; CHECK-NEXT: movq Constant(%rip), %rax 2160; CHECK-NEXT: movq $5, (%rdi) 2161; CHECK-NEXT: addq %rsi, %rax 2162; CHECK-NEXT: retq 2163 %v = load atomic i64, ptr @Constant unordered, align 8 2164 store i64 5, ptr %p 2165 %ret = add i64 %v, %arg 2166 ret i64 %ret 2167} 2168 2169define i64 @fold_constant_fence(i64 %arg) { 2170; CHECK-LABEL: fold_constant_fence: 2171; CHECK: # %bb.0: 2172; CHECK-NEXT: movq Constant(%rip), %rax 2173; CHECK-NEXT: mfence 2174; CHECK-NEXT: addq %rdi, %rax 2175; CHECK-NEXT: retq 2176 %v = load atomic i64, ptr @Constant unordered, align 8 2177 fence seq_cst 2178 %ret = add i64 %v, %arg 2179 ret i64 %ret 2180} 2181 2182define i64 @fold_invariant_clobber(ptr dereferenceable(8) %p, i64 %arg) { 2183; CHECK-LABEL: fold_invariant_clobber: 2184; CHECK: # %bb.0: 2185; CHECK-NEXT: movq (%rdi), %rax 2186; CHECK-NEXT: movq $5, (%rdi) 2187; CHECK-NEXT: addq %rsi, %rax 2188; CHECK-NEXT: retq 2189 %v = load atomic i64, ptr %p unordered, align 8, !invariant.load !{} 2190 store i64 5, ptr %p 2191 %ret = add i64 %v, %arg 2192 ret i64 %ret 2193} 2194 2195 2196define i64 @fold_invariant_fence(ptr dereferenceable(8) %p, i64 %arg) { 2197; CHECK-LABEL: fold_invariant_fence: 2198; CHECK: # %bb.0: 2199; CHECK-NEXT: movq (%rdi), %rax 2200; CHECK-NEXT: mfence 2201; CHECK-NEXT: addq %rsi, %rax 2202; CHECK-NEXT: retq 2203 %v = load atomic i64, ptr %p unordered, align 8, !invariant.load !{} 2204 fence seq_cst 2205 %ret = add i64 %v, %arg 2206 ret i64 %ret 2207} 2208 2209 2210; Exercise a few cases involving any extend idioms 2211 2212define i16 @load_i8_anyext_i16(ptr %ptr) { 2213; CHECK-O0-LABEL: load_i8_anyext_i16: 2214; CHECK-O0: # %bb.0: 2215; CHECK-O0-NEXT: movb (%rdi), %al 2216; CHECK-O0-NEXT: movzbl %al, %eax 2217; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax 2218; CHECK-O0-NEXT: retq 2219; 2220; CHECK-O3-LABEL: load_i8_anyext_i16: 2221; CHECK-O3: # %bb.0: 2222; CHECK-O3-NEXT: movzbl (%rdi), %eax 2223; CHECK-O3-NEXT: # kill: def $ax killed $ax killed $eax 2224; CHECK-O3-NEXT: retq 2225 %v = load atomic i8, ptr %ptr unordered, align 2 2226 %vec = insertelement <2 x i8> undef, i8 %v, i32 0 2227 %res = bitcast <2 x i8> %vec to i16 2228 ret i16 %res 2229} 2230 2231define i32 @load_i8_anyext_i32(ptr %ptr) { 2232; CHECK-O0-LABEL: load_i8_anyext_i32: 2233; CHECK-O0: # %bb.0: 2234; CHECK-O0-NEXT: movb (%rdi), %al 2235; CHECK-O0-NEXT: movzbl %al, %eax 2236; CHECK-O0-NEXT: retq 2237; 2238; CHECK-O3-LABEL: load_i8_anyext_i32: 2239; CHECK-O3: # %bb.0: 2240; CHECK-O3-NEXT: movzbl (%rdi), %eax 2241; CHECK-O3-NEXT: retq 2242 %v = load atomic i8, ptr %ptr unordered, align 4 2243 %vec = insertelement <4 x i8> undef, i8 %v, i32 0 2244 %res = bitcast <4 x i8> %vec to i32 2245 ret i32 %res 2246} 2247 2248define i32 @load_i16_anyext_i32(ptr %ptr) { 2249; CHECK-O0-LABEL: load_i16_anyext_i32: 2250; CHECK-O0: # %bb.0: 2251; CHECK-O0-NEXT: movw (%rdi), %cx 2252; CHECK-O0-NEXT: # implicit-def: $eax 2253; CHECK-O0-NEXT: movw %cx, %ax 2254; CHECK-O0-NEXT: retq 2255; 2256; CHECK-O3-LABEL: load_i16_anyext_i32: 2257; CHECK-O3: # %bb.0: 2258; CHECK-O3-NEXT: movzwl (%rdi), %eax 2259; CHECK-O3-NEXT: retq 2260 %v = load atomic i16, ptr %ptr unordered, align 4 2261 %vec = insertelement <2 x i16> undef, i16 %v, i64 0 2262 %res = bitcast <2 x i16> %vec to i32 2263 ret i32 %res 2264} 2265 2266define i64 @load_i16_anyext_i64(ptr %ptr) { 2267; CHECK-O0-LABEL: load_i16_anyext_i64: 2268; CHECK-O0: # %bb.0: 2269; CHECK-O0-NEXT: movw (%rdi), %cx 2270; CHECK-O0-NEXT: # implicit-def: $eax 2271; CHECK-O0-NEXT: movw %cx, %ax 2272; CHECK-O0-NEXT: vmovd %eax, %xmm0 2273; CHECK-O0-NEXT: vmovq %xmm0, %rax 2274; CHECK-O0-NEXT: retq 2275; 2276; CHECK-O3-LABEL: load_i16_anyext_i64: 2277; CHECK-O3: # %bb.0: 2278; CHECK-O3-NEXT: movzwl (%rdi), %eax 2279; CHECK-O3-NEXT: vmovd %eax, %xmm0 2280; CHECK-O3-NEXT: vmovq %xmm0, %rax 2281; CHECK-O3-NEXT: retq 2282 %v = load atomic i16, ptr %ptr unordered, align 8 2283 %vec = insertelement <4 x i16> undef, i16 %v, i64 0 2284 %res = bitcast <4 x i16> %vec to i64 2285 ret i64 %res 2286} 2287 2288; TODO: Would be legal to combine for legal atomic wider types 2289define i16 @load_combine(ptr %p) { 2290; CHECK-O0-LABEL: load_combine: 2291; CHECK-O0: # %bb.0: 2292; CHECK-O0-NEXT: movb (%rdi), %al 2293; CHECK-O0-NEXT: movb 1(%rdi), %cl 2294; CHECK-O0-NEXT: movzbl %al, %eax 2295; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax 2296; CHECK-O0-NEXT: movzbl %cl, %ecx 2297; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx 2298; CHECK-O0-NEXT: shlw $8, %cx 2299; CHECK-O0-NEXT: orw %cx, %ax 2300; CHECK-O0-NEXT: retq 2301; 2302; CHECK-O3-LABEL: load_combine: 2303; CHECK-O3: # %bb.0: 2304; CHECK-O3-NEXT: movzbl (%rdi), %ecx 2305; CHECK-O3-NEXT: movzbl 1(%rdi), %eax 2306; CHECK-O3-NEXT: shll $8, %eax 2307; CHECK-O3-NEXT: orl %ecx, %eax 2308; CHECK-O3-NEXT: # kill: def $ax killed $ax killed $eax 2309; CHECK-O3-NEXT: retq 2310 %v1 = load atomic i8, ptr %p unordered, align 2 2311 %p2 = getelementptr i8, ptr %p, i64 1 2312 %v2 = load atomic i8, ptr %p2 unordered, align 1 2313 %v1.ext = zext i8 %v1 to i16 2314 %v2.ext = zext i8 %v2 to i16 2315 %v2.sht = shl i16 %v2.ext, 8 2316 %res = or i16 %v1.ext, %v2.sht 2317 ret i16 %res 2318} 2319 2320define i1 @fold_cmp_over_fence(ptr %p, i32 %v1) { 2321; CHECK-O0-LABEL: fold_cmp_over_fence: 2322; CHECK-O0: # %bb.0: 2323; CHECK-O0-NEXT: movl (%rdi), %eax 2324; CHECK-O0-NEXT: mfence 2325; CHECK-O0-NEXT: cmpl %eax, %esi 2326; CHECK-O0-NEXT: jne .LBB116_2 2327; CHECK-O0-NEXT: # %bb.1: # %taken 2328; CHECK-O0-NEXT: movb $1, %al 2329; CHECK-O0-NEXT: retq 2330; CHECK-O0-NEXT: .LBB116_2: # %untaken 2331; CHECK-O0-NEXT: xorl %eax, %eax 2332; CHECK-O0-NEXT: # kill: def $al killed $al killed $eax 2333; CHECK-O0-NEXT: retq 2334; 2335; CHECK-O3-LABEL: fold_cmp_over_fence: 2336; CHECK-O3: # %bb.0: 2337; CHECK-O3-NEXT: movl (%rdi), %eax 2338; CHECK-O3-NEXT: mfence 2339; CHECK-O3-NEXT: cmpl %eax, %esi 2340; CHECK-O3-NEXT: jne .LBB116_2 2341; CHECK-O3-NEXT: # %bb.1: # %taken 2342; CHECK-O3-NEXT: movb $1, %al 2343; CHECK-O3-NEXT: retq 2344; CHECK-O3-NEXT: .LBB116_2: # %untaken 2345; CHECK-O3-NEXT: xorl %eax, %eax 2346; CHECK-O3-NEXT: retq 2347 %v2 = load atomic i32, ptr %p unordered, align 4 2348 fence seq_cst 2349 %cmp = icmp eq i32 %v1, %v2 2350 br i1 %cmp, label %taken, label %untaken 2351taken: 2352 ret i1 true 2353untaken: 2354 ret i1 false 2355} 2356