1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 < %s | FileCheck %s 3 4; This file checks the reassociation of ADD instruction. 5; The two ADD instructions add v0,v1,t2 together. t2 has a long dependence 6; chain, v0 and v1 has a short dependence chain, in order to get the shortest 7; latency, v0 and v1 should be added first, and its result is added to t2 8; later. 9 10define void @add8(i8 %x0, i8 %x1, i8 %x2, ptr %p) { 11; CHECK-LABEL: add8: 12; CHECK: # %bb.0: 13; CHECK-NEXT: orb $16, %dil 14; CHECK-NEXT: orb $32, %sil 15; CHECK-NEXT: addb %dil, %sil 16; CHECK-NEXT: addb $-8, %dl 17; CHECK-NEXT: orb $7, %dl 18; CHECK-NEXT: movzbl %dl, %eax 19; CHECK-NEXT: imull $100, %eax, %eax 20; CHECK-NEXT: addb %sil, %al 21; CHECK-NEXT: movb %al, (%rcx) 22; CHECK-NEXT: retq 23 %v0 = or i8 %x0, 16 24 %v1 = or i8 %x1, 32 25 %t0 = sub i8 %x2, 8 26 %t1 = or i8 %t0, 7 27 %t2 = mul i8 %t1, 100 28 %t3 = add i8 %t2, %v1 29 %t4 = add i8 %t3, %v0 30 store i8 %t4, ptr %p, align 4 31 ret void 32} 33 34define void @add16(i16 %x0, i16 %x1, i16 %x2, ptr %p) { 35; CHECK-LABEL: add16: 36; CHECK: # %bb.0: 37; CHECK-NEXT: orl $16, %edi 38; CHECK-NEXT: orl $32, %esi 39; CHECK-NEXT: addl %edi, %esi 40; CHECK-NEXT: addl $-8, %edx 41; CHECK-NEXT: orl $7, %edx 42; CHECK-NEXT: imull $100, %edx, %eax 43; CHECK-NEXT: addl %esi, %eax 44; CHECK-NEXT: movw %ax, (%rcx) 45; CHECK-NEXT: retq 46 %v0 = or i16 %x0, 16 47 %v1 = or i16 %x1, 32 48 %t0 = sub i16 %x2, 8 49 %t1 = or i16 %t0, 7 50 %t2 = mul i16 %t1, 100 51 %t3 = add i16 %t2, %v1 52 %t4 = add i16 %t3, %v0 53 store i16 %t4, ptr %p, align 4 54 ret void 55} 56 57define void @add32(i32 %x0, i32 %x1, i32 %x2, ptr %p) { 58; CHECK-LABEL: add32: 59; CHECK: # %bb.0: 60; CHECK-NEXT: orl $16, %edi 61; CHECK-NEXT: orl $32, %esi 62; CHECK-NEXT: addl %edi, %esi 63; CHECK-NEXT: addl $-8, %edx 64; CHECK-NEXT: orl $7, %edx 65; CHECK-NEXT: imull $100, %edx, %eax 66; CHECK-NEXT: addl %esi, %eax 67; CHECK-NEXT: movl %eax, (%rcx) 68; CHECK-NEXT: retq 69 %v0 = or i32 %x0, 16 70 %v1 = or i32 %x1, 32 71 %t0 = sub i32 %x2, 8 72 %t1 = or i32 %t0, 7 73 %t2 = mul i32 %t1, 100 74 %t3 = add i32 %t2, %v1 75 %t4 = add i32 %t3, %v0 76 store i32 %t4, ptr %p, align 4 77 ret void 78} 79 80define void @add64(i64 %x0, i64 %x1, i64 %x2, ptr %p) { 81; CHECK-LABEL: add64: 82; CHECK: # %bb.0: 83; CHECK-NEXT: orq $16, %rdi 84; CHECK-NEXT: orq $32, %rsi 85; CHECK-NEXT: addq %rdi, %rsi 86; CHECK-NEXT: addq $-8, %rdx 87; CHECK-NEXT: orq $7, %rdx 88; CHECK-NEXT: imulq $100, %rdx, %rax 89; CHECK-NEXT: addq %rsi, %rax 90; CHECK-NEXT: movq %rax, (%rcx) 91; CHECK-NEXT: retq 92 %v0 = or i64 %x0, 16 93 %v1 = or i64 %x1, 32 94 %t0 = sub i64 %x2, 8 95 %t1 = or i64 %t0, 7 96 %t2 = mul i64 %t1, 100 97 %t3 = add i64 %t2, %v1 98 %t4 = add i64 %t3, %v0 99 store i64 %t4, ptr %p, align 4 100 ret void 101} 102 103; Negative test. Original sequence has shorter latency, don't transform it. 104define void @add64_negative(i64 %x0, i64 %x1, i64 %x2, ptr %p) { 105; CHECK-LABEL: add64_negative: 106; CHECK: # %bb.0: 107; CHECK-NEXT: orq $16, %rdi 108; CHECK-NEXT: orq $32, %rsi 109; CHECK-NEXT: addq %rdi, %rsi 110; CHECK-NEXT: addq $-8, %rdx 111; CHECK-NEXT: orq $7, %rdx 112; CHECK-NEXT: imulq $100, %rdx, %rax 113; CHECK-NEXT: addq %rsi, %rax 114; CHECK-NEXT: movq %rax, (%rcx) 115; CHECK-NEXT: retq 116 %v0 = or i64 %x0, 16 117 %v1 = or i64 %x1, 32 118 %t0 = sub i64 %x2, 8 119 %t1 = or i64 %t0, 7 120 %t2 = mul i64 %t1, 100 121 %t3 = add i64 %v0, %v1 122 %t4 = add i64 %t3, %t2 123 store i64 %t4, ptr %p, align 4 124 ret void 125} 126