1# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py 2# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -bottleneck-analysis < %s | FileCheck %s 3 4.LBB0_4: 5 vmovups (%rsi,%rax,2), %xmm0 6 vpermilps $255, %xmm0, %xmm7 7 vmulps -24(%rsp), %xmm7, %xmm8 8 vpermilps $170, %xmm0, %xmm6 9 vpermilps $85, %xmm0, %xmm5 10 vbroadcastss %xmm0, %xmm0 11 vfmadd231ps %xmm9, %xmm6, %xmm8 12 vfmadd213ps %xmm8, %xmm10, %xmm5 13 vfmadd213ps %xmm5, %xmm11, %xmm0 14 vfmadd213ps %xmm0, %xmm12, %xmm4 15 vfmadd213ps %xmm4, %xmm13, %xmm1 16 vmovaps %xmm7, %xmm4 17 vfmadd213ps %xmm1, %xmm14, %xmm2 18 vmovaps %xmm6, %xmm1 19 vfmadd213ps %xmm2, %xmm15, %xmm3 20 vpermilps $170, %xmm3, %xmm0 21 vmovups %xmm3, (%rdx,%rax) 22 vpermilps $255, %xmm3, %xmm2 23 addq $16, %rax 24 decl %ecx 25 vmovaps %xmm0, %xmm3 26 jne .LBB0_4 27 28# CHECK: Iterations: 100 29# CHECK-NEXT: Instructions: 2200 30# CHECK-NEXT: Total Cycles: 1039 31# CHECK-NEXT: Total uOps: 2400 32 33# CHECK: Dispatch Width: 6 34# CHECK-NEXT: uOps Per Cycle: 2.31 35# CHECK-NEXT: IPC: 2.12 36# CHECK-NEXT: Block RThroughput: 6.0 37 38# CHECK: Cycles with backend pressure increase [ 92.69% ] 39# CHECK-NEXT: Throughput Bottlenecks: 40# CHECK-NEXT: Resource Pressure [ 46.78% ] 41# CHECK-NEXT: - SKLPort0 [ 14.24% ] 42# CHECK-NEXT: - SKLPort1 [ 14.24% ] 43# CHECK-NEXT: - SKLPort5 [ 46.49% ] 44# CHECK-NEXT: - SKLPort6 [ 8.66% ] 45# CHECK-NEXT: Data Dependencies: [ 64.97% ] 46# CHECK-NEXT: - Register Dependencies [ 64.97% ] 47# CHECK-NEXT: - Memory Dependencies [ 0.00% ] 48 49# CHECK: Critical sequence based on the simulation: 50 51# CHECK: Instruction Dependency Information 52# CHECK-NEXT: +----< 18. addq $16, %rax 53# CHECK-NEXT: | 54# CHECK-NEXT: | < loop carried > 55# CHECK-NEXT: | 56# CHECK-NEXT: +----> 0. vmovups (%rsi,%rax,2), %xmm0 ## REGISTER dependency: %rax 57# CHECK-NEXT: | 1. vpermilps $255, %xmm0, %xmm7 58# CHECK-NEXT: | 2. vmulps -24(%rsp), %xmm7, %xmm8 59# CHECK-NEXT: +----> 3. vpermilps $170, %xmm0, %xmm6 ## REGISTER dependency: %xmm0 60# CHECK-NEXT: | 4. vpermilps $85, %xmm0, %xmm5 61# CHECK-NEXT: | 5. vbroadcastss %xmm0, %xmm0 62# CHECK-NEXT: +----> 6. vfmadd231ps %xmm9, %xmm6, %xmm8 ## REGISTER dependency: %xmm6 63# CHECK-NEXT: +----> 7. vfmadd213ps %xmm8, %xmm10, %xmm5 ## REGISTER dependency: %xmm8 64# CHECK-NEXT: +----> 8. vfmadd213ps %xmm5, %xmm11, %xmm0 ## REGISTER dependency: %xmm5 65# CHECK-NEXT: +----> 9. vfmadd213ps %xmm0, %xmm12, %xmm4 ## REGISTER dependency: %xmm0 66# CHECK-NEXT: +----> 10. vfmadd213ps %xmm4, %xmm13, %xmm1 ## REGISTER dependency: %xmm4 67# CHECK-NEXT: | 11. vmovaps %xmm7, %xmm4 68# CHECK-NEXT: +----> 12. vfmadd213ps %xmm1, %xmm14, %xmm2 ## REGISTER dependency: %xmm1 69# CHECK-NEXT: | 13. vmovaps %xmm6, %xmm1 70# CHECK-NEXT: +----> 14. vfmadd213ps %xmm2, %xmm15, %xmm3 ## REGISTER dependency: %xmm2 71# CHECK-NEXT: +----> 15. vpermilps $170, %xmm3, %xmm0 ## REGISTER dependency: %xmm3 72# CHECK-NEXT: | 16. vmovups %xmm3, (%rdx,%rax) 73# CHECK-NEXT: | 17. vpermilps $255, %xmm3, %xmm2 74# CHECK-NEXT: | 18. addq $16, %rax 75# CHECK-NEXT: | 19. decl %ecx 76# CHECK-NEXT: +----> 20. vmovaps %xmm0, %xmm3 ## REGISTER dependency: %xmm0 77# CHECK-NEXT: 21. jne .LBB0_4 78 79# CHECK: Instruction Info: 80# CHECK-NEXT: [1]: #uOps 81# CHECK-NEXT: [2]: Latency 82# CHECK-NEXT: [3]: RThroughput 83# CHECK-NEXT: [4]: MayLoad 84# CHECK-NEXT: [5]: MayStore 85# CHECK-NEXT: [6]: HasSideEffects (U) 86 87# CHECK: [1] [2] [3] [4] [5] [6] Instructions: 88# CHECK-NEXT: 1 6 0.50 * vmovups (%rsi,%rax,2), %xmm0 89# CHECK-NEXT: 1 1 1.00 vpermilps $255, %xmm0, %xmm7 90# CHECK-NEXT: 2 10 0.50 * vmulps -24(%rsp), %xmm7, %xmm8 91# CHECK-NEXT: 1 1 1.00 vpermilps $170, %xmm0, %xmm6 92# CHECK-NEXT: 1 1 1.00 vpermilps $85, %xmm0, %xmm5 93# CHECK-NEXT: 1 1 1.00 vbroadcastss %xmm0, %xmm0 94# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm9, %xmm6, %xmm8 95# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm8, %xmm10, %xmm5 96# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm5, %xmm11, %xmm0 97# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm0, %xmm12, %xmm4 98# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm4, %xmm13, %xmm1 99# CHECK-NEXT: 1 1 0.33 vmovaps %xmm7, %xmm4 100# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm1, %xmm14, %xmm2 101# CHECK-NEXT: 1 1 0.33 vmovaps %xmm6, %xmm1 102# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm2, %xmm15, %xmm3 103# CHECK-NEXT: 1 1 1.00 vpermilps $170, %xmm3, %xmm0 104# CHECK-NEXT: 2 1 1.00 * vmovups %xmm3, (%rdx,%rax) 105# CHECK-NEXT: 1 1 1.00 vpermilps $255, %xmm3, %xmm2 106# CHECK-NEXT: 1 1 0.25 addq $16, %rax 107# CHECK-NEXT: 1 1 0.25 decl %ecx 108# CHECK-NEXT: 1 1 0.33 vmovaps %xmm0, %xmm3 109# CHECK-NEXT: 1 1 0.50 jne .LBB0_4 110 111# CHECK: Resources: 112# CHECK-NEXT: [0] - SKLDivider 113# CHECK-NEXT: [1] - SKLFPDivider 114# CHECK-NEXT: [2] - SKLPort0 115# CHECK-NEXT: [3] - SKLPort1 116# CHECK-NEXT: [4] - SKLPort2 117# CHECK-NEXT: [5] - SKLPort3 118# CHECK-NEXT: [6] - SKLPort4 119# CHECK-NEXT: [7] - SKLPort5 120# CHECK-NEXT: [8] - SKLPort6 121# CHECK-NEXT: [9] - SKLPort7 122 123# CHECK: Resource pressure per iteration: 124# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] 125# CHECK-NEXT: - - 5.52 5.53 1.01 1.03 1.00 6.02 2.93 0.96 126 127# CHECK: Resource pressure by instruction: 128# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: 129# CHECK-NEXT: - - - - 0.04 0.96 - - - - vmovups (%rsi,%rax,2), %xmm0 130# CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $255, %xmm0, %xmm7 131# CHECK-NEXT: - - 0.03 0.97 0.96 0.04 - - - - vmulps -24(%rsp), %xmm7, %xmm8 132# CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $170, %xmm0, %xmm6 133# CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $85, %xmm0, %xmm5 134# CHECK-NEXT: - - - - - - - 1.00 - - vbroadcastss %xmm0, %xmm0 135# CHECK-NEXT: - - 0.95 0.05 - - - - - - vfmadd231ps %xmm9, %xmm6, %xmm8 136# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %xmm8, %xmm10, %xmm5 137# CHECK-NEXT: - - 0.92 0.08 - - - - - - vfmadd213ps %xmm5, %xmm11, %xmm0 138# CHECK-NEXT: - - 0.95 0.05 - - - - - - vfmadd213ps %xmm0, %xmm12, %xmm4 139# CHECK-NEXT: - - 0.51 0.49 - - - - - - vfmadd213ps %xmm4, %xmm13, %xmm1 140# CHECK-NEXT: - - 0.52 0.48 - - - - - - vmovaps %xmm7, %xmm4 141# CHECK-NEXT: - - 0.49 0.51 - - - - - - vfmadd213ps %xmm1, %xmm14, %xmm2 142# CHECK-NEXT: - - 0.04 0.95 - - - 0.01 - - vmovaps %xmm6, %xmm1 143# CHECK-NEXT: - - 0.51 0.49 - - - - - - vfmadd213ps %xmm2, %xmm15, %xmm3 144# CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $170, %xmm3, %xmm0 145# CHECK-NEXT: - - - - 0.01 0.03 1.00 - - 0.96 vmovups %xmm3, (%rdx,%rax) 146# CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $255, %xmm3, %xmm2 147# CHECK-NEXT: - - - - - - - - 1.00 - addq $16, %rax 148# CHECK-NEXT: - - 0.04 0.01 - - - 0.01 0.94 - decl %ecx 149# CHECK-NEXT: - - 0.05 0.95 - - - - - - vmovaps %xmm0, %xmm3 150# CHECK-NEXT: - - 0.01 - - - - - 0.99 - jne .LBB0_4 151