xref: /llvm-project/llvm/test/tools/llvm-mca/X86/SkylakeClient/bottleneck-analysis.s (revision e0900f285bb532790ed494df901f87c5c8b904da)
1# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake  -bottleneck-analysis < %s | FileCheck %s
3
4.LBB0_4:
5  vmovups	(%rsi,%rax,2), %xmm0
6  vpermilps	$255, %xmm0, %xmm7
7  vmulps	-24(%rsp), %xmm7, %xmm8
8  vpermilps	$170, %xmm0, %xmm6
9  vpermilps	$85, %xmm0, %xmm5
10  vbroadcastss	%xmm0, %xmm0
11  vfmadd231ps	%xmm9, %xmm6, %xmm8
12  vfmadd213ps	%xmm8, %xmm10, %xmm5
13  vfmadd213ps	%xmm5, %xmm11, %xmm0
14  vfmadd213ps	%xmm0, %xmm12, %xmm4
15  vfmadd213ps	%xmm4, %xmm13, %xmm1
16  vmovaps	%xmm7, %xmm4
17  vfmadd213ps	%xmm1, %xmm14, %xmm2
18  vmovaps	%xmm6, %xmm1
19  vfmadd213ps	%xmm2, %xmm15, %xmm3
20  vpermilps	$170, %xmm3, %xmm0
21  vmovups	%xmm3, (%rdx,%rax)
22  vpermilps	$255, %xmm3, %xmm2
23  addq	$16, %rax
24  decl	%ecx
25  vmovaps	%xmm0, %xmm3
26  jne	.LBB0_4
27
28# CHECK:      Iterations:        100
29# CHECK-NEXT: Instructions:      2200
30# CHECK-NEXT: Total Cycles:      1039
31# CHECK-NEXT: Total uOps:        2400
32
33# CHECK:      Dispatch Width:    6
34# CHECK-NEXT: uOps Per Cycle:    2.31
35# CHECK-NEXT: IPC:               2.12
36# CHECK-NEXT: Block RThroughput: 6.0
37
38# CHECK:      Cycles with backend pressure increase [ 92.69% ]
39# CHECK-NEXT: Throughput Bottlenecks:
40# CHECK-NEXT:   Resource Pressure       [ 46.78% ]
41# CHECK-NEXT:   - SKLPort0  [ 14.24% ]
42# CHECK-NEXT:   - SKLPort1  [ 14.24% ]
43# CHECK-NEXT:   - SKLPort5  [ 46.49% ]
44# CHECK-NEXT:   - SKLPort6  [ 8.66% ]
45# CHECK-NEXT:   Data Dependencies:      [ 64.97% ]
46# CHECK-NEXT:   - Register Dependencies [ 64.97% ]
47# CHECK-NEXT:   - Memory Dependencies   [ 0.00% ]
48
49# CHECK:      Critical sequence based on the simulation:
50
51# CHECK:                    Instruction                                 Dependency Information
52# CHECK-NEXT:  +----< 18.   addq	$16, %rax
53# CHECK-NEXT:  |
54# CHECK-NEXT:  |    < loop carried >
55# CHECK-NEXT:  |
56# CHECK-NEXT:  +----> 0.    vmovups	(%rsi,%rax,2), %xmm0              ## REGISTER dependency:  %rax
57# CHECK-NEXT:  |      1.    vpermilps	$255, %xmm0, %xmm7
58# CHECK-NEXT:  |      2.    vmulps	-24(%rsp), %xmm7, %xmm8
59# CHECK-NEXT:  +----> 3.    vpermilps	$170, %xmm0, %xmm6                ## REGISTER dependency:  %xmm0
60# CHECK-NEXT:  |      4.    vpermilps	$85, %xmm0, %xmm5
61# CHECK-NEXT:  |      5.    vbroadcastss	%xmm0, %xmm0
62# CHECK-NEXT:  +----> 6.    vfmadd231ps	%xmm9, %xmm6, %xmm8       ## REGISTER dependency:  %xmm6
63# CHECK-NEXT:  +----> 7.    vfmadd213ps	%xmm8, %xmm10, %xmm5      ## REGISTER dependency:  %xmm8
64# CHECK-NEXT:  +----> 8.    vfmadd213ps	%xmm5, %xmm11, %xmm0      ## REGISTER dependency:  %xmm5
65# CHECK-NEXT:  +----> 9.    vfmadd213ps	%xmm0, %xmm12, %xmm4      ## REGISTER dependency:  %xmm0
66# CHECK-NEXT:  +----> 10.   vfmadd213ps	%xmm4, %xmm13, %xmm1      ## REGISTER dependency:  %xmm4
67# CHECK-NEXT:  |      11.   vmovaps	%xmm7, %xmm4
68# CHECK-NEXT:  +----> 12.   vfmadd213ps	%xmm1, %xmm14, %xmm2      ## REGISTER dependency:  %xmm1
69# CHECK-NEXT:  |      13.   vmovaps	%xmm6, %xmm1
70# CHECK-NEXT:  +----> 14.   vfmadd213ps	%xmm2, %xmm15, %xmm3      ## REGISTER dependency:  %xmm2
71# CHECK-NEXT:  +----> 15.   vpermilps	$170, %xmm3, %xmm0                ## REGISTER dependency:  %xmm3
72# CHECK-NEXT:  |      16.   vmovups	%xmm3, (%rdx,%rax)
73# CHECK-NEXT:  |      17.   vpermilps	$255, %xmm3, %xmm2
74# CHECK-NEXT:  |      18.   addq	$16, %rax
75# CHECK-NEXT:  |      19.   decl	%ecx
76# CHECK-NEXT:  +----> 20.   vmovaps	%xmm0, %xmm3                      ## REGISTER dependency:  %xmm0
77# CHECK-NEXT:         21.   jne	.LBB0_4
78
79# CHECK:      Instruction Info:
80# CHECK-NEXT: [1]: #uOps
81# CHECK-NEXT: [2]: Latency
82# CHECK-NEXT: [3]: RThroughput
83# CHECK-NEXT: [4]: MayLoad
84# CHECK-NEXT: [5]: MayStore
85# CHECK-NEXT: [6]: HasSideEffects (U)
86
87# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
88# CHECK-NEXT:  1      6     0.50    *                   vmovups	(%rsi,%rax,2), %xmm0
89# CHECK-NEXT:  1      1     1.00                        vpermilps	$255, %xmm0, %xmm7
90# CHECK-NEXT:  2      10    0.50    *                   vmulps	-24(%rsp), %xmm7, %xmm8
91# CHECK-NEXT:  1      1     1.00                        vpermilps	$170, %xmm0, %xmm6
92# CHECK-NEXT:  1      1     1.00                        vpermilps	$85, %xmm0, %xmm5
93# CHECK-NEXT:  1      1     1.00                        vbroadcastss	%xmm0, %xmm0
94# CHECK-NEXT:  1      4     0.50                        vfmadd231ps	%xmm9, %xmm6, %xmm8
95# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm8, %xmm10, %xmm5
96# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm5, %xmm11, %xmm0
97# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm0, %xmm12, %xmm4
98# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm4, %xmm13, %xmm1
99# CHECK-NEXT:  1      1     0.33                        vmovaps	%xmm7, %xmm4
100# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm1, %xmm14, %xmm2
101# CHECK-NEXT:  1      1     0.33                        vmovaps	%xmm6, %xmm1
102# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm2, %xmm15, %xmm3
103# CHECK-NEXT:  1      1     1.00                        vpermilps	$170, %xmm3, %xmm0
104# CHECK-NEXT:  2      1     1.00           *            vmovups	%xmm3, (%rdx,%rax)
105# CHECK-NEXT:  1      1     1.00                        vpermilps	$255, %xmm3, %xmm2
106# CHECK-NEXT:  1      1     0.25                        addq	$16, %rax
107# CHECK-NEXT:  1      1     0.25                        decl	%ecx
108# CHECK-NEXT:  1      1     0.33                        vmovaps	%xmm0, %xmm3
109# CHECK-NEXT:  1      1     0.50                        jne	.LBB0_4
110
111# CHECK:      Resources:
112# CHECK-NEXT: [0]   - SKLDivider
113# CHECK-NEXT: [1]   - SKLFPDivider
114# CHECK-NEXT: [2]   - SKLPort0
115# CHECK-NEXT: [3]   - SKLPort1
116# CHECK-NEXT: [4]   - SKLPort2
117# CHECK-NEXT: [5]   - SKLPort3
118# CHECK-NEXT: [6]   - SKLPort4
119# CHECK-NEXT: [7]   - SKLPort5
120# CHECK-NEXT: [8]   - SKLPort6
121# CHECK-NEXT: [9]   - SKLPort7
122
123# CHECK:      Resource pressure per iteration:
124# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
125# CHECK-NEXT:  -      -     5.52   5.53   1.01   1.03   1.00   6.02   2.93   0.96
126
127# CHECK:      Resource pressure by instruction:
128# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
129# CHECK-NEXT:  -      -      -      -     0.04   0.96    -      -      -      -     vmovups	(%rsi,%rax,2), %xmm0
130# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$255, %xmm0, %xmm7
131# CHECK-NEXT:  -      -     0.03   0.97   0.96   0.04    -      -      -      -     vmulps	-24(%rsp), %xmm7, %xmm8
132# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$170, %xmm0, %xmm6
133# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$85, %xmm0, %xmm5
134# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcastss	%xmm0, %xmm0
135# CHECK-NEXT:  -      -     0.95   0.05    -      -      -      -      -      -     vfmadd231ps	%xmm9, %xmm6, %xmm8
136# CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     vfmadd213ps	%xmm8, %xmm10, %xmm5
137# CHECK-NEXT:  -      -     0.92   0.08    -      -      -      -      -      -     vfmadd213ps	%xmm5, %xmm11, %xmm0
138# CHECK-NEXT:  -      -     0.95   0.05    -      -      -      -      -      -     vfmadd213ps	%xmm0, %xmm12, %xmm4
139# CHECK-NEXT:  -      -     0.51   0.49    -      -      -      -      -      -     vfmadd213ps	%xmm4, %xmm13, %xmm1
140# CHECK-NEXT:  -      -     0.52   0.48    -      -      -      -      -      -     vmovaps	%xmm7, %xmm4
141# CHECK-NEXT:  -      -     0.49   0.51    -      -      -      -      -      -     vfmadd213ps	%xmm1, %xmm14, %xmm2
142# CHECK-NEXT:  -      -     0.04   0.95    -      -      -     0.01    -      -     vmovaps	%xmm6, %xmm1
143# CHECK-NEXT:  -      -     0.51   0.49    -      -      -      -      -      -     vfmadd213ps	%xmm2, %xmm15, %xmm3
144# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$170, %xmm3, %xmm0
145# CHECK-NEXT:  -      -      -      -     0.01   0.03   1.00    -      -     0.96   vmovups	%xmm3, (%rdx,%rax)
146# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$255, %xmm3, %xmm2
147# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     addq	$16, %rax
148# CHECK-NEXT:  -      -     0.04   0.01    -      -      -     0.01   0.94    -     decl	%ecx
149# CHECK-NEXT:  -      -     0.05   0.95    -      -      -      -      -      -     vmovaps	%xmm0, %xmm3
150# CHECK-NEXT:  -      -     0.01    -      -      -      -      -     0.99    -     jne	.LBB0_4
151