xref: /llvm-project/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s (revision 7785bd34e744a9da515b7e0b5dda10b423ba03fe)
1# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s
3
4# In this test, the VDIVPS takes 38 cycles to write to register YMM3.  The first
5# VADDPS does not depend on the VDIVPS (the WAW dependency is eliminated at
6# register renaming stage). So the first VADDPS can be executed in parallel to
7# the VDIVPS. That VADDPS also writes to register XMM3, and the upper half of
8# YMM3 is implicitly cleared. As a consequence, the definition of YMM3 from the
9# VDIVPS is killed, and the subsequent VADDPS instructions don't need to wait
10# for the VDIVPS to complete.
11# The block reciprocal throughput is limited by the VDIVPS reciprocal throughput
12# (which is 38 cycles). The sequence of VADDPS can be executed in parallel on
13# the FPA unit; their latency is "hidden" by the long latency of the VDIVPS.
14
15vdivps %ymm0, %ymm1, %ymm3
16vaddps %xmm0, %xmm1, %xmm3
17vaddps %ymm3, %ymm1, %ymm4
18vaddps %ymm3, %ymm1, %ymm4
19vaddps %ymm3, %ymm1, %ymm4
20vaddps %ymm3, %ymm1, %ymm4
21vaddps %ymm3, %ymm1, %ymm4
22vaddps %ymm3, %ymm1, %ymm4
23vaddps %ymm3, %ymm1, %ymm4
24vaddps %ymm3, %ymm1, %ymm4
25vaddps %ymm3, %ymm1, %ymm4
26vaddps %ymm3, %ymm1, %ymm4
27vaddps %ymm3, %ymm1, %ymm4
28vaddps %ymm3, %ymm1, %ymm4
29vaddps %ymm3, %ymm1, %ymm4
30vaddps %ymm3, %ymm1, %ymm4
31vaddps %ymm3, %ymm1, %ymm4
32vandps %xmm4, %xmm1, %xmm0
33
34# CHECK:      Iterations:        100
35# CHECK-NEXT: Instructions:      1800
36# CHECK-NEXT: Total Cycles:      3203
37# CHECK-NEXT: Total uOps:        3400
38
39# CHECK:      Dispatch Width:    4
40# CHECK-NEXT: uOps Per Cycle:    1.06
41# CHECK-NEXT: IPC:               0.56
42# CHECK-NEXT: Block RThroughput: 24.5
43
44# CHECK:      Instruction Info:
45# CHECK-NEXT: [1]: #uOps
46# CHECK-NEXT: [2]: Latency
47# CHECK-NEXT: [3]: RThroughput
48# CHECK-NEXT: [4]: MayLoad
49# CHECK-NEXT: [5]: MayStore
50# CHECK-NEXT: [6]: HasSideEffects (U)
51
52# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
53# CHECK-NEXT:  2      9     9.00                        vdivps	%ymm0, %ymm1, %ymm3
54# CHECK-NEXT:  1      5     1.00                        vaddps	%xmm0, %xmm1, %xmm3
55# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
56# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
57# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
58# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
59# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
60# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
61# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
62# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
63# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
64# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
65# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
66# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
67# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
68# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
69# CHECK-NEXT:  2      5     1.00                        vaddps	%ymm3, %ymm1, %ymm4
70# CHECK-NEXT:  1      2     0.50                        vandps	%xmm4, %xmm1, %xmm0
71
72# CHECK:      Timeline view:
73# CHECK-NEXT:                     0123456789          0123456789          0123456789
74# CHECK-NEXT: Index     0123456789          0123456789          0123456789          0123456
75
76# CHECK:      [0,0]     DeeeeeeeeeER   .    .    .    .    .    .    .    .    .    .    ..   vdivps	%ymm0, %ymm1, %ymm3
77# CHECK-NEXT: [0,1]     DeeeeeE----R   .    .    .    .    .    .    .    .    .    .    ..   vaddps	%xmm0, %xmm1, %xmm3
78# CHECK-NEXT: [0,2]     .D====eeeeeER  .    .    .    .    .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
79# CHECK-NEXT: [0,3]     .D======eeeeeER.    .    .    .    .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
80# CHECK-NEXT: [0,4]     . D=======eeeeeER   .    .    .    .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
81# CHECK-NEXT: [0,5]     . D=========eeeeeER .    .    .    .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
82# CHECK-NEXT: [0,6]     .  D==========eeeeeER    .    .    .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
83# CHECK-NEXT: [0,7]     .  D============eeeeeER  .    .    .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
84# CHECK-NEXT: [0,8]     .   D=============eeeeeER.    .    .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
85# CHECK-NEXT: [0,9]     .   D==============eeeeeER    .    .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
86# CHECK-NEXT: [0,10]    .    D==============eeeeeER   .    .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
87# CHECK-NEXT: [0,11]    .    D===============eeeeeER  .    .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
88# CHECK-NEXT: [0,12]    .    .D===============eeeeeER .    .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
89# CHECK-NEXT: [0,13]    .    .D================eeeeeER.    .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
90# CHECK-NEXT: [0,14]    .    . D================eeeeeER    .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
91# CHECK-NEXT: [0,15]    .    . D=================eeeeeER   .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
92# CHECK-NEXT: [0,16]    .    .  D=================eeeeeER  .    .    .    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
93# CHECK-NEXT: [0,17]    .    .  D======================eeER.    .    .    .    .    .    ..   vandps	%xmm4, %xmm1, %xmm0
94# CHECK-NEXT: [1,0]     .    .   D=======================eeeeeeeeeER .    .    .    .    ..   vdivps	%ymm0, %ymm1, %ymm3
95# CHECK-NEXT: [1,1]     .    .   D=======================eeeeeE----R .    .    .    .    ..   vaddps	%xmm0, %xmm1, %xmm3
96# CHECK-NEXT: [1,2]     .    .    D===========================eeeeeER.    .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
97# CHECK-NEXT: [1,3]     .    .    D=============================eeeeeER   .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
98# CHECK-NEXT: [1,4]     .    .    .D==============================eeeeeER .    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
99# CHECK-NEXT: [1,5]     .    .    .D================================eeeeeER    .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
100# CHECK-NEXT: [1,6]     .    .    . D=================================eeeeeER  .    .    ..   vaddps	%ymm3, %ymm1, %ymm4
101# CHECK-NEXT: [1,7]     .    .    . D===================================eeeeeER.    .    ..   vaddps	%ymm3, %ymm1, %ymm4
102# CHECK-NEXT: [1,8]     .    .    .  D====================================eeeeeER   .    ..   vaddps	%ymm3, %ymm1, %ymm4
103# CHECK-NEXT: [1,9]     .    .    .  D=====================================eeeeeER  .    ..   vaddps	%ymm3, %ymm1, %ymm4
104# CHECK-NEXT: [1,10]    .    .    .   D=====================================eeeeeER .    ..   vaddps	%ymm3, %ymm1, %ymm4
105# CHECK-NEXT: [1,11]    .    .    .   D======================================eeeeeER.    ..   vaddps	%ymm3, %ymm1, %ymm4
106# CHECK-NEXT: [1,12]    .    .    .    D======================================eeeeeER    ..   vaddps	%ymm3, %ymm1, %ymm4
107# CHECK-NEXT: [1,13]    .    .    .    D=======================================eeeeeER   ..   vaddps	%ymm3, %ymm1, %ymm4
108# CHECK-NEXT: [1,14]    .    .    .    .D=======================================eeeeeER  ..   vaddps	%ymm3, %ymm1, %ymm4
109# CHECK-NEXT: [1,15]    .    .    .    .D========================================eeeeeER ..   vaddps	%ymm3, %ymm1, %ymm4
110# CHECK-NEXT: [1,16]    .    .    .    . D========================================eeeeeER..   vaddps	%ymm3, %ymm1, %ymm4
111# CHECK-NEXT: [1,17]    .    .    .    . D=============================================eeER   vandps	%xmm4, %xmm1, %xmm0
112
113# CHECK:      Average Wait times (based on the timeline view):
114# CHECK-NEXT: [0]: Executions
115# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
116# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
117# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
118
119# CHECK:            [0]    [1]    [2]    [3]
120# CHECK-NEXT: 0.     2     12.5   0.5    0.0       vdivps	%ymm0, %ymm1, %ymm3
121# CHECK-NEXT: 1.     2     12.5   0.5    4.0       vaddps	%xmm0, %xmm1, %xmm3
122# CHECK-NEXT: 2.     2     16.5   0.0    0.0       vaddps	%ymm3, %ymm1, %ymm4
123# CHECK-NEXT: 3.     2     18.5   2.0    0.0       vaddps	%ymm3, %ymm1, %ymm4
124# CHECK-NEXT: 4.     2     19.5   4.0    0.0       vaddps	%ymm3, %ymm1, %ymm4
125# CHECK-NEXT: 5.     2     21.5   6.0    0.0       vaddps	%ymm3, %ymm1, %ymm4
126# CHECK-NEXT: 6.     2     22.5   8.0    0.0       vaddps	%ymm3, %ymm1, %ymm4
127# CHECK-NEXT: 7.     2     24.5   10.0   0.0       vaddps	%ymm3, %ymm1, %ymm4
128# CHECK-NEXT: 8.     2     25.5   12.0   0.0       vaddps	%ymm3, %ymm1, %ymm4
129# CHECK-NEXT: 9.     2     26.5   13.0   0.0       vaddps	%ymm3, %ymm1, %ymm4
130# CHECK-NEXT: 10.    2     26.5   14.0   0.0       vaddps	%ymm3, %ymm1, %ymm4
131# CHECK-NEXT: 11.    2     27.5   15.0   0.0       vaddps	%ymm3, %ymm1, %ymm4
132# CHECK-NEXT: 12.    2     27.5   16.0   0.0       vaddps	%ymm3, %ymm1, %ymm4
133# CHECK-NEXT: 13.    2     28.5   17.0   0.0       vaddps	%ymm3, %ymm1, %ymm4
134# CHECK-NEXT: 14.    2     28.5   17.5   0.0       vaddps	%ymm3, %ymm1, %ymm4
135# CHECK-NEXT: 15.    2     29.5   18.5   0.0       vaddps	%ymm3, %ymm1, %ymm4
136# CHECK-NEXT: 16.    2     29.5   19.0   0.0       vaddps	%ymm3, %ymm1, %ymm4
137# CHECK-NEXT: 17.    2     34.5   0.0    0.0       vandps	%xmm4, %xmm1, %xmm0
138# CHECK-NEXT:        2     24.0   9.6    0.2       <total>
139