xref: /llvm-project/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s (revision c60461e3f8154ade8e542e64d1711f975adac8d0)
1# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s
3
4# In this test, the VDIVPS takes 38 cycles to write to register YMM3.  The first
5# VADDPS does not depend on the VDIVPS (the WAW dependency is eliminated at
6# register renaming stage). So the first VADDPS can be executed in parallel to
7# the VDIVPS. That VADDPS also writes to register XMM3, and the upper half of
8# YMM3 is implicitly cleared. As a consequence, the definition of YMM3 from the
9# VDIVPS is killed, and the subsequent VADDPS instructions don't need to wait
10# for the VDIVPS to complete.
11# The block reciprocal throughput is limited by the VDIVPS reciprocal throughput
12# (which is 38 cycles). The sequence of VADDPS can be executed in parallel on
13# the FPA unit; their latency is "hidden" by the long latency of the VDIVPS.
14
15vdivps %ymm0, %ymm1, %ymm3
16vaddps %xmm0, %xmm1, %xmm3
17vaddps %ymm3, %ymm1, %ymm4
18vaddps %ymm3, %ymm1, %ymm4
19vaddps %ymm3, %ymm1, %ymm4
20vaddps %ymm3, %ymm1, %ymm4
21vaddps %ymm3, %ymm1, %ymm4
22vaddps %ymm3, %ymm1, %ymm4
23vaddps %ymm3, %ymm1, %ymm4
24vaddps %ymm3, %ymm1, %ymm4
25vaddps %ymm3, %ymm1, %ymm4
26vaddps %ymm3, %ymm1, %ymm4
27vaddps %ymm3, %ymm1, %ymm4
28vaddps %ymm3, %ymm1, %ymm4
29vaddps %ymm3, %ymm1, %ymm4
30vaddps %ymm3, %ymm1, %ymm4
31vaddps %ymm3, %ymm1, %ymm4
32vandps %xmm4, %xmm1, %xmm0
33
34# CHECK:      Iterations:        100
35# CHECK-NEXT: Instructions:      1800
36# CHECK-NEXT: Total Cycles:      3811
37# CHECK-NEXT: Total uOps:        3400
38
39# CHECK:      Dispatch Width:    2
40# CHECK-NEXT: uOps Per Cycle:    0.89
41# CHECK-NEXT: IPC:               0.47
42# CHECK-NEXT: Block RThroughput: 38.0
43
44# CHECK:      Instruction Info:
45# CHECK-NEXT: [1]: #uOps
46# CHECK-NEXT: [2]: Latency
47# CHECK-NEXT: [3]: RThroughput
48# CHECK-NEXT: [4]: MayLoad
49# CHECK-NEXT: [5]: MayStore
50# CHECK-NEXT: [6]: HasSideEffects (U)
51
52# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
53# CHECK-NEXT:  2      38    38.00                       vdivps	%ymm0, %ymm1, %ymm3
54# CHECK-NEXT:  1      3     1.00                        vaddps	%xmm0, %xmm1, %xmm3
55# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
56# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
57# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
58# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
59# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
60# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
61# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
62# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
63# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
64# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
65# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
66# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
67# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
68# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
69# CHECK-NEXT:  2      3     2.00                        vaddps	%ymm3, %ymm1, %ymm4
70# CHECK-NEXT:  1      1     0.50                        vandps	%xmm4, %xmm1, %xmm0
71
72# CHECK:      Timeline view:
73# CHECK-NEXT:                     0123456789          0123456789          0123456789          0123456789
74# CHECK-NEXT: Index     0123456789          0123456789          0123456789          0123456789
75
76# CHECK:      [0,0]     DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER    .    .    .    .    .    .    .   .   vdivps	%ymm0, %ymm1, %ymm3
77# CHECK-NEXT: [0,1]     .DeeeE----------------------------------R    .    .    .    .    .    .    .   .   vaddps	%xmm0, %xmm1, %xmm3
78# CHECK-NEXT: [0,2]     . D==eeeE--------------------------------R   .    .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
79# CHECK-NEXT: [0,3]     .  D===eeeE------------------------------R   .    .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
80# CHECK-NEXT: [0,4]     .   D====eeeE-----------------------------R  .    .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
81# CHECK-NEXT: [0,5]     .    D=====eeeE---------------------------R  .    .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
82# CHECK-NEXT: [0,6]     .    .D======eeeE--------------------------R .    .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
83# CHECK-NEXT: [0,7]     .    . D=======eeeE------------------------R .    .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
84# CHECK-NEXT: [0,8]     .    .  D========eeeE-----------------------R.    .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
85# CHECK-NEXT: [0,9]     .    .   D=========eeeE---------------------R.    .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
86# CHECK-NEXT: [0,10]    .    .    D==========eeeE--------------------R    .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
87# CHECK-NEXT: [0,11]    .    .    .D===========eeeE------------------R    .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
88# CHECK-NEXT: [0,12]    .    .    . D============eeeE-----------------R   .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
89# CHECK-NEXT: [0,13]    .    .    .  D=============eeeE---------------R   .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
90# CHECK-NEXT: [0,14]    .    .    .   D==============eeeE--------------R  .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
91# CHECK-NEXT: [0,15]    .    .    .    D===============eeeE------------R  .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
92# CHECK-NEXT: [0,16]    .    .    .    .D================eeeE-----------R .    .    .    .    .    .   .   vaddps	%ymm3, %ymm1, %ymm4
93# CHECK-NEXT: [0,17]    .    .    .    . D==================eE----------R .    .    .    .    .    .   .   vandps	%xmm4, %xmm1, %xmm0
94# CHECK-NEXT: [1,0]     .    .    .    .  D====================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER.   vdivps	%ymm0, %ymm1, %ymm3
95# CHECK-NEXT: [1,1]     .    .    .    .   D=================eeeE-------------------------------------R.   vaddps	%xmm0, %xmm1, %xmm3
96# CHECK-NEXT: [1,2]     .    .    .    .    D===================eeeE-----------------------------------R   vaddps	%ymm3, %ymm1, %ymm4
97# CHECK-NEXT: [1,3]     .    .    .    .    .D====================eeeE---------------------------------R   vaddps	%ymm3, %ymm1, %ymm4
98# CHECK-NEXT: Truncated display due to cycle limit
99
100# CHECK:      Average Wait times (based on the timeline view):
101# CHECK-NEXT: [0]: Executions
102# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
103# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
104# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
105
106# CHECK:            [0]    [1]    [2]    [3]
107# CHECK-NEXT: 0.     2     11.0   1.5    0.0       vdivps	%ymm0, %ymm1, %ymm3
108# CHECK-NEXT: 1.     2     9.5    0.5    35.5      vaddps	%xmm0, %xmm1, %xmm3
109# CHECK-NEXT: 2.     2     11.5   0.0    33.5      vaddps	%ymm3, %ymm1, %ymm4
110# CHECK-NEXT: 3.     2     12.5   2.0    31.5      vaddps	%ymm3, %ymm1, %ymm4
111# CHECK-NEXT: 4.     2     13.5   4.0    30.5      vaddps	%ymm3, %ymm1, %ymm4
112# CHECK-NEXT: 5.     2     14.5   6.0    28.5      vaddps	%ymm3, %ymm1, %ymm4
113# CHECK-NEXT: 6.     2     15.5   7.5    27.5      vaddps	%ymm3, %ymm1, %ymm4
114# CHECK-NEXT: 7.     2     16.5   9.0    25.5      vaddps	%ymm3, %ymm1, %ymm4
115# CHECK-NEXT: 8.     2     17.5   10.5   24.5      vaddps	%ymm3, %ymm1, %ymm4
116# CHECK-NEXT: 9.     2     18.5   12.0   22.5      vaddps	%ymm3, %ymm1, %ymm4
117# CHECK-NEXT: 10.    2     19.5   13.5   21.5      vaddps	%ymm3, %ymm1, %ymm4
118# CHECK-NEXT: 11.    2     20.5   15.0   19.5      vaddps	%ymm3, %ymm1, %ymm4
119# CHECK-NEXT: 12.    2     21.5   16.5   18.5      vaddps	%ymm3, %ymm1, %ymm4
120# CHECK-NEXT: 13.    2     22.5   18.0   16.5      vaddps	%ymm3, %ymm1, %ymm4
121# CHECK-NEXT: 14.    2     23.5   19.5   15.5      vaddps	%ymm3, %ymm1, %ymm4
122# CHECK-NEXT: 15.    2     21.0   21.0   13.5      vaddps	%ymm3, %ymm1, %ymm4
123# CHECK-NEXT: 16.    2     22.0   22.0   12.5      vaddps	%ymm3, %ymm1, %ymm4
124# CHECK-NEXT: 17.    2     24.0   0.0    11.5      vandps	%xmm4, %xmm1, %xmm0
125# CHECK-NEXT:        2     17.5   9.9    21.6      <total>
126