xref: /llvm-project/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-sse-xmm.s (revision 5fd9babbfcd02bae431d5b280da59adddc2824d3)
1# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -timeline -timeline-max-iterations=2 -register-file-stats -iterations=1000 < %s | FileCheck %s
3
4# LLVM-MCA-BEGIN
5pcmpeqb %xmm0, %xmm0
6paddb %xmm0, %xmm0
7# LLVM-MCA-END
8
9# LLVM-MCA-BEGIN
10pcmpeqw %xmm0, %xmm0
11paddw %xmm0, %xmm0
12# LLVM-MCA-END
13
14# LLVM-MCA-BEGIN
15pcmpeqd %xmm0, %xmm0
16paddd %xmm0, %xmm0
17# LLVM-MCA-END
18
19# LLVM-MCA-BEGIN
20pcmpeqq %xmm0, %xmm0
21paddq %xmm0, %xmm0
22# LLVM-MCA-END
23
24# CHECK:      [0] Code Region
25
26# CHECK:      Iterations:        1000
27# CHECK-NEXT: Instructions:      2000
28# CHECK-NEXT: Total Cycles:      504
29# CHECK-NEXT: Total uOps:        2000
30
31# CHECK:      Dispatch Width:    6
32# CHECK-NEXT: uOps Per Cycle:    3.97
33# CHECK-NEXT: IPC:               3.97
34# CHECK-NEXT: Block RThroughput: 0.5
35
36# CHECK:      Instruction Info:
37# CHECK-NEXT: [1]: #uOps
38# CHECK-NEXT: [2]: Latency
39# CHECK-NEXT: [3]: RThroughput
40# CHECK-NEXT: [4]: MayLoad
41# CHECK-NEXT: [5]: MayStore
42# CHECK-NEXT: [6]: HasSideEffects (U)
43
44# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
45# CHECK-NEXT:  1      1     0.25                        pcmpeqb	%xmm0, %xmm0
46# CHECK-NEXT:  1      1     0.25                        paddb	%xmm0, %xmm0
47
48# CHECK:      Register File statistics:
49# CHECK-NEXT: Total number of mappings created:    2000
50# CHECK-NEXT: Max number of mappings used:         74
51
52# CHECK:      *  Register File #1 -- Zn3FpPRF:
53# CHECK-NEXT:    Number of physical registers:     160
54# CHECK-NEXT:    Total number of mappings created: 2000
55# CHECK-NEXT:    Max number of mappings used:      74
56
57# CHECK:      *  Register File #2 -- Zn3IntegerPRF:
58# CHECK-NEXT:    Number of physical registers:     192
59# CHECK-NEXT:    Total number of mappings created: 0
60# CHECK-NEXT:    Max number of mappings used:      0
61
62# CHECK:      Resources:
63# CHECK-NEXT: [0]   - Zn3AGU0
64# CHECK-NEXT: [1]   - Zn3AGU1
65# CHECK-NEXT: [2]   - Zn3AGU2
66# CHECK-NEXT: [3]   - Zn3ALU0
67# CHECK-NEXT: [4]   - Zn3ALU1
68# CHECK-NEXT: [5]   - Zn3ALU2
69# CHECK-NEXT: [6]   - Zn3ALU3
70# CHECK-NEXT: [7]   - Zn3BRU1
71# CHECK-NEXT: [8]   - Zn3FP0
72# CHECK-NEXT: [9]   - Zn3FP1
73# CHECK-NEXT: [10]  - Zn3FP2
74# CHECK-NEXT: [11]  - Zn3FP3
75# CHECK-NEXT: [12.0] - Zn3FP45
76# CHECK-NEXT: [12.1] - Zn3FP45
77# CHECK-NEXT: [13]  - Zn3FPSt
78# CHECK-NEXT: [14.0] - Zn3LSU
79# CHECK-NEXT: [14.1] - Zn3LSU
80# CHECK-NEXT: [14.2] - Zn3LSU
81# CHECK-NEXT: [15.0] - Zn3Load
82# CHECK-NEXT: [15.1] - Zn3Load
83# CHECK-NEXT: [15.2] - Zn3Load
84# CHECK-NEXT: [16.0] - Zn3Store
85# CHECK-NEXT: [16.1] - Zn3Store
86
87# CHECK:      Resource pressure per iteration:
88# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
89# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -
90
91# CHECK:      Resource pressure by instruction:
92# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
93# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.50    -      -      -      -      -      -      -      -      -      -      -      -     pcmpeqb	%xmm0, %xmm0
94# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25    -     0.50    -      -      -      -      -      -      -      -      -      -      -     paddb	%xmm0, %xmm0
95
96# CHECK:      Timeline view:
97# CHECK-NEXT: Index     01234
98
99# CHECK:      [0,0]     DeER.   pcmpeqb	%xmm0, %xmm0
100# CHECK-NEXT: [0,1]     D=eER   paddb	%xmm0, %xmm0
101# CHECK-NEXT: [1,0]     DeE-R   pcmpeqb	%xmm0, %xmm0
102# CHECK-NEXT: [1,1]     D=eER   paddb	%xmm0, %xmm0
103
104# CHECK:      Average Wait times (based on the timeline view):
105# CHECK-NEXT: [0]: Executions
106# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
107# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
108# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
109
110# CHECK:            [0]    [1]    [2]    [3]
111# CHECK-NEXT: 0.     2     1.0    1.0    0.5       pcmpeqb	%xmm0, %xmm0
112# CHECK-NEXT: 1.     2     2.0    0.0    0.0       paddb	%xmm0, %xmm0
113# CHECK-NEXT:        2     1.5    0.5    0.3       <total>
114
115# CHECK:      [1] Code Region
116
117# CHECK:      Iterations:        1000
118# CHECK-NEXT: Instructions:      2000
119# CHECK-NEXT: Total Cycles:      504
120# CHECK-NEXT: Total uOps:        2000
121
122# CHECK:      Dispatch Width:    6
123# CHECK-NEXT: uOps Per Cycle:    3.97
124# CHECK-NEXT: IPC:               3.97
125# CHECK-NEXT: Block RThroughput: 0.5
126
127# CHECK:      Instruction Info:
128# CHECK-NEXT: [1]: #uOps
129# CHECK-NEXT: [2]: Latency
130# CHECK-NEXT: [3]: RThroughput
131# CHECK-NEXT: [4]: MayLoad
132# CHECK-NEXT: [5]: MayStore
133# CHECK-NEXT: [6]: HasSideEffects (U)
134
135# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
136# CHECK-NEXT:  1      1     0.25                        pcmpeqw	%xmm0, %xmm0
137# CHECK-NEXT:  1      1     0.25                        paddw	%xmm0, %xmm0
138
139# CHECK:      Register File statistics:
140# CHECK-NEXT: Total number of mappings created:    2000
141# CHECK-NEXT: Max number of mappings used:         74
142
143# CHECK:      *  Register File #1 -- Zn3FpPRF:
144# CHECK-NEXT:    Number of physical registers:     160
145# CHECK-NEXT:    Total number of mappings created: 2000
146# CHECK-NEXT:    Max number of mappings used:      74
147
148# CHECK:      *  Register File #2 -- Zn3IntegerPRF:
149# CHECK-NEXT:    Number of physical registers:     192
150# CHECK-NEXT:    Total number of mappings created: 0
151# CHECK-NEXT:    Max number of mappings used:      0
152
153# CHECK:      Resources:
154# CHECK-NEXT: [0]   - Zn3AGU0
155# CHECK-NEXT: [1]   - Zn3AGU1
156# CHECK-NEXT: [2]   - Zn3AGU2
157# CHECK-NEXT: [3]   - Zn3ALU0
158# CHECK-NEXT: [4]   - Zn3ALU1
159# CHECK-NEXT: [5]   - Zn3ALU2
160# CHECK-NEXT: [6]   - Zn3ALU3
161# CHECK-NEXT: [7]   - Zn3BRU1
162# CHECK-NEXT: [8]   - Zn3FP0
163# CHECK-NEXT: [9]   - Zn3FP1
164# CHECK-NEXT: [10]  - Zn3FP2
165# CHECK-NEXT: [11]  - Zn3FP3
166# CHECK-NEXT: [12.0] - Zn3FP45
167# CHECK-NEXT: [12.1] - Zn3FP45
168# CHECK-NEXT: [13]  - Zn3FPSt
169# CHECK-NEXT: [14.0] - Zn3LSU
170# CHECK-NEXT: [14.1] - Zn3LSU
171# CHECK-NEXT: [14.2] - Zn3LSU
172# CHECK-NEXT: [15.0] - Zn3Load
173# CHECK-NEXT: [15.1] - Zn3Load
174# CHECK-NEXT: [15.2] - Zn3Load
175# CHECK-NEXT: [16.0] - Zn3Store
176# CHECK-NEXT: [16.1] - Zn3Store
177
178# CHECK:      Resource pressure per iteration:
179# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
180# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -
181
182# CHECK:      Resource pressure by instruction:
183# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
184# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.50    -      -      -      -      -      -      -      -      -      -      -      -     pcmpeqw	%xmm0, %xmm0
185# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25    -     0.50    -      -      -      -      -      -      -      -      -      -      -     paddw	%xmm0, %xmm0
186
187# CHECK:      Timeline view:
188# CHECK-NEXT: Index     01234
189
190# CHECK:      [0,0]     DeER.   pcmpeqw	%xmm0, %xmm0
191# CHECK-NEXT: [0,1]     D=eER   paddw	%xmm0, %xmm0
192# CHECK-NEXT: [1,0]     DeE-R   pcmpeqw	%xmm0, %xmm0
193# CHECK-NEXT: [1,1]     D=eER   paddw	%xmm0, %xmm0
194
195# CHECK:      Average Wait times (based on the timeline view):
196# CHECK-NEXT: [0]: Executions
197# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
198# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
199# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
200
201# CHECK:            [0]    [1]    [2]    [3]
202# CHECK-NEXT: 0.     2     1.0    1.0    0.5       pcmpeqw	%xmm0, %xmm0
203# CHECK-NEXT: 1.     2     2.0    0.0    0.0       paddw	%xmm0, %xmm0
204# CHECK-NEXT:        2     1.5    0.5    0.3       <total>
205
206# CHECK:      [2] Code Region
207
208# CHECK:      Iterations:        1000
209# CHECK-NEXT: Instructions:      2000
210# CHECK-NEXT: Total Cycles:      504
211# CHECK-NEXT: Total uOps:        2000
212
213# CHECK:      Dispatch Width:    6
214# CHECK-NEXT: uOps Per Cycle:    3.97
215# CHECK-NEXT: IPC:               3.97
216# CHECK-NEXT: Block RThroughput: 0.5
217
218# CHECK:      Instruction Info:
219# CHECK-NEXT: [1]: #uOps
220# CHECK-NEXT: [2]: Latency
221# CHECK-NEXT: [3]: RThroughput
222# CHECK-NEXT: [4]: MayLoad
223# CHECK-NEXT: [5]: MayStore
224# CHECK-NEXT: [6]: HasSideEffects (U)
225
226# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
227# CHECK-NEXT:  1      1     0.25                        pcmpeqd	%xmm0, %xmm0
228# CHECK-NEXT:  1      1     0.25                        paddd	%xmm0, %xmm0
229
230# CHECK:      Register File statistics:
231# CHECK-NEXT: Total number of mappings created:    2000
232# CHECK-NEXT: Max number of mappings used:         74
233
234# CHECK:      *  Register File #1 -- Zn3FpPRF:
235# CHECK-NEXT:    Number of physical registers:     160
236# CHECK-NEXT:    Total number of mappings created: 2000
237# CHECK-NEXT:    Max number of mappings used:      74
238
239# CHECK:      *  Register File #2 -- Zn3IntegerPRF:
240# CHECK-NEXT:    Number of physical registers:     192
241# CHECK-NEXT:    Total number of mappings created: 0
242# CHECK-NEXT:    Max number of mappings used:      0
243
244# CHECK:      Resources:
245# CHECK-NEXT: [0]   - Zn3AGU0
246# CHECK-NEXT: [1]   - Zn3AGU1
247# CHECK-NEXT: [2]   - Zn3AGU2
248# CHECK-NEXT: [3]   - Zn3ALU0
249# CHECK-NEXT: [4]   - Zn3ALU1
250# CHECK-NEXT: [5]   - Zn3ALU2
251# CHECK-NEXT: [6]   - Zn3ALU3
252# CHECK-NEXT: [7]   - Zn3BRU1
253# CHECK-NEXT: [8]   - Zn3FP0
254# CHECK-NEXT: [9]   - Zn3FP1
255# CHECK-NEXT: [10]  - Zn3FP2
256# CHECK-NEXT: [11]  - Zn3FP3
257# CHECK-NEXT: [12.0] - Zn3FP45
258# CHECK-NEXT: [12.1] - Zn3FP45
259# CHECK-NEXT: [13]  - Zn3FPSt
260# CHECK-NEXT: [14.0] - Zn3LSU
261# CHECK-NEXT: [14.1] - Zn3LSU
262# CHECK-NEXT: [14.2] - Zn3LSU
263# CHECK-NEXT: [15.0] - Zn3Load
264# CHECK-NEXT: [15.1] - Zn3Load
265# CHECK-NEXT: [15.2] - Zn3Load
266# CHECK-NEXT: [16.0] - Zn3Store
267# CHECK-NEXT: [16.1] - Zn3Store
268
269# CHECK:      Resource pressure per iteration:
270# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
271# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -
272
273# CHECK:      Resource pressure by instruction:
274# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
275# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.50    -      -      -      -      -      -      -      -      -      -      -      -     pcmpeqd	%xmm0, %xmm0
276# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25    -     0.50    -      -      -      -      -      -      -      -      -      -      -     paddd	%xmm0, %xmm0
277
278# CHECK:      Timeline view:
279# CHECK-NEXT: Index     01234
280
281# CHECK:      [0,0]     DeER.   pcmpeqd	%xmm0, %xmm0
282# CHECK-NEXT: [0,1]     D=eER   paddd	%xmm0, %xmm0
283# CHECK-NEXT: [1,0]     DeE-R   pcmpeqd	%xmm0, %xmm0
284# CHECK-NEXT: [1,1]     D=eER   paddd	%xmm0, %xmm0
285
286# CHECK:      Average Wait times (based on the timeline view):
287# CHECK-NEXT: [0]: Executions
288# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
289# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
290# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
291
292# CHECK:            [0]    [1]    [2]    [3]
293# CHECK-NEXT: 0.     2     1.0    1.0    0.5       pcmpeqd	%xmm0, %xmm0
294# CHECK-NEXT: 1.     2     2.0    0.0    0.0       paddd	%xmm0, %xmm0
295# CHECK-NEXT:        2     1.5    0.5    0.3       <total>
296
297# CHECK:      [3] Code Region
298
299# CHECK:      Iterations:        1000
300# CHECK-NEXT: Instructions:      2000
301# CHECK-NEXT: Total Cycles:      504
302# CHECK-NEXT: Total uOps:        2000
303
304# CHECK:      Dispatch Width:    6
305# CHECK-NEXT: uOps Per Cycle:    3.97
306# CHECK-NEXT: IPC:               3.97
307# CHECK-NEXT: Block RThroughput: 0.5
308
309# CHECK:      Instruction Info:
310# CHECK-NEXT: [1]: #uOps
311# CHECK-NEXT: [2]: Latency
312# CHECK-NEXT: [3]: RThroughput
313# CHECK-NEXT: [4]: MayLoad
314# CHECK-NEXT: [5]: MayStore
315# CHECK-NEXT: [6]: HasSideEffects (U)
316
317# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
318# CHECK-NEXT:  1      1     0.25                        pcmpeqq	%xmm0, %xmm0
319# CHECK-NEXT:  1      1     0.25                        paddq	%xmm0, %xmm0
320
321# CHECK:      Register File statistics:
322# CHECK-NEXT: Total number of mappings created:    2000
323# CHECK-NEXT: Max number of mappings used:         74
324
325# CHECK:      *  Register File #1 -- Zn3FpPRF:
326# CHECK-NEXT:    Number of physical registers:     160
327# CHECK-NEXT:    Total number of mappings created: 2000
328# CHECK-NEXT:    Max number of mappings used:      74
329
330# CHECK:      *  Register File #2 -- Zn3IntegerPRF:
331# CHECK-NEXT:    Number of physical registers:     192
332# CHECK-NEXT:    Total number of mappings created: 0
333# CHECK-NEXT:    Max number of mappings used:      0
334
335# CHECK:      Resources:
336# CHECK-NEXT: [0]   - Zn3AGU0
337# CHECK-NEXT: [1]   - Zn3AGU1
338# CHECK-NEXT: [2]   - Zn3AGU2
339# CHECK-NEXT: [3]   - Zn3ALU0
340# CHECK-NEXT: [4]   - Zn3ALU1
341# CHECK-NEXT: [5]   - Zn3ALU2
342# CHECK-NEXT: [6]   - Zn3ALU3
343# CHECK-NEXT: [7]   - Zn3BRU1
344# CHECK-NEXT: [8]   - Zn3FP0
345# CHECK-NEXT: [9]   - Zn3FP1
346# CHECK-NEXT: [10]  - Zn3FP2
347# CHECK-NEXT: [11]  - Zn3FP3
348# CHECK-NEXT: [12.0] - Zn3FP45
349# CHECK-NEXT: [12.1] - Zn3FP45
350# CHECK-NEXT: [13]  - Zn3FPSt
351# CHECK-NEXT: [14.0] - Zn3LSU
352# CHECK-NEXT: [14.1] - Zn3LSU
353# CHECK-NEXT: [14.2] - Zn3LSU
354# CHECK-NEXT: [15.0] - Zn3Load
355# CHECK-NEXT: [15.1] - Zn3Load
356# CHECK-NEXT: [15.2] - Zn3Load
357# CHECK-NEXT: [16.0] - Zn3Store
358# CHECK-NEXT: [16.1] - Zn3Store
359
360# CHECK:      Resource pressure per iteration:
361# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
362# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -
363
364# CHECK:      Resource pressure by instruction:
365# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
366# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.50    -      -      -      -      -      -      -      -      -      -      -      -     pcmpeqq	%xmm0, %xmm0
367# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25    -     0.50    -      -      -      -      -      -      -      -      -      -      -     paddq	%xmm0, %xmm0
368
369# CHECK:      Timeline view:
370# CHECK-NEXT: Index     01234
371
372# CHECK:      [0,0]     DeER.   pcmpeqq	%xmm0, %xmm0
373# CHECK-NEXT: [0,1]     D=eER   paddq	%xmm0, %xmm0
374# CHECK-NEXT: [1,0]     DeE-R   pcmpeqq	%xmm0, %xmm0
375# CHECK-NEXT: [1,1]     D=eER   paddq	%xmm0, %xmm0
376
377# CHECK:      Average Wait times (based on the timeline view):
378# CHECK-NEXT: [0]: Executions
379# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
380# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
381# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
382
383# CHECK:            [0]    [1]    [2]    [3]
384# CHECK-NEXT: 0.     2     1.0    1.0    0.5       pcmpeqq	%xmm0, %xmm0
385# CHECK-NEXT: 1.     2     2.0    0.0    0.0       paddq	%xmm0, %xmm0
386# CHECK-NEXT:        2     1.5    0.5    0.3       <total>
387