xref: /llvm-project/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-forwarding.s (revision 887362ddb565f7392632e75b343c5a25f321bb4d)
1# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v2 -mattr=+sve2-aes,+sve2-sha3,+sve2-sm4 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=2 < %s | FileCheck %s
3
4# LLVM-MCA-BEGIN madd
5mul  x0, x0, x0
6madd x0, x1, x2, x0
7madd x0, x1, x2, x0
8madd x0, x0, x0, x0
9# LLVM-MCA-END
10
11# LLVM-MCA-BEGIN smaddl
12mul    x0, x0, x0
13smaddl x0, w1, w2, x0
14smaddl x0, w1, w2, x0
15smaddl x0, w0, w0, x0
16# LLVM-MCA-END
17
18# LLVM-MCA-BEGIN fmadd
19fadd  d0, d0, d0
20fmadd d0, d1, d2, d0
21fmul  d0, d0, d0
22fmadd d0, d1, d2, d0
23fmadd d0, d1, d2, d0
24fmadd d0, d0, d1, d2
25# LLVM-MCA-END
26
27# LLVM-MCA-BEGIN saba
28mul  v0.4s, v0.4s, v0.4s
29saba v0.4s, v1.4s, v2.4s
30saba v0.4s, v1.4s, v2.4s
31saba v0.4s, v0.4s, v1.4s
32# LLVM-MCA-END
33
34# LLVM-MCA-BEGIN sdot
35mul  v0.4s, v0.4s,  v0.4s
36sdot v0.4s, v1.16b, v2.16b
37sdot v0.4s, v1.16b, v2.16b
38sdot v0.4s, v0.16b, v1.16b
39# LLVM-MCA-END
40
41# LLVM-MCA-BEGIN smmla
42mul   v0.4s, v0.4s,  v0.4s
43smmla v0.4s, v1.16b, v2.16b
44smmla v0.4s, v1.16b, v2.16b
45smmla v0.4s, v0.16b, v1.16b
46# LLVM-MCA-END
47
48# LLVM-MCA-BEGIN mla
49mul v0.4s, v0.4s, v0.4s
50mla v0.4s, v1.4s, v2.4s
51mla v0.4s, v1.4s, v2.4s
52mla v0.4s, v0.4s, v1.4s
53# LLVM-MCA-END
54
55# LLVM-MCA-BEGIN sqrdmlah
56mul      v0.4s, v0.4s, v0.4s
57sqrdmlah v0.4s, v1.4s, v2.4s
58sqrdmlah v0.4s, v1.4s, v2.4s
59sqrdmlah v0.4s, v0.4s, v1.4s
60# LLVM-MCA-END
61
62# LLVM-MCA-BEGIN smlal2
63mul    v0.4s, v0.4s, v0.4s
64smlal2 v0.4s, v1.8h, v2.8h
65smlal2 v0.4s, v1.8h, v2.8h
66smlal2 v0.4s, v0.8h, v1.8h
67# LLVM-MCA-END
68
69# LLVM-MCA-BEGIN sadalp
70mul    v0.4s, v0.4s, v0.4s
71sadalp v0.2d, v1.4s
72sadalp v0.2d, v1.4s
73sadalp v0.2d, v0.4s
74# LLVM-MCA-END
75
76# LLVM-MCA-BEGIN ssra
77mul  v0.4s, v0.4s, v0.4s
78ssra v0.2d, v1.2d, #1
79ssra v0.2d, v1.2d, #1
80ssra v0.2d, v0.2d, #1
81# LLVM-MCA-END
82
83# LLVM-MCA-BEGIN fcmla
84fmul  v0.4s, v0.4s, v0.4s
85fcmla v0.2d, v1.2d, v2.2d, #90
86fcmla v0.2d, v1.2d, v2.2d, #90
87fcmla v0.2d, v0.2d, v1.2d, #90
88# LLVM-MCA-END
89
90# LLVM-MCA-BEGIN fmla
91fmul v0.2d, v0.2d, v0.2d
92fmla v0.2d, v1.2d, v2.2d
93fadd v0.2d, v0.2d, v0.2d
94fmla v0.2d, v1.2d, v2.2d
95fmla v0.2d, v1.2d, v2.2d
96fmla v0.2d, v0.2d, v1.2d
97# LLVM-MCA-END
98
99# LLVM-MCA-BEGIN fmlal
100fmul  v0.2d, v0.2d, v0.2d
101fmlal v0.4s, v1.4h, v2.4h
102fadd  v0.2d, v0.2d, v0.2d
103fmlal v0.4s, v1.4h, v2.4h
104fmlal v0.4s, v1.4h, v2.4h
105fmlal v0.4s, v0.4h, v1.4h
106# LLVM-MCA-END
107
108# LLVM-MCA-BEGIN bfdot
109fmul  v0.2d, v0.2d, v0.2d
110bfdot v0.4s, v1.8h, v2.8h
111bfdot v0.4s, v1.8h, v2.8h
112bfdot v0.4s, v0.8h, v1.8h
113# LLVM-MCA-END
114
115# LLVM-MCA-BEGIN bfmmla
116fmul   v0.2d, v0.2d, v0.2d
117bfmmla v0.4s, v1.8h, v2.8h
118bfmmla v0.4s, v1.8h, v2.8h
119bfmmla v0.4s, v0.8h, v1.8h
120# LLVM-MCA-END
121
122# LLVM-MCA-BEGIN bfmlalb
123fmul    v0.2d, v0.2d, v0.2d
124bfmlalb v0.4s, v1.8h, v2.8h
125bfmlalb v0.4s, v1.8h, v2.8h
126bfmlalb v0.4s, v0.8h, v1.8h
127# LLVM-MCA-END
128
129# LLVM-MCA-BEGIN crc32b
130mul    w0, w0, w0
131crc32b w0, w0, w1
132crc32b w0, w0, w1
133crc32b w0, w0, w0
134# LLVM-MCA-END
135
136# LLVM-MCA-BEGIN Z saba
137mul  z0.d, z0.d, z0.d
138saba z0.d, z1.d, z2.d
139saba z0.d, z1.d, z2.d
140saba z0.d, z0.d, z1.d
141# LLVM-MCA-END
142
143# LLVM-MCA-BEGIN Z sadalp
144mul    z0.d, z0.d, z0.d
145sadalp z0.d, p0/m, z1.s
146sadalp z0.d, p0/m, z1.s
147sadalp z0.d, p0/m, z0.s
148# LLVM-MCA-END
149
150# LLVM-MCA-BEGIN Z ssra
151mul  z0.d, z0.d, z0.d
152ssra z0.d, z1.d, #1
153ssra z0.d, z1.d, #1
154ssra z0.d, z0.d, #1
155# LLVM-MCA-END
156
157# LLVM-MCA-BEGIN Z cdot.s
158mul  z0.d, z0.d, z0.d
159cdot z0.s, z1.b, z2.b, #90
160cdot z0.s, z1.b, z2.b, #90
161cdot z0.s, z0.b, z1.b, #90
162# LLVM-MCA-END
163
164# LLVM-MCA-BEGIN Z cdot.d
165mul  z0.d, z0.d, z0.d
166cdot z0.d, z1.h, z2.h, #90
167cdot z0.d, z1.h, z2.h, #90
168cdot z0.d, z0.h, z1.h, #90
169# LLVM-MCA-END
170
171# LLVM-MCA-BEGIN Z cmla.b
172mul  z0.d, z0.d, z0.d
173cmla z0.b, z1.b, z2.b, #90
174cmla z0.b, z1.b, z2.b, #90
175cmla z0.b, z0.b, z1.b, #90
176# LLVM-MCA-END
177
178# LLVM-MCA-BEGIN Z cmla.d
179mul  z0.d, z0.d, z0.d
180cmla z0.d, z1.d, z2.d, #90
181cmla z0.d, z1.d, z2.d, #90
182cmla z0.d, z0.d, z1.d, #90
183# LLVM-MCA-END
184
185# LLVM-MCA-BEGIN Z sdot.s
186mul  z0.d, z0.d, z0.d
187sdot z0.s, z1.b, z2.b
188sdot z0.s, z1.b, z2.b
189sdot z0.s, z0.b, z1.b
190# LLVM-MCA-END
191
192# LLVM-MCA-BEGIN Z sudot
193mul  z0.d, z0.d, z0.d
194sdot z0.s, z1.b, z2.b[1]
195sdot z0.s, z1.b, z2.b[1]
196sdot z0.s, z0.b, z1.b[1]
197# LLVM-MCA-END
198
199# LLVM-MCA-BEGIN Z sdot.d
200mul  z0.d, z0.d, z0.d
201sdot z0.d, z1.h, z2.h
202sdot z0.d, z1.h, z2.h
203sdot z0.d, z0.h, z1.h
204# LLVM-MCA-END
205
206# LLVM-MCA-BEGIN Z smmla
207mul   z0.s, z0.s, z0.s
208smmla z0.s, z1.b, z2.b
209smmla z0.s, z1.b, z2.b
210smmla z0.s, z0.b, z1.b
211# LLVM-MCA-END
212
213# LLVM-MCA-BEGIN Z mla.b
214mul z0.d, z0.d, z0.d
215mla z0.b, p0/m, z1.b, z2.b
216mla z0.b, p0/m, z1.b, z2.b
217mla z0.b, p0/m, z0.b, z1.b
218# LLVM-MCA-END
219
220# LLVM-MCA-BEGIN Z mla.d
221mul z0.d, z0.d, z0.d
222mla z0.d, p0/m, z1.d, z2.d
223mla z0.d, p0/m, z1.d, z2.d
224mla z0.d, p0/m, z0.d, z1.d
225# LLVM-MCA-END
226
227# LLVM-MCA-BEGIN Z smlalb
228mul    z0.d, z0.d, z0.d
229smlalb z0.d, z1.s, z2.s
230smlalb z0.d, z1.s, z2.s
231smlalb z0.d, z0.s, z1.s
232# LLVM-MCA-END
233
234# LLVM-MCA-BEGIN Z sqdmlalb
235mul      z0.d, z0.d, z0.d
236sqdmlalb z0.d, z1.s, z2.s
237sqdmlalb z0.d, z1.s, z2.s
238sqdmlalb z0.d, z0.s, z1.s
239# LLVM-MCA-END
240
241# LLVM-MCA-BEGIN Z sqrdmlah.b
242mul      z0.d, z0.d, z0.d
243sqrdmlah z0.b, z1.b, z2.b
244sqrdmlah z0.b, z1.b, z2.b
245sqrdmlah z0.b, z0.b, z1.b
246# LLVM-MCA-END
247
248# LLVM-MCA-BEGIN Z sqrdmlah.d
249mul      z0.d, z0.d, z0.d
250sqrdmlah z0.d, z1.d, z2.d
251sqrdmlah z0.d, z1.d, z2.d
252sqrdmlah z0.d, z0.d, z1.d
253# LLVM-MCA-END
254
255# LLVM-MCA-BEGIN Z fcmla ZPmZZ
256fmul  z0.d, z0.d, z0.d
257fcmla z0.d, p0/m, z1.d, z2.d, 90
258fcmla z0.d, p0/m, z1.d, z2.d, 90
259fcmla z0.d, p0/m, z0.d, z1.d, 90
260# LLVM-MCA-END
261
262# LLVM-MCA-BEGIN Z fcmla ZZZI
263fmul  z0.d, z0.d, z0.d
264fcmla z0.s, z1.s, z2.s[1], 90
265fcmla z0.s, z1.s, z2.s[1], 90
266fcmla z0.s, z0.s, z1.s[1], 90
267# LLVM-MCA-END
268
269# LLVM-MCA-BEGIN Z fmla ZPmZZ
270fmul z0.d, z0.d, z0.d
271fmla z0.d, p0/m, z1.d, z2.d
272fmla z0.d, p0/m, z1.d, z2.d
273fmla z0.d, p0/m, z0.d, z1.d
274# LLVM-MCA-END
275
276# LLVM-MCA-BEGIN Z fmla ZZZI
277fmul z0.d, z0.d, z0.d
278fmla z0.d, z1.d, z2.d[1]
279fmla z0.d, z1.d, z2.d[1]
280fmla z0.d, z0.d, z1.d[1]
281# LLVM-MCA-END
282
283# LLVM-MCA-BEGIN Z fmlalb ZZZ
284fmul   z0.d, z0.d, z0.d
285fmlalb z0.s, z1.h, z2.h
286fmlalb z0.s, z1.h, z2.h
287fmlalb z0.s, z0.h, z1.h
288# LLVM-MCA-END
289
290# LLVM-MCA-BEGIN Z bfdot
291fmul  z0.d, z0.d, z0.d
292bfdot z0.s, z1.h, z2.h
293bfdot z0.s, z1.h, z2.h
294bfdot z0.s, z0.h, z1.h
295# LLVM-MCA-END
296
297# LLVM-MCA-BEGIN Z bfmmla
298fmul   z0.d, z0.d, z0.d
299bfmmla z0.s, z1.h, z2.h
300bfmmla z0.s, z1.h, z2.h
301bfmmla z0.s, z0.h, z1.h
302# LLVM-MCA-END
303
304# LLVM-MCA-BEGIN bfmlalb
305fmul    z0.d, z0.d, z0.d
306bfmlalb z0.s, z1.h, z2.h
307bfmlalb z0.s, z1.h, z2.h
308bfmlalb z0.s, z0.h, z1.h
309# LLVM-MCA-END
310
311# CHECK:      [0] Code Region - madd
312
313# CHECK:      Iterations:        100
314# CHECK-NEXT: Instructions:      400
315# CHECK-NEXT: Total Cycles:      703
316# CHECK-NEXT: Total uOps:        400
317
318# CHECK:      Dispatch Width:    16
319# CHECK-NEXT: uOps Per Cycle:    0.57
320# CHECK-NEXT: IPC:               0.57
321# CHECK-NEXT: Block RThroughput: 3.0
322
323# CHECK:      Timeline view:
324# CHECK-NEXT:                     0123456
325# CHECK-NEXT: Index     0123456789
326
327# CHECK:      [0,0]     DeeER.    .    ..   mul	x0, x0, x0
328# CHECK-NEXT: [0,1]     D==eeER   .    ..   madd	x0, x1, x2, x0
329# CHECK-NEXT: [0,2]     D===eeER  .    ..   madd	x0, x1, x2, x0
330# CHECK-NEXT: [0,3]     D=====eeER.    ..   madd	x0, x0, x0, x0
331# CHECK-NEXT: [1,0]     D=======eeER   ..   mul	x0, x0, x0
332# CHECK-NEXT: [1,1]     D=========eeER ..   madd	x0, x1, x2, x0
333# CHECK-NEXT: [1,2]     D==========eeER..   madd	x0, x1, x2, x0
334# CHECK-NEXT: [1,3]     D============eeER   madd	x0, x0, x0, x0
335
336# CHECK:      Average Wait times (based on the timeline view):
337# CHECK-NEXT: [0]: Executions
338# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
339# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
340# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
341
342# CHECK:            [0]    [1]    [2]    [3]
343# CHECK-NEXT: 0.     2     4.5    0.5    0.0       mul	x0, x0, x0
344# CHECK-NEXT: 1.     2     6.5    0.0    0.0       madd	x0, x1, x2, x0
345# CHECK-NEXT: 2.     2     7.5    0.0    0.0       madd	x0, x1, x2, x0
346# CHECK-NEXT: 3.     2     9.5    0.0    0.0       madd	x0, x0, x0, x0
347# CHECK-NEXT:        2     7.0    0.1    0.0       <total>
348
349# CHECK:      [1] Code Region - smaddl
350
351# CHECK:      Iterations:        100
352# CHECK-NEXT: Instructions:      400
353# CHECK-NEXT: Total Cycles:      703
354# CHECK-NEXT: Total uOps:        400
355
356# CHECK:      Dispatch Width:    16
357# CHECK-NEXT: uOps Per Cycle:    0.57
358# CHECK-NEXT: IPC:               0.57
359# CHECK-NEXT: Block RThroughput: 3.0
360
361# CHECK:      Timeline view:
362# CHECK-NEXT:                     0123456
363# CHECK-NEXT: Index     0123456789
364
365# CHECK:      [0,0]     DeeER.    .    ..   mul	x0, x0, x0
366# CHECK-NEXT: [0,1]     D==eeER   .    ..   smaddl	x0, w1, w2, x0
367# CHECK-NEXT: [0,2]     D===eeER  .    ..   smaddl	x0, w1, w2, x0
368# CHECK-NEXT: [0,3]     D=====eeER.    ..   smaddl	x0, w0, w0, x0
369# CHECK-NEXT: [1,0]     D=======eeER   ..   mul	x0, x0, x0
370# CHECK-NEXT: [1,1]     D=========eeER ..   smaddl	x0, w1, w2, x0
371# CHECK-NEXT: [1,2]     D==========eeER..   smaddl	x0, w1, w2, x0
372# CHECK-NEXT: [1,3]     D============eeER   smaddl	x0, w0, w0, x0
373
374# CHECK:      Average Wait times (based on the timeline view):
375# CHECK-NEXT: [0]: Executions
376# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
377# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
378# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
379
380# CHECK:            [0]    [1]    [2]    [3]
381# CHECK-NEXT: 0.     2     4.5    0.5    0.0       mul	x0, x0, x0
382# CHECK-NEXT: 1.     2     6.5    0.0    0.0       smaddl	x0, w1, w2, x0
383# CHECK-NEXT: 2.     2     7.5    0.0    0.0       smaddl	x0, w1, w2, x0
384# CHECK-NEXT: 3.     2     9.5    0.0    0.0       smaddl	x0, w0, w0, x0
385# CHECK-NEXT:        2     7.0    0.1    0.0       <total>
386
387# CHECK:      [2] Code Region - fmadd
388
389# CHECK:      Iterations:        100
390# CHECK-NEXT: Instructions:      600
391# CHECK-NEXT: Total Cycles:      1703
392# CHECK-NEXT: Total uOps:        600
393
394# CHECK:      Dispatch Width:    16
395# CHECK-NEXT: uOps Per Cycle:    0.35
396# CHECK-NEXT: IPC:               0.35
397# CHECK-NEXT: Block RThroughput: 1.5
398
399# CHECK:      Timeline view:
400# CHECK-NEXT:                     0123456789          0123456
401# CHECK-NEXT: Index     0123456789          0123456789
402
403# CHECK:      [0,0]     DeeER.    .    .    .    .    .    ..   fadd	d0, d0, d0
404# CHECK-NEXT: [0,1]     D==eeeeER .    .    .    .    .    ..   fmadd	d0, d1, d2, d0
405# CHECK-NEXT: [0,2]     D======eeeER   .    .    .    .    ..   fmul	d0, d0, d0
406# CHECK-NEXT: [0,3]     D=======eeeeER .    .    .    .    ..   fmadd	d0, d1, d2, d0
407# CHECK-NEXT: [0,4]     D=========eeeeER    .    .    .    ..   fmadd	d0, d1, d2, d0
408# CHECK-NEXT: [0,5]     D=============eeeeER.    .    .    ..   fmadd	d0, d0, d1, d2
409# CHECK-NEXT: [1,0]     D=================eeER   .    .    ..   fadd	d0, d0, d0
410# CHECK-NEXT: [1,1]     D===================eeeeER    .    ..   fmadd	d0, d1, d2, d0
411# CHECK-NEXT: [1,2]     D=======================eeeER .    ..   fmul	d0, d0, d0
412# CHECK-NEXT: [1,3]     D========================eeeeER    ..   fmadd	d0, d1, d2, d0
413# CHECK-NEXT: [1,4]     D==========================eeeeER  ..   fmadd	d0, d1, d2, d0
414# CHECK-NEXT: [1,5]     D==============================eeeeER   fmadd	d0, d0, d1, d2
415
416# CHECK:      Average Wait times (based on the timeline view):
417# CHECK-NEXT: [0]: Executions
418# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
419# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
420# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
421
422# CHECK:            [0]    [1]    [2]    [3]
423# CHECK-NEXT: 0.     2     9.5    0.5    0.0       fadd	d0, d0, d0
424# CHECK-NEXT: 1.     2     11.5   0.0    0.0       fmadd	d0, d1, d2, d0
425# CHECK-NEXT: 2.     2     15.5   0.0    0.0       fmul	d0, d0, d0
426# CHECK-NEXT: 3.     2     16.5   0.0    0.0       fmadd	d0, d1, d2, d0
427# CHECK-NEXT: 4.     2     18.5   0.0    0.0       fmadd	d0, d1, d2, d0
428# CHECK-NEXT: 5.     2     22.5   0.0    0.0       fmadd	d0, d0, d1, d2
429# CHECK-NEXT:        2     15.7   0.1    0.0       <total>
430
431# CHECK:      [3] Code Region - saba
432
433# CHECK:      Iterations:        100
434# CHECK-NEXT: Instructions:      400
435# CHECK-NEXT: Total Cycles:      1303
436# CHECK-NEXT: Total uOps:        400
437
438# CHECK:      Dispatch Width:    16
439# CHECK-NEXT: uOps Per Cycle:    0.31
440# CHECK-NEXT: IPC:               0.31
441# CHECK-NEXT: Block RThroughput: 1.5
442
443# CHECK:      Timeline view:
444# CHECK-NEXT:                     0123456789
445# CHECK-NEXT: Index     0123456789          012345678
446
447# CHECK:      [0,0]     DeeeeER   .    .    .    .  .   mul	v0.4s, v0.4s, v0.4s
448# CHECK-NEXT: [0,1]     D====eeeeER    .    .    .  .   saba	v0.4s, v1.4s, v2.4s
449# CHECK-NEXT: [0,2]     D=====eeeeER   .    .    .  .   saba	v0.4s, v1.4s, v2.4s
450# CHECK-NEXT: [0,3]     D=========eeeeER    .    .  .   saba	v0.4s, v0.4s, v1.4s
451# CHECK-NEXT: [1,0]     D=============eeeeER.    .  .   mul	v0.4s, v0.4s, v0.4s
452# CHECK-NEXT: [1,1]     D=================eeeeER .  .   saba	v0.4s, v1.4s, v2.4s
453# CHECK-NEXT: [1,2]     D==================eeeeER.  .   saba	v0.4s, v1.4s, v2.4s
454# CHECK-NEXT: [1,3]     D======================eeeeER   saba	v0.4s, v0.4s, v1.4s
455
456# CHECK:      Average Wait times (based on the timeline view):
457# CHECK-NEXT: [0]: Executions
458# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
459# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
460# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
461
462# CHECK:            [0]    [1]    [2]    [3]
463# CHECK-NEXT: 0.     2     7.5    0.5    0.0       mul	v0.4s, v0.4s, v0.4s
464# CHECK-NEXT: 1.     2     11.5   0.0    0.0       saba	v0.4s, v1.4s, v2.4s
465# CHECK-NEXT: 2.     2     12.5   0.0    0.0       saba	v0.4s, v1.4s, v2.4s
466# CHECK-NEXT: 3.     2     16.5   0.0    0.0       saba	v0.4s, v0.4s, v1.4s
467# CHECK-NEXT:        2     12.0   0.1    0.0       <total>
468
469# CHECK:      [4] Code Region - sdot
470
471# CHECK:      Iterations:        100
472# CHECK-NEXT: Instructions:      400
473# CHECK-NEXT: Total Cycles:      1103
474# CHECK-NEXT: Total uOps:        400
475
476# CHECK:      Dispatch Width:    16
477# CHECK-NEXT: uOps Per Cycle:    0.36
478# CHECK-NEXT: IPC:               0.36
479# CHECK-NEXT: Block RThroughput: 0.8
480
481# CHECK:      Timeline view:
482# CHECK-NEXT:                     0123456789
483# CHECK-NEXT: Index     0123456789          01234
484
485# CHECK:      [0,0]     DeeeeER   .    .    .   .   mul	v0.4s, v0.4s, v0.4s
486# CHECK-NEXT: [0,1]     D====eeeER.    .    .   .   sdot	v0.4s, v1.16b, v2.16b
487# CHECK-NEXT: [0,2]     D=====eeeER    .    .   .   sdot	v0.4s, v1.16b, v2.16b
488# CHECK-NEXT: [0,3]     D========eeeER .    .   .   sdot	v0.4s, v0.16b, v1.16b
489# CHECK-NEXT: [1,0]     D===========eeeeER  .   .   mul	v0.4s, v0.4s, v0.4s
490# CHECK-NEXT: [1,1]     D===============eeeER   .   sdot	v0.4s, v1.16b, v2.16b
491# CHECK-NEXT: [1,2]     D================eeeER  .   sdot	v0.4s, v1.16b, v2.16b
492# CHECK-NEXT: [1,3]     D===================eeeER   sdot	v0.4s, v0.16b, v1.16b
493
494# CHECK:      Average Wait times (based on the timeline view):
495# CHECK-NEXT: [0]: Executions
496# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
497# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
498# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
499
500# CHECK:            [0]    [1]    [2]    [3]
501# CHECK-NEXT: 0.     2     6.5    0.5    0.0       mul	v0.4s, v0.4s, v0.4s
502# CHECK-NEXT: 1.     2     10.5   0.0    0.0       sdot	v0.4s, v1.16b, v2.16b
503# CHECK-NEXT: 2.     2     11.5   0.0    0.0       sdot	v0.4s, v1.16b, v2.16b
504# CHECK-NEXT: 3.     2     14.5   0.0    0.0       sdot	v0.4s, v0.16b, v1.16b
505# CHECK-NEXT:        2     10.8   0.1    0.0       <total>
506
507# CHECK:      [5] Code Region - smmla
508
509# CHECK:      Iterations:        100
510# CHECK-NEXT: Instructions:      400
511# CHECK-NEXT: Total Cycles:      1103
512# CHECK-NEXT: Total uOps:        400
513
514# CHECK:      Dispatch Width:    16
515# CHECK-NEXT: uOps Per Cycle:    0.36
516# CHECK-NEXT: IPC:               0.36
517# CHECK-NEXT: Block RThroughput: 0.8
518
519# CHECK:      Timeline view:
520# CHECK-NEXT:                     0123456789
521# CHECK-NEXT: Index     0123456789          01234
522
523# CHECK:      [0,0]     DeeeeER   .    .    .   .   mul	v0.4s, v0.4s, v0.4s
524# CHECK-NEXT: [0,1]     D====eeeER.    .    .   .   smmla	v0.4s, v1.16b, v2.16b
525# CHECK-NEXT: [0,2]     D=====eeeER    .    .   .   smmla	v0.4s, v1.16b, v2.16b
526# CHECK-NEXT: [0,3]     D========eeeER .    .   .   smmla	v0.4s, v0.16b, v1.16b
527# CHECK-NEXT: [1,0]     D===========eeeeER  .   .   mul	v0.4s, v0.4s, v0.4s
528# CHECK-NEXT: [1,1]     D===============eeeER   .   smmla	v0.4s, v1.16b, v2.16b
529# CHECK-NEXT: [1,2]     D================eeeER  .   smmla	v0.4s, v1.16b, v2.16b
530# CHECK-NEXT: [1,3]     D===================eeeER   smmla	v0.4s, v0.16b, v1.16b
531
532# CHECK:      Average Wait times (based on the timeline view):
533# CHECK-NEXT: [0]: Executions
534# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
535# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
536# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
537
538# CHECK:            [0]    [1]    [2]    [3]
539# CHECK-NEXT: 0.     2     6.5    0.5    0.0       mul	v0.4s, v0.4s, v0.4s
540# CHECK-NEXT: 1.     2     10.5   0.0    0.0       smmla	v0.4s, v1.16b, v2.16b
541# CHECK-NEXT: 2.     2     11.5   0.0    0.0       smmla	v0.4s, v1.16b, v2.16b
542# CHECK-NEXT: 3.     2     14.5   0.0    0.0       smmla	v0.4s, v0.16b, v1.16b
543# CHECK-NEXT:        2     10.8   0.1    0.0       <total>
544
545# CHECK:      [6] Code Region - mla
546
547# CHECK:      Iterations:        100
548# CHECK-NEXT: Instructions:      400
549# CHECK-NEXT: Total Cycles:      1303
550# CHECK-NEXT: Total uOps:        400
551
552# CHECK:      Dispatch Width:    16
553# CHECK-NEXT: uOps Per Cycle:    0.31
554# CHECK-NEXT: IPC:               0.31
555# CHECK-NEXT: Block RThroughput: 2.0
556
557# CHECK:      Timeline view:
558# CHECK-NEXT:                     0123456789
559# CHECK-NEXT: Index     0123456789          012345678
560
561# CHECK:      [0,0]     DeeeeER   .    .    .    .  .   mul	v0.4s, v0.4s, v0.4s
562# CHECK-NEXT: [0,1]     D====eeeeER    .    .    .  .   mla	v0.4s, v1.4s, v2.4s
563# CHECK-NEXT: [0,2]     D=====eeeeER   .    .    .  .   mla	v0.4s, v1.4s, v2.4s
564# CHECK-NEXT: [0,3]     D=========eeeeER    .    .  .   mla	v0.4s, v0.4s, v1.4s
565# CHECK-NEXT: [1,0]     D=============eeeeER.    .  .   mul	v0.4s, v0.4s, v0.4s
566# CHECK-NEXT: [1,1]     D=================eeeeER .  .   mla	v0.4s, v1.4s, v2.4s
567# CHECK-NEXT: [1,2]     D==================eeeeER.  .   mla	v0.4s, v1.4s, v2.4s
568# CHECK-NEXT: [1,3]     D======================eeeeER   mla	v0.4s, v0.4s, v1.4s
569
570# CHECK:      Average Wait times (based on the timeline view):
571# CHECK-NEXT: [0]: Executions
572# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
573# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
574# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
575
576# CHECK:            [0]    [1]    [2]    [3]
577# CHECK-NEXT: 0.     2     7.5    0.5    0.0       mul	v0.4s, v0.4s, v0.4s
578# CHECK-NEXT: 1.     2     11.5   0.0    0.0       mla	v0.4s, v1.4s, v2.4s
579# CHECK-NEXT: 2.     2     12.5   0.0    0.0       mla	v0.4s, v1.4s, v2.4s
580# CHECK-NEXT: 3.     2     16.5   0.0    0.0       mla	v0.4s, v0.4s, v1.4s
581# CHECK-NEXT:        2     12.0   0.1    0.0       <total>
582
583# CHECK:      [7] Code Region - sqrdmlah
584
585# CHECK:      Iterations:        100
586# CHECK-NEXT: Instructions:      400
587# CHECK-NEXT: Total Cycles:      1403
588# CHECK-NEXT: Total uOps:        400
589
590# CHECK:      Dispatch Width:    16
591# CHECK-NEXT: uOps Per Cycle:    0.29
592# CHECK-NEXT: IPC:               0.29
593# CHECK-NEXT: Block RThroughput: 3.5
594
595# CHECK:      Timeline view:
596# CHECK-NEXT:                     0123456789          0
597# CHECK-NEXT: Index     0123456789          0123456789
598
599# CHECK:      [0,0]     DeeeeER   .    .    .    .    .   mul	v0.4s, v0.4s, v0.4s
600# CHECK-NEXT: [0,1]     D====eeeeER    .    .    .    .   sqrdmlah	v0.4s, v1.4s, v2.4s
601# CHECK-NEXT: [0,2]     D======eeeeER  .    .    .    .   sqrdmlah	v0.4s, v1.4s, v2.4s
602# CHECK-NEXT: [0,3]     D==========eeeeER   .    .    .   sqrdmlah	v0.4s, v0.4s, v1.4s
603# CHECK-NEXT: [1,0]     D==============eeeeER    .    .   mul	v0.4s, v0.4s, v0.4s
604# CHECK-NEXT: [1,1]     D==================eeeeER.    .   sqrdmlah	v0.4s, v1.4s, v2.4s
605# CHECK-NEXT: [1,2]     D====================eeeeER   .   sqrdmlah	v0.4s, v1.4s, v2.4s
606# CHECK-NEXT: [1,3]     D========================eeeeER   sqrdmlah	v0.4s, v0.4s, v1.4s
607
608# CHECK:      Average Wait times (based on the timeline view):
609# CHECK-NEXT: [0]: Executions
610# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
611# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
612# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
613
614# CHECK:            [0]    [1]    [2]    [3]
615# CHECK-NEXT: 0.     2     8.0    0.5    0.0       mul	v0.4s, v0.4s, v0.4s
616# CHECK-NEXT: 1.     2     12.0   0.0    0.0       sqrdmlah	v0.4s, v1.4s, v2.4s
617# CHECK-NEXT: 2.     2     14.0   0.0    0.0       sqrdmlah	v0.4s, v1.4s, v2.4s
618# CHECK-NEXT: 3.     2     18.0   0.0    0.0       sqrdmlah	v0.4s, v0.4s, v1.4s
619# CHECK-NEXT:        2     13.0   0.1    0.0       <total>
620
621# CHECK:      [8] Code Region - smlal2
622
623# CHECK:      Iterations:        100
624# CHECK-NEXT: Instructions:      400
625# CHECK-NEXT: Total Cycles:      1303
626# CHECK-NEXT: Total uOps:        400
627
628# CHECK:      Dispatch Width:    16
629# CHECK-NEXT: uOps Per Cycle:    0.31
630# CHECK-NEXT: IPC:               0.31
631# CHECK-NEXT: Block RThroughput: 2.0
632
633# CHECK:      Timeline view:
634# CHECK-NEXT:                     0123456789
635# CHECK-NEXT: Index     0123456789          012345678
636
637# CHECK:      [0,0]     DeeeeER   .    .    .    .  .   mul	v0.4s, v0.4s, v0.4s
638# CHECK-NEXT: [0,1]     D====eeeeER    .    .    .  .   smlal2	v0.4s, v1.8h, v2.8h
639# CHECK-NEXT: [0,2]     D=====eeeeER   .    .    .  .   smlal2	v0.4s, v1.8h, v2.8h
640# CHECK-NEXT: [0,3]     D=========eeeeER    .    .  .   smlal2	v0.4s, v0.8h, v1.8h
641# CHECK-NEXT: [1,0]     D=============eeeeER.    .  .   mul	v0.4s, v0.4s, v0.4s
642# CHECK-NEXT: [1,1]     D=================eeeeER .  .   smlal2	v0.4s, v1.8h, v2.8h
643# CHECK-NEXT: [1,2]     D==================eeeeER.  .   smlal2	v0.4s, v1.8h, v2.8h
644# CHECK-NEXT: [1,3]     D======================eeeeER   smlal2	v0.4s, v0.8h, v1.8h
645
646# CHECK:      Average Wait times (based on the timeline view):
647# CHECK-NEXT: [0]: Executions
648# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
649# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
650# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
651
652# CHECK:            [0]    [1]    [2]    [3]
653# CHECK-NEXT: 0.     2     7.5    0.5    0.0       mul	v0.4s, v0.4s, v0.4s
654# CHECK-NEXT: 1.     2     11.5   0.0    0.0       smlal2	v0.4s, v1.8h, v2.8h
655# CHECK-NEXT: 2.     2     12.5   0.0    0.0       smlal2	v0.4s, v1.8h, v2.8h
656# CHECK-NEXT: 3.     2     16.5   0.0    0.0       smlal2	v0.4s, v0.8h, v1.8h
657# CHECK-NEXT:        2     12.0   0.1    0.0       <total>
658
659# CHECK:      [9] Code Region - sadalp
660
661# CHECK:      Iterations:        100
662# CHECK-NEXT: Instructions:      400
663# CHECK-NEXT: Total Cycles:      1303
664# CHECK-NEXT: Total uOps:        400
665
666# CHECK:      Dispatch Width:    16
667# CHECK-NEXT: uOps Per Cycle:    0.31
668# CHECK-NEXT: IPC:               0.31
669# CHECK-NEXT: Block RThroughput: 1.5
670
671# CHECK:      Timeline view:
672# CHECK-NEXT:                     0123456789
673# CHECK-NEXT: Index     0123456789          012345678
674
675# CHECK:      [0,0]     DeeeeER   .    .    .    .  .   mul	v0.4s, v0.4s, v0.4s
676# CHECK-NEXT: [0,1]     D====eeeeER    .    .    .  .   sadalp	v0.2d, v1.4s
677# CHECK-NEXT: [0,2]     D=====eeeeER   .    .    .  .   sadalp	v0.2d, v1.4s
678# CHECK-NEXT: [0,3]     D=========eeeeER    .    .  .   sadalp	v0.2d, v0.4s
679# CHECK-NEXT: [1,0]     D=============eeeeER.    .  .   mul	v0.4s, v0.4s, v0.4s
680# CHECK-NEXT: [1,1]     D=================eeeeER .  .   sadalp	v0.2d, v1.4s
681# CHECK-NEXT: [1,2]     D==================eeeeER.  .   sadalp	v0.2d, v1.4s
682# CHECK-NEXT: [1,3]     D======================eeeeER   sadalp	v0.2d, v0.4s
683
684# CHECK:      Average Wait times (based on the timeline view):
685# CHECK-NEXT: [0]: Executions
686# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
687# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
688# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
689
690# CHECK:            [0]    [1]    [2]    [3]
691# CHECK-NEXT: 0.     2     7.5    0.5    0.0       mul	v0.4s, v0.4s, v0.4s
692# CHECK-NEXT: 1.     2     11.5   0.0    0.0       sadalp	v0.2d, v1.4s
693# CHECK-NEXT: 2.     2     12.5   0.0    0.0       sadalp	v0.2d, v1.4s
694# CHECK-NEXT: 3.     2     16.5   0.0    0.0       sadalp	v0.2d, v0.4s
695# CHECK-NEXT:        2     12.0   0.1    0.0       <total>
696
697# CHECK:      [10] Code Region - ssra
698
699# CHECK:      Iterations:        100
700# CHECK-NEXT: Instructions:      400
701# CHECK-NEXT: Total Cycles:      1303
702# CHECK-NEXT: Total uOps:        400
703
704# CHECK:      Dispatch Width:    16
705# CHECK-NEXT: uOps Per Cycle:    0.31
706# CHECK-NEXT: IPC:               0.31
707# CHECK-NEXT: Block RThroughput: 1.5
708
709# CHECK:      Timeline view:
710# CHECK-NEXT:                     0123456789
711# CHECK-NEXT: Index     0123456789          012345678
712
713# CHECK:      [0,0]     DeeeeER   .    .    .    .  .   mul	v0.4s, v0.4s, v0.4s
714# CHECK-NEXT: [0,1]     D====eeeeER    .    .    .  .   ssra	v0.2d, v1.2d, #1
715# CHECK-NEXT: [0,2]     D=====eeeeER   .    .    .  .   ssra	v0.2d, v1.2d, #1
716# CHECK-NEXT: [0,3]     D=========eeeeER    .    .  .   ssra	v0.2d, v0.2d, #1
717# CHECK-NEXT: [1,0]     D=============eeeeER.    .  .   mul	v0.4s, v0.4s, v0.4s
718# CHECK-NEXT: [1,1]     D=================eeeeER .  .   ssra	v0.2d, v1.2d, #1
719# CHECK-NEXT: [1,2]     D==================eeeeER.  .   ssra	v0.2d, v1.2d, #1
720# CHECK-NEXT: [1,3]     D======================eeeeER   ssra	v0.2d, v0.2d, #1
721
722# CHECK:      Average Wait times (based on the timeline view):
723# CHECK-NEXT: [0]: Executions
724# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
725# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
726# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
727
728# CHECK:            [0]    [1]    [2]    [3]
729# CHECK-NEXT: 0.     2     7.5    0.5    0.0       mul	v0.4s, v0.4s, v0.4s
730# CHECK-NEXT: 1.     2     11.5   0.0    0.0       ssra	v0.2d, v1.2d, #1
731# CHECK-NEXT: 2.     2     12.5   0.0    0.0       ssra	v0.2d, v1.2d, #1
732# CHECK-NEXT: 3.     2     16.5   0.0    0.0       ssra	v0.2d, v0.2d, #1
733# CHECK-NEXT:        2     12.0   0.1    0.0       <total>
734
735# CHECK:      [11] Code Region - fcmla
736
737# CHECK:      Iterations:        100
738# CHECK-NEXT: Instructions:      400
739# CHECK-NEXT: Total Cycles:      1303
740# CHECK-NEXT: Total uOps:        400
741
742# CHECK:      Dispatch Width:    16
743# CHECK-NEXT: uOps Per Cycle:    0.31
744# CHECK-NEXT: IPC:               0.31
745# CHECK-NEXT: Block RThroughput: 1.0
746
747# CHECK:      Timeline view:
748# CHECK-NEXT:                     0123456789
749# CHECK-NEXT: Index     0123456789          012345678
750
751# CHECK:      [0,0]     DeeeER    .    .    .    .  .   fmul	v0.4s, v0.4s, v0.4s
752# CHECK-NEXT: [0,1]     D===eeeeER.    .    .    .  .   fcmla	v0.2d, v1.2d, v2.2d, #90
753# CHECK-NEXT: [0,2]     D=====eeeeER   .    .    .  .   fcmla	v0.2d, v1.2d, v2.2d, #90
754# CHECK-NEXT: [0,3]     D=========eeeeER    .    .  .   fcmla	v0.2d, v0.2d, v1.2d, #90
755# CHECK-NEXT: [1,0]     D=============eeeER .    .  .   fmul	v0.4s, v0.4s, v0.4s
756# CHECK-NEXT: [1,1]     D================eeeeER  .  .   fcmla	v0.2d, v1.2d, v2.2d, #90
757# CHECK-NEXT: [1,2]     D==================eeeeER.  .   fcmla	v0.2d, v1.2d, v2.2d, #90
758# CHECK-NEXT: [1,3]     D======================eeeeER   fcmla	v0.2d, v0.2d, v1.2d, #90
759
760# CHECK:      Average Wait times (based on the timeline view):
761# CHECK-NEXT: [0]: Executions
762# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
763# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
764# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
765
766# CHECK:            [0]    [1]    [2]    [3]
767# CHECK-NEXT: 0.     2     7.5    0.5    0.0       fmul	v0.4s, v0.4s, v0.4s
768# CHECK-NEXT: 1.     2     10.5   0.0    0.0       fcmla	v0.2d, v1.2d, v2.2d, #90
769# CHECK-NEXT: 2.     2     12.5   0.0    0.0       fcmla	v0.2d, v1.2d, v2.2d, #90
770# CHECK-NEXT: 3.     2     16.5   0.0    0.0       fcmla	v0.2d, v0.2d, v1.2d, #90
771# CHECK-NEXT:        2     11.8   0.1    0.0       <total>
772
773# CHECK:      [12] Code Region - fmla
774
775# CHECK:      Iterations:        100
776# CHECK-NEXT: Instructions:      600
777# CHECK-NEXT: Total Cycles:      1703
778# CHECK-NEXT: Total uOps:        600
779
780# CHECK:      Dispatch Width:    16
781# CHECK-NEXT: uOps Per Cycle:    0.35
782# CHECK-NEXT: IPC:               0.35
783# CHECK-NEXT: Block RThroughput: 1.5
784
785# CHECK:      Timeline view:
786# CHECK-NEXT:                     0123456789          0123456
787# CHECK-NEXT: Index     0123456789          0123456789
788
789# CHECK:      [0,0]     DeeeER    .    .    .    .    .    ..   fmul	v0.2d, v0.2d, v0.2d
790# CHECK-NEXT: [0,1]     D=eeeeER  .    .    .    .    .    ..   fmla	v0.2d, v1.2d, v2.2d
791# CHECK-NEXT: [0,2]     D=====eeER.    .    .    .    .    ..   fadd	v0.2d, v0.2d, v0.2d
792# CHECK-NEXT: [0,3]     D=======eeeeER .    .    .    .    ..   fmla	v0.2d, v1.2d, v2.2d
793# CHECK-NEXT: [0,4]     D=========eeeeER    .    .    .    ..   fmla	v0.2d, v1.2d, v2.2d
794# CHECK-NEXT: [0,5]     D=============eeeeER.    .    .    ..   fmla	v0.2d, v0.2d, v1.2d
795# CHECK-NEXT: [1,0]     D=================eeeER  .    .    ..   fmul	v0.2d, v0.2d, v0.2d
796# CHECK-NEXT: [1,1]     D==================eeeeER.    .    ..   fmla	v0.2d, v1.2d, v2.2d
797# CHECK-NEXT: [1,2]     D======================eeER   .    ..   fadd	v0.2d, v0.2d, v0.2d
798# CHECK-NEXT: [1,3]     D========================eeeeER    ..   fmla	v0.2d, v1.2d, v2.2d
799# CHECK-NEXT: [1,4]     D==========================eeeeER  ..   fmla	v0.2d, v1.2d, v2.2d
800# CHECK-NEXT: [1,5]     D==============================eeeeER   fmla	v0.2d, v0.2d, v1.2d
801
802# CHECK:      Average Wait times (based on the timeline view):
803# CHECK-NEXT: [0]: Executions
804# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
805# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
806# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
807
808# CHECK:            [0]    [1]    [2]    [3]
809# CHECK-NEXT: 0.     2     9.5    0.5    0.0       fmul	v0.2d, v0.2d, v0.2d
810# CHECK-NEXT: 1.     2     10.5   0.0    0.0       fmla	v0.2d, v1.2d, v2.2d
811# CHECK-NEXT: 2.     2     14.5   0.0    0.0       fadd	v0.2d, v0.2d, v0.2d
812# CHECK-NEXT: 3.     2     16.5   0.0    0.0       fmla	v0.2d, v1.2d, v2.2d
813# CHECK-NEXT: 4.     2     18.5   0.0    0.0       fmla	v0.2d, v1.2d, v2.2d
814# CHECK-NEXT: 5.     2     22.5   0.0    0.0       fmla	v0.2d, v0.2d, v1.2d
815# CHECK-NEXT:        2     15.3   0.1    0.0       <total>
816
817# CHECK:      [13] Code Region - fmlal
818
819# CHECK:      Iterations:        100
820# CHECK-NEXT: Instructions:      600
821# CHECK-NEXT: Total Cycles:      1903
822# CHECK-NEXT: Total uOps:        600
823
824# CHECK:      Dispatch Width:    16
825# CHECK-NEXT: uOps Per Cycle:    0.32
826# CHECK-NEXT: IPC:               0.32
827# CHECK-NEXT: Block RThroughput: 1.5
828
829# CHECK:      Timeline view:
830# CHECK-NEXT:                     0123456789          0123456789
831# CHECK-NEXT: Index     0123456789          0123456789          0
832
833# CHECK:      [0,0]     DeeeER    .    .    .    .    .    .    .   fmul	v0.2d, v0.2d, v0.2d
834# CHECK-NEXT: [0,1]     D===eeeeER.    .    .    .    .    .    .   fmlal	v0.4s, v1.4h, v2.4h
835# CHECK-NEXT: [0,2]     D=======eeER   .    .    .    .    .    .   fadd	v0.2d, v0.2d, v0.2d
836# CHECK-NEXT: [0,3]     D=========eeeeER    .    .    .    .    .   fmlal	v0.4s, v1.4h, v2.4h
837# CHECK-NEXT: [0,4]     D===========eeeeER  .    .    .    .    .   fmlal	v0.4s, v1.4h, v2.4h
838# CHECK-NEXT: [0,5]     D===============eeeeER   .    .    .    .   fmlal	v0.4s, v0.4h, v1.4h
839# CHECK-NEXT: [1,0]     D===================eeeER.    .    .    .   fmul	v0.2d, v0.2d, v0.2d
840# CHECK-NEXT: [1,1]     D======================eeeeER .    .    .   fmlal	v0.4s, v1.4h, v2.4h
841# CHECK-NEXT: [1,2]     D==========================eeER    .    .   fadd	v0.2d, v0.2d, v0.2d
842# CHECK-NEXT: [1,3]     D============================eeeeER.    .   fmlal	v0.4s, v1.4h, v2.4h
843# CHECK-NEXT: [1,4]     D==============================eeeeER   .   fmlal	v0.4s, v1.4h, v2.4h
844# CHECK-NEXT: [1,5]     D==================================eeeeER   fmlal	v0.4s, v0.4h, v1.4h
845
846# CHECK:      Average Wait times (based on the timeline view):
847# CHECK-NEXT: [0]: Executions
848# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
849# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
850# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
851
852# CHECK:            [0]    [1]    [2]    [3]
853# CHECK-NEXT: 0.     2     10.5   0.5    0.0       fmul	v0.2d, v0.2d, v0.2d
854# CHECK-NEXT: 1.     2     13.5   0.0    0.0       fmlal	v0.4s, v1.4h, v2.4h
855# CHECK-NEXT: 2.     2     17.5   0.0    0.0       fadd	v0.2d, v0.2d, v0.2d
856# CHECK-NEXT: 3.     2     19.5   0.0    0.0       fmlal	v0.4s, v1.4h, v2.4h
857# CHECK-NEXT: 4.     2     21.5   0.0    0.0       fmlal	v0.4s, v1.4h, v2.4h
858# CHECK-NEXT: 5.     2     25.5   0.0    0.0       fmlal	v0.4s, v0.4h, v1.4h
859# CHECK-NEXT:        2     18.0   0.1    0.0       <total>
860
861# CHECK:      [14] Code Region - bfdot
862
863# CHECK:      Iterations:        100
864# CHECK-NEXT: Instructions:      400
865# CHECK-NEXT: Total Cycles:      1603
866# CHECK-NEXT: Total uOps:        400
867
868# CHECK:      Dispatch Width:    16
869# CHECK-NEXT: uOps Per Cycle:    0.25
870# CHECK-NEXT: IPC:               0.25
871# CHECK-NEXT: Block RThroughput: 1.0
872
873# CHECK:      Timeline view:
874# CHECK-NEXT:                     0123456789          01234
875# CHECK-NEXT: Index     0123456789          0123456789
876
877# CHECK:      [0,0]     DeeeER    .    .    .    .    .   .   fmul	v0.2d, v0.2d, v0.2d
878# CHECK-NEXT: [0,1]     D===eeeeeER    .    .    .    .   .   bfdot	v0.4s, v1.8h, v2.8h
879# CHECK-NEXT: [0,2]     D======eeeeeER .    .    .    .   .   bfdot	v0.4s, v1.8h, v2.8h
880# CHECK-NEXT: [0,3]     D===========eeeeeER .    .    .   .   bfdot	v0.4s, v0.8h, v1.8h
881# CHECK-NEXT: [1,0]     D================eeeER   .    .   .   fmul	v0.2d, v0.2d, v0.2d
882# CHECK-NEXT: [1,1]     D===================eeeeeER   .   .   bfdot	v0.4s, v1.8h, v2.8h
883# CHECK-NEXT: [1,2]     D======================eeeeeER.   .   bfdot	v0.4s, v1.8h, v2.8h
884# CHECK-NEXT: [1,3]     D===========================eeeeeER   bfdot	v0.4s, v0.8h, v1.8h
885
886# CHECK:      Average Wait times (based on the timeline view):
887# CHECK-NEXT: [0]: Executions
888# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
889# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
890# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
891
892# CHECK:            [0]    [1]    [2]    [3]
893# CHECK-NEXT: 0.     2     9.0    0.5    0.0       fmul	v0.2d, v0.2d, v0.2d
894# CHECK-NEXT: 1.     2     12.0   0.0    0.0       bfdot	v0.4s, v1.8h, v2.8h
895# CHECK-NEXT: 2.     2     15.0   0.0    0.0       bfdot	v0.4s, v1.8h, v2.8h
896# CHECK-NEXT: 3.     2     20.0   0.0    0.0       bfdot	v0.4s, v0.8h, v1.8h
897# CHECK-NEXT:        2     14.0   0.1    0.0       <total>
898
899# CHECK:      [15] Code Region - bfmmla
900
901# CHECK:      Iterations:        100
902# CHECK-NEXT: Instructions:      400
903# CHECK-NEXT: Total Cycles:      1903
904# CHECK-NEXT: Total uOps:        400
905
906# CHECK:      Dispatch Width:    16
907# CHECK-NEXT: uOps Per Cycle:    0.21
908# CHECK-NEXT: IPC:               0.21
909# CHECK-NEXT: Block RThroughput: 1.0
910
911# CHECK:      Timeline view:
912# CHECK-NEXT:                     0123456789          0123456789
913# CHECK-NEXT: Index     0123456789          0123456789          0
914
915# CHECK:      [0,0]     DeeeER    .    .    .    .    .    .    .   fmul	v0.2d, v0.2d, v0.2d
916# CHECK-NEXT: [0,1]     D===eeeeeeER   .    .    .    .    .    .   bfmmla	v0.4s, v1.8h, v2.8h
917# CHECK-NEXT: [0,2]     D=======eeeeeeER    .    .    .    .    .   bfmmla	v0.4s, v1.8h, v2.8h
918# CHECK-NEXT: [0,3]     D=============eeeeeeER   .    .    .    .   bfmmla	v0.4s, v0.8h, v1.8h
919# CHECK-NEXT: [1,0]     D===================eeeER.    .    .    .   fmul	v0.2d, v0.2d, v0.2d
920# CHECK-NEXT: [1,1]     D======================eeeeeeER    .    .   bfmmla	v0.4s, v1.8h, v2.8h
921# CHECK-NEXT: [1,2]     D==========================eeeeeeER.    .   bfmmla	v0.4s, v1.8h, v2.8h
922# CHECK-NEXT: [1,3]     D================================eeeeeeER   bfmmla	v0.4s, v0.8h, v1.8h
923
924# CHECK:      Average Wait times (based on the timeline view):
925# CHECK-NEXT: [0]: Executions
926# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
927# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
928# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
929
930# CHECK:            [0]    [1]    [2]    [3]
931# CHECK-NEXT: 0.     2     10.5   0.5    0.0       fmul	v0.2d, v0.2d, v0.2d
932# CHECK-NEXT: 1.     2     13.5   0.0    0.0       bfmmla	v0.4s, v1.8h, v2.8h
933# CHECK-NEXT: 2.     2     17.5   0.0    0.0       bfmmla	v0.4s, v1.8h, v2.8h
934# CHECK-NEXT: 3.     2     23.5   0.0    0.0       bfmmla	v0.4s, v0.8h, v1.8h
935# CHECK-NEXT:        2     16.3   0.1    0.0       <total>
936
937# CHECK:      [16] Code Region - bfmlalb
938
939# CHECK:      Iterations:        100
940# CHECK-NEXT: Instructions:      400
941# CHECK-NEXT: Total Cycles:      1503
942# CHECK-NEXT: Total uOps:        400
943
944# CHECK:      Dispatch Width:    16
945# CHECK-NEXT: uOps Per Cycle:    0.27
946# CHECK-NEXT: IPC:               0.27
947# CHECK-NEXT: Block RThroughput: 1.0
948
949# CHECK:      Timeline view:
950# CHECK-NEXT:                     0123456789          012
951# CHECK-NEXT: Index     0123456789          0123456789
952
953# CHECK:      [0,0]     DeeeER    .    .    .    .    . .   fmul	v0.2d, v0.2d, v0.2d
954# CHECK-NEXT: [0,1]     D===eeeeeER    .    .    .    . .   bfmlalb	v0.4s, v1.8h, v2.8h
955# CHECK-NEXT: [0,2]     D=====eeeeeER  .    .    .    . .   bfmlalb	v0.4s, v1.8h, v2.8h
956# CHECK-NEXT: [0,3]     D==========eeeeeER  .    .    . .   bfmlalb	v0.4s, v0.8h, v1.8h
957# CHECK-NEXT: [1,0]     D===============eeeER    .    . .   fmul	v0.2d, v0.2d, v0.2d
958# CHECK-NEXT: [1,1]     D==================eeeeeER    . .   bfmlalb	v0.4s, v1.8h, v2.8h
959# CHECK-NEXT: [1,2]     D====================eeeeeER  . .   bfmlalb	v0.4s, v1.8h, v2.8h
960# CHECK-NEXT: [1,3]     D=========================eeeeeER   bfmlalb	v0.4s, v0.8h, v1.8h
961
962# CHECK:      Average Wait times (based on the timeline view):
963# CHECK-NEXT: [0]: Executions
964# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
965# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
966# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
967
968# CHECK:            [0]    [1]    [2]    [3]
969# CHECK-NEXT: 0.     2     8.5    0.5    0.0       fmul	v0.2d, v0.2d, v0.2d
970# CHECK-NEXT: 1.     2     11.5   0.0    0.0       bfmlalb	v0.4s, v1.8h, v2.8h
971# CHECK-NEXT: 2.     2     13.5   0.0    0.0       bfmlalb	v0.4s, v1.8h, v2.8h
972# CHECK-NEXT: 3.     2     18.5   0.0    0.0       bfmlalb	v0.4s, v0.8h, v1.8h
973# CHECK-NEXT:        2     13.0   0.1    0.0       <total>
974
975# CHECK:      [17] Code Region - crc32b
976
977# CHECK:      Iterations:        100
978# CHECK-NEXT: Instructions:      400
979# CHECK-NEXT: Total Cycles:      703
980# CHECK-NEXT: Total uOps:        400
981
982# CHECK:      Dispatch Width:    16
983# CHECK-NEXT: uOps Per Cycle:    0.57
984# CHECK-NEXT: IPC:               0.57
985# CHECK-NEXT: Block RThroughput: 3.0
986
987# CHECK:      Timeline view:
988# CHECK-NEXT:                     0123456
989# CHECK-NEXT: Index     0123456789
990
991# CHECK:      [0,0]     DeeER.    .    ..   mul	w0, w0, w0
992# CHECK-NEXT: [0,1]     D==eeER   .    ..   crc32b	w0, w0, w1
993# CHECK-NEXT: [0,2]     D===eeER  .    ..   crc32b	w0, w0, w1
994# CHECK-NEXT: [0,3]     D=====eeER.    ..   crc32b	w0, w0, w0
995# CHECK-NEXT: [1,0]     D=======eeER   ..   mul	w0, w0, w0
996# CHECK-NEXT: [1,1]     D=========eeER ..   crc32b	w0, w0, w1
997# CHECK-NEXT: [1,2]     D==========eeER..   crc32b	w0, w0, w1
998# CHECK-NEXT: [1,3]     D============eeER   crc32b	w0, w0, w0
999
1000# CHECK:      Average Wait times (based on the timeline view):
1001# CHECK-NEXT: [0]: Executions
1002# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1003# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1004# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1005
1006# CHECK:            [0]    [1]    [2]    [3]
1007# CHECK-NEXT: 0.     2     4.5    0.5    0.0       mul	w0, w0, w0
1008# CHECK-NEXT: 1.     2     6.5    0.0    0.0       crc32b	w0, w0, w1
1009# CHECK-NEXT: 2.     2     7.5    0.0    0.0       crc32b	w0, w0, w1
1010# CHECK-NEXT: 3.     2     9.5    0.0    0.0       crc32b	w0, w0, w0
1011# CHECK-NEXT:        2     7.0    0.1    0.0       <total>
1012
1013# CHECK:      [18] Code Region - Z saba
1014
1015# CHECK:      Iterations:        100
1016# CHECK-NEXT: Instructions:      400
1017# CHECK-NEXT: Total Cycles:      1403
1018# CHECK-NEXT: Total uOps:        500
1019
1020# CHECK:      Dispatch Width:    16
1021# CHECK-NEXT: uOps Per Cycle:    0.36
1022# CHECK-NEXT: IPC:               0.29
1023# CHECK-NEXT: Block RThroughput: 1.5
1024
1025# CHECK:      Timeline view:
1026# CHECK-NEXT:                     0123456789          0
1027# CHECK-NEXT: Index     0123456789          0123456789
1028
1029# CHECK:      [0,0]     DeeeeeER  .    .    .    .    .   mul	z0.d, z0.d, z0.d
1030# CHECK-NEXT: [0,1]     D=====eeeeER   .    .    .    .   saba	z0.d, z1.d, z2.d
1031# CHECK-NEXT: [0,2]     D======eeeeER  .    .    .    .   saba	z0.d, z1.d, z2.d
1032# CHECK-NEXT: [0,3]     D==========eeeeER   .    .    .   saba	z0.d, z0.d, z1.d
1033# CHECK-NEXT: [1,0]     D==============eeeeeER   .    .   mul	z0.d, z0.d, z0.d
1034# CHECK-NEXT: [1,1]     D===================eeeeER    .   saba	z0.d, z1.d, z2.d
1035# CHECK-NEXT: [1,2]     D====================eeeeER   .   saba	z0.d, z1.d, z2.d
1036# CHECK-NEXT: [1,3]     D========================eeeeER   saba	z0.d, z0.d, z1.d
1037
1038# CHECK:      Average Wait times (based on the timeline view):
1039# CHECK-NEXT: [0]: Executions
1040# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1041# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1042# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1043
1044# CHECK:            [0]    [1]    [2]    [3]
1045# CHECK-NEXT: 0.     2     8.0    0.5    0.0       mul	z0.d, z0.d, z0.d
1046# CHECK-NEXT: 1.     2     13.0   0.0    0.0       saba	z0.d, z1.d, z2.d
1047# CHECK-NEXT: 2.     2     14.0   0.0    0.0       saba	z0.d, z1.d, z2.d
1048# CHECK-NEXT: 3.     2     18.0   0.0    0.0       saba	z0.d, z0.d, z1.d
1049# CHECK-NEXT:        2     13.3   0.1    0.0       <total>
1050
1051# CHECK:      [19] Code Region - Z sadalp
1052
1053# CHECK:      Iterations:        100
1054# CHECK-NEXT: Instructions:      400
1055# CHECK-NEXT: Total Cycles:      1403
1056# CHECK-NEXT: Total uOps:        500
1057
1058# CHECK:      Dispatch Width:    16
1059# CHECK-NEXT: uOps Per Cycle:    0.36
1060# CHECK-NEXT: IPC:               0.29
1061# CHECK-NEXT: Block RThroughput: 1.5
1062
1063# CHECK:      Timeline view:
1064# CHECK-NEXT:                     0123456789          0
1065# CHECK-NEXT: Index     0123456789          0123456789
1066
1067# CHECK:      [0,0]     DeeeeeER  .    .    .    .    .   mul	z0.d, z0.d, z0.d
1068# CHECK-NEXT: [0,1]     D=====eeeeER   .    .    .    .   sadalp	z0.d, p0/m, z1.s
1069# CHECK-NEXT: [0,2]     D======eeeeER  .    .    .    .   sadalp	z0.d, p0/m, z1.s
1070# CHECK-NEXT: [0,3]     D==========eeeeER   .    .    .   sadalp	z0.d, p0/m, z0.s
1071# CHECK-NEXT: [1,0]     D==============eeeeeER   .    .   mul	z0.d, z0.d, z0.d
1072# CHECK-NEXT: [1,1]     D===================eeeeER    .   sadalp	z0.d, p0/m, z1.s
1073# CHECK-NEXT: [1,2]     D====================eeeeER   .   sadalp	z0.d, p0/m, z1.s
1074# CHECK-NEXT: [1,3]     D========================eeeeER   sadalp	z0.d, p0/m, z0.s
1075
1076# CHECK:      Average Wait times (based on the timeline view):
1077# CHECK-NEXT: [0]: Executions
1078# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1079# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1080# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1081
1082# CHECK:            [0]    [1]    [2]    [3]
1083# CHECK-NEXT: 0.     2     8.0    0.5    0.0       mul	z0.d, z0.d, z0.d
1084# CHECK-NEXT: 1.     2     13.0   0.0    0.0       sadalp	z0.d, p0/m, z1.s
1085# CHECK-NEXT: 2.     2     14.0   0.0    0.0       sadalp	z0.d, p0/m, z1.s
1086# CHECK-NEXT: 3.     2     18.0   0.0    0.0       sadalp	z0.d, p0/m, z0.s
1087# CHECK-NEXT:        2     13.3   0.1    0.0       <total>
1088
1089# CHECK:      [20] Code Region - Z ssra
1090
1091# CHECK:      Iterations:        100
1092# CHECK-NEXT: Instructions:      400
1093# CHECK-NEXT: Total Cycles:      1403
1094# CHECK-NEXT: Total uOps:        500
1095
1096# CHECK:      Dispatch Width:    16
1097# CHECK-NEXT: uOps Per Cycle:    0.36
1098# CHECK-NEXT: IPC:               0.29
1099# CHECK-NEXT: Block RThroughput: 1.5
1100
1101# CHECK:      Timeline view:
1102# CHECK-NEXT:                     0123456789          0
1103# CHECK-NEXT: Index     0123456789          0123456789
1104
1105# CHECK:      [0,0]     DeeeeeER  .    .    .    .    .   mul	z0.d, z0.d, z0.d
1106# CHECK-NEXT: [0,1]     D=====eeeeER   .    .    .    .   ssra	z0.d, z1.d, #1
1107# CHECK-NEXT: [0,2]     D======eeeeER  .    .    .    .   ssra	z0.d, z1.d, #1
1108# CHECK-NEXT: [0,3]     D==========eeeeER   .    .    .   ssra	z0.d, z0.d, #1
1109# CHECK-NEXT: [1,0]     D==============eeeeeER   .    .   mul	z0.d, z0.d, z0.d
1110# CHECK-NEXT: [1,1]     D===================eeeeER    .   ssra	z0.d, z1.d, #1
1111# CHECK-NEXT: [1,2]     D====================eeeeER   .   ssra	z0.d, z1.d, #1
1112# CHECK-NEXT: [1,3]     D========================eeeeER   ssra	z0.d, z0.d, #1
1113
1114# CHECK:      Average Wait times (based on the timeline view):
1115# CHECK-NEXT: [0]: Executions
1116# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1117# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1118# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1119
1120# CHECK:            [0]    [1]    [2]    [3]
1121# CHECK-NEXT: 0.     2     8.0    0.5    0.0       mul	z0.d, z0.d, z0.d
1122# CHECK-NEXT: 1.     2     13.0   0.0    0.0       ssra	z0.d, z1.d, #1
1123# CHECK-NEXT: 2.     2     14.0   0.0    0.0       ssra	z0.d, z1.d, #1
1124# CHECK-NEXT: 3.     2     18.0   0.0    0.0       ssra	z0.d, z0.d, #1
1125# CHECK-NEXT:        2     13.3   0.1    0.0       <total>
1126
1127# CHECK:      [21] Code Region - Z cdot.s
1128
1129# CHECK:      Iterations:        100
1130# CHECK-NEXT: Instructions:      400
1131# CHECK-NEXT: Total Cycles:      1203
1132# CHECK-NEXT: Total uOps:        500
1133
1134# CHECK:      Dispatch Width:    16
1135# CHECK-NEXT: uOps Per Cycle:    0.42
1136# CHECK-NEXT: IPC:               0.33
1137# CHECK-NEXT: Block RThroughput: 1.0
1138
1139# CHECK:      Timeline view:
1140# CHECK-NEXT:                     0123456789
1141# CHECK-NEXT: Index     0123456789          0123456
1142
1143# CHECK:      [0,0]     DeeeeeER  .    .    .    ..   mul	z0.d, z0.d, z0.d
1144# CHECK-NEXT: [0,1]     D=====eeeER    .    .    ..   cdot	z0.s, z1.b, z2.b, #90
1145# CHECK-NEXT: [0,2]     D======eeeER   .    .    ..   cdot	z0.s, z1.b, z2.b, #90
1146# CHECK-NEXT: [0,3]     D=========eeeER.    .    ..   cdot	z0.s, z0.b, z1.b, #90
1147# CHECK-NEXT: [1,0]     D============eeeeeER.    ..   mul	z0.d, z0.d, z0.d
1148# CHECK-NEXT: [1,1]     D=================eeeER  ..   cdot	z0.s, z1.b, z2.b, #90
1149# CHECK-NEXT: [1,2]     D==================eeeER ..   cdot	z0.s, z1.b, z2.b, #90
1150# CHECK-NEXT: [1,3]     D=====================eeeER   cdot	z0.s, z0.b, z1.b, #90
1151
1152# CHECK:      Average Wait times (based on the timeline view):
1153# CHECK-NEXT: [0]: Executions
1154# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1155# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1156# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1157
1158# CHECK:            [0]    [1]    [2]    [3]
1159# CHECK-NEXT: 0.     2     7.0    0.5    0.0       mul	z0.d, z0.d, z0.d
1160# CHECK-NEXT: 1.     2     12.0   0.0    0.0       cdot	z0.s, z1.b, z2.b, #90
1161# CHECK-NEXT: 2.     2     13.0   0.0    0.0       cdot	z0.s, z1.b, z2.b, #90
1162# CHECK-NEXT: 3.     2     16.0   0.0    0.0       cdot	z0.s, z0.b, z1.b, #90
1163# CHECK-NEXT:        2     12.0   0.1    0.0       <total>
1164
1165# CHECK:      [22] Code Region - Z cdot.d
1166
1167# CHECK:      Iterations:        100
1168# CHECK-NEXT: Instructions:      400
1169# CHECK-NEXT: Total Cycles:      1403
1170# CHECK-NEXT: Total uOps:        500
1171
1172# CHECK:      Dispatch Width:    16
1173# CHECK-NEXT: uOps Per Cycle:    0.36
1174# CHECK-NEXT: IPC:               0.29
1175# CHECK-NEXT: Block RThroughput: 2.5
1176
1177# CHECK:      Timeline view:
1178# CHECK-NEXT:                     0123456789          0
1179# CHECK-NEXT: Index     0123456789          0123456789
1180
1181# CHECK:      [0,0]     DeeeeeER  .    .    .    .    .   mul	z0.d, z0.d, z0.d
1182# CHECK-NEXT: [0,1]     D=====eeeeER   .    .    .    .   cdot	z0.d, z1.h, z2.h, #90
1183# CHECK-NEXT: [0,2]     D======eeeeER  .    .    .    .   cdot	z0.d, z1.h, z2.h, #90
1184# CHECK-NEXT: [0,3]     D==========eeeeER   .    .    .   cdot	z0.d, z0.h, z1.h, #90
1185# CHECK-NEXT: [1,0]     D==============eeeeeER   .    .   mul	z0.d, z0.d, z0.d
1186# CHECK-NEXT: [1,1]     D===================eeeeER    .   cdot	z0.d, z1.h, z2.h, #90
1187# CHECK-NEXT: [1,2]     D====================eeeeER   .   cdot	z0.d, z1.h, z2.h, #90
1188# CHECK-NEXT: [1,3]     D========================eeeeER   cdot	z0.d, z0.h, z1.h, #90
1189
1190# CHECK:      Average Wait times (based on the timeline view):
1191# CHECK-NEXT: [0]: Executions
1192# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1193# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1194# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1195
1196# CHECK:            [0]    [1]    [2]    [3]
1197# CHECK-NEXT: 0.     2     8.0    0.5    0.0       mul	z0.d, z0.d, z0.d
1198# CHECK-NEXT: 1.     2     13.0   0.0    0.0       cdot	z0.d, z1.h, z2.h, #90
1199# CHECK-NEXT: 2.     2     14.0   0.0    0.0       cdot	z0.d, z1.h, z2.h, #90
1200# CHECK-NEXT: 3.     2     18.0   0.0    0.0       cdot	z0.d, z0.h, z1.h, #90
1201# CHECK-NEXT:        2     13.3   0.1    0.0       <total>
1202
1203# CHECK:      [23] Code Region - Z cmla.b
1204
1205# CHECK:      Iterations:        100
1206# CHECK-NEXT: Instructions:      400
1207# CHECK-NEXT: Total Cycles:      1403
1208# CHECK-NEXT: Total uOps:        500
1209
1210# CHECK:      Dispatch Width:    16
1211# CHECK-NEXT: uOps Per Cycle:    0.36
1212# CHECK-NEXT: IPC:               0.29
1213# CHECK-NEXT: Block RThroughput: 2.5
1214
1215# CHECK:      Timeline view:
1216# CHECK-NEXT:                     0123456789          0
1217# CHECK-NEXT: Index     0123456789          0123456789
1218
1219# CHECK:      [0,0]     DeeeeeER  .    .    .    .    .   mul	z0.d, z0.d, z0.d
1220# CHECK-NEXT: [0,1]     D=====eeeeER   .    .    .    .   cmla	z0.b, z1.b, z2.b, #90
1221# CHECK-NEXT: [0,2]     D======eeeeER  .    .    .    .   cmla	z0.b, z1.b, z2.b, #90
1222# CHECK-NEXT: [0,3]     D==========eeeeER   .    .    .   cmla	z0.b, z0.b, z1.b, #90
1223# CHECK-NEXT: [1,0]     D==============eeeeeER   .    .   mul	z0.d, z0.d, z0.d
1224# CHECK-NEXT: [1,1]     D===================eeeeER    .   cmla	z0.b, z1.b, z2.b, #90
1225# CHECK-NEXT: [1,2]     D====================eeeeER   .   cmla	z0.b, z1.b, z2.b, #90
1226# CHECK-NEXT: [1,3]     D========================eeeeER   cmla	z0.b, z0.b, z1.b, #90
1227
1228# CHECK:      Average Wait times (based on the timeline view):
1229# CHECK-NEXT: [0]: Executions
1230# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1231# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1232# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1233
1234# CHECK:            [0]    [1]    [2]    [3]
1235# CHECK-NEXT: 0.     2     8.0    0.5    0.0       mul	z0.d, z0.d, z0.d
1236# CHECK-NEXT: 1.     2     13.0   0.0    0.0       cmla	z0.b, z1.b, z2.b, #90
1237# CHECK-NEXT: 2.     2     14.0   0.0    0.0       cmla	z0.b, z1.b, z2.b, #90
1238# CHECK-NEXT: 3.     2     18.0   0.0    0.0       cmla	z0.b, z0.b, z1.b, #90
1239# CHECK-NEXT:        2     13.3   0.1    0.0       <total>
1240
1241# CHECK:      [24] Code Region - Z cmla.d
1242
1243# CHECK:      Iterations:        100
1244# CHECK-NEXT: Instructions:      400
1245# CHECK-NEXT: Total Cycles:      1803
1246# CHECK-NEXT: Total uOps:        500
1247
1248# CHECK:      Dispatch Width:    16
1249# CHECK-NEXT: uOps Per Cycle:    0.28
1250# CHECK-NEXT: IPC:               0.22
1251# CHECK-NEXT: Block RThroughput: 4.0
1252
1253# CHECK:      Timeline view:
1254# CHECK-NEXT:                     0123456789          012345678
1255# CHECK-NEXT: Index     0123456789          0123456789
1256
1257# CHECK:      [0,0]     DeeeeeER  .    .    .    .    .    .  .   mul	z0.d, z0.d, z0.d
1258# CHECK-NEXT: [0,1]     D=====eeeeeER  .    .    .    .    .  .   cmla	z0.d, z1.d, z2.d, #90
1259# CHECK-NEXT: [0,2]     D========eeeeeER    .    .    .    .  .   cmla	z0.d, z1.d, z2.d, #90
1260# CHECK-NEXT: [0,3]     D=============eeeeeER    .    .    .  .   cmla	z0.d, z0.d, z1.d, #90
1261# CHECK-NEXT: [1,0]     D==================eeeeeER    .    .  .   mul	z0.d, z0.d, z0.d
1262# CHECK-NEXT: [1,1]     D=======================eeeeeER    .  .   cmla	z0.d, z1.d, z2.d, #90
1263# CHECK-NEXT: [1,2]     D==========================eeeeeER .  .   cmla	z0.d, z1.d, z2.d, #90
1264# CHECK-NEXT: [1,3]     D===============================eeeeeER   cmla	z0.d, z0.d, z1.d, #90
1265
1266# CHECK:      Average Wait times (based on the timeline view):
1267# CHECK-NEXT: [0]: Executions
1268# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1269# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1270# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1271
1272# CHECK:            [0]    [1]    [2]    [3]
1273# CHECK-NEXT: 0.     2     10.0   0.5    0.0       mul	z0.d, z0.d, z0.d
1274# CHECK-NEXT: 1.     2     15.0   0.0    0.0       cmla	z0.d, z1.d, z2.d, #90
1275# CHECK-NEXT: 2.     2     18.0   0.0    0.0       cmla	z0.d, z1.d, z2.d, #90
1276# CHECK-NEXT: 3.     2     23.0   0.0    0.0       cmla	z0.d, z0.d, z1.d, #90
1277# CHECK-NEXT:        2     16.5   0.1    0.0       <total>
1278
1279# CHECK:      [25] Code Region - Z sdot.s
1280
1281# CHECK:      Iterations:        100
1282# CHECK-NEXT: Instructions:      400
1283# CHECK-NEXT: Total Cycles:      1203
1284# CHECK-NEXT: Total uOps:        500
1285
1286# CHECK:      Dispatch Width:    16
1287# CHECK-NEXT: uOps Per Cycle:    0.42
1288# CHECK-NEXT: IPC:               0.33
1289# CHECK-NEXT: Block RThroughput: 1.0
1290
1291# CHECK:      Timeline view:
1292# CHECK-NEXT:                     0123456789
1293# CHECK-NEXT: Index     0123456789          0123456
1294
1295# CHECK:      [0,0]     DeeeeeER  .    .    .    ..   mul	z0.d, z0.d, z0.d
1296# CHECK-NEXT: [0,1]     D=====eeeER    .    .    ..   sdot	z0.s, z1.b, z2.b
1297# CHECK-NEXT: [0,2]     D======eeeER   .    .    ..   sdot	z0.s, z1.b, z2.b
1298# CHECK-NEXT: [0,3]     D=========eeeER.    .    ..   sdot	z0.s, z0.b, z1.b
1299# CHECK-NEXT: [1,0]     D============eeeeeER.    ..   mul	z0.d, z0.d, z0.d
1300# CHECK-NEXT: [1,1]     D=================eeeER  ..   sdot	z0.s, z1.b, z2.b
1301# CHECK-NEXT: [1,2]     D==================eeeER ..   sdot	z0.s, z1.b, z2.b
1302# CHECK-NEXT: [1,3]     D=====================eeeER   sdot	z0.s, z0.b, z1.b
1303
1304# CHECK:      Average Wait times (based on the timeline view):
1305# CHECK-NEXT: [0]: Executions
1306# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1307# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1308# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1309
1310# CHECK:            [0]    [1]    [2]    [3]
1311# CHECK-NEXT: 0.     2     7.0    0.5    0.0       mul	z0.d, z0.d, z0.d
1312# CHECK-NEXT: 1.     2     12.0   0.0    0.0       sdot	z0.s, z1.b, z2.b
1313# CHECK-NEXT: 2.     2     13.0   0.0    0.0       sdot	z0.s, z1.b, z2.b
1314# CHECK-NEXT: 3.     2     16.0   0.0    0.0       sdot	z0.s, z0.b, z1.b
1315# CHECK-NEXT:        2     12.0   0.1    0.0       <total>
1316
1317# CHECK:      [26] Code Region - Z sudot
1318
1319# CHECK:      Iterations:        100
1320# CHECK-NEXT: Instructions:      400
1321# CHECK-NEXT: Total Cycles:      1203
1322# CHECK-NEXT: Total uOps:        500
1323
1324# CHECK:      Dispatch Width:    16
1325# CHECK-NEXT: uOps Per Cycle:    0.42
1326# CHECK-NEXT: IPC:               0.33
1327# CHECK-NEXT: Block RThroughput: 1.0
1328
1329# CHECK:      Timeline view:
1330# CHECK-NEXT:                     0123456789
1331# CHECK-NEXT: Index     0123456789          0123456
1332
1333# CHECK:      [0,0]     DeeeeeER  .    .    .    ..   mul	z0.d, z0.d, z0.d
1334# CHECK-NEXT: [0,1]     D=====eeeER    .    .    ..   sdot	z0.s, z1.b, z2.b[1]
1335# CHECK-NEXT: [0,2]     D======eeeER   .    .    ..   sdot	z0.s, z1.b, z2.b[1]
1336# CHECK-NEXT: [0,3]     D=========eeeER.    .    ..   sdot	z0.s, z0.b, z1.b[1]
1337# CHECK-NEXT: [1,0]     D============eeeeeER.    ..   mul	z0.d, z0.d, z0.d
1338# CHECK-NEXT: [1,1]     D=================eeeER  ..   sdot	z0.s, z1.b, z2.b[1]
1339# CHECK-NEXT: [1,2]     D==================eeeER ..   sdot	z0.s, z1.b, z2.b[1]
1340# CHECK-NEXT: [1,3]     D=====================eeeER   sdot	z0.s, z0.b, z1.b[1]
1341
1342# CHECK:      Average Wait times (based on the timeline view):
1343# CHECK-NEXT: [0]: Executions
1344# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1345# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1346# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1347
1348# CHECK:            [0]    [1]    [2]    [3]
1349# CHECK-NEXT: 0.     2     7.0    0.5    0.0       mul	z0.d, z0.d, z0.d
1350# CHECK-NEXT: 1.     2     12.0   0.0    0.0       sdot	z0.s, z1.b, z2.b[1]
1351# CHECK-NEXT: 2.     2     13.0   0.0    0.0       sdot	z0.s, z1.b, z2.b[1]
1352# CHECK-NEXT: 3.     2     16.0   0.0    0.0       sdot	z0.s, z0.b, z1.b[1]
1353# CHECK-NEXT:        2     12.0   0.1    0.0       <total>
1354
1355# CHECK:      [27] Code Region - Z sdot.d
1356
1357# CHECK:      Iterations:        100
1358# CHECK-NEXT: Instructions:      400
1359# CHECK-NEXT: Total Cycles:      1403
1360# CHECK-NEXT: Total uOps:        500
1361
1362# CHECK:      Dispatch Width:    16
1363# CHECK-NEXT: uOps Per Cycle:    0.36
1364# CHECK-NEXT: IPC:               0.29
1365# CHECK-NEXT: Block RThroughput: 2.5
1366
1367# CHECK:      Timeline view:
1368# CHECK-NEXT:                     0123456789          0
1369# CHECK-NEXT: Index     0123456789          0123456789
1370
1371# CHECK:      [0,0]     DeeeeeER  .    .    .    .    .   mul	z0.d, z0.d, z0.d
1372# CHECK-NEXT: [0,1]     D=====eeeeER   .    .    .    .   sdot	z0.d, z1.h, z2.h
1373# CHECK-NEXT: [0,2]     D======eeeeER  .    .    .    .   sdot	z0.d, z1.h, z2.h
1374# CHECK-NEXT: [0,3]     D==========eeeeER   .    .    .   sdot	z0.d, z0.h, z1.h
1375# CHECK-NEXT: [1,0]     D==============eeeeeER   .    .   mul	z0.d, z0.d, z0.d
1376# CHECK-NEXT: [1,1]     D===================eeeeER    .   sdot	z0.d, z1.h, z2.h
1377# CHECK-NEXT: [1,2]     D====================eeeeER   .   sdot	z0.d, z1.h, z2.h
1378# CHECK-NEXT: [1,3]     D========================eeeeER   sdot	z0.d, z0.h, z1.h
1379
1380# CHECK:      Average Wait times (based on the timeline view):
1381# CHECK-NEXT: [0]: Executions
1382# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1383# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1384# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1385
1386# CHECK:            [0]    [1]    [2]    [3]
1387# CHECK-NEXT: 0.     2     8.0    0.5    0.0       mul	z0.d, z0.d, z0.d
1388# CHECK-NEXT: 1.     2     13.0   0.0    0.0       sdot	z0.d, z1.h, z2.h
1389# CHECK-NEXT: 2.     2     14.0   0.0    0.0       sdot	z0.d, z1.h, z2.h
1390# CHECK-NEXT: 3.     2     18.0   0.0    0.0       sdot	z0.d, z0.h, z1.h
1391# CHECK-NEXT:        2     13.3   0.1    0.0       <total>
1392
1393# CHECK:      [28] Code Region - Z smmla
1394
1395# CHECK:      Iterations:        100
1396# CHECK-NEXT: Instructions:      400
1397# CHECK-NEXT: Total Cycles:      1103
1398# CHECK-NEXT: Total uOps:        400
1399
1400# CHECK:      Dispatch Width:    16
1401# CHECK-NEXT: uOps Per Cycle:    0.36
1402# CHECK-NEXT: IPC:               0.36
1403# CHECK-NEXT: Block RThroughput: 0.8
1404
1405# CHECK:      Timeline view:
1406# CHECK-NEXT:                     0123456789
1407# CHECK-NEXT: Index     0123456789          01234
1408
1409# CHECK:      [0,0]     DeeeeER   .    .    .   .   mul	z0.s, z0.s, z0.s
1410# CHECK-NEXT: [0,1]     D====eeeER.    .    .   .   smmla	z0.s, z1.b, z2.b
1411# CHECK-NEXT: [0,2]     D=====eeeER    .    .   .   smmla	z0.s, z1.b, z2.b
1412# CHECK-NEXT: [0,3]     D========eeeER .    .   .   smmla	z0.s, z0.b, z1.b
1413# CHECK-NEXT: [1,0]     D===========eeeeER  .   .   mul	z0.s, z0.s, z0.s
1414# CHECK-NEXT: [1,1]     D===============eeeER   .   smmla	z0.s, z1.b, z2.b
1415# CHECK-NEXT: [1,2]     D================eeeER  .   smmla	z0.s, z1.b, z2.b
1416# CHECK-NEXT: [1,3]     D===================eeeER   smmla	z0.s, z0.b, z1.b
1417
1418# CHECK:      Average Wait times (based on the timeline view):
1419# CHECK-NEXT: [0]: Executions
1420# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1421# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1422# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1423
1424# CHECK:            [0]    [1]    [2]    [3]
1425# CHECK-NEXT: 0.     2     6.5    0.5    0.0       mul	z0.s, z0.s, z0.s
1426# CHECK-NEXT: 1.     2     10.5   0.0    0.0       smmla	z0.s, z1.b, z2.b
1427# CHECK-NEXT: 2.     2     11.5   0.0    0.0       smmla	z0.s, z1.b, z2.b
1428# CHECK-NEXT: 3.     2     14.5   0.0    0.0       smmla	z0.s, z0.b, z1.b
1429# CHECK-NEXT:        2     10.8   0.1    0.0       <total>
1430
1431# CHECK:      [29] Code Region - Z mla.b
1432
1433# CHECK:      Iterations:        100
1434# CHECK-NEXT: Instructions:      400
1435# CHECK-NEXT: Total Cycles:      1403
1436# CHECK-NEXT: Total uOps:        500
1437
1438# CHECK:      Dispatch Width:    16
1439# CHECK-NEXT: uOps Per Cycle:    0.36
1440# CHECK-NEXT: IPC:               0.29
1441# CHECK-NEXT: Block RThroughput: 4.0
1442
1443# CHECK:      Timeline view:
1444# CHECK-NEXT:                     0123456789          0
1445# CHECK-NEXT: Index     0123456789          0123456789
1446
1447# CHECK:      [0,0]     DeeeeeER  .    .    .    .    .   mul	z0.d, z0.d, z0.d
1448# CHECK-NEXT: [0,1]     D=====eeeeER   .    .    .    .   mla	z0.b, p0/m, z1.b, z2.b
1449# CHECK-NEXT: [0,2]     D======eeeeER  .    .    .    .   mla	z0.b, p0/m, z1.b, z2.b
1450# CHECK-NEXT: [0,3]     D==========eeeeER   .    .    .   mla	z0.b, p0/m, z0.b, z1.b
1451# CHECK-NEXT: [1,0]     D==============eeeeeER   .    .   mul	z0.d, z0.d, z0.d
1452# CHECK-NEXT: [1,1]     D===================eeeeER    .   mla	z0.b, p0/m, z1.b, z2.b
1453# CHECK-NEXT: [1,2]     D====================eeeeER   .   mla	z0.b, p0/m, z1.b, z2.b
1454# CHECK-NEXT: [1,3]     D========================eeeeER   mla	z0.b, p0/m, z0.b, z1.b
1455
1456# CHECK:      Average Wait times (based on the timeline view):
1457# CHECK-NEXT: [0]: Executions
1458# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1459# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1460# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1461
1462# CHECK:            [0]    [1]    [2]    [3]
1463# CHECK-NEXT: 0.     2     8.0    0.5    0.0       mul	z0.d, z0.d, z0.d
1464# CHECK-NEXT: 1.     2     13.0   0.0    0.0       mla	z0.b, p0/m, z1.b, z2.b
1465# CHECK-NEXT: 2.     2     14.0   0.0    0.0       mla	z0.b, p0/m, z1.b, z2.b
1466# CHECK-NEXT: 3.     2     18.0   0.0    0.0       mla	z0.b, p0/m, z0.b, z1.b
1467# CHECK-NEXT:        2     13.3   0.1    0.0       <total>
1468
1469# CHECK:      [30] Code Region - Z mla.d
1470
1471# CHECK:      Iterations:        100
1472# CHECK-NEXT: Instructions:      400
1473# CHECK-NEXT: Total Cycles:      1803
1474# CHECK-NEXT: Total uOps:        500
1475
1476# CHECK:      Dispatch Width:    16
1477# CHECK-NEXT: uOps Per Cycle:    0.28
1478# CHECK-NEXT: IPC:               0.22
1479# CHECK-NEXT: Block RThroughput: 4.0
1480
1481# CHECK:      Timeline view:
1482# CHECK-NEXT:                     0123456789          012345678
1483# CHECK-NEXT: Index     0123456789          0123456789
1484
1485# CHECK:      [0,0]     DeeeeeER  .    .    .    .    .    .  .   mul	z0.d, z0.d, z0.d
1486# CHECK-NEXT: [0,1]     D=====eeeeeER  .    .    .    .    .  .   mla	z0.d, p0/m, z1.d, z2.d
1487# CHECK-NEXT: [0,2]     D========eeeeeER    .    .    .    .  .   mla	z0.d, p0/m, z1.d, z2.d
1488# CHECK-NEXT: [0,3]     D=============eeeeeER    .    .    .  .   mla	z0.d, p0/m, z0.d, z1.d
1489# CHECK-NEXT: [1,0]     D==================eeeeeER    .    .  .   mul	z0.d, z0.d, z0.d
1490# CHECK-NEXT: [1,1]     D=======================eeeeeER    .  .   mla	z0.d, p0/m, z1.d, z2.d
1491# CHECK-NEXT: [1,2]     D==========================eeeeeER .  .   mla	z0.d, p0/m, z1.d, z2.d
1492# CHECK-NEXT: [1,3]     D===============================eeeeeER   mla	z0.d, p0/m, z0.d, z1.d
1493
1494# CHECK:      Average Wait times (based on the timeline view):
1495# CHECK-NEXT: [0]: Executions
1496# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1497# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1498# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1499
1500# CHECK:            [0]    [1]    [2]    [3]
1501# CHECK-NEXT: 0.     2     10.0   0.5    0.0       mul	z0.d, z0.d, z0.d
1502# CHECK-NEXT: 1.     2     15.0   0.0    0.0       mla	z0.d, p0/m, z1.d, z2.d
1503# CHECK-NEXT: 2.     2     18.0   0.0    0.0       mla	z0.d, p0/m, z1.d, z2.d
1504# CHECK-NEXT: 3.     2     23.0   0.0    0.0       mla	z0.d, p0/m, z0.d, z1.d
1505# CHECK-NEXT:        2     16.5   0.1    0.0       <total>
1506
1507# CHECK:      [31] Code Region - Z smlalb
1508
1509# CHECK:      Iterations:        100
1510# CHECK-NEXT: Instructions:      400
1511# CHECK-NEXT: Total Cycles:      1403
1512# CHECK-NEXT: Total uOps:        500
1513
1514# CHECK:      Dispatch Width:    16
1515# CHECK-NEXT: uOps Per Cycle:    0.36
1516# CHECK-NEXT: IPC:               0.29
1517# CHECK-NEXT: Block RThroughput: 2.5
1518
1519# CHECK:      Timeline view:
1520# CHECK-NEXT:                     0123456789          0
1521# CHECK-NEXT: Index     0123456789          0123456789
1522
1523# CHECK:      [0,0]     DeeeeeER  .    .    .    .    .   mul	z0.d, z0.d, z0.d
1524# CHECK-NEXT: [0,1]     D=====eeeeER   .    .    .    .   smlalb	z0.d, z1.s, z2.s
1525# CHECK-NEXT: [0,2]     D======eeeeER  .    .    .    .   smlalb	z0.d, z1.s, z2.s
1526# CHECK-NEXT: [0,3]     D==========eeeeER   .    .    .   smlalb	z0.d, z0.s, z1.s
1527# CHECK-NEXT: [1,0]     D==============eeeeeER   .    .   mul	z0.d, z0.d, z0.d
1528# CHECK-NEXT: [1,1]     D===================eeeeER    .   smlalb	z0.d, z1.s, z2.s
1529# CHECK-NEXT: [1,2]     D====================eeeeER   .   smlalb	z0.d, z1.s, z2.s
1530# CHECK-NEXT: [1,3]     D========================eeeeER   smlalb	z0.d, z0.s, z1.s
1531
1532# CHECK:      Average Wait times (based on the timeline view):
1533# CHECK-NEXT: [0]: Executions
1534# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1535# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1536# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1537
1538# CHECK:            [0]    [1]    [2]    [3]
1539# CHECK-NEXT: 0.     2     8.0    0.5    0.0       mul	z0.d, z0.d, z0.d
1540# CHECK-NEXT: 1.     2     13.0   0.0    0.0       smlalb	z0.d, z1.s, z2.s
1541# CHECK-NEXT: 2.     2     14.0   0.0    0.0       smlalb	z0.d, z1.s, z2.s
1542# CHECK-NEXT: 3.     2     18.0   0.0    0.0       smlalb	z0.d, z0.s, z1.s
1543# CHECK-NEXT:        2     13.3   0.1    0.0       <total>
1544
1545# CHECK:      [32] Code Region - Z sqdmlalb
1546
1547# CHECK:      Iterations:        100
1548# CHECK-NEXT: Instructions:      400
1549# CHECK-NEXT: Total Cycles:      1503
1550# CHECK-NEXT: Total uOps:        500
1551
1552# CHECK:      Dispatch Width:    16
1553# CHECK-NEXT: uOps Per Cycle:    0.33
1554# CHECK-NEXT: IPC:               0.27
1555# CHECK-NEXT: Block RThroughput: 2.5
1556
1557# CHECK:      Timeline view:
1558# CHECK-NEXT:                     0123456789          012
1559# CHECK-NEXT: Index     0123456789          0123456789
1560
1561# CHECK:      [0,0]     DeeeeeER  .    .    .    .    . .   mul	z0.d, z0.d, z0.d
1562# CHECK-NEXT: [0,1]     D=====eeeeER   .    .    .    . .   sqdmlalb	z0.d, z1.s, z2.s
1563# CHECK-NEXT: [0,2]     D=======eeeeER .    .    .    . .   sqdmlalb	z0.d, z1.s, z2.s
1564# CHECK-NEXT: [0,3]     D===========eeeeER  .    .    . .   sqdmlalb	z0.d, z0.s, z1.s
1565# CHECK-NEXT: [1,0]     D===============eeeeeER  .    . .   mul	z0.d, z0.d, z0.d
1566# CHECK-NEXT: [1,1]     D====================eeeeER   . .   sqdmlalb	z0.d, z1.s, z2.s
1567# CHECK-NEXT: [1,2]     D======================eeeeER . .   sqdmlalb	z0.d, z1.s, z2.s
1568# CHECK-NEXT: [1,3]     D==========================eeeeER   sqdmlalb	z0.d, z0.s, z1.s
1569
1570# CHECK:      Average Wait times (based on the timeline view):
1571# CHECK-NEXT: [0]: Executions
1572# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1573# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1574# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1575
1576# CHECK:            [0]    [1]    [2]    [3]
1577# CHECK-NEXT: 0.     2     8.5    0.5    0.0       mul	z0.d, z0.d, z0.d
1578# CHECK-NEXT: 1.     2     13.5   0.0    0.0       sqdmlalb	z0.d, z1.s, z2.s
1579# CHECK-NEXT: 2.     2     15.5   0.0    0.0       sqdmlalb	z0.d, z1.s, z2.s
1580# CHECK-NEXT: 3.     2     19.5   0.0    0.0       sqdmlalb	z0.d, z0.s, z1.s
1581# CHECK-NEXT:        2     14.3   0.1    0.0       <total>
1582
1583# CHECK:      [33] Code Region - Z sqrdmlah.b
1584
1585# CHECK:      Iterations:        100
1586# CHECK-NEXT: Instructions:      400
1587# CHECK-NEXT: Total Cycles:      1503
1588# CHECK-NEXT: Total uOps:        500
1589
1590# CHECK:      Dispatch Width:    16
1591# CHECK-NEXT: uOps Per Cycle:    0.33
1592# CHECK-NEXT: IPC:               0.27
1593# CHECK-NEXT: Block RThroughput: 2.5
1594
1595# CHECK:      Timeline view:
1596# CHECK-NEXT:                     0123456789          012
1597# CHECK-NEXT: Index     0123456789          0123456789
1598
1599# CHECK:      [0,0]     DeeeeeER  .    .    .    .    . .   mul	z0.d, z0.d, z0.d
1600# CHECK-NEXT: [0,1]     D=====eeeeER   .    .    .    . .   sqrdmlah	z0.b, z1.b, z2.b
1601# CHECK-NEXT: [0,2]     D=======eeeeER .    .    .    . .   sqrdmlah	z0.b, z1.b, z2.b
1602# CHECK-NEXT: [0,3]     D===========eeeeER  .    .    . .   sqrdmlah	z0.b, z0.b, z1.b
1603# CHECK-NEXT: [1,0]     D===============eeeeeER  .    . .   mul	z0.d, z0.d, z0.d
1604# CHECK-NEXT: [1,1]     D====================eeeeER   . .   sqrdmlah	z0.b, z1.b, z2.b
1605# CHECK-NEXT: [1,2]     D======================eeeeER . .   sqrdmlah	z0.b, z1.b, z2.b
1606# CHECK-NEXT: [1,3]     D==========================eeeeER   sqrdmlah	z0.b, z0.b, z1.b
1607
1608# CHECK:      Average Wait times (based on the timeline view):
1609# CHECK-NEXT: [0]: Executions
1610# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1611# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1612# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1613
1614# CHECK:            [0]    [1]    [2]    [3]
1615# CHECK-NEXT: 0.     2     8.5    0.5    0.0       mul	z0.d, z0.d, z0.d
1616# CHECK-NEXT: 1.     2     13.5   0.0    0.0       sqrdmlah	z0.b, z1.b, z2.b
1617# CHECK-NEXT: 2.     2     15.5   0.0    0.0       sqrdmlah	z0.b, z1.b, z2.b
1618# CHECK-NEXT: 3.     2     19.5   0.0    0.0       sqrdmlah	z0.b, z0.b, z1.b
1619# CHECK-NEXT:        2     14.3   0.1    0.0       <total>
1620
1621# CHECK:      [34] Code Region - Z sqrdmlah.d
1622
1623# CHECK:      Iterations:        100
1624# CHECK-NEXT: Instructions:      400
1625# CHECK-NEXT: Total Cycles:      1803
1626# CHECK-NEXT: Total uOps:        500
1627
1628# CHECK:      Dispatch Width:    16
1629# CHECK-NEXT: uOps Per Cycle:    0.28
1630# CHECK-NEXT: IPC:               0.22
1631# CHECK-NEXT: Block RThroughput: 4.0
1632
1633# CHECK:      Timeline view:
1634# CHECK-NEXT:                     0123456789          012345678
1635# CHECK-NEXT: Index     0123456789          0123456789
1636
1637# CHECK:      [0,0]     DeeeeeER  .    .    .    .    .    .  .   mul	z0.d, z0.d, z0.d
1638# CHECK-NEXT: [0,1]     D=====eeeeeER  .    .    .    .    .  .   sqrdmlah	z0.d, z1.d, z2.d
1639# CHECK-NEXT: [0,2]     D========eeeeeER    .    .    .    .  .   sqrdmlah	z0.d, z1.d, z2.d
1640# CHECK-NEXT: [0,3]     D=============eeeeeER    .    .    .  .   sqrdmlah	z0.d, z0.d, z1.d
1641# CHECK-NEXT: [1,0]     D==================eeeeeER    .    .  .   mul	z0.d, z0.d, z0.d
1642# CHECK-NEXT: [1,1]     D=======================eeeeeER    .  .   sqrdmlah	z0.d, z1.d, z2.d
1643# CHECK-NEXT: [1,2]     D==========================eeeeeER .  .   sqrdmlah	z0.d, z1.d, z2.d
1644# CHECK-NEXT: [1,3]     D===============================eeeeeER   sqrdmlah	z0.d, z0.d, z1.d
1645
1646# CHECK:      Average Wait times (based on the timeline view):
1647# CHECK-NEXT: [0]: Executions
1648# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1649# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1650# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1651
1652# CHECK:            [0]    [1]    [2]    [3]
1653# CHECK-NEXT: 0.     2     10.0   0.5    0.0       mul	z0.d, z0.d, z0.d
1654# CHECK-NEXT: 1.     2     15.0   0.0    0.0       sqrdmlah	z0.d, z1.d, z2.d
1655# CHECK-NEXT: 2.     2     18.0   0.0    0.0       sqrdmlah	z0.d, z1.d, z2.d
1656# CHECK-NEXT: 3.     2     23.0   0.0    0.0       sqrdmlah	z0.d, z0.d, z1.d
1657# CHECK-NEXT:        2     16.5   0.1    0.0       <total>
1658
1659# CHECK:      [35] Code Region - Z fcmla ZPmZZ
1660
1661# CHECK:      Iterations:        100
1662# CHECK-NEXT: Instructions:      400
1663# CHECK-NEXT: Total Cycles:      1503
1664# CHECK-NEXT: Total uOps:        400
1665
1666# CHECK:      Dispatch Width:    16
1667# CHECK-NEXT: uOps Per Cycle:    0.27
1668# CHECK-NEXT: IPC:               0.27
1669# CHECK-NEXT: Block RThroughput: 1.0
1670
1671# CHECK:      Timeline view:
1672# CHECK-NEXT:                     0123456789          012
1673# CHECK-NEXT: Index     0123456789          0123456789
1674
1675# CHECK:      [0,0]     DeeeER    .    .    .    .    . .   fmul	z0.d, z0.d, z0.d
1676# CHECK-NEXT: [0,1]     D===eeeeeER    .    .    .    . .   fcmla	z0.d, p0/m, z1.d, z2.d, #90
1677# CHECK-NEXT: [0,2]     D=====eeeeeER  .    .    .    . .   fcmla	z0.d, p0/m, z1.d, z2.d, #90
1678# CHECK-NEXT: [0,3]     D==========eeeeeER  .    .    . .   fcmla	z0.d, p0/m, z0.d, z1.d, #90
1679# CHECK-NEXT: [1,0]     D===============eeeER    .    . .   fmul	z0.d, z0.d, z0.d
1680# CHECK-NEXT: [1,1]     D==================eeeeeER    . .   fcmla	z0.d, p0/m, z1.d, z2.d, #90
1681# CHECK-NEXT: [1,2]     D====================eeeeeER  . .   fcmla	z0.d, p0/m, z1.d, z2.d, #90
1682# CHECK-NEXT: [1,3]     D=========================eeeeeER   fcmla	z0.d, p0/m, z0.d, z1.d, #90
1683
1684# CHECK:      Average Wait times (based on the timeline view):
1685# CHECK-NEXT: [0]: Executions
1686# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1687# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1688# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1689
1690# CHECK:            [0]    [1]    [2]    [3]
1691# CHECK-NEXT: 0.     2     8.5    0.5    0.0       fmul	z0.d, z0.d, z0.d
1692# CHECK-NEXT: 1.     2     11.5   0.0    0.0       fcmla	z0.d, p0/m, z1.d, z2.d, #90
1693# CHECK-NEXT: 2.     2     13.5   0.0    0.0       fcmla	z0.d, p0/m, z1.d, z2.d, #90
1694# CHECK-NEXT: 3.     2     18.5   0.0    0.0       fcmla	z0.d, p0/m, z0.d, z1.d, #90
1695# CHECK-NEXT:        2     13.0   0.1    0.0       <total>
1696
1697# CHECK:      [36] Code Region - Z fcmla ZZZI
1698
1699# CHECK:      Iterations:        100
1700# CHECK-NEXT: Instructions:      400
1701# CHECK-NEXT: Total Cycles:      1503
1702# CHECK-NEXT: Total uOps:        400
1703
1704# CHECK:      Dispatch Width:    16
1705# CHECK-NEXT: uOps Per Cycle:    0.27
1706# CHECK-NEXT: IPC:               0.27
1707# CHECK-NEXT: Block RThroughput: 1.0
1708
1709# CHECK:      Timeline view:
1710# CHECK-NEXT:                     0123456789          012
1711# CHECK-NEXT: Index     0123456789          0123456789
1712
1713# CHECK:      [0,0]     DeeeER    .    .    .    .    . .   fmul	z0.d, z0.d, z0.d
1714# CHECK-NEXT: [0,1]     D===eeeeeER    .    .    .    . .   fcmla	z0.s, z1.s, z2.s[1], #90
1715# CHECK-NEXT: [0,2]     D=====eeeeeER  .    .    .    . .   fcmla	z0.s, z1.s, z2.s[1], #90
1716# CHECK-NEXT: [0,3]     D==========eeeeeER  .    .    . .   fcmla	z0.s, z0.s, z1.s[1], #90
1717# CHECK-NEXT: [1,0]     D===============eeeER    .    . .   fmul	z0.d, z0.d, z0.d
1718# CHECK-NEXT: [1,1]     D==================eeeeeER    . .   fcmla	z0.s, z1.s, z2.s[1], #90
1719# CHECK-NEXT: [1,2]     D====================eeeeeER  . .   fcmla	z0.s, z1.s, z2.s[1], #90
1720# CHECK-NEXT: [1,3]     D=========================eeeeeER   fcmla	z0.s, z0.s, z1.s[1], #90
1721
1722# CHECK:      Average Wait times (based on the timeline view):
1723# CHECK-NEXT: [0]: Executions
1724# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1725# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1726# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1727
1728# CHECK:            [0]    [1]    [2]    [3]
1729# CHECK-NEXT: 0.     2     8.5    0.5    0.0       fmul	z0.d, z0.d, z0.d
1730# CHECK-NEXT: 1.     2     11.5   0.0    0.0       fcmla	z0.s, z1.s, z2.s[1], #90
1731# CHECK-NEXT: 2.     2     13.5   0.0    0.0       fcmla	z0.s, z1.s, z2.s[1], #90
1732# CHECK-NEXT: 3.     2     18.5   0.0    0.0       fcmla	z0.s, z0.s, z1.s[1], #90
1733# CHECK-NEXT:        2     13.0   0.1    0.0       <total>
1734
1735# CHECK:      [37] Code Region - Z fmla ZPmZZ
1736
1737# CHECK:      Iterations:        100
1738# CHECK-NEXT: Instructions:      400
1739# CHECK-NEXT: Total Cycles:      1303
1740# CHECK-NEXT: Total uOps:        400
1741
1742# CHECK:      Dispatch Width:    16
1743# CHECK-NEXT: uOps Per Cycle:    0.31
1744# CHECK-NEXT: IPC:               0.31
1745# CHECK-NEXT: Block RThroughput: 1.0
1746
1747# CHECK:      Timeline view:
1748# CHECK-NEXT:                     0123456789
1749# CHECK-NEXT: Index     0123456789          012345678
1750
1751# CHECK:      [0,0]     DeeeER    .    .    .    .  .   fmul	z0.d, z0.d, z0.d
1752# CHECK-NEXT: [0,1]     D===eeeeER.    .    .    .  .   fmla	z0.d, p0/m, z1.d, z2.d
1753# CHECK-NEXT: [0,2]     D=====eeeeER   .    .    .  .   fmla	z0.d, p0/m, z1.d, z2.d
1754# CHECK-NEXT: [0,3]     D=========eeeeER    .    .  .   fmla	z0.d, p0/m, z0.d, z1.d
1755# CHECK-NEXT: [1,0]     D=============eeeER .    .  .   fmul	z0.d, z0.d, z0.d
1756# CHECK-NEXT: [1,1]     D================eeeeER  .  .   fmla	z0.d, p0/m, z1.d, z2.d
1757# CHECK-NEXT: [1,2]     D==================eeeeER.  .   fmla	z0.d, p0/m, z1.d, z2.d
1758# CHECK-NEXT: [1,3]     D======================eeeeER   fmla	z0.d, p0/m, z0.d, z1.d
1759
1760# CHECK:      Average Wait times (based on the timeline view):
1761# CHECK-NEXT: [0]: Executions
1762# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1763# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1764# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1765
1766# CHECK:            [0]    [1]    [2]    [3]
1767# CHECK-NEXT: 0.     2     7.5    0.5    0.0       fmul	z0.d, z0.d, z0.d
1768# CHECK-NEXT: 1.     2     10.5   0.0    0.0       fmla	z0.d, p0/m, z1.d, z2.d
1769# CHECK-NEXT: 2.     2     12.5   0.0    0.0       fmla	z0.d, p0/m, z1.d, z2.d
1770# CHECK-NEXT: 3.     2     16.5   0.0    0.0       fmla	z0.d, p0/m, z0.d, z1.d
1771# CHECK-NEXT:        2     11.8   0.1    0.0       <total>
1772
1773# CHECK:      [38] Code Region - Z fmla ZZZI
1774
1775# CHECK:      Iterations:        100
1776# CHECK-NEXT: Instructions:      400
1777# CHECK-NEXT: Total Cycles:      1303
1778# CHECK-NEXT: Total uOps:        400
1779
1780# CHECK:      Dispatch Width:    16
1781# CHECK-NEXT: uOps Per Cycle:    0.31
1782# CHECK-NEXT: IPC:               0.31
1783# CHECK-NEXT: Block RThroughput: 1.0
1784
1785# CHECK:      Timeline view:
1786# CHECK-NEXT:                     0123456789
1787# CHECK-NEXT: Index     0123456789          012345678
1788
1789# CHECK:      [0,0]     DeeeER    .    .    .    .  .   fmul	z0.d, z0.d, z0.d
1790# CHECK-NEXT: [0,1]     D===eeeeER.    .    .    .  .   fmla	z0.d, z1.d, z2.d[1]
1791# CHECK-NEXT: [0,2]     D=====eeeeER   .    .    .  .   fmla	z0.d, z1.d, z2.d[1]
1792# CHECK-NEXT: [0,3]     D=========eeeeER    .    .  .   fmla	z0.d, z0.d, z1.d[1]
1793# CHECK-NEXT: [1,0]     D=============eeeER .    .  .   fmul	z0.d, z0.d, z0.d
1794# CHECK-NEXT: [1,1]     D================eeeeER  .  .   fmla	z0.d, z1.d, z2.d[1]
1795# CHECK-NEXT: [1,2]     D==================eeeeER.  .   fmla	z0.d, z1.d, z2.d[1]
1796# CHECK-NEXT: [1,3]     D======================eeeeER   fmla	z0.d, z0.d, z1.d[1]
1797
1798# CHECK:      Average Wait times (based on the timeline view):
1799# CHECK-NEXT: [0]: Executions
1800# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1801# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1802# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1803
1804# CHECK:            [0]    [1]    [2]    [3]
1805# CHECK-NEXT: 0.     2     7.5    0.5    0.0       fmul	z0.d, z0.d, z0.d
1806# CHECK-NEXT: 1.     2     10.5   0.0    0.0       fmla	z0.d, z1.d, z2.d[1]
1807# CHECK-NEXT: 2.     2     12.5   0.0    0.0       fmla	z0.d, z1.d, z2.d[1]
1808# CHECK-NEXT: 3.     2     16.5   0.0    0.0       fmla	z0.d, z0.d, z1.d[1]
1809# CHECK-NEXT:        2     11.8   0.1    0.0       <total>
1810
1811# CHECK:      [39] Code Region - Z fmlalb ZZZ
1812
1813# CHECK:      Iterations:        100
1814# CHECK-NEXT: Instructions:      400
1815# CHECK-NEXT: Total Cycles:      1303
1816# CHECK-NEXT: Total uOps:        400
1817
1818# CHECK:      Dispatch Width:    16
1819# CHECK-NEXT: uOps Per Cycle:    0.31
1820# CHECK-NEXT: IPC:               0.31
1821# CHECK-NEXT: Block RThroughput: 1.0
1822
1823# CHECK:      Timeline view:
1824# CHECK-NEXT:                     0123456789
1825# CHECK-NEXT: Index     0123456789          012345678
1826
1827# CHECK:      [0,0]     DeeeER    .    .    .    .  .   fmul	z0.d, z0.d, z0.d
1828# CHECK-NEXT: [0,1]     D===eeeeER.    .    .    .  .   fmlalb	z0.s, z1.h, z2.h
1829# CHECK-NEXT: [0,2]     D=====eeeeER   .    .    .  .   fmlalb	z0.s, z1.h, z2.h
1830# CHECK-NEXT: [0,3]     D=========eeeeER    .    .  .   fmlalb	z0.s, z0.h, z1.h
1831# CHECK-NEXT: [1,0]     D=============eeeER .    .  .   fmul	z0.d, z0.d, z0.d
1832# CHECK-NEXT: [1,1]     D================eeeeER  .  .   fmlalb	z0.s, z1.h, z2.h
1833# CHECK-NEXT: [1,2]     D==================eeeeER.  .   fmlalb	z0.s, z1.h, z2.h
1834# CHECK-NEXT: [1,3]     D======================eeeeER   fmlalb	z0.s, z0.h, z1.h
1835
1836# CHECK:      Average Wait times (based on the timeline view):
1837# CHECK-NEXT: [0]: Executions
1838# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1839# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1840# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1841
1842# CHECK:            [0]    [1]    [2]    [3]
1843# CHECK-NEXT: 0.     2     7.5    0.5    0.0       fmul	z0.d, z0.d, z0.d
1844# CHECK-NEXT: 1.     2     10.5   0.0    0.0       fmlalb	z0.s, z1.h, z2.h
1845# CHECK-NEXT: 2.     2     12.5   0.0    0.0       fmlalb	z0.s, z1.h, z2.h
1846# CHECK-NEXT: 3.     2     16.5   0.0    0.0       fmlalb	z0.s, z0.h, z1.h
1847# CHECK-NEXT:        2     11.8   0.1    0.0       <total>
1848
1849# CHECK:      [40] Code Region - Z bfdot
1850
1851# CHECK:      Iterations:        100
1852# CHECK-NEXT: Instructions:      400
1853# CHECK-NEXT: Total Cycles:      1603
1854# CHECK-NEXT: Total uOps:        400
1855
1856# CHECK:      Dispatch Width:    16
1857# CHECK-NEXT: uOps Per Cycle:    0.25
1858# CHECK-NEXT: IPC:               0.25
1859# CHECK-NEXT: Block RThroughput: 1.0
1860
1861# CHECK:      Timeline view:
1862# CHECK-NEXT:                     0123456789          01234
1863# CHECK-NEXT: Index     0123456789          0123456789
1864
1865# CHECK:      [0,0]     DeeeER    .    .    .    .    .   .   fmul	z0.d, z0.d, z0.d
1866# CHECK-NEXT: [0,1]     D===eeeeeER    .    .    .    .   .   bfdot	z0.s, z1.h, z2.h
1867# CHECK-NEXT: [0,2]     D======eeeeeER .    .    .    .   .   bfdot	z0.s, z1.h, z2.h
1868# CHECK-NEXT: [0,3]     D===========eeeeeER .    .    .   .   bfdot	z0.s, z0.h, z1.h
1869# CHECK-NEXT: [1,0]     D================eeeER   .    .   .   fmul	z0.d, z0.d, z0.d
1870# CHECK-NEXT: [1,1]     D===================eeeeeER   .   .   bfdot	z0.s, z1.h, z2.h
1871# CHECK-NEXT: [1,2]     D======================eeeeeER.   .   bfdot	z0.s, z1.h, z2.h
1872# CHECK-NEXT: [1,3]     D===========================eeeeeER   bfdot	z0.s, z0.h, z1.h
1873
1874# CHECK:      Average Wait times (based on the timeline view):
1875# CHECK-NEXT: [0]: Executions
1876# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1877# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1878# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1879
1880# CHECK:            [0]    [1]    [2]    [3]
1881# CHECK-NEXT: 0.     2     9.0    0.5    0.0       fmul	z0.d, z0.d, z0.d
1882# CHECK-NEXT: 1.     2     12.0   0.0    0.0       bfdot	z0.s, z1.h, z2.h
1883# CHECK-NEXT: 2.     2     15.0   0.0    0.0       bfdot	z0.s, z1.h, z2.h
1884# CHECK-NEXT: 3.     2     20.0   0.0    0.0       bfdot	z0.s, z0.h, z1.h
1885# CHECK-NEXT:        2     14.0   0.1    0.0       <total>
1886
1887# CHECK:      [41] Code Region - Z bfmmla
1888
1889# CHECK:      Iterations:        100
1890# CHECK-NEXT: Instructions:      400
1891# CHECK-NEXT: Total Cycles:      1903
1892# CHECK-NEXT: Total uOps:        400
1893
1894# CHECK:      Dispatch Width:    16
1895# CHECK-NEXT: uOps Per Cycle:    0.21
1896# CHECK-NEXT: IPC:               0.21
1897# CHECK-NEXT: Block RThroughput: 1.0
1898
1899# CHECK:      Timeline view:
1900# CHECK-NEXT:                     0123456789          0123456789
1901# CHECK-NEXT: Index     0123456789          0123456789          0
1902
1903# CHECK:      [0,0]     DeeeER    .    .    .    .    .    .    .   fmul	z0.d, z0.d, z0.d
1904# CHECK-NEXT: [0,1]     D===eeeeeeER   .    .    .    .    .    .   bfmmla	z0.s, z1.h, z2.h
1905# CHECK-NEXT: [0,2]     D=======eeeeeeER    .    .    .    .    .   bfmmla	z0.s, z1.h, z2.h
1906# CHECK-NEXT: [0,3]     D=============eeeeeeER   .    .    .    .   bfmmla	z0.s, z0.h, z1.h
1907# CHECK-NEXT: [1,0]     D===================eeeER.    .    .    .   fmul	z0.d, z0.d, z0.d
1908# CHECK-NEXT: [1,1]     D======================eeeeeeER    .    .   bfmmla	z0.s, z1.h, z2.h
1909# CHECK-NEXT: [1,2]     D==========================eeeeeeER.    .   bfmmla	z0.s, z1.h, z2.h
1910# CHECK-NEXT: [1,3]     D================================eeeeeeER   bfmmla	z0.s, z0.h, z1.h
1911
1912# CHECK:      Average Wait times (based on the timeline view):
1913# CHECK-NEXT: [0]: Executions
1914# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1915# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1916# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1917
1918# CHECK:            [0]    [1]    [2]    [3]
1919# CHECK-NEXT: 0.     2     10.5   0.5    0.0       fmul	z0.d, z0.d, z0.d
1920# CHECK-NEXT: 1.     2     13.5   0.0    0.0       bfmmla	z0.s, z1.h, z2.h
1921# CHECK-NEXT: 2.     2     17.5   0.0    0.0       bfmmla	z0.s, z1.h, z2.h
1922# CHECK-NEXT: 3.     2     23.5   0.0    0.0       bfmmla	z0.s, z0.h, z1.h
1923# CHECK-NEXT:        2     16.3   0.1    0.0       <total>
1924
1925# CHECK:      [42] Code Region - bfmlalb
1926
1927# CHECK:      Iterations:        100
1928# CHECK-NEXT: Instructions:      400
1929# CHECK-NEXT: Total Cycles:      1503
1930# CHECK-NEXT: Total uOps:        400
1931
1932# CHECK:      Dispatch Width:    16
1933# CHECK-NEXT: uOps Per Cycle:    0.27
1934# CHECK-NEXT: IPC:               0.27
1935# CHECK-NEXT: Block RThroughput: 1.0
1936
1937# CHECK:      Timeline view:
1938# CHECK-NEXT:                     0123456789          012
1939# CHECK-NEXT: Index     0123456789          0123456789
1940
1941# CHECK:      [0,0]     DeeeER    .    .    .    .    . .   fmul	z0.d, z0.d, z0.d
1942# CHECK-NEXT: [0,1]     D===eeeeeER    .    .    .    . .   bfmlalb	z0.s, z1.h, z2.h
1943# CHECK-NEXT: [0,2]     D=====eeeeeER  .    .    .    . .   bfmlalb	z0.s, z1.h, z2.h
1944# CHECK-NEXT: [0,3]     D==========eeeeeER  .    .    . .   bfmlalb	z0.s, z0.h, z1.h
1945# CHECK-NEXT: [1,0]     D===============eeeER    .    . .   fmul	z0.d, z0.d, z0.d
1946# CHECK-NEXT: [1,1]     D==================eeeeeER    . .   bfmlalb	z0.s, z1.h, z2.h
1947# CHECK-NEXT: [1,2]     D====================eeeeeER  . .   bfmlalb	z0.s, z1.h, z2.h
1948# CHECK-NEXT: [1,3]     D=========================eeeeeER   bfmlalb	z0.s, z0.h, z1.h
1949
1950# CHECK:      Average Wait times (based on the timeline view):
1951# CHECK-NEXT: [0]: Executions
1952# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
1953# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
1954# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
1955
1956# CHECK:            [0]    [1]    [2]    [3]
1957# CHECK-NEXT: 0.     2     8.5    0.5    0.0       fmul	z0.d, z0.d, z0.d
1958# CHECK-NEXT: 1.     2     11.5   0.0    0.0       bfmlalb	z0.s, z1.h, z2.h
1959# CHECK-NEXT: 2.     2     13.5   0.0    0.0       bfmlalb	z0.s, z1.h, z2.h
1960# CHECK-NEXT: 3.     2     18.5   0.0    0.0       bfmlalb	z0.s, z0.h, z1.h
1961# CHECK-NEXT:        2     13.0   0.1    0.0       <total>
1962