# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v2 -mattr=+sve2-aes,+sve2-sha3,+sve2-sm4 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=2 < %s | FileCheck %s # LLVM-MCA-BEGIN madd mul x0, x0, x0 madd x0, x1, x2, x0 madd x0, x1, x2, x0 madd x0, x0, x0, x0 # LLVM-MCA-END # LLVM-MCA-BEGIN smaddl mul x0, x0, x0 smaddl x0, w1, w2, x0 smaddl x0, w1, w2, x0 smaddl x0, w0, w0, x0 # LLVM-MCA-END # LLVM-MCA-BEGIN fmadd fadd d0, d0, d0 fmadd d0, d1, d2, d0 fmul d0, d0, d0 fmadd d0, d1, d2, d0 fmadd d0, d1, d2, d0 fmadd d0, d0, d1, d2 # LLVM-MCA-END # LLVM-MCA-BEGIN saba mul v0.4s, v0.4s, v0.4s saba v0.4s, v1.4s, v2.4s saba v0.4s, v1.4s, v2.4s saba v0.4s, v0.4s, v1.4s # LLVM-MCA-END # LLVM-MCA-BEGIN sdot mul v0.4s, v0.4s, v0.4s sdot v0.4s, v1.16b, v2.16b sdot v0.4s, v1.16b, v2.16b sdot v0.4s, v0.16b, v1.16b # LLVM-MCA-END # LLVM-MCA-BEGIN smmla mul v0.4s, v0.4s, v0.4s smmla v0.4s, v1.16b, v2.16b smmla v0.4s, v1.16b, v2.16b smmla v0.4s, v0.16b, v1.16b # LLVM-MCA-END # LLVM-MCA-BEGIN mla mul v0.4s, v0.4s, v0.4s mla v0.4s, v1.4s, v2.4s mla v0.4s, v1.4s, v2.4s mla v0.4s, v0.4s, v1.4s # LLVM-MCA-END # LLVM-MCA-BEGIN sqrdmlah mul v0.4s, v0.4s, v0.4s sqrdmlah v0.4s, v1.4s, v2.4s sqrdmlah v0.4s, v1.4s, v2.4s sqrdmlah v0.4s, v0.4s, v1.4s # LLVM-MCA-END # LLVM-MCA-BEGIN smlal2 mul v0.4s, v0.4s, v0.4s smlal2 v0.4s, v1.8h, v2.8h smlal2 v0.4s, v1.8h, v2.8h smlal2 v0.4s, v0.8h, v1.8h # LLVM-MCA-END # LLVM-MCA-BEGIN sadalp mul v0.4s, v0.4s, v0.4s sadalp v0.2d, v1.4s sadalp v0.2d, v1.4s sadalp v0.2d, v0.4s # LLVM-MCA-END # LLVM-MCA-BEGIN ssra mul v0.4s, v0.4s, v0.4s ssra v0.2d, v1.2d, #1 ssra v0.2d, v1.2d, #1 ssra v0.2d, v0.2d, #1 # LLVM-MCA-END # LLVM-MCA-BEGIN fcmla fmul v0.4s, v0.4s, v0.4s fcmla v0.2d, v1.2d, v2.2d, #90 fcmla v0.2d, v1.2d, v2.2d, #90 fcmla v0.2d, v0.2d, v1.2d, #90 # LLVM-MCA-END # LLVM-MCA-BEGIN fmla fmul v0.2d, v0.2d, v0.2d fmla v0.2d, v1.2d, v2.2d fadd v0.2d, v0.2d, v0.2d fmla v0.2d, v1.2d, v2.2d fmla v0.2d, v1.2d, v2.2d fmla v0.2d, v0.2d, v1.2d # LLVM-MCA-END # LLVM-MCA-BEGIN fmlal fmul v0.2d, v0.2d, v0.2d fmlal v0.4s, v1.4h, v2.4h fadd v0.2d, v0.2d, v0.2d fmlal v0.4s, v1.4h, v2.4h fmlal v0.4s, v1.4h, v2.4h fmlal v0.4s, v0.4h, v1.4h # LLVM-MCA-END # LLVM-MCA-BEGIN bfdot fmul v0.2d, v0.2d, v0.2d bfdot v0.4s, v1.8h, v2.8h bfdot v0.4s, v1.8h, v2.8h bfdot v0.4s, v0.8h, v1.8h # LLVM-MCA-END # LLVM-MCA-BEGIN bfmmla fmul v0.2d, v0.2d, v0.2d bfmmla v0.4s, v1.8h, v2.8h bfmmla v0.4s, v1.8h, v2.8h bfmmla v0.4s, v0.8h, v1.8h # LLVM-MCA-END # LLVM-MCA-BEGIN bfmlalb fmul v0.2d, v0.2d, v0.2d bfmlalb v0.4s, v1.8h, v2.8h bfmlalb v0.4s, v1.8h, v2.8h bfmlalb v0.4s, v0.8h, v1.8h # LLVM-MCA-END # LLVM-MCA-BEGIN crc32b mul w0, w0, w0 crc32b w0, w0, w1 crc32b w0, w0, w1 crc32b w0, w0, w0 # LLVM-MCA-END # LLVM-MCA-BEGIN Z saba mul z0.d, z0.d, z0.d saba z0.d, z1.d, z2.d saba z0.d, z1.d, z2.d saba z0.d, z0.d, z1.d # LLVM-MCA-END # LLVM-MCA-BEGIN Z sadalp mul z0.d, z0.d, z0.d sadalp z0.d, p0/m, z1.s sadalp z0.d, p0/m, z1.s sadalp z0.d, p0/m, z0.s # LLVM-MCA-END # LLVM-MCA-BEGIN Z ssra mul z0.d, z0.d, z0.d ssra z0.d, z1.d, #1 ssra z0.d, z1.d, #1 ssra z0.d, z0.d, #1 # LLVM-MCA-END # LLVM-MCA-BEGIN Z cdot.s mul z0.d, z0.d, z0.d cdot z0.s, z1.b, z2.b, #90 cdot z0.s, z1.b, z2.b, #90 cdot z0.s, z0.b, z1.b, #90 # LLVM-MCA-END # LLVM-MCA-BEGIN Z cdot.d mul z0.d, z0.d, z0.d cdot z0.d, z1.h, z2.h, #90 cdot z0.d, z1.h, z2.h, #90 cdot z0.d, z0.h, z1.h, #90 # LLVM-MCA-END # LLVM-MCA-BEGIN Z cmla.b mul z0.d, z0.d, z0.d cmla z0.b, z1.b, z2.b, #90 cmla z0.b, z1.b, z2.b, #90 cmla z0.b, z0.b, z1.b, #90 # LLVM-MCA-END # LLVM-MCA-BEGIN Z cmla.d mul z0.d, z0.d, z0.d cmla z0.d, z1.d, z2.d, #90 cmla z0.d, z1.d, z2.d, #90 cmla z0.d, z0.d, z1.d, #90 # LLVM-MCA-END # LLVM-MCA-BEGIN Z sdot.s mul z0.d, z0.d, z0.d sdot z0.s, z1.b, z2.b sdot z0.s, z1.b, z2.b sdot z0.s, z0.b, z1.b # LLVM-MCA-END # LLVM-MCA-BEGIN Z sudot mul z0.d, z0.d, z0.d sdot z0.s, z1.b, z2.b[1] sdot z0.s, z1.b, z2.b[1] sdot z0.s, z0.b, z1.b[1] # LLVM-MCA-END # LLVM-MCA-BEGIN Z sdot.d mul z0.d, z0.d, z0.d sdot z0.d, z1.h, z2.h sdot z0.d, z1.h, z2.h sdot z0.d, z0.h, z1.h # LLVM-MCA-END # LLVM-MCA-BEGIN Z smmla mul z0.s, z0.s, z0.s smmla z0.s, z1.b, z2.b smmla z0.s, z1.b, z2.b smmla z0.s, z0.b, z1.b # LLVM-MCA-END # LLVM-MCA-BEGIN Z mla.b mul z0.d, z0.d, z0.d mla z0.b, p0/m, z1.b, z2.b mla z0.b, p0/m, z1.b, z2.b mla z0.b, p0/m, z0.b, z1.b # LLVM-MCA-END # LLVM-MCA-BEGIN Z mla.d mul z0.d, z0.d, z0.d mla z0.d, p0/m, z1.d, z2.d mla z0.d, p0/m, z1.d, z2.d mla z0.d, p0/m, z0.d, z1.d # LLVM-MCA-END # LLVM-MCA-BEGIN Z smlalb mul z0.d, z0.d, z0.d smlalb z0.d, z1.s, z2.s smlalb z0.d, z1.s, z2.s smlalb z0.d, z0.s, z1.s # LLVM-MCA-END # LLVM-MCA-BEGIN Z sqdmlalb mul z0.d, z0.d, z0.d sqdmlalb z0.d, z1.s, z2.s sqdmlalb z0.d, z1.s, z2.s sqdmlalb z0.d, z0.s, z1.s # LLVM-MCA-END # LLVM-MCA-BEGIN Z sqrdmlah.b mul z0.d, z0.d, z0.d sqrdmlah z0.b, z1.b, z2.b sqrdmlah z0.b, z1.b, z2.b sqrdmlah z0.b, z0.b, z1.b # LLVM-MCA-END # LLVM-MCA-BEGIN Z sqrdmlah.d mul z0.d, z0.d, z0.d sqrdmlah z0.d, z1.d, z2.d sqrdmlah z0.d, z1.d, z2.d sqrdmlah z0.d, z0.d, z1.d # LLVM-MCA-END # LLVM-MCA-BEGIN Z fcmla ZPmZZ fmul z0.d, z0.d, z0.d fcmla z0.d, p0/m, z1.d, z2.d, 90 fcmla z0.d, p0/m, z1.d, z2.d, 90 fcmla z0.d, p0/m, z0.d, z1.d, 90 # LLVM-MCA-END # LLVM-MCA-BEGIN Z fcmla ZZZI fmul z0.d, z0.d, z0.d fcmla z0.s, z1.s, z2.s[1], 90 fcmla z0.s, z1.s, z2.s[1], 90 fcmla z0.s, z0.s, z1.s[1], 90 # LLVM-MCA-END # LLVM-MCA-BEGIN Z fmla ZPmZZ fmul z0.d, z0.d, z0.d fmla z0.d, p0/m, z1.d, z2.d fmla z0.d, p0/m, z1.d, z2.d fmla z0.d, p0/m, z0.d, z1.d # LLVM-MCA-END # LLVM-MCA-BEGIN Z fmla ZZZI fmul z0.d, z0.d, z0.d fmla z0.d, z1.d, z2.d[1] fmla z0.d, z1.d, z2.d[1] fmla z0.d, z0.d, z1.d[1] # LLVM-MCA-END # LLVM-MCA-BEGIN Z fmlalb ZZZ fmul z0.d, z0.d, z0.d fmlalb z0.s, z1.h, z2.h fmlalb z0.s, z1.h, z2.h fmlalb z0.s, z0.h, z1.h # LLVM-MCA-END # LLVM-MCA-BEGIN Z bfdot fmul z0.d, z0.d, z0.d bfdot z0.s, z1.h, z2.h bfdot z0.s, z1.h, z2.h bfdot z0.s, z0.h, z1.h # LLVM-MCA-END # LLVM-MCA-BEGIN Z bfmmla fmul z0.d, z0.d, z0.d bfmmla z0.s, z1.h, z2.h bfmmla z0.s, z1.h, z2.h bfmmla z0.s, z0.h, z1.h # LLVM-MCA-END # LLVM-MCA-BEGIN bfmlalb fmul z0.d, z0.d, z0.d bfmlalb z0.s, z1.h, z2.h bfmlalb z0.s, z1.h, z2.h bfmlalb z0.s, z0.h, z1.h # LLVM-MCA-END # CHECK: [0] Code Region - madd # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.57 # CHECK-NEXT: IPC: 0.57 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456 # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeER. . .. mul x0, x0, x0 # CHECK-NEXT: [0,1] D==eeER . .. madd x0, x1, x2, x0 # CHECK-NEXT: [0,2] D===eeER . .. madd x0, x1, x2, x0 # CHECK-NEXT: [0,3] D=====eeER. .. madd x0, x0, x0, x0 # CHECK-NEXT: [1,0] D=======eeER .. mul x0, x0, x0 # CHECK-NEXT: [1,1] D=========eeER .. madd x0, x1, x2, x0 # CHECK-NEXT: [1,2] D==========eeER.. madd x0, x1, x2, x0 # CHECK-NEXT: [1,3] D============eeER madd x0, x0, x0, x0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 4.5 0.5 0.0 mul x0, x0, x0 # CHECK-NEXT: 1. 2 6.5 0.0 0.0 madd x0, x1, x2, x0 # CHECK-NEXT: 2. 2 7.5 0.0 0.0 madd x0, x1, x2, x0 # CHECK-NEXT: 3. 2 9.5 0.0 0.0 madd x0, x0, x0, x0 # CHECK-NEXT: 2 7.0 0.1 0.0 # CHECK: [1] Code Region - smaddl # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.57 # CHECK-NEXT: IPC: 0.57 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456 # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeER. . .. mul x0, x0, x0 # CHECK-NEXT: [0,1] D==eeER . .. smaddl x0, w1, w2, x0 # CHECK-NEXT: [0,2] D===eeER . .. smaddl x0, w1, w2, x0 # CHECK-NEXT: [0,3] D=====eeER. .. smaddl x0, w0, w0, x0 # CHECK-NEXT: [1,0] D=======eeER .. mul x0, x0, x0 # CHECK-NEXT: [1,1] D=========eeER .. smaddl x0, w1, w2, x0 # CHECK-NEXT: [1,2] D==========eeER.. smaddl x0, w1, w2, x0 # CHECK-NEXT: [1,3] D============eeER smaddl x0, w0, w0, x0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 4.5 0.5 0.0 mul x0, x0, x0 # CHECK-NEXT: 1. 2 6.5 0.0 0.0 smaddl x0, w1, w2, x0 # CHECK-NEXT: 2. 2 7.5 0.0 0.0 smaddl x0, w1, w2, x0 # CHECK-NEXT: 3. 2 9.5 0.0 0.0 smaddl x0, w0, w0, x0 # CHECK-NEXT: 2 7.0 0.1 0.0 # CHECK: [2] Code Region - fmadd # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 600 # CHECK-NEXT: Total Cycles: 1703 # CHECK-NEXT: Total uOps: 600 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.35 # CHECK-NEXT: IPC: 0.35 # CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeER. . . . . . .. fadd d0, d0, d0 # CHECK-NEXT: [0,1] D==eeeeER . . . . . .. fmadd d0, d1, d2, d0 # CHECK-NEXT: [0,2] D======eeeER . . . . .. fmul d0, d0, d0 # CHECK-NEXT: [0,3] D=======eeeeER . . . . .. fmadd d0, d1, d2, d0 # CHECK-NEXT: [0,4] D=========eeeeER . . . .. fmadd d0, d1, d2, d0 # CHECK-NEXT: [0,5] D=============eeeeER. . . .. fmadd d0, d0, d1, d2 # CHECK-NEXT: [1,0] D=================eeER . . .. fadd d0, d0, d0 # CHECK-NEXT: [1,1] D===================eeeeER . .. fmadd d0, d1, d2, d0 # CHECK-NEXT: [1,2] D=======================eeeER . .. fmul d0, d0, d0 # CHECK-NEXT: [1,3] D========================eeeeER .. fmadd d0, d1, d2, d0 # CHECK-NEXT: [1,4] D==========================eeeeER .. fmadd d0, d1, d2, d0 # CHECK-NEXT: [1,5] D==============================eeeeER fmadd d0, d0, d1, d2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 9.5 0.5 0.0 fadd d0, d0, d0 # CHECK-NEXT: 1. 2 11.5 0.0 0.0 fmadd d0, d1, d2, d0 # CHECK-NEXT: 2. 2 15.5 0.0 0.0 fmul d0, d0, d0 # CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmadd d0, d1, d2, d0 # CHECK-NEXT: 4. 2 18.5 0.0 0.0 fmadd d0, d1, d2, d0 # CHECK-NEXT: 5. 2 22.5 0.0 0.0 fmadd d0, d0, d1, d2 # CHECK-NEXT: 2 15.7 0.1 0.0 # CHECK: [3] Code Region - saba # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 012345678 # CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [0,1] D====eeeeER . . . . saba v0.4s, v1.4s, v2.4s # CHECK-NEXT: [0,2] D=====eeeeER . . . . saba v0.4s, v1.4s, v2.4s # CHECK-NEXT: [0,3] D=========eeeeER . . . saba v0.4s, v0.4s, v1.4s # CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [1,1] D=================eeeeER . . saba v0.4s, v1.4s, v2.4s # CHECK-NEXT: [1,2] D==================eeeeER. . saba v0.4s, v1.4s, v2.4s # CHECK-NEXT: [1,3] D======================eeeeER saba v0.4s, v0.4s, v1.4s # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: 1. 2 11.5 0.0 0.0 saba v0.4s, v1.4s, v2.4s # CHECK-NEXT: 2. 2 12.5 0.0 0.0 saba v0.4s, v1.4s, v2.4s # CHECK-NEXT: 3. 2 16.5 0.0 0.0 saba v0.4s, v0.4s, v1.4s # CHECK-NEXT: 2 12.0 0.1 0.0 # CHECK: [4] Code Region - sdot # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1103 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.36 # CHECK-NEXT: Block RThroughput: 0.8 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 01234 # CHECK: [0,0] DeeeeER . . . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [0,1] D====eeeER. . . . sdot v0.4s, v1.16b, v2.16b # CHECK-NEXT: [0,2] D=====eeeER . . . sdot v0.4s, v1.16b, v2.16b # CHECK-NEXT: [0,3] D========eeeER . . . sdot v0.4s, v0.16b, v1.16b # CHECK-NEXT: [1,0] D===========eeeeER . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [1,1] D===============eeeER . sdot v0.4s, v1.16b, v2.16b # CHECK-NEXT: [1,2] D================eeeER . sdot v0.4s, v1.16b, v2.16b # CHECK-NEXT: [1,3] D===================eeeER sdot v0.4s, v0.16b, v1.16b # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 6.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: 1. 2 10.5 0.0 0.0 sdot v0.4s, v1.16b, v2.16b # CHECK-NEXT: 2. 2 11.5 0.0 0.0 sdot v0.4s, v1.16b, v2.16b # CHECK-NEXT: 3. 2 14.5 0.0 0.0 sdot v0.4s, v0.16b, v1.16b # CHECK-NEXT: 2 10.8 0.1 0.0 # CHECK: [5] Code Region - smmla # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1103 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.36 # CHECK-NEXT: Block RThroughput: 0.8 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 01234 # CHECK: [0,0] DeeeeER . . . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [0,1] D====eeeER. . . . smmla v0.4s, v1.16b, v2.16b # CHECK-NEXT: [0,2] D=====eeeER . . . smmla v0.4s, v1.16b, v2.16b # CHECK-NEXT: [0,3] D========eeeER . . . smmla v0.4s, v0.16b, v1.16b # CHECK-NEXT: [1,0] D===========eeeeER . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [1,1] D===============eeeER . smmla v0.4s, v1.16b, v2.16b # CHECK-NEXT: [1,2] D================eeeER . smmla v0.4s, v1.16b, v2.16b # CHECK-NEXT: [1,3] D===================eeeER smmla v0.4s, v0.16b, v1.16b # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 6.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: 1. 2 10.5 0.0 0.0 smmla v0.4s, v1.16b, v2.16b # CHECK-NEXT: 2. 2 11.5 0.0 0.0 smmla v0.4s, v1.16b, v2.16b # CHECK-NEXT: 3. 2 14.5 0.0 0.0 smmla v0.4s, v0.16b, v1.16b # CHECK-NEXT: 2 10.8 0.1 0.0 # CHECK: [6] Code Region - mla # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 012345678 # CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [0,1] D====eeeeER . . . . mla v0.4s, v1.4s, v2.4s # CHECK-NEXT: [0,2] D=====eeeeER . . . . mla v0.4s, v1.4s, v2.4s # CHECK-NEXT: [0,3] D=========eeeeER . . . mla v0.4s, v0.4s, v1.4s # CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [1,1] D=================eeeeER . . mla v0.4s, v1.4s, v2.4s # CHECK-NEXT: [1,2] D==================eeeeER. . mla v0.4s, v1.4s, v2.4s # CHECK-NEXT: [1,3] D======================eeeeER mla v0.4s, v0.4s, v1.4s # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: 1. 2 11.5 0.0 0.0 mla v0.4s, v1.4s, v2.4s # CHECK-NEXT: 2. 2 12.5 0.0 0.0 mla v0.4s, v1.4s, v2.4s # CHECK-NEXT: 3. 2 16.5 0.0 0.0 mla v0.4s, v0.4s, v1.4s # CHECK-NEXT: 2 12.0 0.1 0.0 # CHECK: [7] Code Region - sqrdmlah # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1403 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.29 # CHECK-NEXT: IPC: 0.29 # CHECK-NEXT: Block RThroughput: 3.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [0,1] D====eeeeER . . . . sqrdmlah v0.4s, v1.4s, v2.4s # CHECK-NEXT: [0,2] D======eeeeER . . . . sqrdmlah v0.4s, v1.4s, v2.4s # CHECK-NEXT: [0,3] D==========eeeeER . . . sqrdmlah v0.4s, v0.4s, v1.4s # CHECK-NEXT: [1,0] D==============eeeeER . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [1,1] D==================eeeeER. . sqrdmlah v0.4s, v1.4s, v2.4s # CHECK-NEXT: [1,2] D====================eeeeER . sqrdmlah v0.4s, v1.4s, v2.4s # CHECK-NEXT: [1,3] D========================eeeeER sqrdmlah v0.4s, v0.4s, v1.4s # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: 1. 2 12.0 0.0 0.0 sqrdmlah v0.4s, v1.4s, v2.4s # CHECK-NEXT: 2. 2 14.0 0.0 0.0 sqrdmlah v0.4s, v1.4s, v2.4s # CHECK-NEXT: 3. 2 18.0 0.0 0.0 sqrdmlah v0.4s, v0.4s, v1.4s # CHECK-NEXT: 2 13.0 0.1 0.0 # CHECK: [8] Code Region - smlal2 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 012345678 # CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [0,1] D====eeeeER . . . . smlal2 v0.4s, v1.8h, v2.8h # CHECK-NEXT: [0,2] D=====eeeeER . . . . smlal2 v0.4s, v1.8h, v2.8h # CHECK-NEXT: [0,3] D=========eeeeER . . . smlal2 v0.4s, v0.8h, v1.8h # CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [1,1] D=================eeeeER . . smlal2 v0.4s, v1.8h, v2.8h # CHECK-NEXT: [1,2] D==================eeeeER. . smlal2 v0.4s, v1.8h, v2.8h # CHECK-NEXT: [1,3] D======================eeeeER smlal2 v0.4s, v0.8h, v1.8h # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: 1. 2 11.5 0.0 0.0 smlal2 v0.4s, v1.8h, v2.8h # CHECK-NEXT: 2. 2 12.5 0.0 0.0 smlal2 v0.4s, v1.8h, v2.8h # CHECK-NEXT: 3. 2 16.5 0.0 0.0 smlal2 v0.4s, v0.8h, v1.8h # CHECK-NEXT: 2 12.0 0.1 0.0 # CHECK: [9] Code Region - sadalp # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 012345678 # CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [0,1] D====eeeeER . . . . sadalp v0.2d, v1.4s # CHECK-NEXT: [0,2] D=====eeeeER . . . . sadalp v0.2d, v1.4s # CHECK-NEXT: [0,3] D=========eeeeER . . . sadalp v0.2d, v0.4s # CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [1,1] D=================eeeeER . . sadalp v0.2d, v1.4s # CHECK-NEXT: [1,2] D==================eeeeER. . sadalp v0.2d, v1.4s # CHECK-NEXT: [1,3] D======================eeeeER sadalp v0.2d, v0.4s # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: 1. 2 11.5 0.0 0.0 sadalp v0.2d, v1.4s # CHECK-NEXT: 2. 2 12.5 0.0 0.0 sadalp v0.2d, v1.4s # CHECK-NEXT: 3. 2 16.5 0.0 0.0 sadalp v0.2d, v0.4s # CHECK-NEXT: 2 12.0 0.1 0.0 # CHECK: [10] Code Region - ssra # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 012345678 # CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [0,1] D====eeeeER . . . . ssra v0.2d, v1.2d, #1 # CHECK-NEXT: [0,2] D=====eeeeER . . . . ssra v0.2d, v1.2d, #1 # CHECK-NEXT: [0,3] D=========eeeeER . . . ssra v0.2d, v0.2d, #1 # CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [1,1] D=================eeeeER . . ssra v0.2d, v1.2d, #1 # CHECK-NEXT: [1,2] D==================eeeeER. . ssra v0.2d, v1.2d, #1 # CHECK-NEXT: [1,3] D======================eeeeER ssra v0.2d, v0.2d, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s # CHECK-NEXT: 1. 2 11.5 0.0 0.0 ssra v0.2d, v1.2d, #1 # CHECK-NEXT: 2. 2 12.5 0.0 0.0 ssra v0.2d, v1.2d, #1 # CHECK-NEXT: 3. 2 16.5 0.0 0.0 ssra v0.2d, v0.2d, #1 # CHECK-NEXT: 2 12.0 0.1 0.0 # CHECK: [11] Code Region - fcmla # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 012345678 # CHECK: [0,0] DeeeER . . . . . fmul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [0,1] D===eeeeER. . . . . fcmla v0.2d, v1.2d, v2.2d, #90 # CHECK-NEXT: [0,2] D=====eeeeER . . . . fcmla v0.2d, v1.2d, v2.2d, #90 # CHECK-NEXT: [0,3] D=========eeeeER . . . fcmla v0.2d, v0.2d, v1.2d, #90 # CHECK-NEXT: [1,0] D=============eeeER . . . fmul v0.4s, v0.4s, v0.4s # CHECK-NEXT: [1,1] D================eeeeER . . fcmla v0.2d, v1.2d, v2.2d, #90 # CHECK-NEXT: [1,2] D==================eeeeER. . fcmla v0.2d, v1.2d, v2.2d, #90 # CHECK-NEXT: [1,3] D======================eeeeER fcmla v0.2d, v0.2d, v1.2d, #90 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul v0.4s, v0.4s, v0.4s # CHECK-NEXT: 1. 2 10.5 0.0 0.0 fcmla v0.2d, v1.2d, v2.2d, #90 # CHECK-NEXT: 2. 2 12.5 0.0 0.0 fcmla v0.2d, v1.2d, v2.2d, #90 # CHECK-NEXT: 3. 2 16.5 0.0 0.0 fcmla v0.2d, v0.2d, v1.2d, #90 # CHECK-NEXT: 2 11.8 0.1 0.0 # CHECK: [12] Code Region - fmla # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 600 # CHECK-NEXT: Total Cycles: 1703 # CHECK-NEXT: Total uOps: 600 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.35 # CHECK-NEXT: IPC: 0.35 # CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeER . . . . . .. fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: [0,1] D=eeeeER . . . . . .. fmla v0.2d, v1.2d, v2.2d # CHECK-NEXT: [0,2] D=====eeER. . . . . .. fadd v0.2d, v0.2d, v0.2d # CHECK-NEXT: [0,3] D=======eeeeER . . . . .. fmla v0.2d, v1.2d, v2.2d # CHECK-NEXT: [0,4] D=========eeeeER . . . .. fmla v0.2d, v1.2d, v2.2d # CHECK-NEXT: [0,5] D=============eeeeER. . . .. fmla v0.2d, v0.2d, v1.2d # CHECK-NEXT: [1,0] D=================eeeER . . .. fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: [1,1] D==================eeeeER. . .. fmla v0.2d, v1.2d, v2.2d # CHECK-NEXT: [1,2] D======================eeER . .. fadd v0.2d, v0.2d, v0.2d # CHECK-NEXT: [1,3] D========================eeeeER .. fmla v0.2d, v1.2d, v2.2d # CHECK-NEXT: [1,4] D==========================eeeeER .. fmla v0.2d, v1.2d, v2.2d # CHECK-NEXT: [1,5] D==============================eeeeER fmla v0.2d, v0.2d, v1.2d # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 9.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla v0.2d, v1.2d, v2.2d # CHECK-NEXT: 2. 2 14.5 0.0 0.0 fadd v0.2d, v0.2d, v0.2d # CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla v0.2d, v1.2d, v2.2d # CHECK-NEXT: 4. 2 18.5 0.0 0.0 fmla v0.2d, v1.2d, v2.2d # CHECK-NEXT: 5. 2 22.5 0.0 0.0 fmla v0.2d, v0.2d, v1.2d # CHECK-NEXT: 2 15.3 0.1 0.0 # CHECK: [13] Code Region - fmlal # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 600 # CHECK-NEXT: Total Cycles: 1903 # CHECK-NEXT: Total uOps: 600 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.32 # CHECK-NEXT: IPC: 0.32 # CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 # CHECK-NEXT: Index 0123456789 0123456789 0 # CHECK: [0,0] DeeeER . . . . . . . fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: [0,1] D===eeeeER. . . . . . . fmlal v0.4s, v1.4h, v2.4h # CHECK-NEXT: [0,2] D=======eeER . . . . . . fadd v0.2d, v0.2d, v0.2d # CHECK-NEXT: [0,3] D=========eeeeER . . . . . fmlal v0.4s, v1.4h, v2.4h # CHECK-NEXT: [0,4] D===========eeeeER . . . . . fmlal v0.4s, v1.4h, v2.4h # CHECK-NEXT: [0,5] D===============eeeeER . . . . fmlal v0.4s, v0.4h, v1.4h # CHECK-NEXT: [1,0] D===================eeeER. . . . fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: [1,1] D======================eeeeER . . . fmlal v0.4s, v1.4h, v2.4h # CHECK-NEXT: [1,2] D==========================eeER . . fadd v0.2d, v0.2d, v0.2d # CHECK-NEXT: [1,3] D============================eeeeER. . fmlal v0.4s, v1.4h, v2.4h # CHECK-NEXT: [1,4] D==============================eeeeER . fmlal v0.4s, v1.4h, v2.4h # CHECK-NEXT: [1,5] D==================================eeeeER fmlal v0.4s, v0.4h, v1.4h # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 10.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1. 2 13.5 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h # CHECK-NEXT: 2. 2 17.5 0.0 0.0 fadd v0.2d, v0.2d, v0.2d # CHECK-NEXT: 3. 2 19.5 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h # CHECK-NEXT: 4. 2 21.5 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h # CHECK-NEXT: 5. 2 25.5 0.0 0.0 fmlal v0.4s, v0.4h, v1.4h # CHECK-NEXT: 2 18.0 0.1 0.0 # CHECK: [14] Code Region - bfdot # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1603 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.25 # CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 01234 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeER . . . . . . fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfdot v0.4s, v1.8h, v2.8h # CHECK-NEXT: [0,2] D======eeeeeER . . . . . bfdot v0.4s, v1.8h, v2.8h # CHECK-NEXT: [0,3] D===========eeeeeER . . . . bfdot v0.4s, v0.8h, v1.8h # CHECK-NEXT: [1,0] D================eeeER . . . fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: [1,1] D===================eeeeeER . . bfdot v0.4s, v1.8h, v2.8h # CHECK-NEXT: [1,2] D======================eeeeeER. . bfdot v0.4s, v1.8h, v2.8h # CHECK-NEXT: [1,3] D===========================eeeeeER bfdot v0.4s, v0.8h, v1.8h # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 9.0 0.5 0.0 fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1. 2 12.0 0.0 0.0 bfdot v0.4s, v1.8h, v2.8h # CHECK-NEXT: 2. 2 15.0 0.0 0.0 bfdot v0.4s, v1.8h, v2.8h # CHECK-NEXT: 3. 2 20.0 0.0 0.0 bfdot v0.4s, v0.8h, v1.8h # CHECK-NEXT: 2 14.0 0.1 0.0 # CHECK: [15] Code Region - bfmmla # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1903 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.21 # CHECK-NEXT: IPC: 0.21 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 # CHECK-NEXT: Index 0123456789 0123456789 0 # CHECK: [0,0] DeeeER . . . . . . . fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: [0,1] D===eeeeeeER . . . . . . bfmmla v0.4s, v1.8h, v2.8h # CHECK-NEXT: [0,2] D=======eeeeeeER . . . . . bfmmla v0.4s, v1.8h, v2.8h # CHECK-NEXT: [0,3] D=============eeeeeeER . . . . bfmmla v0.4s, v0.8h, v1.8h # CHECK-NEXT: [1,0] D===================eeeER. . . . fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: [1,1] D======================eeeeeeER . . bfmmla v0.4s, v1.8h, v2.8h # CHECK-NEXT: [1,2] D==========================eeeeeeER. . bfmmla v0.4s, v1.8h, v2.8h # CHECK-NEXT: [1,3] D================================eeeeeeER bfmmla v0.4s, v0.8h, v1.8h # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 10.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1. 2 13.5 0.0 0.0 bfmmla v0.4s, v1.8h, v2.8h # CHECK-NEXT: 2. 2 17.5 0.0 0.0 bfmmla v0.4s, v1.8h, v2.8h # CHECK-NEXT: 3. 2 23.5 0.0 0.0 bfmmla v0.4s, v0.8h, v1.8h # CHECK-NEXT: 2 16.3 0.1 0.0 # CHECK: [16] Code Region - bfmlalb # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1503 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.27 # CHECK-NEXT: IPC: 0.27 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 012 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeER . . . . . . fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfmlalb v0.4s, v1.8h, v2.8h # CHECK-NEXT: [0,2] D=====eeeeeER . . . . . bfmlalb v0.4s, v1.8h, v2.8h # CHECK-NEXT: [0,3] D==========eeeeeER . . . . bfmlalb v0.4s, v0.8h, v1.8h # CHECK-NEXT: [1,0] D===============eeeER . . . fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: [1,1] D==================eeeeeER . . bfmlalb v0.4s, v1.8h, v2.8h # CHECK-NEXT: [1,2] D====================eeeeeER . . bfmlalb v0.4s, v1.8h, v2.8h # CHECK-NEXT: [1,3] D=========================eeeeeER bfmlalb v0.4s, v0.8h, v1.8h # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1. 2 11.5 0.0 0.0 bfmlalb v0.4s, v1.8h, v2.8h # CHECK-NEXT: 2. 2 13.5 0.0 0.0 bfmlalb v0.4s, v1.8h, v2.8h # CHECK-NEXT: 3. 2 18.5 0.0 0.0 bfmlalb v0.4s, v0.8h, v1.8h # CHECK-NEXT: 2 13.0 0.1 0.0 # CHECK: [17] Code Region - crc32b # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.57 # CHECK-NEXT: IPC: 0.57 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456 # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeER. . .. mul w0, w0, w0 # CHECK-NEXT: [0,1] D==eeER . .. crc32b w0, w0, w1 # CHECK-NEXT: [0,2] D===eeER . .. crc32b w0, w0, w1 # CHECK-NEXT: [0,3] D=====eeER. .. crc32b w0, w0, w0 # CHECK-NEXT: [1,0] D=======eeER .. mul w0, w0, w0 # CHECK-NEXT: [1,1] D=========eeER .. crc32b w0, w0, w1 # CHECK-NEXT: [1,2] D==========eeER.. crc32b w0, w0, w1 # CHECK-NEXT: [1,3] D============eeER crc32b w0, w0, w0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 4.5 0.5 0.0 mul w0, w0, w0 # CHECK-NEXT: 1. 2 6.5 0.0 0.0 crc32b w0, w0, w1 # CHECK-NEXT: 2. 2 7.5 0.0 0.0 crc32b w0, w0, w1 # CHECK-NEXT: 3. 2 9.5 0.0 0.0 crc32b w0, w0, w0 # CHECK-NEXT: 2 7.0 0.1 0.0 # CHECK: [18] Code Region - Z saba # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1403 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.29 # CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeeER . . . . saba z0.d, z1.d, z2.d # CHECK-NEXT: [0,2] D======eeeeER . . . . saba z0.d, z1.d, z2.d # CHECK-NEXT: [0,3] D==========eeeeER . . . saba z0.d, z0.d, z1.d # CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D===================eeeeER . saba z0.d, z1.d, z2.d # CHECK-NEXT: [1,2] D====================eeeeER . saba z0.d, z1.d, z2.d # CHECK-NEXT: [1,3] D========================eeeeER saba z0.d, z0.d, z1.d # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 13.0 0.0 0.0 saba z0.d, z1.d, z2.d # CHECK-NEXT: 2. 2 14.0 0.0 0.0 saba z0.d, z1.d, z2.d # CHECK-NEXT: 3. 2 18.0 0.0 0.0 saba z0.d, z0.d, z1.d # CHECK-NEXT: 2 13.3 0.1 0.0 # CHECK: [19] Code Region - Z sadalp # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1403 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.29 # CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeeER . . . . sadalp z0.d, p0/m, z1.s # CHECK-NEXT: [0,2] D======eeeeER . . . . sadalp z0.d, p0/m, z1.s # CHECK-NEXT: [0,3] D==========eeeeER . . . sadalp z0.d, p0/m, z0.s # CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D===================eeeeER . sadalp z0.d, p0/m, z1.s # CHECK-NEXT: [1,2] D====================eeeeER . sadalp z0.d, p0/m, z1.s # CHECK-NEXT: [1,3] D========================eeeeER sadalp z0.d, p0/m, z0.s # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 13.0 0.0 0.0 sadalp z0.d, p0/m, z1.s # CHECK-NEXT: 2. 2 14.0 0.0 0.0 sadalp z0.d, p0/m, z1.s # CHECK-NEXT: 3. 2 18.0 0.0 0.0 sadalp z0.d, p0/m, z0.s # CHECK-NEXT: 2 13.3 0.1 0.0 # CHECK: [20] Code Region - Z ssra # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1403 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.29 # CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeeER . . . . ssra z0.d, z1.d, #1 # CHECK-NEXT: [0,2] D======eeeeER . . . . ssra z0.d, z1.d, #1 # CHECK-NEXT: [0,3] D==========eeeeER . . . ssra z0.d, z0.d, #1 # CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D===================eeeeER . ssra z0.d, z1.d, #1 # CHECK-NEXT: [1,2] D====================eeeeER . ssra z0.d, z1.d, #1 # CHECK-NEXT: [1,3] D========================eeeeER ssra z0.d, z0.d, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 13.0 0.0 0.0 ssra z0.d, z1.d, #1 # CHECK-NEXT: 2. 2 14.0 0.0 0.0 ssra z0.d, z1.d, #1 # CHECK-NEXT: 3. 2 18.0 0.0 0.0 ssra z0.d, z0.d, #1 # CHECK-NEXT: 2 13.3 0.1 0.0 # CHECK: [21] Code Region - Z cdot.s # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1203 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.42 # CHECK-NEXT: IPC: 0.33 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 0123456 # CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeER . . .. cdot z0.s, z1.b, z2.b, #90 # CHECK-NEXT: [0,2] D======eeeER . . .. cdot z0.s, z1.b, z2.b, #90 # CHECK-NEXT: [0,3] D=========eeeER. . .. cdot z0.s, z0.b, z1.b, #90 # CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D=================eeeER .. cdot z0.s, z1.b, z2.b, #90 # CHECK-NEXT: [1,2] D==================eeeER .. cdot z0.s, z1.b, z2.b, #90 # CHECK-NEXT: [1,3] D=====================eeeER cdot z0.s, z0.b, z1.b, #90 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 12.0 0.0 0.0 cdot z0.s, z1.b, z2.b, #90 # CHECK-NEXT: 2. 2 13.0 0.0 0.0 cdot z0.s, z1.b, z2.b, #90 # CHECK-NEXT: 3. 2 16.0 0.0 0.0 cdot z0.s, z0.b, z1.b, #90 # CHECK-NEXT: 2 12.0 0.1 0.0 # CHECK: [22] Code Region - Z cdot.d # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1403 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.29 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeeER . . . . cdot z0.d, z1.h, z2.h, #90 # CHECK-NEXT: [0,2] D======eeeeER . . . . cdot z0.d, z1.h, z2.h, #90 # CHECK-NEXT: [0,3] D==========eeeeER . . . cdot z0.d, z0.h, z1.h, #90 # CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D===================eeeeER . cdot z0.d, z1.h, z2.h, #90 # CHECK-NEXT: [1,2] D====================eeeeER . cdot z0.d, z1.h, z2.h, #90 # CHECK-NEXT: [1,3] D========================eeeeER cdot z0.d, z0.h, z1.h, #90 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 13.0 0.0 0.0 cdot z0.d, z1.h, z2.h, #90 # CHECK-NEXT: 2. 2 14.0 0.0 0.0 cdot z0.d, z1.h, z2.h, #90 # CHECK-NEXT: 3. 2 18.0 0.0 0.0 cdot z0.d, z0.h, z1.h, #90 # CHECK-NEXT: 2 13.3 0.1 0.0 # CHECK: [23] Code Region - Z cmla.b # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1403 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.29 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeeER . . . . cmla z0.b, z1.b, z2.b, #90 # CHECK-NEXT: [0,2] D======eeeeER . . . . cmla z0.b, z1.b, z2.b, #90 # CHECK-NEXT: [0,3] D==========eeeeER . . . cmla z0.b, z0.b, z1.b, #90 # CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D===================eeeeER . cmla z0.b, z1.b, z2.b, #90 # CHECK-NEXT: [1,2] D====================eeeeER . cmla z0.b, z1.b, z2.b, #90 # CHECK-NEXT: [1,3] D========================eeeeER cmla z0.b, z0.b, z1.b, #90 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 13.0 0.0 0.0 cmla z0.b, z1.b, z2.b, #90 # CHECK-NEXT: 2. 2 14.0 0.0 0.0 cmla z0.b, z1.b, z2.b, #90 # CHECK-NEXT: 3. 2 18.0 0.0 0.0 cmla z0.b, z0.b, z1.b, #90 # CHECK-NEXT: 2 13.3 0.1 0.0 # CHECK: [24] Code Region - Z cmla.d # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1803 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.28 # CHECK-NEXT: IPC: 0.22 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 012345678 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeER . . . . . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeeeER . . . . . . cmla z0.d, z1.d, z2.d, #90 # CHECK-NEXT: [0,2] D========eeeeeER . . . . . cmla z0.d, z1.d, z2.d, #90 # CHECK-NEXT: [0,3] D=============eeeeeER . . . . cmla z0.d, z0.d, z1.d, #90 # CHECK-NEXT: [1,0] D==================eeeeeER . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D=======================eeeeeER . . cmla z0.d, z1.d, z2.d, #90 # CHECK-NEXT: [1,2] D==========================eeeeeER . . cmla z0.d, z1.d, z2.d, #90 # CHECK-NEXT: [1,3] D===============================eeeeeER cmla z0.d, z0.d, z1.d, #90 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 10.0 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 15.0 0.0 0.0 cmla z0.d, z1.d, z2.d, #90 # CHECK-NEXT: 2. 2 18.0 0.0 0.0 cmla z0.d, z1.d, z2.d, #90 # CHECK-NEXT: 3. 2 23.0 0.0 0.0 cmla z0.d, z0.d, z1.d, #90 # CHECK-NEXT: 2 16.5 0.1 0.0 # CHECK: [25] Code Region - Z sdot.s # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1203 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.42 # CHECK-NEXT: IPC: 0.33 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 0123456 # CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeER . . .. sdot z0.s, z1.b, z2.b # CHECK-NEXT: [0,2] D======eeeER . . .. sdot z0.s, z1.b, z2.b # CHECK-NEXT: [0,3] D=========eeeER. . .. sdot z0.s, z0.b, z1.b # CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D=================eeeER .. sdot z0.s, z1.b, z2.b # CHECK-NEXT: [1,2] D==================eeeER .. sdot z0.s, z1.b, z2.b # CHECK-NEXT: [1,3] D=====================eeeER sdot z0.s, z0.b, z1.b # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 12.0 0.0 0.0 sdot z0.s, z1.b, z2.b # CHECK-NEXT: 2. 2 13.0 0.0 0.0 sdot z0.s, z1.b, z2.b # CHECK-NEXT: 3. 2 16.0 0.0 0.0 sdot z0.s, z0.b, z1.b # CHECK-NEXT: 2 12.0 0.1 0.0 # CHECK: [26] Code Region - Z sudot # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1203 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.42 # CHECK-NEXT: IPC: 0.33 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 0123456 # CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeER . . .. sdot z0.s, z1.b, z2.b[1] # CHECK-NEXT: [0,2] D======eeeER . . .. sdot z0.s, z1.b, z2.b[1] # CHECK-NEXT: [0,3] D=========eeeER. . .. sdot z0.s, z0.b, z1.b[1] # CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D=================eeeER .. sdot z0.s, z1.b, z2.b[1] # CHECK-NEXT: [1,2] D==================eeeER .. sdot z0.s, z1.b, z2.b[1] # CHECK-NEXT: [1,3] D=====================eeeER sdot z0.s, z0.b, z1.b[1] # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 12.0 0.0 0.0 sdot z0.s, z1.b, z2.b[1] # CHECK-NEXT: 2. 2 13.0 0.0 0.0 sdot z0.s, z1.b, z2.b[1] # CHECK-NEXT: 3. 2 16.0 0.0 0.0 sdot z0.s, z0.b, z1.b[1] # CHECK-NEXT: 2 12.0 0.1 0.0 # CHECK: [27] Code Region - Z sdot.d # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1403 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.29 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeeER . . . . sdot z0.d, z1.h, z2.h # CHECK-NEXT: [0,2] D======eeeeER . . . . sdot z0.d, z1.h, z2.h # CHECK-NEXT: [0,3] D==========eeeeER . . . sdot z0.d, z0.h, z1.h # CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D===================eeeeER . sdot z0.d, z1.h, z2.h # CHECK-NEXT: [1,2] D====================eeeeER . sdot z0.d, z1.h, z2.h # CHECK-NEXT: [1,3] D========================eeeeER sdot z0.d, z0.h, z1.h # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 13.0 0.0 0.0 sdot z0.d, z1.h, z2.h # CHECK-NEXT: 2. 2 14.0 0.0 0.0 sdot z0.d, z1.h, z2.h # CHECK-NEXT: 3. 2 18.0 0.0 0.0 sdot z0.d, z0.h, z1.h # CHECK-NEXT: 2 13.3 0.1 0.0 # CHECK: [28] Code Region - Z smmla # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1103 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.36 # CHECK-NEXT: Block RThroughput: 0.8 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 01234 # CHECK: [0,0] DeeeeER . . . . mul z0.s, z0.s, z0.s # CHECK-NEXT: [0,1] D====eeeER. . . . smmla z0.s, z1.b, z2.b # CHECK-NEXT: [0,2] D=====eeeER . . . smmla z0.s, z1.b, z2.b # CHECK-NEXT: [0,3] D========eeeER . . . smmla z0.s, z0.b, z1.b # CHECK-NEXT: [1,0] D===========eeeeER . . mul z0.s, z0.s, z0.s # CHECK-NEXT: [1,1] D===============eeeER . smmla z0.s, z1.b, z2.b # CHECK-NEXT: [1,2] D================eeeER . smmla z0.s, z1.b, z2.b # CHECK-NEXT: [1,3] D===================eeeER smmla z0.s, z0.b, z1.b # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 6.5 0.5 0.0 mul z0.s, z0.s, z0.s # CHECK-NEXT: 1. 2 10.5 0.0 0.0 smmla z0.s, z1.b, z2.b # CHECK-NEXT: 2. 2 11.5 0.0 0.0 smmla z0.s, z1.b, z2.b # CHECK-NEXT: 3. 2 14.5 0.0 0.0 smmla z0.s, z0.b, z1.b # CHECK-NEXT: 2 10.8 0.1 0.0 # CHECK: [29] Code Region - Z mla.b # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1403 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.29 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeeER . . . . mla z0.b, p0/m, z1.b, z2.b # CHECK-NEXT: [0,2] D======eeeeER . . . . mla z0.b, p0/m, z1.b, z2.b # CHECK-NEXT: [0,3] D==========eeeeER . . . mla z0.b, p0/m, z0.b, z1.b # CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D===================eeeeER . mla z0.b, p0/m, z1.b, z2.b # CHECK-NEXT: [1,2] D====================eeeeER . mla z0.b, p0/m, z1.b, z2.b # CHECK-NEXT: [1,3] D========================eeeeER mla z0.b, p0/m, z0.b, z1.b # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 13.0 0.0 0.0 mla z0.b, p0/m, z1.b, z2.b # CHECK-NEXT: 2. 2 14.0 0.0 0.0 mla z0.b, p0/m, z1.b, z2.b # CHECK-NEXT: 3. 2 18.0 0.0 0.0 mla z0.b, p0/m, z0.b, z1.b # CHECK-NEXT: 2 13.3 0.1 0.0 # CHECK: [30] Code Region - Z mla.d # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1803 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.28 # CHECK-NEXT: IPC: 0.22 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 012345678 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeER . . . . . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeeeER . . . . . . mla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [0,2] D========eeeeeER . . . . . mla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [0,3] D=============eeeeeER . . . . mla z0.d, p0/m, z0.d, z1.d # CHECK-NEXT: [1,0] D==================eeeeeER . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D=======================eeeeeER . . mla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [1,2] D==========================eeeeeER . . mla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [1,3] D===============================eeeeeER mla z0.d, p0/m, z0.d, z1.d # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 10.0 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 15.0 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: 2. 2 18.0 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: 3. 2 23.0 0.0 0.0 mla z0.d, p0/m, z0.d, z1.d # CHECK-NEXT: 2 16.5 0.1 0.0 # CHECK: [31] Code Region - Z smlalb # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1403 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.29 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeeER . . . . smlalb z0.d, z1.s, z2.s # CHECK-NEXT: [0,2] D======eeeeER . . . . smlalb z0.d, z1.s, z2.s # CHECK-NEXT: [0,3] D==========eeeeER . . . smlalb z0.d, z0.s, z1.s # CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D===================eeeeER . smlalb z0.d, z1.s, z2.s # CHECK-NEXT: [1,2] D====================eeeeER . smlalb z0.d, z1.s, z2.s # CHECK-NEXT: [1,3] D========================eeeeER smlalb z0.d, z0.s, z1.s # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 13.0 0.0 0.0 smlalb z0.d, z1.s, z2.s # CHECK-NEXT: 2. 2 14.0 0.0 0.0 smlalb z0.d, z1.s, z2.s # CHECK-NEXT: 3. 2 18.0 0.0 0.0 smlalb z0.d, z0.s, z1.s # CHECK-NEXT: 2 13.3 0.1 0.0 # CHECK: [32] Code Region - Z sqdmlalb # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1503 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.33 # CHECK-NEXT: IPC: 0.27 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 012 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeER . . . . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeeER . . . . . sqdmlalb z0.d, z1.s, z2.s # CHECK-NEXT: [0,2] D=======eeeeER . . . . . sqdmlalb z0.d, z1.s, z2.s # CHECK-NEXT: [0,3] D===========eeeeER . . . . sqdmlalb z0.d, z0.s, z1.s # CHECK-NEXT: [1,0] D===============eeeeeER . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D====================eeeeER . . sqdmlalb z0.d, z1.s, z2.s # CHECK-NEXT: [1,2] D======================eeeeER . . sqdmlalb z0.d, z1.s, z2.s # CHECK-NEXT: [1,3] D==========================eeeeER sqdmlalb z0.d, z0.s, z1.s # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.5 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 13.5 0.0 0.0 sqdmlalb z0.d, z1.s, z2.s # CHECK-NEXT: 2. 2 15.5 0.0 0.0 sqdmlalb z0.d, z1.s, z2.s # CHECK-NEXT: 3. 2 19.5 0.0 0.0 sqdmlalb z0.d, z0.s, z1.s # CHECK-NEXT: 2 14.3 0.1 0.0 # CHECK: [33] Code Region - Z sqrdmlah.b # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1503 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.33 # CHECK-NEXT: IPC: 0.27 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 012 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeER . . . . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeeER . . . . . sqrdmlah z0.b, z1.b, z2.b # CHECK-NEXT: [0,2] D=======eeeeER . . . . . sqrdmlah z0.b, z1.b, z2.b # CHECK-NEXT: [0,3] D===========eeeeER . . . . sqrdmlah z0.b, z0.b, z1.b # CHECK-NEXT: [1,0] D===============eeeeeER . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D====================eeeeER . . sqrdmlah z0.b, z1.b, z2.b # CHECK-NEXT: [1,2] D======================eeeeER . . sqrdmlah z0.b, z1.b, z2.b # CHECK-NEXT: [1,3] D==========================eeeeER sqrdmlah z0.b, z0.b, z1.b # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.5 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 13.5 0.0 0.0 sqrdmlah z0.b, z1.b, z2.b # CHECK-NEXT: 2. 2 15.5 0.0 0.0 sqrdmlah z0.b, z1.b, z2.b # CHECK-NEXT: 3. 2 19.5 0.0 0.0 sqrdmlah z0.b, z0.b, z1.b # CHECK-NEXT: 2 14.3 0.1 0.0 # CHECK: [34] Code Region - Z sqrdmlah.d # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1803 # CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.28 # CHECK-NEXT: IPC: 0.22 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 012345678 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeER . . . . . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D=====eeeeeER . . . . . . sqrdmlah z0.d, z1.d, z2.d # CHECK-NEXT: [0,2] D========eeeeeER . . . . . sqrdmlah z0.d, z1.d, z2.d # CHECK-NEXT: [0,3] D=============eeeeeER . . . . sqrdmlah z0.d, z0.d, z1.d # CHECK-NEXT: [1,0] D==================eeeeeER . . . mul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D=======================eeeeeER . . sqrdmlah z0.d, z1.d, z2.d # CHECK-NEXT: [1,2] D==========================eeeeeER . . sqrdmlah z0.d, z1.d, z2.d # CHECK-NEXT: [1,3] D===============================eeeeeER sqrdmlah z0.d, z0.d, z1.d # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 10.0 0.5 0.0 mul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 15.0 0.0 0.0 sqrdmlah z0.d, z1.d, z2.d # CHECK-NEXT: 2. 2 18.0 0.0 0.0 sqrdmlah z0.d, z1.d, z2.d # CHECK-NEXT: 3. 2 23.0 0.0 0.0 sqrdmlah z0.d, z0.d, z1.d # CHECK-NEXT: 2 16.5 0.1 0.0 # CHECK: [35] Code Region - Z fcmla ZPmZZ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1503 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.27 # CHECK-NEXT: IPC: 0.27 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 012 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D===eeeeeER . . . . . fcmla z0.d, p0/m, z1.d, z2.d, #90 # CHECK-NEXT: [0,2] D=====eeeeeER . . . . . fcmla z0.d, p0/m, z1.d, z2.d, #90 # CHECK-NEXT: [0,3] D==========eeeeeER . . . . fcmla z0.d, p0/m, z0.d, z1.d, #90 # CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D==================eeeeeER . . fcmla z0.d, p0/m, z1.d, z2.d, #90 # CHECK-NEXT: [1,2] D====================eeeeeER . . fcmla z0.d, p0/m, z1.d, z2.d, #90 # CHECK-NEXT: [1,3] D=========================eeeeeER fcmla z0.d, p0/m, z0.d, z1.d, #90 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 11.5 0.0 0.0 fcmla z0.d, p0/m, z1.d, z2.d, #90 # CHECK-NEXT: 2. 2 13.5 0.0 0.0 fcmla z0.d, p0/m, z1.d, z2.d, #90 # CHECK-NEXT: 3. 2 18.5 0.0 0.0 fcmla z0.d, p0/m, z0.d, z1.d, #90 # CHECK-NEXT: 2 13.0 0.1 0.0 # CHECK: [36] Code Region - Z fcmla ZZZI # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1503 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.27 # CHECK-NEXT: IPC: 0.27 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 012 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D===eeeeeER . . . . . fcmla z0.s, z1.s, z2.s[1], #90 # CHECK-NEXT: [0,2] D=====eeeeeER . . . . . fcmla z0.s, z1.s, z2.s[1], #90 # CHECK-NEXT: [0,3] D==========eeeeeER . . . . fcmla z0.s, z0.s, z1.s[1], #90 # CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D==================eeeeeER . . fcmla z0.s, z1.s, z2.s[1], #90 # CHECK-NEXT: [1,2] D====================eeeeeER . . fcmla z0.s, z1.s, z2.s[1], #90 # CHECK-NEXT: [1,3] D=========================eeeeeER fcmla z0.s, z0.s, z1.s[1], #90 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 11.5 0.0 0.0 fcmla z0.s, z1.s, z2.s[1], #90 # CHECK-NEXT: 2. 2 13.5 0.0 0.0 fcmla z0.s, z1.s, z2.s[1], #90 # CHECK-NEXT: 3. 2 18.5 0.0 0.0 fcmla z0.s, z0.s, z1.s[1], #90 # CHECK-NEXT: 2 13.0 0.1 0.0 # CHECK: [37] Code Region - Z fmla ZPmZZ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 012345678 # CHECK: [0,0] DeeeER . . . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D===eeeeER. . . . . fmla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [0,2] D=====eeeeER . . . . fmla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [0,3] D=========eeeeER . . . fmla z0.d, p0/m, z0.d, z1.d # CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D================eeeeER . . fmla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [1,2] D==================eeeeER. . fmla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [1,3] D======================eeeeER fmla z0.d, p0/m, z0.d, z1.d # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: 2. 2 12.5 0.0 0.0 fmla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla z0.d, p0/m, z0.d, z1.d # CHECK-NEXT: 2 11.8 0.1 0.0 # CHECK: [38] Code Region - Z fmla ZZZI # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 012345678 # CHECK: [0,0] DeeeER . . . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D===eeeeER. . . . . fmla z0.d, z1.d, z2.d[1] # CHECK-NEXT: [0,2] D=====eeeeER . . . . fmla z0.d, z1.d, z2.d[1] # CHECK-NEXT: [0,3] D=========eeeeER . . . fmla z0.d, z0.d, z1.d[1] # CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D================eeeeER . . fmla z0.d, z1.d, z2.d[1] # CHECK-NEXT: [1,2] D==================eeeeER. . fmla z0.d, z1.d, z2.d[1] # CHECK-NEXT: [1,3] D======================eeeeER fmla z0.d, z0.d, z1.d[1] # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla z0.d, z1.d, z2.d[1] # CHECK-NEXT: 2. 2 12.5 0.0 0.0 fmla z0.d, z1.d, z2.d[1] # CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla z0.d, z0.d, z1.d[1] # CHECK-NEXT: 2 11.8 0.1 0.0 # CHECK: [39] Code Region - Z fmlalb ZZZ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 012345678 # CHECK: [0,0] DeeeER . . . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D===eeeeER. . . . . fmlalb z0.s, z1.h, z2.h # CHECK-NEXT: [0,2] D=====eeeeER . . . . fmlalb z0.s, z1.h, z2.h # CHECK-NEXT: [0,3] D=========eeeeER . . . fmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D================eeeeER . . fmlalb z0.s, z1.h, z2.h # CHECK-NEXT: [1,2] D==================eeeeER. . fmlalb z0.s, z1.h, z2.h # CHECK-NEXT: [1,3] D======================eeeeER fmlalb z0.s, z0.h, z1.h # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmlalb z0.s, z1.h, z2.h # CHECK-NEXT: 2. 2 12.5 0.0 0.0 fmlalb z0.s, z1.h, z2.h # CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmlalb z0.s, z0.h, z1.h # CHECK-NEXT: 2 11.8 0.1 0.0 # CHECK: [40] Code Region - Z bfdot # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1603 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.25 # CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 01234 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfdot z0.s, z1.h, z2.h # CHECK-NEXT: [0,2] D======eeeeeER . . . . . bfdot z0.s, z1.h, z2.h # CHECK-NEXT: [0,3] D===========eeeeeER . . . . bfdot z0.s, z0.h, z1.h # CHECK-NEXT: [1,0] D================eeeER . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D===================eeeeeER . . bfdot z0.s, z1.h, z2.h # CHECK-NEXT: [1,2] D======================eeeeeER. . bfdot z0.s, z1.h, z2.h # CHECK-NEXT: [1,3] D===========================eeeeeER bfdot z0.s, z0.h, z1.h # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 9.0 0.5 0.0 fmul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 12.0 0.0 0.0 bfdot z0.s, z1.h, z2.h # CHECK-NEXT: 2. 2 15.0 0.0 0.0 bfdot z0.s, z1.h, z2.h # CHECK-NEXT: 3. 2 20.0 0.0 0.0 bfdot z0.s, z0.h, z1.h # CHECK-NEXT: 2 14.0 0.1 0.0 # CHECK: [41] Code Region - Z bfmmla # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1903 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.21 # CHECK-NEXT: IPC: 0.21 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 # CHECK-NEXT: Index 0123456789 0123456789 0 # CHECK: [0,0] DeeeER . . . . . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D===eeeeeeER . . . . . . bfmmla z0.s, z1.h, z2.h # CHECK-NEXT: [0,2] D=======eeeeeeER . . . . . bfmmla z0.s, z1.h, z2.h # CHECK-NEXT: [0,3] D=============eeeeeeER . . . . bfmmla z0.s, z0.h, z1.h # CHECK-NEXT: [1,0] D===================eeeER. . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D======================eeeeeeER . . bfmmla z0.s, z1.h, z2.h # CHECK-NEXT: [1,2] D==========================eeeeeeER. . bfmmla z0.s, z1.h, z2.h # CHECK-NEXT: [1,3] D================================eeeeeeER bfmmla z0.s, z0.h, z1.h # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 10.5 0.5 0.0 fmul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 13.5 0.0 0.0 bfmmla z0.s, z1.h, z2.h # CHECK-NEXT: 2. 2 17.5 0.0 0.0 bfmmla z0.s, z1.h, z2.h # CHECK-NEXT: 3. 2 23.5 0.0 0.0 bfmmla z0.s, z0.h, z1.h # CHECK-NEXT: 2 16.3 0.1 0.0 # CHECK: [42] Code Region - bfmlalb # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 1503 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 16 # CHECK-NEXT: uOps Per Cycle: 0.27 # CHECK-NEXT: IPC: 0.27 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 012 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfmlalb z0.s, z1.h, z2.h # CHECK-NEXT: [0,2] D=====eeeeeER . . . . . bfmlalb z0.s, z1.h, z2.h # CHECK-NEXT: [0,3] D==========eeeeeER . . . . bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d # CHECK-NEXT: [1,1] D==================eeeeeER . . bfmlalb z0.s, z1.h, z2.h # CHECK-NEXT: [1,2] D====================eeeeeER . . bfmlalb z0.s, z1.h, z2.h # CHECK-NEXT: [1,3] D=========================eeeeeER bfmlalb z0.s, z0.h, z1.h # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d # CHECK-NEXT: 1. 2 11.5 0.0 0.0 bfmlalb z0.s, z1.h, z2.h # CHECK-NEXT: 2. 2 13.5 0.0 0.0 bfmlalb z0.s, z1.h, z2.h # CHECK-NEXT: 3. 2 18.5 0.0 0.0 bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: 2 13.0 0.1 0.0