1# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py 2# RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx1200 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s 3 4v_s_exp_f32 s0, s0 5v_s_log_f32 s0, s0 6v_s_rcp_f32 s1, s1 7v_s_rsq_f32 s1, s0 8v_s_sqrt_f32 s2, s1 9v_s_exp_f16 s3, s1 10v_s_log_f16 s4, s1 11v_s_rcp_f16 s5, s2 12v_s_rsq_f16 s5, s4 13v_s_sqrt_f16 s5, s5 14 15# CHECK: Iterations: 1 16# CHECK-NEXT: Instructions: 10 17# CHECK-NEXT: Total Cycles: 45 18# CHECK-NEXT: Total uOps: 10 19 20# CHECK: Dispatch Width: 1 21# CHECK-NEXT: uOps Per Cycle: 0.22 22# CHECK-NEXT: IPC: 0.22 23# CHECK-NEXT: Block RThroughput: 10.0 24 25# CHECK: Instruction Info: 26# CHECK-NEXT: [1]: #uOps 27# CHECK-NEXT: [2]: Latency 28# CHECK-NEXT: [3]: RThroughput 29# CHECK-NEXT: [4]: MayLoad 30# CHECK-NEXT: [5]: MayStore 31# CHECK-NEXT: [6]: HasSideEffects (U) 32 33# CHECK: [1] [2] [3] [4] [5] [6] Instructions: 34# CHECK-NEXT: 1 7 1.00 U v_s_exp_f32 s0, s0 35# CHECK-NEXT: 1 7 1.00 U v_s_log_f32 s0, s0 36# CHECK-NEXT: 1 7 1.00 U v_s_rcp_f32 s1, s1 37# CHECK-NEXT: 1 7 1.00 U v_s_rsq_f32 s1, s0 38# CHECK-NEXT: 1 7 1.00 U v_s_sqrt_f32 s2, s1 39# CHECK-NEXT: 1 7 1.00 U v_s_exp_f16 s3, s1 40# CHECK-NEXT: 1 7 1.00 U v_s_log_f16 s4, s1 41# CHECK-NEXT: 1 7 1.00 U v_s_rcp_f16 s5, s2 42# CHECK-NEXT: 1 7 1.00 U v_s_rsq_f16 s5, s4 43# CHECK-NEXT: 1 7 1.00 U v_s_sqrt_f16 s5, s5 44 45# CHECK: Resources: 46# CHECK-NEXT: [0] - HWBranch 47# CHECK-NEXT: [1] - HWExport 48# CHECK-NEXT: [2] - HWLGKM 49# CHECK-NEXT: [3] - HWRC 50# CHECK-NEXT: [4] - HWSALU 51# CHECK-NEXT: [5] - HWVALU 52# CHECK-NEXT: [6] - HWVMEM 53 54# CHECK: Resource pressure per iteration: 55# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] 56# CHECK-NEXT: - - - 10.00 - 10.00 - 57 58# CHECK: Resource pressure by instruction: 59# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] Instructions: 60# CHECK-NEXT: - - - 1.00 - 1.00 - v_s_exp_f32 s0, s0 61# CHECK-NEXT: - - - 1.00 - 1.00 - v_s_log_f32 s0, s0 62# CHECK-NEXT: - - - 1.00 - 1.00 - v_s_rcp_f32 s1, s1 63# CHECK-NEXT: - - - 1.00 - 1.00 - v_s_rsq_f32 s1, s0 64# CHECK-NEXT: - - - 1.00 - 1.00 - v_s_sqrt_f32 s2, s1 65# CHECK-NEXT: - - - 1.00 - 1.00 - v_s_exp_f16 s3, s1 66# CHECK-NEXT: - - - 1.00 - 1.00 - v_s_log_f16 s4, s1 67# CHECK-NEXT: - - - 1.00 - 1.00 - v_s_rcp_f16 s5, s2 68# CHECK-NEXT: - - - 1.00 - 1.00 - v_s_rsq_f16 s5, s4 69# CHECK-NEXT: - - - 1.00 - 1.00 - v_s_sqrt_f16 s5, s5 70 71# CHECK: Timeline view: 72# CHECK-NEXT: 0123456789 0123456789 73# CHECK-NEXT: Index 0123456789 0123456789 01234 74 75# CHECK: [0,0] DeeeeeeE . . . . . . . . v_s_exp_f32 s0, s0 76# CHECK-NEXT: [0,1] . . DeeeeeeE. . . . . . . v_s_log_f32 s0, s0 77# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . v_s_rcp_f32 s1, s1 78# CHECK-NEXT: [0,3] . . . DeeeeeeE . . . . . v_s_rsq_f32 s1, s0 79# CHECK-NEXT: [0,4] . . . . .DeeeeeeE . . . . v_s_sqrt_f32 s2, s1 80# CHECK-NEXT: [0,5] . . . . . DeeeeeeE. . . . v_s_exp_f16 s3, s1 81# CHECK-NEXT: [0,6] . . . . . DeeeeeeE . . . v_s_log_f16 s4, s1 82# CHECK-NEXT: [0,7] . . . . . . DeeeeeeE . . v_s_rcp_f16 s5, s2 83# CHECK-NEXT: [0,8] . . . . . . DeeeeeeE . . v_s_rsq_f16 s5, s4 84# CHECK-NEXT: [0,9] . . . . . . . . DeeeeeeE v_s_sqrt_f16 s5, s5 85 86# CHECK: Average Wait times (based on the timeline view): 87# CHECK-NEXT: [0]: Executions 88# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue 89# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready 90# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage 91 92# CHECK: [0] [1] [2] [3] 93# CHECK-NEXT: 0. 1 0.0 0.0 0.0 v_s_exp_f32 s0, s0 94# CHECK-NEXT: 1. 1 0.0 0.0 0.0 v_s_log_f32 s0, s0 95# CHECK-NEXT: 2. 1 0.0 0.0 0.0 v_s_rcp_f32 s1, s1 96# CHECK-NEXT: 3. 1 0.0 0.0 0.0 v_s_rsq_f32 s1, s0 97# CHECK-NEXT: 4. 1 0.0 0.0 0.0 v_s_sqrt_f32 s2, s1 98# CHECK-NEXT: 5. 1 0.0 0.0 0.0 v_s_exp_f16 s3, s1 99# CHECK-NEXT: 6. 1 0.0 0.0 0.0 v_s_log_f16 s4, s1 100# CHECK-NEXT: 7. 1 0.0 0.0 0.0 v_s_rcp_f16 s5, s2 101# CHECK-NEXT: 8. 1 0.0 0.0 0.0 v_s_rsq_f16 s5, s4 102# CHECK-NEXT: 9. 1 0.0 0.0 0.0 v_s_sqrt_f16 s5, s5 103# CHECK-NEXT: 1 0.0 0.0 0.0 <total> 104