xref: /llvm-project/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll (revision f956e7fbf161447b9236f7c4448a9d02d3564261)
1; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s
2; -global-isel=1 SI run line skipped since store not yet implemented.
3; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s
4; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s
5; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=GCN %s
6; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=GCN %s
7; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=MEMTIME -check-prefix=GCN %s
8; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=MEMTIME -check-prefix=GCN %s
9; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-SDAG -check-prefix=GCN %s
10; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %s
11; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
12; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
13
14declare i64 @llvm.readcyclecounter() #0
15
16; GCN-LABEL: {{^}}test_readcyclecounter:
17; MEMTIME-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}}
18; GCN-DAG:     s_load_{{dwordx2|b64}}
19; GFX12:       s_getreg_b32 [[HI1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_HI)
20; GFX12:       s_getreg_b32 [[LO1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_LO)
21; GFX12:       s_getreg_b32 [[HI2:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_HI)
22; GFX12:       s_cmp_eq_u32 [[HI1]], [[HI2]]
23; GFX12:       s_cselect_b32 {{s[0-9]+}}, [[LO1]], 0
24; GCN-DAG:     kmcnt
25; MEMTIME:     store_dwordx2
26; SIVI-NOT:    kmcnt
27; MEMTIME:     s_memtime s{{\[[0-9]+:[0-9]+\]}}
28; MEMTIME:     store_dwordx2
29
30; GETREG-GISEL-DAG:  s_mov_b32 s[[SZERO:[0-9]+]], 0
31; GETREG-GISEL-DAG:  v_mov_b32_e32 v[[ZERO:[0-9]+]], s[[SZERO]]
32; GETREG-SDAG-DAG:  v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
33; GETREG-DAG:  s_getreg_b32 [[CNT1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES, 0, 20)
34; GETREG-DAG:  v_mov_b32_e32 v[[VCNT1:[0-9]+]], [[CNT1]]
35; GETREG:      global_store_{{dwordx2|b64}} v{{.+}}, v[[[VCNT1]]:[[ZERO]]]
36; GETREG:      s_getreg_b32 [[CNT2:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES, 0, 20)
37; GETREG:      v_mov_b32_e32 v[[VCNT2:[0-9]+]], [[CNT2]]
38; GETREG:      global_store_{{dwordx2|b64}} v{{.+}}, v[[[VCNT2]]:[[ZERO]]]
39
40define amdgpu_kernel void @test_readcyclecounter(ptr addrspace(1) %out) #0 {
41  %cycle0 = call i64 @llvm.readcyclecounter()
42  store volatile i64 %cycle0, ptr addrspace(1) %out
43
44  %cycle1 = call i64 @llvm.readcyclecounter()
45  store volatile i64 %cycle1, ptr addrspace(1) %out
46  ret void
47}
48
49; This test used to crash in ScheduleDAG.
50;
51; GCN-LABEL: {{^}}test_readcyclecounter_smem:
52; MEMTIME-DAG: s_memtime
53; GFX12:       s_getreg_b32 [[HI1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_HI)
54; GFX12:       s_getreg_b32 [[LO1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_LO)
55; GFX12:       s_getreg_b32 [[HI2:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_HI)
56; GCN-DAG:     s_load_{{dword|b32|b64}}
57; GETREG-DAG:  s_getreg_b32 s{{[0-9]+}}, hwreg(HW_REG_SHADER_CYCLES, 0, 20)
58; GFX12:       s_cmp_eq_u32 [[HI1]], [[HI2]]
59; GFX12:       s_cselect_b32 {{s[0-9]+}}, [[LO1]], 0
60define amdgpu_cs i32 @test_readcyclecounter_smem(ptr addrspace(4) inreg %in) #0 {
61  %cycle0 = call i64 @llvm.readcyclecounter()
62  %in.v = load i64, ptr addrspace(4) %in
63  %r.64 = add i64 %cycle0, %in.v
64  %r.32 = trunc i64 %r.64 to i32
65  ret i32 %r.32
66}
67
68attributes #0 = { nounwind }
69