xref: /llvm-project/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll (revision 7652a59407018c057cdc1163c9f64b5b6f0954eb)
1; RUN:  llc -mtriple=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
2; RUN:  llc -mtriple=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900 %s
3; RUN:  llc -mtriple=amdgcn -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
4; RUN:  llc -mtriple=amdgcn -mcpu=gfx900 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900 %s
5
6; GCN-LABEL: name: s_abs_i32
7; GCN: S_ABS_I32
8define amdgpu_kernel void @s_abs_i32(ptr addrspace(1) %out, i32 %val) nounwind {
9  %neg = sub i32 0, %val
10  %cond = icmp sgt i32 %val, %neg
11  %res = select i1 %cond, i32 %val, i32 %neg
12  %res2 = add i32 %res, 2
13  store i32 %res2, ptr addrspace(1) %out, align 4
14  ret void
15}
16
17; GCN-LABEL: name: v_abs_i32
18; SI:  V_SUB_CO_U32_e64
19; GFX900: V_SUB_U32_e64
20; GCN: V_MAX_I32_e64
21define amdgpu_kernel void @v_abs_i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
22  %tid = call i32 @llvm.amdgcn.workitem.id.x()
23  %gep.in = getelementptr inbounds i32, ptr addrspace(1) %src, i32 %tid
24  %val = load i32, ptr addrspace(1) %gep.in, align 4
25  %neg = sub i32 0, %val
26  %cond = icmp sgt i32 %val, %neg
27  %res = select i1 %cond, i32 %val, i32 %neg
28  %res2 = add i32 %res, 2
29  store i32 %res2, ptr addrspace(1) %out, align 4
30  ret void
31}
32
33; GCN-LABEL: name: s_abs_v2i32
34; GCN: S_ABS_I32
35; GCN: S_ABS_I32
36define amdgpu_kernel void @s_abs_v2i32(ptr addrspace(1) %out, <2 x i32> %val) nounwind {
37  %z0 = insertelement <2 x i32> undef, i32 0, i32 0
38  %z1 = insertelement <2 x i32> %z0, i32 0, i32 1
39  %t0 = insertelement <2 x i32> undef, i32 2, i32 0
40  %t1 = insertelement <2 x i32> %t0, i32 2, i32 1
41  %neg = sub <2 x i32> %z1, %val
42  %cond = icmp sgt <2 x i32> %val, %neg
43  %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg
44  %res2 = add <2 x i32> %res, %t1
45  store <2 x i32> %res2, ptr addrspace(1) %out, align 4
46  ret void
47}
48
49; GCN-LABEL: name: v_abs_v2i32
50; SI:  V_SUB_CO_U32_e64
51; GFX900: V_SUB_U32_e64
52; GCN: V_MAX_I32_e64
53; GCN: V_MAX_I32_e64
54define amdgpu_kernel void @v_abs_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
55  %z0 = insertelement <2 x i32> undef, i32 0, i32 0
56  %z1 = insertelement <2 x i32> %z0, i32 0, i32 1
57  %t0 = insertelement <2 x i32> undef, i32 2, i32 0
58  %t1 = insertelement <2 x i32> %t0, i32 2, i32 1
59  %tid = call i32 @llvm.amdgcn.workitem.id.x()
60  %gep.in = getelementptr inbounds <2 x i32>, ptr addrspace(1) %src, i32 %tid
61  %val = load <2 x i32>, ptr addrspace(1) %gep.in, align 4
62  %neg = sub <2 x i32> %z1, %val
63  %cond = icmp sgt <2 x i32> %val, %neg
64  %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg
65  %res2 = add <2 x i32> %res, %t1
66  store <2 x i32> %res2, ptr addrspace(1) %out, align 4
67  ret void
68}
69
70declare i32 @llvm.amdgcn.workitem.id.x() #0
71
72attributes #0 = { nounwind readnone }
73attributes #1 = { nounwind }
74