xref: /llvm-project/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll (revision f4037277bb0220cb1dece91d21d4fdc2995eae7a)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2; RUN: opt -S -mtriple=amdgcn-amd- -passes=amdgpu-attributor %s | FileCheck %s
3
4; Test to ensure recursive functions exhibit proper behaviour
5; Test to generate fibonacci numbers
6
7define i32 @fib(i32 %n) #0 {
8; CHECK-LABEL: define {{[^@]+}}@fib
9; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
10; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[N]], 0
11; CHECK-NEXT:    br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
12; CHECK:       cont1:
13; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[N]], 1
14; CHECK-NEXT:    br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
15; CHECK:       cont2:
16; CHECK-NEXT:    [[NM1:%.*]] = sub i32 [[N]], 1
17; CHECK-NEXT:    [[FIBM1:%.*]] = call i32 @fib(i32 [[NM1]])
18; CHECK-NEXT:    [[NM2:%.*]] = sub i32 [[N]], 2
19; CHECK-NEXT:    [[FIBM2:%.*]] = call i32 @fib(i32 [[NM2]])
20; CHECK-NEXT:    [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
21; CHECK-NEXT:    ret i32 [[RETVAL]]
22; CHECK:       exit:
23; CHECK-NEXT:    ret i32 1
24;
25  %cmp1 = icmp eq i32 %n, 0
26  br i1 %cmp1, label %exit, label %cont1
27
28cont1:
29  %cmp2 = icmp eq i32 %n, 1
30  br i1 %cmp2, label %exit, label %cont2
31
32cont2:
33  %nm1 = sub i32 %n, 1
34  %fibm1 = call i32 @fib(i32 %nm1)
35  %nm2 = sub i32 %n, 2
36  %fibm2 = call i32 @fib(i32 %nm2)
37  %retval = add i32 %fibm1, %fibm2
38
39  ret i32 %retval
40
41exit:
42  ret i32 1
43}
44
45define internal i32 @fib_internal(i32 %n) #0 {
46; CHECK-LABEL: define {{[^@]+}}@fib_internal
47; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
48; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[N]], 0
49; CHECK-NEXT:    br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
50; CHECK:       cont1:
51; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[N]], 1
52; CHECK-NEXT:    br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
53; CHECK:       cont2:
54; CHECK-NEXT:    [[NM1:%.*]] = sub i32 [[N]], 1
55; CHECK-NEXT:    [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]])
56; CHECK-NEXT:    [[NM2:%.*]] = sub i32 [[N]], 2
57; CHECK-NEXT:    [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]])
58; CHECK-NEXT:    [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
59; CHECK-NEXT:    ret i32 [[RETVAL]]
60; CHECK:       exit:
61; CHECK-NEXT:    ret i32 1
62;
63  %cmp1 = icmp eq i32 %n, 0
64  br i1 %cmp1, label %exit, label %cont1
65
66cont1:
67  %cmp2 = icmp eq i32 %n, 1
68  br i1 %cmp2, label %exit, label %cont2
69
70cont2:
71  %nm1 = sub i32 %n, 1
72  %fibm1 = call i32 @fib_internal(i32 %nm1)
73  %nm2 = sub i32 %n, 2
74  %fibm2 = call i32 @fib_internal(i32 %nm2)
75  %retval = add i32 %fibm1, %fibm2
76
77  ret i32 %retval
78
79exit:
80  ret i32 1
81}
82
83define amdgpu_kernel void @kernel(ptr addrspace(1) %m) #1 {
84; CHECK-LABEL: define {{[^@]+}}@kernel
85; CHECK-SAME: (ptr addrspace(1) [[M:%.*]]) #[[ATTR2:[0-9]+]] {
86; CHECK-NEXT:    [[R:%.*]] = call i32 @fib(i32 5)
87; CHECK-NEXT:    [[R2:%.*]] = call i32 @fib_internal(i32 5)
88; CHECK-NEXT:    store i32 [[R]], ptr addrspace(1) [[M]], align 4
89; CHECK-NEXT:    store i32 [[R2]], ptr addrspace(1) [[M]], align 4
90; CHECK-NEXT:    ret void
91;
92  %r = call i32 @fib(i32 5)
93  %r2 = call i32 @fib_internal(i32 5)
94
95  store i32 %r, ptr addrspace(1) %m
96  store i32 %r2, ptr addrspace(1) %m
97  ret void
98}
99
100; nounwind and readnone are added to match attributor results.
101attributes #0 = { nounwind readnone }
102attributes #1 = { "uniform-work-group-size"="true" }
103;.
104; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
105; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
106; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" }
107;.
108