xref: /llvm-project/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-threshold.ll (revision ef992b60798b6cd2c50b25351bfc392e319896b7)
1; RUN: opt < %s -S -mtriple=amdgcn-- -passes=loop-unroll | FileCheck %s
2
3; Check the handling of amdgpu.loop.unroll.threshold metadata which can be used to
4; set the default threshold for a loop. This metadata overrides both the AMDGPU
5; default, and any value specified by the amdgpu-unroll-threshold function attribute
6; (which sets a threshold for all loops in the function).
7
8; Check that the loop in unroll_default is not fully unrolled using the default
9; unroll threshold
10; CHECK-LABEL: @unroll_default
11; CHECK: entry:
12; CHECK: br i1 %cmp
13; CHECK: ret void
14
15@in = internal unnamed_addr global ptr null, align 8
16@out = internal unnamed_addr global ptr null, align 8
17
18define void @unroll_default() {
19entry:
20  br label %do.body
21
22do.body:                                          ; preds = %entry
23  %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ]
24  %v1 = load i64, ptr @in, align 8
25  store i64 %v1, ptr @out, align 8
26  %inc = add nsw i32 %i.0, 1
27  %cmp = icmp slt i32 %inc, 100
28  br i1 %cmp, label %do.body, label %do.end
29
30do.end:                                           ; preds = %do.body
31  ret void
32}
33
34; Check that the same loop in unroll_full is fully unrolled when the default
35; unroll threshold is increased by use of the amdgpu.loop.unroll.threshold metadata
36; CHECK-LABEL: @unroll_full
37; CHECK: entry:
38; CHECK-NOT: br i1 %cmp
39; CHECK: ret void
40
41define void @unroll_full() {
42entry:
43  br label %do.body
44
45do.body:                                          ; preds = %entry
46  %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ]
47  %v1 = load i64, ptr @in, align 8
48  store i64 %v1, ptr @out, align 8
49  %inc = add nsw i32 %i.0, 1
50  %cmp = icmp slt i32 %inc, 100
51  br i1 %cmp, label %do.body, label %do.end, !llvm.loop !1
52
53do.end:                                           ; preds = %do.body
54  ret void
55}
56
57; Check that the same loop in override_no_unroll is not unrolled when a high default
58; unroll threshold specified using the amdgpu-unroll-threshold function attribute
59; is overridden by a low threshold using the amdgpu.loop.unroll.threshold metadata
60
61; CHECK-LABEL: @override_no_unroll
62; CHECK: entry:
63; CHECK: br i1 %cmp
64; CHECK: ret void
65
66define void @override_no_unroll() #0 {
67entry:
68  br label %do.body
69
70do.body:                                          ; preds = %entry
71  %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ]
72  %v1 = load i64, ptr @in, align 8
73  store i64 %v1, ptr @out, align 8
74  %inc = add nsw i32 %i.0, 1
75  %cmp = icmp slt i32 %inc, 100
76  br i1 %cmp, label %do.body, label %do.end, !llvm.loop !3
77
78do.end:                                           ; preds = %do.body
79  ret void
80}
81
82; Check that the same loop in override_unroll is fully unrolled when a low default
83; unroll threshold specified using the amdgpu-unroll-threshold function attribute
84; is overridden by a high threshold using the amdgpu.loop.unroll.threshold metadata
85
86; CHECK-LABEL: @override_unroll
87; CHECK: entry:
88; CHECK-NOT: br i1 %cmp
89; CHECK: ret void
90
91define void @override_unroll() #1 {
92entry:
93  br label %do.body
94
95do.body:                                          ; preds = %entry
96  %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ]
97  %v1 = load i64, ptr @in, align 8
98  store i64 %v1, ptr @out, align 8
99  %inc = add nsw i32 %i.0, 1
100  %cmp = icmp slt i32 %inc, 100
101  br i1 %cmp, label %do.body, label %do.end, !llvm.loop !1
102
103do.end:                                           ; preds = %do.body
104  ret void
105}
106
107attributes #0 = { "amdgpu-unroll-threshold"="1000" }
108attributes #1 = { "amdgpu-unroll-threshold"="100" }
109
110!1 = !{!1, !2}
111!2 = !{!"amdgpu.loop.unroll.threshold", i32 1000}
112!3 = !{!3, !4}
113!4 = !{!"amdgpu.loop.unroll.threshold", i32 100}
114