xref: /llvm-project/llvm/test/Transforms/JumpThreading/divergent-target-test.ll (revision fa90f6b9d0fa2742df4548156c498c48dc796ec4)
1; REQUIRES: amdgpu-registered-target && x86-registered-target
2; RUN: opt < %s -mtriple=amdgcn -passes=jump-threading -S | FileCheck %s -check-prefixes=CHECK,DIVERGENT
3; RUN: opt < %s -mtriple=x86_64 -passes=jump-threading -S | FileCheck %s -check-prefixes=CHECK,UNIFORM
4
5; Here we assure that for the target with no branch divergence usual Jump Threading optimization performed
6; For target with branch divergence - no optimization, so the IR is unchanged.
7
8declare i32 @f1()
9declare i32 @f2()
10declare void @f3()
11
12define i32 @test(i1 %cond) {
13; CHECK: test
14	br i1 %cond, label %T1, label %F1
15
16; DIVERGENT:   T1
17; UNIFORM-NOT: T1
18T1:
19	%v1 = call i32 @f1()
20	br label %Merge
21; DIVERGENT:   F1
22; UNIFORM-NOT: F1
23F1:
24	%v2 = call i32 @f2()
25	br label %Merge
26; DIVERGENT:   Merge
27; UNIFORM-NOT: Merge
28Merge:
29	%A = phi i1 [true, %T1], [false, %F1]
30	%B = phi i32 [%v1, %T1], [%v2, %F1]
31	br i1 %A, label %T2, label %F2
32
33; DIVERGENT:   T2
34T2:
35; UNIFORM: T2:
36; UNIFORM: %v1 = call i32 @f1()
37; UNIFORM: call void @f3()
38; UNIFORM: ret i32 %v1
39	call void @f3()
40	ret i32 %B
41; DIVERGENT:   F2
42F2:
43; UNIFORM: F2:
44; UNIFORM: %v2 = call i32 @f2()
45; UNIFORM: ret i32 %v2
46	ret i32 %B
47}
48
49; Check divergence check is skipped if there can't be divergence in
50; the function.
51define i32 @requires_single_lane_exec(i1 %cond) #0 {
52; CHECK: requires_single_lane_exec
53	br i1 %cond, label %T1, label %F1
54
55; CHECK-NOT: T1
56T1:
57	%v1 = call i32 @f1()
58	br label %Merge
59; CHECK-NOT: F1
60F1:
61	%v2 = call i32 @f2()
62	br label %Merge
63; CHECK-NOT: Merge
64Merge:
65	%A = phi i1 [true, %T1], [false, %F1]
66	%B = phi i32 [%v1, %T1], [%v2, %F1]
67	br i1 %A, label %T2, label %F2
68
69T2:
70; CHECK: T2:
71; CHECK: %v1 = call i32 @f1()
72; CHECK: call void @f3()
73; CHECK: ret i32 %v1
74	call void @f3()
75	ret i32 %B
76F2:
77; CHECK: F2:
78; CHECK: %v2 = call i32 @f2()
79; CHECK: ret i32 %v2
80	ret i32 %B
81}
82
83attributes #0 = { "amdgpu-flat-work-group-size"="1,1" }
84