xref: /llvm-project/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll (revision f1ec0d12bb0843f0deab83ef2b5cf1339cbc4f0b)
1; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' < %s | FileCheck %s
2
3target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
4target triple = "nvptx64-nvidia-cuda"
5
6; We only use the div instruction -- the rem should be DCE'ed.
7; CHECK-LABEL: @div_only
8define void @div_only(i64 %a, i64 %b, ptr %retptr) {
9  ; CHECK: udiv i32
10  ; CHECK-NOT: urem
11  ; CHECK: sdiv i64
12  ; CHECK-NOT: rem
13  %d = sdiv i64 %a, %b
14  store i64 %d, ptr %retptr
15  ret void
16}
17
18; We only use the rem instruction -- the div should be DCE'ed.
19; CHECK-LABEL: @rem_only
20define void @rem_only(i64 %a, i64 %b, ptr %retptr) {
21  ; CHECK-NOT: div
22  ; CHECK: urem i32
23  ; CHECK-NOT: div
24  ; CHECK: rem i64
25  ; CHECK-NOT: div
26  %d = srem i64 %a, %b
27  store i64 %d, ptr %retptr
28  ret void
29}
30
31; CHECK-LABEL: @udiv_by_constant(
32define i64 @udiv_by_constant(i32 %a) {
33; CHECK-NEXT:    [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64
34; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[A_ZEXT]] to i32
35; CHECK-NEXT:    [[TMP2:%.*]] = udiv i32 [[TMP1]], 50
36; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
37; CHECK-NEXT:    ret i64 [[TMP3]]
38
39  %a.zext = zext i32 %a to i64
40  %wide.div = udiv i64 %a.zext, 50
41  ret i64 %wide.div
42}
43
44; CHECK-LABEL: @urem_by_constant(
45define i64 @urem_by_constant(i32 %a) {
46; CHECK-NEXT:    [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64
47; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[A_ZEXT]] to i32
48; CHECK-NEXT:    [[TMP2:%.*]] = urem i32 [[TMP1]], 50
49; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
50; CHECK-NEXT:    ret i64 [[TMP3]]
51
52  %a.zext = zext i32 %a to i64
53  %wide.div = urem i64 %a.zext, 50
54  ret i64 %wide.div
55}
56
57; Negative test: instead of emitting a runtime check on %a, we prefer to let the
58; DAGCombiner transform this division by constant into a multiplication (with a
59; "magic constant").
60;
61; CHECK-LABEL: @udiv_by_constant_negative_0(
62define i64 @udiv_by_constant_negative_0(i64 %a) {
63; CHECK-NEXT:    [[WIDE_DIV:%.*]] = udiv i64 [[A:%.*]], 50
64; CHECK-NEXT:    ret i64 [[WIDE_DIV]]
65
66  %wide.div = udiv i64 %a, 50
67  ret i64 %wide.div
68}
69
70; Negative test: while we know the dividend is short, the divisor isn't.  This
71; test is here for completeness, but instcombine will optimize this to return 0.
72;
73; CHECK-LABEL: @udiv_by_constant_negative_1(
74define i64 @udiv_by_constant_negative_1(i32 %a) {
75; CHECK-NEXT:    [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64
76; CHECK-NEXT:    [[WIDE_DIV:%.*]] = udiv i64 [[A_ZEXT]], 8589934592
77; CHECK-NEXT:    ret i64 [[WIDE_DIV]]
78
79  %a.zext = zext i32 %a to i64
80  %wide.div = udiv i64 %a.zext, 8589934592 ;; == 1 << 33
81  ret i64 %wide.div
82}
83
84; URem version of udiv_by_constant_negative_0
85;
86; CHECK-LABEL: @urem_by_constant_negative_0(
87define i64 @urem_by_constant_negative_0(i64 %a) {
88; CHECK-NEXT:    [[WIDE_DIV:%.*]] = urem i64 [[A:%.*]], 50
89; CHECK-NEXT:    ret i64 [[WIDE_DIV]]
90
91  %wide.div = urem i64 %a, 50
92  ret i64 %wide.div
93}
94
95; URem version of udiv_by_constant_negative_1
96;
97; CHECK-LABEL: @urem_by_constant_negative_1(
98define i64 @urem_by_constant_negative_1(i32 %a) {
99; CHECK-NEXT:    [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64
100; CHECK-NEXT:    [[WIDE_DIV:%.*]] = urem i64 [[A_ZEXT]], 8589934592
101; CHECK-NEXT:    ret i64 [[WIDE_DIV]]
102
103  %a.zext = zext i32 %a to i64
104  %wide.div = urem i64 %a.zext, 8589934592 ;; == 1 << 33
105  ret i64 %wide.div
106}
107