xref: /llvm-project/llvm/test/Transforms/InstCombine/udiv-simplify.ll (revision 56c091ea7106507b36015297ee9005c9d5fab0bf)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3
4define i64 @test1(i32 %x) nounwind {
5; CHECK-LABEL: @test1(
6; CHECK-NEXT:    ret i64 0
7;
8  %y = lshr i32 %x, 1
9  %r = udiv i32 %y, -1
10  %z = sext i32 %r to i64
11  ret i64 %z
12}
13define i64 @test2(i32 %x) nounwind {
14; CHECK-LABEL: @test2(
15; CHECK-NEXT:    ret i64 0
16;
17  %y = lshr i32 %x, 31
18  %r = udiv i32 %y, 3
19  %z = sext i32 %r to i64
20  ret i64 %z
21}
22
23; The udiv instructions shouldn't be optimized away, and the
24; sext instructions should be optimized to zext.
25
26define i64 @test1_PR2274(i32 %x, i32 %g) nounwind {
27; CHECK-LABEL: @test1_PR2274(
28; CHECK-NEXT:    [[Y:%.*]] = lshr i32 [[X:%.*]], 30
29; CHECK-NEXT:    [[R:%.*]] = udiv i32 [[Y]], [[G:%.*]]
30; CHECK-NEXT:    [[Z:%.*]] = zext nneg i32 [[R]] to i64
31; CHECK-NEXT:    ret i64 [[Z]]
32;
33  %y = lshr i32 %x, 30
34  %r = udiv i32 %y, %g
35  %z = sext i32 %r to i64
36  ret i64 %z
37}
38define i64 @test2_PR2274(i32 %x, i32 %v) nounwind {
39; CHECK-LABEL: @test2_PR2274(
40; CHECK-NEXT:    [[Y:%.*]] = lshr i32 [[X:%.*]], 31
41; CHECK-NEXT:    [[R:%.*]] = udiv i32 [[Y]], [[V:%.*]]
42; CHECK-NEXT:    [[Z:%.*]] = zext nneg i32 [[R]] to i64
43; CHECK-NEXT:    ret i64 [[Z]]
44;
45  %y = lshr i32 %x, 31
46  %r = udiv i32 %y, %v
47  %z = sext i32 %r to i64
48  ret i64 %z
49}
50
51; The udiv should be simplified according to the rule:
52; X udiv (C1 << N), where C1 is `1<<C2` --> X >> (N+C2)
53@b = external global [1 x i16]
54
55define i32 @PR30366(i1 %a) {
56; CHECK-LABEL: @PR30366(
57; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[A:%.*]] to i32
58; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i16 ptrtoint (ptr @b to i16) to i32
59; CHECK-NEXT:    [[D1:%.*]] = lshr i32 [[Z]], [[TMP1]]
60; CHECK-NEXT:    ret i32 [[D1]]
61;
62  %z = zext i1 %a to i32
63  %shl = shl i16 1, ptrtoint (ptr @b to i16)
64  %z2 = zext i16 %shl to i32
65  %d = udiv i32 %z, %z2
66  ret i32 %d
67}
68
69; OSS-Fuzz #4857
70; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=4857
71define i177 @ossfuzz_4857(i177 %X, i177 %Y) {
72; CHECK-LABEL: @ossfuzz_4857(
73; CHECK-NEXT:    store i1 poison, ptr undef, align 1
74; CHECK-NEXT:    ret i177 0
75;
76  %B5 = udiv i177 %Y, -1
77  %B4 = add i177 %B5, -1
78  %B2 = add i177 %B4, -1
79  %B6 = mul i177 %B5, %B2
80  %B3 = add i177 %B2, %B2
81  %B9 = xor i177 %B4, %B3
82  %B13 = ashr i177 %Y, %B2
83  %B22 = add i177 %B9, %B13
84  %B1 = udiv i177 %B5, %B6
85  %C9 = icmp ult i177 %Y, %B22
86  store i1 %C9, ptr undef
87  ret i177 %B1
88}
89
90; 2 low bits are not needed because 12 has 2 trailing zeros
91
92define i8 @udiv_demanded_low_bits_set(i8 %a) {
93; CHECK-LABEL: @udiv_demanded_low_bits_set(
94; CHECK-NEXT:    [[U:%.*]] = udiv i8 [[A:%.*]], 12
95; CHECK-NEXT:    ret i8 [[U]]
96;
97  %o = or i8 %a, 3
98  %u = udiv i8 %o, 12
99  ret i8 %u
100}
101
102; This can't divide evenly, so it is poison.
103
104define i8 @udiv_exact_demanded_low_bits_set(i8 %a) {
105; CHECK-LABEL: @udiv_exact_demanded_low_bits_set(
106; CHECK-NEXT:    ret i8 poison
107;
108  %o = or i8 %a, 3
109  %u = udiv exact i8 %o, 12
110  ret i8 %u
111}
112
113; All high bits are set, so this simplifies.
114
115define i8 @udiv_demanded_high_bits_set(i8 %x, i8 %y) {
116; CHECK-LABEL: @udiv_demanded_high_bits_set(
117; CHECK-NEXT:    ret i8 21
118;
119  %o = or i8 %x, -4
120  %r = udiv i8 %o, 12
121  ret i8 %r
122}
123
124; This should fold the same as above.
125
126define i8 @udiv_exact_demanded_high_bits_set(i8 %x, i8 %y) {
127; CHECK-LABEL: @udiv_exact_demanded_high_bits_set(
128; CHECK-NEXT:    ret i8 21
129;
130  %o = or i8 %x, -4
131  %r = udiv exact i8 %o, 12
132  ret i8 %r
133}
134
135; 2 low bits are not needed because 12 has 2 trailing zeros
136
137define i8 @udiv_demanded_low_bits_clear(i8 %a) {
138; CHECK-LABEL: @udiv_demanded_low_bits_clear(
139; CHECK-NEXT:    [[U:%.*]] = udiv i8 [[A:%.*]], 12
140; CHECK-NEXT:    ret i8 [[U]]
141;
142  %o = and i8 %a, -4
143  %u = udiv i8 %o, 12
144  ret i8 %u
145}
146
147; This should fold the same as above.
148
149define i8 @udiv_exact_demanded_low_bits_clear(i8 %a) {
150; CHECK-LABEL: @udiv_exact_demanded_low_bits_clear(
151; CHECK-NEXT:    [[U:%.*]] = udiv i8 [[A:%.*]], 12
152; CHECK-NEXT:    ret i8 [[U]]
153;
154  %o = and i8 %a, -4
155  %u = udiv exact i8 %o, 12
156  ret i8 %u
157}
158
159define <vscale x 1 x i32> @udiv_demanded3(<vscale x 1 x i32> %a) {
160; CHECK-LABEL: @udiv_demanded3(
161; CHECK-NEXT:    [[U:%.*]] = udiv <vscale x 1 x i32> [[A:%.*]], splat (i32 12)
162; CHECK-NEXT:    ret <vscale x 1 x i32> [[U]]
163;
164  %o = or <vscale x 1 x i32> %a, splat (i32 3)
165  %u = udiv <vscale x 1 x i32> %o, splat (i32 12)
166  ret <vscale x 1 x i32> %u
167}
168
169; PR74242
170define i32 @div_by_zero_or_one_from_dom_cond(i32 %a, i32 %b) {
171; CHECK-LABEL: @div_by_zero_or_one_from_dom_cond(
172; CHECK-NEXT:  entry:
173; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[A:%.*]], 1
174; CHECK-NEXT:    br i1 [[CMP]], label [[JOIN:%.*]], label [[ZERO_OR_ONE:%.*]]
175; CHECK:       zero_or_one:
176; CHECK-NEXT:    br label [[JOIN]]
177; CHECK:       join:
178; CHECK-NEXT:    ret i32 [[B:%.*]]
179;
180entry:
181  %cmp = icmp ugt i32 %a, 1
182  br i1 %cmp, label %join, label %zero_or_one
183
184zero_or_one:
185  %div = udiv i32 %b, %a
186  br label %join
187
188join:
189  %res = phi i32 [ %div, %zero_or_one ], [ %b, %entry ]
190  ret i32 %res
191}
192