xref: /llvm-project/llvm/test/CodeGen/AMDGPU/add64-low-32-bits-known-zero.ll (revision 09583dec15c5100fd5f26f81a1b011bce3f9b21f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3
4; Reduce a 64-bit add by a constant if we know the low 32-bits are all
5; zero.
6
7; add i64:x, K if computeTrailingZeros(K) >= 32
8; => build_pair (add x.hi, K.hi), x.lo
9
10define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_0(i64 inreg %reg) {
11; GFX9-LABEL: s_add_i64_const_low_bits_known0_0:
12; GFX9:       ; %bb.0:
13; GFX9-NEXT:    s_add_i32 s1, s1, 0x40000
14; GFX9-NEXT:    ; return to shader part epilog
15  %add = add i64 %reg, 1125899906842624 ; (1 << 50)
16  ret i64 %add
17}
18
19define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_1(i64 inreg %reg) {
20; GFX9-LABEL: s_add_i64_const_low_bits_known0_1:
21; GFX9:       ; %bb.0:
22; GFX9-NEXT:    s_add_i32 s1, s1, 1
23; GFX9-NEXT:    ; return to shader part epilog
24  %add = add i64 %reg, 4294967296 ; (1 << 32)
25  ret i64 %add
26}
27
28define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_2(i64 inreg %reg) {
29; GFX9-LABEL: s_add_i64_const_low_bits_known0_2:
30; GFX9:       ; %bb.0:
31; GFX9-NEXT:    s_add_i32 s1, s1, 2
32; GFX9-NEXT:    ; return to shader part epilog
33  %add = add i64 %reg, 8589934592 ; (1 << 33)
34  ret i64 %add
35}
36
37define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_3(i64 inreg %reg) {
38; GFX9-LABEL: s_add_i64_const_low_bits_known0_3:
39; GFX9:       ; %bb.0:
40; GFX9-NEXT:    s_add_i32 s1, s1, 0x80000000
41; GFX9-NEXT:    ; return to shader part epilog
42  %add = add i64 %reg, -9223372036854775808 ; (1 << 63)
43  ret i64 %add
44}
45
46define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_4(i64 inreg %reg) {
47; GFX9-LABEL: s_add_i64_const_low_bits_known0_4:
48; GFX9:       ; %bb.0:
49; GFX9-NEXT:    s_add_i32 s1, s1, -1
50; GFX9-NEXT:    ; return to shader part epilog
51  %add = add i64 %reg, -4294967296 ; 0xffffffff00000000
52  ret i64 %add
53}
54
55define i64 @v_add_i64_const_low_bits_known0_0(i64 %reg) {
56; GFX9-LABEL: v_add_i64_const_low_bits_known0_0:
57; GFX9:       ; %bb.0:
58; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59; GFX9-NEXT:    v_add_u32_e32 v1, 0x40000, v1
60; GFX9-NEXT:    s_setpc_b64 s[30:31]
61  %add = add i64 %reg, 1125899906842624 ; (1 << 50)
62  ret i64 %add
63}
64
65define i64 @v_add_i64_const_low_bits_known0_1(i64 %reg) {
66; GFX9-LABEL: v_add_i64_const_low_bits_known0_1:
67; GFX9:       ; %bb.0:
68; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69; GFX9-NEXT:    v_add_u32_e32 v1, 1, v1
70; GFX9-NEXT:    s_setpc_b64 s[30:31]
71  %add = add i64 %reg, 4294967296 ; (1 << 32)
72  ret i64 %add
73}
74
75define i64 @v_add_i64_const_low_bits_known0_2(i64 %reg) {
76; GFX9-LABEL: v_add_i64_const_low_bits_known0_2:
77; GFX9:       ; %bb.0:
78; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GFX9-NEXT:    v_add_u32_e32 v1, 2, v1
80; GFX9-NEXT:    s_setpc_b64 s[30:31]
81  %add = add i64 %reg, 8589934592 ; (1 << 33)
82  ret i64 %add
83}
84
85define i64 @v_add_i64_const_low_bits_known0_3(i64 %reg) {
86; GFX9-LABEL: v_add_i64_const_low_bits_known0_3:
87; GFX9:       ; %bb.0:
88; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v1
90; GFX9-NEXT:    s_setpc_b64 s[30:31]
91  %add = add i64 %reg, -9223372036854775808 ; (1 << 63)
92  ret i64 %add
93}
94
95define i64 @v_add_i64_const_low_bits_known0_4(i64 %reg) {
96; GFX9-LABEL: v_add_i64_const_low_bits_known0_4:
97; GFX9:       ; %bb.0:
98; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99; GFX9-NEXT:    v_add_u32_e32 v1, -1, v1
100; GFX9-NEXT:    s_setpc_b64 s[30:31]
101  %add = add i64 %reg, -4294967296 ; 0xffffffff00000000
102  ret i64 %add
103}
104
105define amdgpu_ps i64 @s_add_i64_const_high_bits_known0_0(i64 inreg %reg) {
106; GFX9-LABEL: s_add_i64_const_high_bits_known0_0:
107; GFX9:       ; %bb.0:
108; GFX9-NEXT:    s_add_u32 s0, s0, -1
109; GFX9-NEXT:    s_addc_u32 s1, s1, 0
110; GFX9-NEXT:    ; return to shader part epilog
111  %add = add i64 %reg, 4294967295 ; (1 << 31)
112  ret i64 %add
113}
114
115define i64 @v_add_i64_const_high_bits_known0_0(i64 %reg) {
116; GFX9-LABEL: v_add_i64_const_high_bits_known0_0:
117; GFX9:       ; %bb.0:
118; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
120; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
121; GFX9-NEXT:    s_setpc_b64 s[30:31]
122  %add = add i64 %reg, 4294967295 ; (1 << 31)
123  ret i64 %add
124}
125
126define <2 x i64> @v_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> %reg) {
127; GFX9-LABEL: v_add_v2i64_splat_const_low_bits_known0_0:
128; GFX9:       ; %bb.0:
129; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130; GFX9-NEXT:    v_add_u32_e32 v1, 1, v1
131; GFX9-NEXT:    v_add_u32_e32 v3, 1, v3
132; GFX9-NEXT:    s_setpc_b64 s[30:31]
133  %add = add <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
134  ret <2 x i64> %add
135}
136
137define <2 x i64> @v_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> %reg) {
138; GFX9-LABEL: v_add_v2i64_nonsplat_const_low_bits_known0_0:
139; GFX9:       ; %bb.0:
140; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141; GFX9-NEXT:    v_add_u32_e32 v1, 1, v1
142; GFX9-NEXT:    v_add_u32_e32 v3, 2, v3
143; GFX9-NEXT:    s_setpc_b64 s[30:31]
144  %add = add <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
145  ret <2 x i64> %add
146}
147
148define amdgpu_ps <2 x i64> @s_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
149; GFX9-LABEL: s_add_v2i64_splat_const_low_bits_known0_0:
150; GFX9:       ; %bb.0:
151; GFX9-NEXT:    s_add_i32 s1, s1, 1
152; GFX9-NEXT:    s_add_i32 s3, s3, 1
153; GFX9-NEXT:    ; return to shader part epilog
154  %add = add <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
155  ret <2 x i64> %add
156}
157
158define amdgpu_ps <2 x i64> @s_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
159; GFX9-LABEL: s_add_v2i64_nonsplat_const_low_bits_known0_0:
160; GFX9:       ; %bb.0:
161; GFX9-NEXT:    s_add_i32 s1, s1, 1
162; GFX9-NEXT:    s_add_i32 s3, s3, 2
163; GFX9-NEXT:    ; return to shader part epilog
164  %add = add <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
165  ret <2 x i64> %add
166}
167
168; We could reduce this to use a 32-bit add if we use computeKnownBits
169define i64 @v_add_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) {
170; GFX9-LABEL: v_add_i64_variable_high_bits_known0_0:
171; GFX9:       ; %bb.0:
172; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
174; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
175; GFX9-NEXT:    s_setpc_b64 s[30:31]
176  %zext.offset.hi32 = zext i32 %offset.hi32 to i64
177  %in.high.bits = shl i64 %zext.offset.hi32, 32
178  %add = add i64 %reg, %in.high.bits
179  ret i64 %add
180}
181
182; We could reduce this to use a 32-bit add if we use computeKnownBits
183define amdgpu_ps i64 @s_add_i64_variable_high_bits_known0_0(i64 inreg %reg, i32 inreg %offset.hi32) {
184; GFX9-LABEL: s_add_i64_variable_high_bits_known0_0:
185; GFX9:       ; %bb.0:
186; GFX9-NEXT:    s_add_u32 s0, s0, 0
187; GFX9-NEXT:    s_addc_u32 s1, s1, s2
188; GFX9-NEXT:    ; return to shader part epilog
189  %zext.offset.hi32 = zext i32 %offset.hi32 to i64
190  %in.high.bits = shl i64 %zext.offset.hi32, 32
191  %add = add i64 %reg, %in.high.bits
192  ret i64 %add
193}
194