xref: /llvm-project/llvm/test/CodeGen/X86/pseudo_cmov_lower.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; RUN: llc < %s -mtriple=i386-linux-gnu -o - | FileCheck %s
2
3; This test checks that only a single js gets generated in the final code
4; for lowering the CMOV pseudos that get created for this IR.
5; CHECK-LABEL: foo1:
6; CHECK: js
7; CHECK-NOT: js
8define i32 @foo1(i32 %v1, i32 %v2, i32 %v3) nounwind {
9entry:
10  %cmp = icmp slt i32 %v1, 0
11  %v2.v3 = select i1 %cmp, i32 %v2, i32 %v3
12  %v1.v2 = select i1 %cmp, i32 %v1, i32 %v2
13  %sub = sub i32 %v1.v2, %v2.v3
14  ret i32 %sub
15}
16
17; This test checks that only a single js gets generated in the final code
18; for lowering the CMOV pseudos that get created for this IR. This makes
19; sure the code for the lowering for opposite conditions gets tested.
20; CHECK-LABEL: foo11:
21; CHECK: js
22; CHECK-NOT: js
23; CHECK-NOT: jns
24define i32 @foo11(i32 %v1, i32 %v2, i32 %v3) nounwind {
25entry:
26  %cmp1 = icmp slt i32 %v1, 0
27  %v2.v3 = select i1 %cmp1, i32 %v2, i32 %v3
28  %cmp2 = icmp sge i32 %v1, 0
29  %v1.v2 = select i1 %cmp2, i32 %v1, i32 %v2
30  %sub = sub i32 %v1.v2, %v2.v3
31  ret i32 %sub
32}
33
34; This test checks that only a single js gets generated in the final code
35; for lowering the CMOV pseudos that get created for this IR.
36; CHECK-LABEL: foo2:
37; CHECK: js
38; CHECK-NOT: js
39define i32 @foo2(i8 %v1, i8 %v2, i8 %v3) nounwind {
40entry:
41  %cmp = icmp slt i8 %v1, 0
42  %v2.v3 = select i1 %cmp, i8 %v2, i8 %v3
43  %v1.v2 = select i1 %cmp, i8 %v1, i8 %v2
44  %t1 = sext i8 %v2.v3 to i32
45  %t2 = sext i8 %v1.v2 to i32
46  %sub = sub i32 %t1, %t2
47  ret i32 %sub
48}
49
50; This test checks that only a single js gets generated in the final code
51; for lowering the CMOV pseudos that get created for this IR.
52; CHECK-LABEL: foo3:
53; CHECK: js
54; CHECK-NOT: js
55define i32 @foo3(i16 %v1, i16 %v2, i16 %v3) nounwind {
56entry:
57  %cmp = icmp slt i16 %v1, 0
58  %v2.v3 = select i1 %cmp, i16 %v2, i16 %v3
59  %v1.v2 = select i1 %cmp, i16 %v1, i16 %v2
60  %t1 = sext i16 %v2.v3 to i32
61  %t2 = sext i16 %v1.v2 to i32
62  %sub = sub i32 %t1, %t2
63  ret i32 %sub
64}
65
66; This test checks that only a single js gets generated in the final code
67; for lowering the CMOV pseudos that get created for this IR.
68; CHECK-LABEL: foo4:
69; CHECK: js
70; CHECK-NOT: js
71define float @foo4(i32 %v1, float %v2, float %v3, float %v4) nounwind {
72entry:
73  %cmp = icmp slt i32 %v1, 0
74  %t1 = select i1 %cmp, float %v2, float %v3
75  %t2 = select i1 %cmp, float %v3, float %v4
76  %sub = fsub float %t1, %t2
77  ret float %sub
78}
79
80; This test checks that only a single je gets generated in the final code
81; for lowering the CMOV pseudos that get created for this IR.
82; CHECK-LABEL: foo5:
83; CHECK: je
84; CHECK-NOT: je
85define double @foo5(i32 %v1, double %v2, double %v3, double %v4) nounwind {
86entry:
87  %cmp = icmp eq i32 %v1, 0
88  %t1 = select i1 %cmp, double %v2, double %v3
89  %t2 = select i1 %cmp, double %v3, double %v4
90  %sub = fsub double %t1, %t2
91  ret double %sub
92}
93
94; This test checks that only a single je gets generated in the final code
95; for lowering the CMOV pseudos that get created for this IR.
96; CHECK-LABEL: foo6:
97; CHECK: je
98; CHECK-NOT: je
99define <4 x float> @foo6(i32 %v1, <4 x float> %v2, <4 x float> %v3, <4 x float> %v4) nounwind {
100entry:
101  %cmp = icmp eq i32 %v1, 0
102  %t1 = select i1 %cmp, <4 x float> %v2, <4 x float> %v3
103  %t2 = select i1 %cmp, <4 x float> %v3, <4 x float> %v4
104  %sub = fsub <4 x float> %t1, %t2
105  ret <4 x float> %sub
106}
107
108; This test checks that only a single je gets generated in the final code
109; for lowering the CMOV pseudos that get created for this IR.
110; CHECK-LABEL: foo7:
111; CHECK: je
112; CHECK-NOT: je
113define <2 x double> @foo7(i32 %v1, <2 x double> %v2, <2 x double> %v3, <2 x double> %v4) nounwind {
114entry:
115  %cmp = icmp eq i32 %v1, 0
116  %t1 = select i1 %cmp, <2 x double> %v2, <2 x double> %v3
117  %t2 = select i1 %cmp, <2 x double> %v3, <2 x double> %v4
118  %sub = fsub <2 x double> %t1, %t2
119  ret <2 x double> %sub
120}
121
122; This test checks that only a single ja gets generated in the final code
123; for lowering the CMOV pseudos that get created for this IR. This combines
124; all the supported types together into one long string of selects based
125; on the same condition.
126; CHECK-LABEL: foo8:
127; CHECK: ja
128; CHECK-NOT: ja
129define void @foo8(i32 %v1,
130                  i8 %v2, i8 %v3,
131                  i16 %v12, i16 %v13,
132                  i32 %v22, i32 %v23,
133                  float %v32, float %v33,
134                  double %v42, double %v43,
135                  <4 x float> %v52, <4 x float> %v53,
136                  <2 x double> %v62, <2 x double> %v63,
137                  <8 x float> %v72, <8 x float> %v73,
138                  <4 x double> %v82, <4 x double> %v83,
139                  <16 x float> %v92, <16 x float> %v93,
140                  <8 x double> %v102, <8 x double> %v103,
141                  ptr %dst) nounwind {
142entry:
143  %add.ptr11 = getelementptr inbounds i8, ptr %dst, i32 2
144
145  %add.ptr21 = getelementptr inbounds i8, ptr %dst, i32 4
146
147  %add.ptr31 = getelementptr inbounds i8, ptr %dst, i32 8
148
149  %add.ptr41 = getelementptr inbounds i8, ptr %dst, i32 16
150
151  %add.ptr51 = getelementptr inbounds i8, ptr %dst, i32 32
152
153  %add.ptr61 = getelementptr inbounds i8, ptr %dst, i32 48
154
155  %add.ptr71 = getelementptr inbounds i8, ptr %dst, i32 64
156
157  %add.ptr81 = getelementptr inbounds i8, ptr %dst, i32 128
158
159  %add.ptr91 = getelementptr inbounds i8, ptr %dst, i32 64
160
161  %add.ptr101 = getelementptr inbounds i8, ptr %dst, i32 128
162
163  ; These operations are necessary, because select of two single use loads
164  ; ends up getting optimized into a select of two leas, followed by a
165  ; single load of the selected address.
166  %t13 = xor i16 %v13, 11
167  %t23 = xor i32 %v23, 1234
168  %t33 = fadd float %v33, %v32
169  %t43 = fadd double %v43, %v42
170  %t53 = fadd <4 x float> %v53, %v52
171  %t63 = fadd <2 x double> %v63, %v62
172  %t73 = fsub <8 x float> %v73, %v72
173  %t83 = fsub <4 x double> %v83, %v82
174  %t93 = fsub <16 x float> %v93, %v92
175  %t103 = fsub <8 x double> %v103, %v102
176
177  %cmp = icmp ugt i32 %v1, 31
178  %t11 = select i1 %cmp, i16 %v12, i16 %t13
179  %t21 = select i1 %cmp, i32 %v22, i32 %t23
180  %t31 = select i1 %cmp, float %v32, float %t33
181  %t41 = select i1 %cmp, double %v42, double %t43
182  %t51 = select i1 %cmp, <4 x float> %v52, <4 x float> %t53
183  %t61 = select i1 %cmp, <2 x double> %v62, <2 x double> %t63
184  %t71 = select i1 %cmp, <8 x float> %v72, <8 x float> %t73
185  %t81 = select i1 %cmp, <4 x double> %v82, <4 x double> %t83
186  %t91 = select i1 %cmp, <16 x float> %v92, <16 x float> %t93
187  %t101 = select i1 %cmp, <8 x double> %v102, <8 x double> %t103
188
189  store i16 %t11, ptr %add.ptr11, align 2
190  store i32 %t21, ptr %add.ptr21, align 4
191  store float %t31, ptr %add.ptr31, align 4
192  store double %t41, ptr %add.ptr41, align 8
193  store <4 x float> %t51, ptr %add.ptr51, align 16
194  store <2 x double> %t61, ptr %add.ptr61, align 16
195  store <8 x float> %t71, ptr %add.ptr71, align 32
196  store <4 x double> %t81, ptr %add.ptr81, align 32
197  store <16 x float> %t91, ptr %add.ptr91, align 32
198  store <8 x double> %t101, ptr %add.ptr101, align 32
199
200  ret void
201}
202
203; This test checks that only a single ja gets generated in the final code
204; for lowering the CMOV pseudos that get created for this IR.
205; on the same condition.
206; Contrary to my expectations, this doesn't exercise the code for
207; CMOV_V8I1, CMOV_V16I1, CMOV_V32I1, or CMOV_V64I1.  Instead the selects all
208; get lowered into vector length number of selects, which all eventually turn
209; into a huge number of CMOV_GR8, which are all contiguous, so the optimization
210; kicks in as long as CMOV_GR8 is supported. I couldn't find a way to get
211; CMOV_V*I1 pseudo-opcodes to get generated. If a way exists to get CMOV_V*1
212; pseudo-opcodes to be generated, this test should be replaced with one that
213; tests those opcodes.
214;
215; CHECK-LABEL: foo9:
216; CHECK: ja
217; CHECK-NOT: ja
218define void @foo9(i32 %v1,
219                  <8 x i1> %v12, <8 x i1> %v13,
220                  <16 x i1> %v22, <16 x i1> %v23,
221                  <32 x i1> %v32, <32 x i1> %v33,
222                  <64 x i1> %v42, <64 x i1> %v43,
223                  ptr %dst) nounwind {
224entry:
225
226  %add.ptr21 = getelementptr inbounds i8, ptr %dst, i32 4
227
228  %add.ptr31 = getelementptr inbounds i8, ptr %dst, i32 8
229
230  %add.ptr41 = getelementptr inbounds i8, ptr %dst, i32 16
231
232  ; These operations are necessary, because select of two single use loads
233  ; ends up getting optimized into a select of two leas, followed by a
234  ; single load of the selected address.
235  %t13 = xor <8 x i1> %v13, %v12
236  %t23 = xor <16 x i1> %v23, %v22
237  %t33 = xor <32 x i1> %v33, %v32
238  %t43 = xor <64 x i1> %v43, %v42
239
240  %cmp = icmp ugt i32 %v1, 31
241  %t11 = select i1 %cmp, <8 x i1> %v12, <8 x i1> %t13
242  %t21 = select i1 %cmp, <16 x i1> %v22, <16 x i1> %t23
243  %t31 = select i1 %cmp, <32 x i1> %v32, <32 x i1> %t33
244  %t41 = select i1 %cmp, <64 x i1> %v42, <64 x i1> %t43
245
246  store <8 x i1> %t11, ptr %dst, align 16
247  store <16 x i1> %t21, ptr %add.ptr21, align 4
248  store <32 x i1> %t31, ptr %add.ptr31, align 8
249  store <64 x i1> %t41, ptr %add.ptr41, align 16
250
251  ret void
252}
253