xref: /llvm-project/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll (revision d942f5e13dd03e902ae77602c5a1781d04ac18a3)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
3; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
4
5define i1 @cmp_v4i32(<4 x float> %arg, <4 x float> %arg1) {
6; CHECK-LABEL: @cmp_v4i32(
7; CHECK-NEXT:  bb:
8; CHECK-NEXT:    [[T:%.*]] = bitcast <4 x float> [[ARG:%.*]] to <4 x i32>
9; CHECK-NEXT:    [[T3:%.*]] = bitcast <4 x float> [[ARG1:%.*]] to <4 x i32>
10; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[T]], [[T3]]
11; CHECK-NEXT:    [[T5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
12; CHECK-NEXT:    br i1 [[T5]], label [[BB6:%.*]], label [[BB18:%.*]]
13; CHECK:       bb6:
14; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i32> [[T]], [[T3]]
15; CHECK-NEXT:    [[T9:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
16; CHECK-NEXT:    br i1 [[T9]], label [[BB10:%.*]], label [[BB18]]
17; CHECK:       bb10:
18; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[T]], [[T3]]
19; CHECK-NEXT:    [[T13:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
20; CHECK-NEXT:    br i1 [[T13]], label [[BB14:%.*]], label [[BB18]]
21; CHECK:       bb14:
22; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i32> [[T]], [[T3]]
23; CHECK-NEXT:    [[T17:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
24; CHECK-NEXT:    br label [[BB18]]
25; CHECK:       bb18:
26; CHECK-NEXT:    [[T19:%.*]] = phi i1 [ false, [[BB10]] ], [ false, [[BB6]] ], [ false, [[BB:%.*]] ], [ [[T17]], [[BB14]] ]
27; CHECK-NEXT:    ret i1 [[T19]]
28;
29bb:
30  %t = bitcast <4 x float> %arg to <4 x i32>
31  %t2 = extractelement <4 x i32> %t, i32 0
32  %t3 = bitcast <4 x float> %arg1 to <4 x i32>
33  %t4 = extractelement <4 x i32> %t3, i32 0
34  %t5 = icmp eq i32 %t2, %t4
35  br i1 %t5, label %bb6, label %bb18
36
37bb6:
38  %t7 = extractelement <4 x i32> %t, i32 1
39  %t8 = extractelement <4 x i32> %t3, i32 1
40  %t9 = icmp eq i32 %t7, %t8
41  br i1 %t9, label %bb10, label %bb18
42
43bb10:
44  %t11 = extractelement <4 x i32> %t, i32 2
45  %t12 = extractelement <4 x i32> %t3, i32 2
46  %t13 = icmp eq i32 %t11, %t12
47  br i1 %t13, label %bb14, label %bb18
48
49bb14:
50  %t15 = extractelement <4 x i32> %t, i32 3
51  %t16 = extractelement <4 x i32> %t3, i32 3
52  %t17 = icmp eq i32 %t15, %t16
53  br label %bb18
54
55bb18:
56  %t19 = phi i1 [ false, %bb10 ], [ false, %bb6 ], [ false, %bb ], [ %t17, %bb14 ]
57  ret i1 %t19
58}
59
60define i32 @cmp_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) {
61; SSE-LABEL: @cmp_v2f64(
62; SSE-NEXT:  entry:
63; SSE-NEXT:    [[X1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
64; SSE-NEXT:    [[Y1:%.*]] = extractelement <2 x double> [[Y:%.*]], i32 1
65; SSE-NEXT:    [[CMP1:%.*]] = fcmp oeq double [[X1]], [[Y1]]
66; SSE-NEXT:    br i1 [[CMP1]], label [[T:%.*]], label [[F:%.*]]
67; SSE:       t:
68; SSE-NEXT:    [[Z1:%.*]] = extractelement <2 x double> [[Z:%.*]], i32 1
69; SSE-NEXT:    [[CMP2:%.*]] = fcmp ogt double [[Y1]], [[Z1]]
70; SSE-NEXT:    [[E:%.*]] = select i1 [[CMP2]], i32 42, i32 99
71; SSE-NEXT:    ret i32 [[E]]
72; SSE:       f:
73; SSE-NEXT:    ret i32 0
74;
75; AVX-LABEL: @cmp_v2f64(
76; AVX-NEXT:  entry:
77; AVX-NEXT:    [[TMP0:%.*]] = fcmp oeq <2 x double> [[X:%.*]], [[Y:%.*]]
78; AVX-NEXT:    [[CMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
79; AVX-NEXT:    br i1 [[CMP1]], label [[T:%.*]], label [[F:%.*]]
80; AVX:       t:
81; AVX-NEXT:    [[TMP1:%.*]] = fcmp ogt <2 x double> [[Y]], [[Z:%.*]]
82; AVX-NEXT:    [[CMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
83; AVX-NEXT:    [[E:%.*]] = select i1 [[CMP2]], i32 42, i32 99
84; AVX-NEXT:    ret i32 [[E]]
85; AVX:       f:
86; AVX-NEXT:    ret i32 0
87;
88entry:
89  %x1 = extractelement <2 x double> %x, i32 1
90  %y1 = extractelement <2 x double> %y, i32 1
91  %cmp1 = fcmp oeq double %x1, %y1
92  br i1 %cmp1, label %t, label %f
93
94t:
95  %z1 = extractelement <2 x double> %z, i32 1
96  %cmp2 = fcmp ogt double %y1, %z1
97  %e = select i1 %cmp2, i32 42, i32 99
98  ret i32 %e
99
100f:
101  ret i32 0
102}
103
104define i1 @cmp01_v2f64(<2 x double> %x, <2 x double> %y) {
105; SSE-LABEL: @cmp01_v2f64(
106; SSE-NEXT:    [[X0:%.*]] = extractelement <2 x double> [[X:%.*]], i32 0
107; SSE-NEXT:    [[Y1:%.*]] = extractelement <2 x double> [[Y:%.*]], i32 1
108; SSE-NEXT:    [[CMP:%.*]] = fcmp oge double [[X0]], [[Y1]]
109; SSE-NEXT:    ret i1 [[CMP]]
110;
111; AVX-LABEL: @cmp01_v2f64(
112; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
113; AVX-NEXT:    [[TMP1:%.*]] = fcmp oge <2 x double> [[X:%.*]], [[SHIFT]]
114; AVX-NEXT:    [[CMP:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
115; AVX-NEXT:    ret i1 [[CMP]]
116;
117  %x0 = extractelement <2 x double> %x, i32 0
118  %y1 = extractelement <2 x double> %y, i32 1
119  %cmp = fcmp oge double %x0, %y1
120  ret i1 %cmp
121}
122
123define i1 @cmp10_v2f64(<2 x double> %x, <2 x double> %y) {
124; SSE-LABEL: @cmp10_v2f64(
125; SSE-NEXT:    [[X1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
126; SSE-NEXT:    [[Y0:%.*]] = extractelement <2 x double> [[Y:%.*]], i32 0
127; SSE-NEXT:    [[CMP:%.*]] = fcmp ule double [[X1]], [[Y0]]
128; SSE-NEXT:    ret i1 [[CMP]]
129;
130; AVX-LABEL: @cmp10_v2f64(
131; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
132; AVX-NEXT:    [[TMP1:%.*]] = fcmp ule <2 x double> [[SHIFT]], [[Y:%.*]]
133; AVX-NEXT:    [[CMP:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0
134; AVX-NEXT:    ret i1 [[CMP]]
135;
136  %x1 = extractelement <2 x double> %x, i32 1
137  %y0 = extractelement <2 x double> %y, i32 0
138  %cmp = fcmp ule double %x1, %y0
139  ret i1 %cmp
140}
141
142define i1 @cmp12_v4i32(<4 x i32> %x, <4 x i32> %y) {
143; CHECK-LABEL: @cmp12_v4i32(
144; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> <i32 poison, i32 2, i32 poison, i32 poison>
145; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[SHIFT]]
146; CHECK-NEXT:    [[CMP:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
147; CHECK-NEXT:    ret i1 [[CMP]]
148;
149  %x1 = extractelement <4 x i32> %x, i32 1
150  %y2 = extractelement <4 x i32> %y, i32 2
151  %cmp = icmp sgt i32 %x1, %y2
152  ret i1 %cmp
153}
154
155define <4 x i1> @ins_fcmp_ext_ext(<4 x float> %a, <4 x i1> %b) {
156; SSE-LABEL: @ins_fcmp_ext_ext(
157; SSE-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 1
158; SSE-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
159; SSE-NEXT:    [[A21:%.*]] = fcmp ugt float [[A2]], [[A1]]
160; SSE-NEXT:    [[R:%.*]] = insertelement <4 x i1> [[B:%.*]], i1 [[A21]], i32 2
161; SSE-NEXT:    ret <4 x i1> [[R]]
162;
163; AVX-LABEL: @ins_fcmp_ext_ext(
164; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
165; AVX-NEXT:    [[TMP1:%.*]] = fcmp ugt <4 x float> [[A]], [[SHIFT]]
166; AVX-NEXT:    [[R:%.*]] = shufflevector <4 x i1> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
167; AVX-NEXT:    ret <4 x i1> [[R]]
168;
169  %a1 = extractelement <4 x float> %a, i32 1
170  %a2 = extractelement <4 x float> %a, i32 2
171  %a21 = fcmp ugt float %a2, %a1
172  %r = insertelement <4 x i1> %b, i1 %a21, i32 2
173  ret <4 x i1> %r
174}
175
176define <4 x i1> @ins_icmp_ext_ext(<4 x i32> %a, <4 x i1> %b) {
177; CHECK-LABEL: @ins_icmp_ext_ext(
178; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 3
179; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
180; CHECK-NEXT:    [[A23:%.*]] = icmp ule i32 [[A2]], [[A3]]
181; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x i1> [[B:%.*]], i1 [[A23]], i32 3
182; CHECK-NEXT:    ret <4 x i1> [[R]]
183;
184  %a3 = extractelement <4 x i32> %a, i32 3
185  %a2 = extractelement <4 x i32> %a, i32 2
186  %a23 = icmp ule i32 %a2, %a3
187  %r = insertelement <4 x i1> %b, i1 %a23, i32 3
188  ret <4 x i1> %r
189}
190