xref: /minix3/external/bsd/llvm/dist/llvm/test/CodeGen/X86/psubus.ll (revision f4a2713ac843a11c696ec80c0a5e3e5d80b4d338)
1*f4a2713aSLionel Sambuc; RUN: llc -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2
2*f4a2713aSLionel Sambuc; RUN: llc -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
3*f4a2713aSLionel Sambuc; RUN: llc -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
4*f4a2713aSLionel Sambuc
5*f4a2713aSLionel Sambuctarget datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
6*f4a2713aSLionel Sambuctarget triple = "x86_64-apple-macosx10.8.0"
7*f4a2713aSLionel Sambuc
8*f4a2713aSLionel Sambucdefine void @test1(i16* nocapture %head) nounwind {
9*f4a2713aSLionel Sambucvector.ph:
10*f4a2713aSLionel Sambuc  br label %vector.body
11*f4a2713aSLionel Sambuc
12*f4a2713aSLionel Sambucvector.body:                                      ; preds = %vector.body, %vector.ph
13*f4a2713aSLionel Sambuc  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
14*f4a2713aSLionel Sambuc  %0 = getelementptr inbounds i16* %head, i64 %index
15*f4a2713aSLionel Sambuc  %1 = bitcast i16* %0 to <8 x i16>*
16*f4a2713aSLionel Sambuc  %2 = load <8 x i16>* %1, align 2
17*f4a2713aSLionel Sambuc  %3 = icmp slt <8 x i16> %2, zeroinitializer
18*f4a2713aSLionel Sambuc  %4 = xor <8 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
19*f4a2713aSLionel Sambuc  %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
20*f4a2713aSLionel Sambuc  store <8 x i16> %5, <8 x i16>* %1, align 2
21*f4a2713aSLionel Sambuc  %index.next = add i64 %index, 8
22*f4a2713aSLionel Sambuc  %6 = icmp eq i64 %index.next, 16384
23*f4a2713aSLionel Sambuc  br i1 %6, label %for.end, label %vector.body
24*f4a2713aSLionel Sambuc
25*f4a2713aSLionel Sambucfor.end:                                          ; preds = %vector.body
26*f4a2713aSLionel Sambuc  ret void
27*f4a2713aSLionel Sambuc
28*f4a2713aSLionel Sambuc; SSE2: @test1
29*f4a2713aSLionel Sambuc; SSE2: psubusw LCPI0_0(%rip), %xmm0
30*f4a2713aSLionel Sambuc
31*f4a2713aSLionel Sambuc; AVX1: @test1
32*f4a2713aSLionel Sambuc; AVX1: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
33*f4a2713aSLionel Sambuc
34*f4a2713aSLionel Sambuc; AVX2: @test1
35*f4a2713aSLionel Sambuc; AVX2: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
36*f4a2713aSLionel Sambuc}
37*f4a2713aSLionel Sambuc
38*f4a2713aSLionel Sambucdefine void @test2(i16* nocapture %head) nounwind {
39*f4a2713aSLionel Sambucvector.ph:
40*f4a2713aSLionel Sambuc  br label %vector.body
41*f4a2713aSLionel Sambuc
42*f4a2713aSLionel Sambucvector.body:                                      ; preds = %vector.body, %vector.ph
43*f4a2713aSLionel Sambuc  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
44*f4a2713aSLionel Sambuc  %0 = getelementptr inbounds i16* %head, i64 %index
45*f4a2713aSLionel Sambuc  %1 = bitcast i16* %0 to <8 x i16>*
46*f4a2713aSLionel Sambuc  %2 = load <8 x i16>* %1, align 2
47*f4a2713aSLionel Sambuc  %3 = icmp ugt <8 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
48*f4a2713aSLionel Sambuc  %4 = add <8 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
49*f4a2713aSLionel Sambuc  %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
50*f4a2713aSLionel Sambuc  store <8 x i16> %5, <8 x i16>* %1, align 2
51*f4a2713aSLionel Sambuc  %index.next = add i64 %index, 8
52*f4a2713aSLionel Sambuc  %6 = icmp eq i64 %index.next, 16384
53*f4a2713aSLionel Sambuc  br i1 %6, label %for.end, label %vector.body
54*f4a2713aSLionel Sambuc
55*f4a2713aSLionel Sambucfor.end:                                          ; preds = %vector.body
56*f4a2713aSLionel Sambuc  ret void
57*f4a2713aSLionel Sambuc
58*f4a2713aSLionel Sambuc; SSE2: @test2
59*f4a2713aSLionel Sambuc; SSE2: psubusw LCPI1_0(%rip), %xmm0
60*f4a2713aSLionel Sambuc
61*f4a2713aSLionel Sambuc; AVX1: @test2
62*f4a2713aSLionel Sambuc; AVX1: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
63*f4a2713aSLionel Sambuc
64*f4a2713aSLionel Sambuc; AVX2: @test2
65*f4a2713aSLionel Sambuc; AVX2: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
66*f4a2713aSLionel Sambuc}
67*f4a2713aSLionel Sambuc
68*f4a2713aSLionel Sambucdefine void @test3(i16* nocapture %head, i16 zeroext %w) nounwind {
69*f4a2713aSLionel Sambucvector.ph:
70*f4a2713aSLionel Sambuc  %0 = insertelement <8 x i16> undef, i16 %w, i32 0
71*f4a2713aSLionel Sambuc  %broadcast15 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
72*f4a2713aSLionel Sambuc  br label %vector.body
73*f4a2713aSLionel Sambuc
74*f4a2713aSLionel Sambucvector.body:                                      ; preds = %vector.body, %vector.ph
75*f4a2713aSLionel Sambuc  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
76*f4a2713aSLionel Sambuc  %1 = getelementptr inbounds i16* %head, i64 %index
77*f4a2713aSLionel Sambuc  %2 = bitcast i16* %1 to <8 x i16>*
78*f4a2713aSLionel Sambuc  %3 = load <8 x i16>* %2, align 2
79*f4a2713aSLionel Sambuc  %4 = icmp ult <8 x i16> %3, %broadcast15
80*f4a2713aSLionel Sambuc  %5 = sub <8 x i16> %3, %broadcast15
81*f4a2713aSLionel Sambuc  %6 = select <8 x i1> %4, <8 x i16> zeroinitializer, <8 x i16> %5
82*f4a2713aSLionel Sambuc  store <8 x i16> %6, <8 x i16>* %2, align 2
83*f4a2713aSLionel Sambuc  %index.next = add i64 %index, 8
84*f4a2713aSLionel Sambuc  %7 = icmp eq i64 %index.next, 16384
85*f4a2713aSLionel Sambuc  br i1 %7, label %for.end, label %vector.body
86*f4a2713aSLionel Sambuc
87*f4a2713aSLionel Sambucfor.end:                                          ; preds = %vector.body
88*f4a2713aSLionel Sambuc  ret void
89*f4a2713aSLionel Sambuc
90*f4a2713aSLionel Sambuc; SSE2: @test3
91*f4a2713aSLionel Sambuc; SSE2: psubusw %xmm0, %xmm1
92*f4a2713aSLionel Sambuc
93*f4a2713aSLionel Sambuc; AVX1: @test3
94*f4a2713aSLionel Sambuc; AVX1: vpsubusw %xmm0, %xmm1, %xmm1
95*f4a2713aSLionel Sambuc
96*f4a2713aSLionel Sambuc; AVX2: @test3
97*f4a2713aSLionel Sambuc; AVX2: vpsubusw %xmm0, %xmm1, %xmm1
98*f4a2713aSLionel Sambuc}
99*f4a2713aSLionel Sambuc
100*f4a2713aSLionel Sambucdefine void @test4(i8* nocapture %head) nounwind {
101*f4a2713aSLionel Sambucvector.ph:
102*f4a2713aSLionel Sambuc  br label %vector.body
103*f4a2713aSLionel Sambuc
104*f4a2713aSLionel Sambucvector.body:                                      ; preds = %vector.body, %vector.ph
105*f4a2713aSLionel Sambuc  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
106*f4a2713aSLionel Sambuc  %0 = getelementptr inbounds i8* %head, i64 %index
107*f4a2713aSLionel Sambuc  %1 = bitcast i8* %0 to <16 x i8>*
108*f4a2713aSLionel Sambuc  %2 = load <16 x i8>* %1, align 1
109*f4a2713aSLionel Sambuc  %3 = icmp slt <16 x i8> %2, zeroinitializer
110*f4a2713aSLionel Sambuc  %4 = xor <16 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
111*f4a2713aSLionel Sambuc  %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
112*f4a2713aSLionel Sambuc  store <16 x i8> %5, <16 x i8>* %1, align 1
113*f4a2713aSLionel Sambuc  %index.next = add i64 %index, 16
114*f4a2713aSLionel Sambuc  %6 = icmp eq i64 %index.next, 16384
115*f4a2713aSLionel Sambuc  br i1 %6, label %for.end, label %vector.body
116*f4a2713aSLionel Sambuc
117*f4a2713aSLionel Sambucfor.end:                                          ; preds = %vector.body
118*f4a2713aSLionel Sambuc  ret void
119*f4a2713aSLionel Sambuc
120*f4a2713aSLionel Sambuc; SSE2: @test4
121*f4a2713aSLionel Sambuc; SSE2: psubusb LCPI3_0(%rip), %xmm0
122*f4a2713aSLionel Sambuc
123*f4a2713aSLionel Sambuc; AVX1: @test4
124*f4a2713aSLionel Sambuc; AVX1: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
125*f4a2713aSLionel Sambuc
126*f4a2713aSLionel Sambuc; AVX2: @test4
127*f4a2713aSLionel Sambuc; AVX2: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
128*f4a2713aSLionel Sambuc}
129*f4a2713aSLionel Sambuc
130*f4a2713aSLionel Sambucdefine void @test5(i8* nocapture %head) nounwind {
131*f4a2713aSLionel Sambucvector.ph:
132*f4a2713aSLionel Sambuc  br label %vector.body
133*f4a2713aSLionel Sambuc
134*f4a2713aSLionel Sambucvector.body:                                      ; preds = %vector.body, %vector.ph
135*f4a2713aSLionel Sambuc  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
136*f4a2713aSLionel Sambuc  %0 = getelementptr inbounds i8* %head, i64 %index
137*f4a2713aSLionel Sambuc  %1 = bitcast i8* %0 to <16 x i8>*
138*f4a2713aSLionel Sambuc  %2 = load <16 x i8>* %1, align 1
139*f4a2713aSLionel Sambuc  %3 = icmp ugt <16 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
140*f4a2713aSLionel Sambuc  %4 = add <16 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
141*f4a2713aSLionel Sambuc  %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
142*f4a2713aSLionel Sambuc  store <16 x i8> %5, <16 x i8>* %1, align 1
143*f4a2713aSLionel Sambuc  %index.next = add i64 %index, 16
144*f4a2713aSLionel Sambuc  %6 = icmp eq i64 %index.next, 16384
145*f4a2713aSLionel Sambuc  br i1 %6, label %for.end, label %vector.body
146*f4a2713aSLionel Sambuc
147*f4a2713aSLionel Sambucfor.end:                                          ; preds = %vector.body
148*f4a2713aSLionel Sambuc  ret void
149*f4a2713aSLionel Sambuc
150*f4a2713aSLionel Sambuc; SSE2: @test5
151*f4a2713aSLionel Sambuc; SSE2: psubusb LCPI4_0(%rip), %xmm0
152*f4a2713aSLionel Sambuc
153*f4a2713aSLionel Sambuc; AVX1: @test5
154*f4a2713aSLionel Sambuc; AVX1: vpsubusb LCPI4_0(%rip), %xmm0, %xmm0
155*f4a2713aSLionel Sambuc
156*f4a2713aSLionel Sambuc; AVX2: @test5
157*f4a2713aSLionel Sambuc; AVX2: vpsubusb LCPI4_0(%rip), %xmm0, %xmm0
158*f4a2713aSLionel Sambuc}
159*f4a2713aSLionel Sambuc
160*f4a2713aSLionel Sambucdefine void @test6(i8* nocapture %head, i8 zeroext %w) nounwind {
161*f4a2713aSLionel Sambucvector.ph:
162*f4a2713aSLionel Sambuc  %0 = insertelement <16 x i8> undef, i8 %w, i32 0
163*f4a2713aSLionel Sambuc  %broadcast15 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
164*f4a2713aSLionel Sambuc  br label %vector.body
165*f4a2713aSLionel Sambuc
166*f4a2713aSLionel Sambucvector.body:                                      ; preds = %vector.body, %vector.ph
167*f4a2713aSLionel Sambuc  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
168*f4a2713aSLionel Sambuc  %1 = getelementptr inbounds i8* %head, i64 %index
169*f4a2713aSLionel Sambuc  %2 = bitcast i8* %1 to <16 x i8>*
170*f4a2713aSLionel Sambuc  %3 = load <16 x i8>* %2, align 1
171*f4a2713aSLionel Sambuc  %4 = icmp ult <16 x i8> %3, %broadcast15
172*f4a2713aSLionel Sambuc  %5 = sub <16 x i8> %3, %broadcast15
173*f4a2713aSLionel Sambuc  %6 = select <16 x i1> %4, <16 x i8> zeroinitializer, <16 x i8> %5
174*f4a2713aSLionel Sambuc  store <16 x i8> %6, <16 x i8>* %2, align 1
175*f4a2713aSLionel Sambuc  %index.next = add i64 %index, 16
176*f4a2713aSLionel Sambuc  %7 = icmp eq i64 %index.next, 16384
177*f4a2713aSLionel Sambuc  br i1 %7, label %for.end, label %vector.body
178*f4a2713aSLionel Sambuc
179*f4a2713aSLionel Sambucfor.end:                                          ; preds = %vector.body
180*f4a2713aSLionel Sambuc  ret void
181*f4a2713aSLionel Sambuc
182*f4a2713aSLionel Sambuc; SSE2: @test6
183*f4a2713aSLionel Sambuc; SSE2: psubusb %xmm0, %xmm1
184*f4a2713aSLionel Sambuc
185*f4a2713aSLionel Sambuc; AVX1: @test6
186*f4a2713aSLionel Sambuc; AVX1: vpsubusb %xmm0, %xmm1, %xmm1
187*f4a2713aSLionel Sambuc
188*f4a2713aSLionel Sambuc; AVX2: @test6
189*f4a2713aSLionel Sambuc; AVX2: vpsubusb %xmm0, %xmm1, %xmm1
190*f4a2713aSLionel Sambuc}
191*f4a2713aSLionel Sambuc
192*f4a2713aSLionel Sambucdefine void @test7(i16* nocapture %head) nounwind {
193*f4a2713aSLionel Sambucvector.ph:
194*f4a2713aSLionel Sambuc  br label %vector.body
195*f4a2713aSLionel Sambuc
196*f4a2713aSLionel Sambucvector.body:                                      ; preds = %vector.body, %vector.ph
197*f4a2713aSLionel Sambuc  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
198*f4a2713aSLionel Sambuc  %0 = getelementptr inbounds i16* %head, i64 %index
199*f4a2713aSLionel Sambuc  %1 = bitcast i16* %0 to <16 x i16>*
200*f4a2713aSLionel Sambuc  %2 = load <16 x i16>* %1, align 2
201*f4a2713aSLionel Sambuc  %3 = icmp slt <16 x i16> %2, zeroinitializer
202*f4a2713aSLionel Sambuc  %4 = xor <16 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
203*f4a2713aSLionel Sambuc  %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
204*f4a2713aSLionel Sambuc  store <16 x i16> %5, <16 x i16>* %1, align 2
205*f4a2713aSLionel Sambuc  %index.next = add i64 %index, 8
206*f4a2713aSLionel Sambuc  %6 = icmp eq i64 %index.next, 16384
207*f4a2713aSLionel Sambuc  br i1 %6, label %for.end, label %vector.body
208*f4a2713aSLionel Sambuc
209*f4a2713aSLionel Sambucfor.end:                                          ; preds = %vector.body
210*f4a2713aSLionel Sambuc  ret void
211*f4a2713aSLionel Sambuc
212*f4a2713aSLionel Sambuc; AVX2: @test7
213*f4a2713aSLionel Sambuc; AVX2: vpsubusw LCPI6_0(%rip), %ymm0, %ymm0
214*f4a2713aSLionel Sambuc}
215*f4a2713aSLionel Sambuc
216*f4a2713aSLionel Sambucdefine void @test8(i16* nocapture %head) nounwind {
217*f4a2713aSLionel Sambucvector.ph:
218*f4a2713aSLionel Sambuc  br label %vector.body
219*f4a2713aSLionel Sambuc
220*f4a2713aSLionel Sambucvector.body:                                      ; preds = %vector.body, %vector.ph
221*f4a2713aSLionel Sambuc  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
222*f4a2713aSLionel Sambuc  %0 = getelementptr inbounds i16* %head, i64 %index
223*f4a2713aSLionel Sambuc  %1 = bitcast i16* %0 to <16 x i16>*
224*f4a2713aSLionel Sambuc  %2 = load <16 x i16>* %1, align 2
225*f4a2713aSLionel Sambuc  %3 = icmp ugt <16 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
226*f4a2713aSLionel Sambuc  %4 = add <16 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
227*f4a2713aSLionel Sambuc  %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
228*f4a2713aSLionel Sambuc  store <16 x i16> %5, <16 x i16>* %1, align 2
229*f4a2713aSLionel Sambuc  %index.next = add i64 %index, 8
230*f4a2713aSLionel Sambuc  %6 = icmp eq i64 %index.next, 16384
231*f4a2713aSLionel Sambuc  br i1 %6, label %for.end, label %vector.body
232*f4a2713aSLionel Sambuc
233*f4a2713aSLionel Sambucfor.end:                                          ; preds = %vector.body
234*f4a2713aSLionel Sambuc  ret void
235*f4a2713aSLionel Sambuc
236*f4a2713aSLionel Sambuc; AVX2: @test8
237*f4a2713aSLionel Sambuc; AVX2: vpsubusw LCPI7_0(%rip), %ymm0, %ymm0
238*f4a2713aSLionel Sambuc}
239*f4a2713aSLionel Sambuc
240*f4a2713aSLionel Sambucdefine void @test9(i16* nocapture %head, i16 zeroext %w) nounwind {
241*f4a2713aSLionel Sambucvector.ph:
242*f4a2713aSLionel Sambuc  %0 = insertelement <16 x i16> undef, i16 %w, i32 0
243*f4a2713aSLionel Sambuc  %broadcast15 = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> zeroinitializer
244*f4a2713aSLionel Sambuc  br label %vector.body
245*f4a2713aSLionel Sambuc
246*f4a2713aSLionel Sambucvector.body:                                      ; preds = %vector.body, %vector.ph
247*f4a2713aSLionel Sambuc  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
248*f4a2713aSLionel Sambuc  %1 = getelementptr inbounds i16* %head, i64 %index
249*f4a2713aSLionel Sambuc  %2 = bitcast i16* %1 to <16 x i16>*
250*f4a2713aSLionel Sambuc  %3 = load <16 x i16>* %2, align 2
251*f4a2713aSLionel Sambuc  %4 = icmp ult <16 x i16> %3, %broadcast15
252*f4a2713aSLionel Sambuc  %5 = sub <16 x i16> %3, %broadcast15
253*f4a2713aSLionel Sambuc  %6 = select <16 x i1> %4, <16 x i16> zeroinitializer, <16 x i16> %5
254*f4a2713aSLionel Sambuc  store <16 x i16> %6, <16 x i16>* %2, align 2
255*f4a2713aSLionel Sambuc  %index.next = add i64 %index, 8
256*f4a2713aSLionel Sambuc  %7 = icmp eq i64 %index.next, 16384
257*f4a2713aSLionel Sambuc  br i1 %7, label %for.end, label %vector.body
258*f4a2713aSLionel Sambuc
259*f4a2713aSLionel Sambucfor.end:                                          ; preds = %vector.body
260*f4a2713aSLionel Sambuc  ret void
261*f4a2713aSLionel Sambuc
262*f4a2713aSLionel Sambuc
263*f4a2713aSLionel Sambuc; AVX2: @test9
264*f4a2713aSLionel Sambuc; AVX2: vpsubusw %ymm0, %ymm1, %ymm1
265*f4a2713aSLionel Sambuc}
266*f4a2713aSLionel Sambuc
267*f4a2713aSLionel Sambucdefine void @test10(i8* nocapture %head) nounwind {
268*f4a2713aSLionel Sambucvector.ph:
269*f4a2713aSLionel Sambuc  br label %vector.body
270*f4a2713aSLionel Sambuc
271*f4a2713aSLionel Sambucvector.body:                                      ; preds = %vector.body, %vector.ph
272*f4a2713aSLionel Sambuc  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
273*f4a2713aSLionel Sambuc  %0 = getelementptr inbounds i8* %head, i64 %index
274*f4a2713aSLionel Sambuc  %1 = bitcast i8* %0 to <32 x i8>*
275*f4a2713aSLionel Sambuc  %2 = load <32 x i8>* %1, align 1
276*f4a2713aSLionel Sambuc  %3 = icmp slt <32 x i8> %2, zeroinitializer
277*f4a2713aSLionel Sambuc  %4 = xor <32 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
278*f4a2713aSLionel Sambuc  %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
279*f4a2713aSLionel Sambuc  store <32 x i8> %5, <32 x i8>* %1, align 1
280*f4a2713aSLionel Sambuc  %index.next = add i64 %index, 16
281*f4a2713aSLionel Sambuc  %6 = icmp eq i64 %index.next, 16384
282*f4a2713aSLionel Sambuc  br i1 %6, label %for.end, label %vector.body
283*f4a2713aSLionel Sambuc
284*f4a2713aSLionel Sambucfor.end:                                          ; preds = %vector.body
285*f4a2713aSLionel Sambuc  ret void
286*f4a2713aSLionel Sambuc
287*f4a2713aSLionel Sambuc
288*f4a2713aSLionel Sambuc; AVX2: @test10
289*f4a2713aSLionel Sambuc; AVX2: vpsubusb LCPI9_0(%rip), %ymm0, %ymm0
290*f4a2713aSLionel Sambuc}
291*f4a2713aSLionel Sambuc
292*f4a2713aSLionel Sambucdefine void @test11(i8* nocapture %head) nounwind {
293*f4a2713aSLionel Sambucvector.ph:
294*f4a2713aSLionel Sambuc  br label %vector.body
295*f4a2713aSLionel Sambuc
296*f4a2713aSLionel Sambucvector.body:                                      ; preds = %vector.body, %vector.ph
297*f4a2713aSLionel Sambuc  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
298*f4a2713aSLionel Sambuc  %0 = getelementptr inbounds i8* %head, i64 %index
299*f4a2713aSLionel Sambuc  %1 = bitcast i8* %0 to <32 x i8>*
300*f4a2713aSLionel Sambuc  %2 = load <32 x i8>* %1, align 1
301*f4a2713aSLionel Sambuc  %3 = icmp ugt <32 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
302*f4a2713aSLionel Sambuc  %4 = add <32 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
303*f4a2713aSLionel Sambuc  %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
304*f4a2713aSLionel Sambuc  store <32 x i8> %5, <32 x i8>* %1, align 1
305*f4a2713aSLionel Sambuc  %index.next = add i64 %index, 16
306*f4a2713aSLionel Sambuc  %6 = icmp eq i64 %index.next, 16384
307*f4a2713aSLionel Sambuc  br i1 %6, label %for.end, label %vector.body
308*f4a2713aSLionel Sambuc
309*f4a2713aSLionel Sambucfor.end:                                          ; preds = %vector.body
310*f4a2713aSLionel Sambuc  ret void
311*f4a2713aSLionel Sambuc
312*f4a2713aSLionel Sambuc; AVX2: @test11
313*f4a2713aSLionel Sambuc; AVX2: vpsubusb LCPI10_0(%rip), %ymm0, %ymm0
314*f4a2713aSLionel Sambuc}
315*f4a2713aSLionel Sambuc
316*f4a2713aSLionel Sambucdefine void @test12(i8* nocapture %head, i8 zeroext %w) nounwind {
317*f4a2713aSLionel Sambucvector.ph:
318*f4a2713aSLionel Sambuc  %0 = insertelement <32 x i8> undef, i8 %w, i32 0
319*f4a2713aSLionel Sambuc  %broadcast15 = shufflevector <32 x i8> %0, <32 x i8> undef, <32 x i32> zeroinitializer
320*f4a2713aSLionel Sambuc  br label %vector.body
321*f4a2713aSLionel Sambuc
322*f4a2713aSLionel Sambucvector.body:                                      ; preds = %vector.body, %vector.ph
323*f4a2713aSLionel Sambuc  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
324*f4a2713aSLionel Sambuc  %1 = getelementptr inbounds i8* %head, i64 %index
325*f4a2713aSLionel Sambuc  %2 = bitcast i8* %1 to <32 x i8>*
326*f4a2713aSLionel Sambuc  %3 = load <32 x i8>* %2, align 1
327*f4a2713aSLionel Sambuc  %4 = icmp ult <32 x i8> %3, %broadcast15
328*f4a2713aSLionel Sambuc  %5 = sub <32 x i8> %3, %broadcast15
329*f4a2713aSLionel Sambuc  %6 = select <32 x i1> %4, <32 x i8> zeroinitializer, <32 x i8> %5
330*f4a2713aSLionel Sambuc  store <32 x i8> %6, <32 x i8>* %2, align 1
331*f4a2713aSLionel Sambuc  %index.next = add i64 %index, 16
332*f4a2713aSLionel Sambuc  %7 = icmp eq i64 %index.next, 16384
333*f4a2713aSLionel Sambuc  br i1 %7, label %for.end, label %vector.body
334*f4a2713aSLionel Sambuc
335*f4a2713aSLionel Sambucfor.end:                                          ; preds = %vector.body
336*f4a2713aSLionel Sambuc  ret void
337*f4a2713aSLionel Sambuc
338*f4a2713aSLionel Sambuc; AVX2: @test12
339*f4a2713aSLionel Sambuc; AVX2: vpsubusb %ymm0, %ymm1, %ymm1
340*f4a2713aSLionel Sambuc}
341