1*f4a2713aSLionel Sambuc; RUN: llc -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2 2*f4a2713aSLionel Sambuc; RUN: llc -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1 3*f4a2713aSLionel Sambuc; RUN: llc -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2 4*f4a2713aSLionel Sambuc 5*f4a2713aSLionel Sambuctarget datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 6*f4a2713aSLionel Sambuctarget triple = "x86_64-apple-macosx10.8.0" 7*f4a2713aSLionel Sambuc 8*f4a2713aSLionel Sambucdefine void @test1(i16* nocapture %head) nounwind { 9*f4a2713aSLionel Sambucvector.ph: 10*f4a2713aSLionel Sambuc br label %vector.body 11*f4a2713aSLionel Sambuc 12*f4a2713aSLionel Sambucvector.body: ; preds = %vector.body, %vector.ph 13*f4a2713aSLionel Sambuc %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 14*f4a2713aSLionel Sambuc %0 = getelementptr inbounds i16* %head, i64 %index 15*f4a2713aSLionel Sambuc %1 = bitcast i16* %0 to <8 x i16>* 16*f4a2713aSLionel Sambuc %2 = load <8 x i16>* %1, align 2 17*f4a2713aSLionel Sambuc %3 = icmp slt <8 x i16> %2, zeroinitializer 18*f4a2713aSLionel Sambuc %4 = xor <8 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768> 19*f4a2713aSLionel Sambuc %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer 20*f4a2713aSLionel Sambuc store <8 x i16> %5, <8 x i16>* %1, align 2 21*f4a2713aSLionel Sambuc %index.next = add i64 %index, 8 22*f4a2713aSLionel Sambuc %6 = icmp eq i64 %index.next, 16384 23*f4a2713aSLionel Sambuc br i1 %6, label %for.end, label %vector.body 24*f4a2713aSLionel Sambuc 25*f4a2713aSLionel Sambucfor.end: ; preds = %vector.body 26*f4a2713aSLionel Sambuc ret void 27*f4a2713aSLionel Sambuc 28*f4a2713aSLionel Sambuc; SSE2: @test1 29*f4a2713aSLionel Sambuc; SSE2: psubusw LCPI0_0(%rip), %xmm0 30*f4a2713aSLionel Sambuc 31*f4a2713aSLionel Sambuc; AVX1: @test1 32*f4a2713aSLionel Sambuc; AVX1: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0 33*f4a2713aSLionel Sambuc 34*f4a2713aSLionel Sambuc; AVX2: @test1 35*f4a2713aSLionel Sambuc; AVX2: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0 36*f4a2713aSLionel Sambuc} 37*f4a2713aSLionel Sambuc 38*f4a2713aSLionel Sambucdefine void @test2(i16* nocapture %head) nounwind { 39*f4a2713aSLionel Sambucvector.ph: 40*f4a2713aSLionel Sambuc br label %vector.body 41*f4a2713aSLionel Sambuc 42*f4a2713aSLionel Sambucvector.body: ; preds = %vector.body, %vector.ph 43*f4a2713aSLionel Sambuc %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 44*f4a2713aSLionel Sambuc %0 = getelementptr inbounds i16* %head, i64 %index 45*f4a2713aSLionel Sambuc %1 = bitcast i16* %0 to <8 x i16>* 46*f4a2713aSLionel Sambuc %2 = load <8 x i16>* %1, align 2 47*f4a2713aSLionel Sambuc %3 = icmp ugt <8 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766> 48*f4a2713aSLionel Sambuc %4 = add <8 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767> 49*f4a2713aSLionel Sambuc %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer 50*f4a2713aSLionel Sambuc store <8 x i16> %5, <8 x i16>* %1, align 2 51*f4a2713aSLionel Sambuc %index.next = add i64 %index, 8 52*f4a2713aSLionel Sambuc %6 = icmp eq i64 %index.next, 16384 53*f4a2713aSLionel Sambuc br i1 %6, label %for.end, label %vector.body 54*f4a2713aSLionel Sambuc 55*f4a2713aSLionel Sambucfor.end: ; preds = %vector.body 56*f4a2713aSLionel Sambuc ret void 57*f4a2713aSLionel Sambuc 58*f4a2713aSLionel Sambuc; SSE2: @test2 59*f4a2713aSLionel Sambuc; SSE2: psubusw LCPI1_0(%rip), %xmm0 60*f4a2713aSLionel Sambuc 61*f4a2713aSLionel Sambuc; AVX1: @test2 62*f4a2713aSLionel Sambuc; AVX1: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0 63*f4a2713aSLionel Sambuc 64*f4a2713aSLionel Sambuc; AVX2: @test2 65*f4a2713aSLionel Sambuc; AVX2: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0 66*f4a2713aSLionel Sambuc} 67*f4a2713aSLionel Sambuc 68*f4a2713aSLionel Sambucdefine void @test3(i16* nocapture %head, i16 zeroext %w) nounwind { 69*f4a2713aSLionel Sambucvector.ph: 70*f4a2713aSLionel Sambuc %0 = insertelement <8 x i16> undef, i16 %w, i32 0 71*f4a2713aSLionel Sambuc %broadcast15 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer 72*f4a2713aSLionel Sambuc br label %vector.body 73*f4a2713aSLionel Sambuc 74*f4a2713aSLionel Sambucvector.body: ; preds = %vector.body, %vector.ph 75*f4a2713aSLionel Sambuc %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 76*f4a2713aSLionel Sambuc %1 = getelementptr inbounds i16* %head, i64 %index 77*f4a2713aSLionel Sambuc %2 = bitcast i16* %1 to <8 x i16>* 78*f4a2713aSLionel Sambuc %3 = load <8 x i16>* %2, align 2 79*f4a2713aSLionel Sambuc %4 = icmp ult <8 x i16> %3, %broadcast15 80*f4a2713aSLionel Sambuc %5 = sub <8 x i16> %3, %broadcast15 81*f4a2713aSLionel Sambuc %6 = select <8 x i1> %4, <8 x i16> zeroinitializer, <8 x i16> %5 82*f4a2713aSLionel Sambuc store <8 x i16> %6, <8 x i16>* %2, align 2 83*f4a2713aSLionel Sambuc %index.next = add i64 %index, 8 84*f4a2713aSLionel Sambuc %7 = icmp eq i64 %index.next, 16384 85*f4a2713aSLionel Sambuc br i1 %7, label %for.end, label %vector.body 86*f4a2713aSLionel Sambuc 87*f4a2713aSLionel Sambucfor.end: ; preds = %vector.body 88*f4a2713aSLionel Sambuc ret void 89*f4a2713aSLionel Sambuc 90*f4a2713aSLionel Sambuc; SSE2: @test3 91*f4a2713aSLionel Sambuc; SSE2: psubusw %xmm0, %xmm1 92*f4a2713aSLionel Sambuc 93*f4a2713aSLionel Sambuc; AVX1: @test3 94*f4a2713aSLionel Sambuc; AVX1: vpsubusw %xmm0, %xmm1, %xmm1 95*f4a2713aSLionel Sambuc 96*f4a2713aSLionel Sambuc; AVX2: @test3 97*f4a2713aSLionel Sambuc; AVX2: vpsubusw %xmm0, %xmm1, %xmm1 98*f4a2713aSLionel Sambuc} 99*f4a2713aSLionel Sambuc 100*f4a2713aSLionel Sambucdefine void @test4(i8* nocapture %head) nounwind { 101*f4a2713aSLionel Sambucvector.ph: 102*f4a2713aSLionel Sambuc br label %vector.body 103*f4a2713aSLionel Sambuc 104*f4a2713aSLionel Sambucvector.body: ; preds = %vector.body, %vector.ph 105*f4a2713aSLionel Sambuc %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 106*f4a2713aSLionel Sambuc %0 = getelementptr inbounds i8* %head, i64 %index 107*f4a2713aSLionel Sambuc %1 = bitcast i8* %0 to <16 x i8>* 108*f4a2713aSLionel Sambuc %2 = load <16 x i8>* %1, align 1 109*f4a2713aSLionel Sambuc %3 = icmp slt <16 x i8> %2, zeroinitializer 110*f4a2713aSLionel Sambuc %4 = xor <16 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128> 111*f4a2713aSLionel Sambuc %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer 112*f4a2713aSLionel Sambuc store <16 x i8> %5, <16 x i8>* %1, align 1 113*f4a2713aSLionel Sambuc %index.next = add i64 %index, 16 114*f4a2713aSLionel Sambuc %6 = icmp eq i64 %index.next, 16384 115*f4a2713aSLionel Sambuc br i1 %6, label %for.end, label %vector.body 116*f4a2713aSLionel Sambuc 117*f4a2713aSLionel Sambucfor.end: ; preds = %vector.body 118*f4a2713aSLionel Sambuc ret void 119*f4a2713aSLionel Sambuc 120*f4a2713aSLionel Sambuc; SSE2: @test4 121*f4a2713aSLionel Sambuc; SSE2: psubusb LCPI3_0(%rip), %xmm0 122*f4a2713aSLionel Sambuc 123*f4a2713aSLionel Sambuc; AVX1: @test4 124*f4a2713aSLionel Sambuc; AVX1: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0 125*f4a2713aSLionel Sambuc 126*f4a2713aSLionel Sambuc; AVX2: @test4 127*f4a2713aSLionel Sambuc; AVX2: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0 128*f4a2713aSLionel Sambuc} 129*f4a2713aSLionel Sambuc 130*f4a2713aSLionel Sambucdefine void @test5(i8* nocapture %head) nounwind { 131*f4a2713aSLionel Sambucvector.ph: 132*f4a2713aSLionel Sambuc br label %vector.body 133*f4a2713aSLionel Sambuc 134*f4a2713aSLionel Sambucvector.body: ; preds = %vector.body, %vector.ph 135*f4a2713aSLionel Sambuc %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 136*f4a2713aSLionel Sambuc %0 = getelementptr inbounds i8* %head, i64 %index 137*f4a2713aSLionel Sambuc %1 = bitcast i8* %0 to <16 x i8>* 138*f4a2713aSLionel Sambuc %2 = load <16 x i8>* %1, align 1 139*f4a2713aSLionel Sambuc %3 = icmp ugt <16 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126> 140*f4a2713aSLionel Sambuc %4 = add <16 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127> 141*f4a2713aSLionel Sambuc %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer 142*f4a2713aSLionel Sambuc store <16 x i8> %5, <16 x i8>* %1, align 1 143*f4a2713aSLionel Sambuc %index.next = add i64 %index, 16 144*f4a2713aSLionel Sambuc %6 = icmp eq i64 %index.next, 16384 145*f4a2713aSLionel Sambuc br i1 %6, label %for.end, label %vector.body 146*f4a2713aSLionel Sambuc 147*f4a2713aSLionel Sambucfor.end: ; preds = %vector.body 148*f4a2713aSLionel Sambuc ret void 149*f4a2713aSLionel Sambuc 150*f4a2713aSLionel Sambuc; SSE2: @test5 151*f4a2713aSLionel Sambuc; SSE2: psubusb LCPI4_0(%rip), %xmm0 152*f4a2713aSLionel Sambuc 153*f4a2713aSLionel Sambuc; AVX1: @test5 154*f4a2713aSLionel Sambuc; AVX1: vpsubusb LCPI4_0(%rip), %xmm0, %xmm0 155*f4a2713aSLionel Sambuc 156*f4a2713aSLionel Sambuc; AVX2: @test5 157*f4a2713aSLionel Sambuc; AVX2: vpsubusb LCPI4_0(%rip), %xmm0, %xmm0 158*f4a2713aSLionel Sambuc} 159*f4a2713aSLionel Sambuc 160*f4a2713aSLionel Sambucdefine void @test6(i8* nocapture %head, i8 zeroext %w) nounwind { 161*f4a2713aSLionel Sambucvector.ph: 162*f4a2713aSLionel Sambuc %0 = insertelement <16 x i8> undef, i8 %w, i32 0 163*f4a2713aSLionel Sambuc %broadcast15 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer 164*f4a2713aSLionel Sambuc br label %vector.body 165*f4a2713aSLionel Sambuc 166*f4a2713aSLionel Sambucvector.body: ; preds = %vector.body, %vector.ph 167*f4a2713aSLionel Sambuc %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 168*f4a2713aSLionel Sambuc %1 = getelementptr inbounds i8* %head, i64 %index 169*f4a2713aSLionel Sambuc %2 = bitcast i8* %1 to <16 x i8>* 170*f4a2713aSLionel Sambuc %3 = load <16 x i8>* %2, align 1 171*f4a2713aSLionel Sambuc %4 = icmp ult <16 x i8> %3, %broadcast15 172*f4a2713aSLionel Sambuc %5 = sub <16 x i8> %3, %broadcast15 173*f4a2713aSLionel Sambuc %6 = select <16 x i1> %4, <16 x i8> zeroinitializer, <16 x i8> %5 174*f4a2713aSLionel Sambuc store <16 x i8> %6, <16 x i8>* %2, align 1 175*f4a2713aSLionel Sambuc %index.next = add i64 %index, 16 176*f4a2713aSLionel Sambuc %7 = icmp eq i64 %index.next, 16384 177*f4a2713aSLionel Sambuc br i1 %7, label %for.end, label %vector.body 178*f4a2713aSLionel Sambuc 179*f4a2713aSLionel Sambucfor.end: ; preds = %vector.body 180*f4a2713aSLionel Sambuc ret void 181*f4a2713aSLionel Sambuc 182*f4a2713aSLionel Sambuc; SSE2: @test6 183*f4a2713aSLionel Sambuc; SSE2: psubusb %xmm0, %xmm1 184*f4a2713aSLionel Sambuc 185*f4a2713aSLionel Sambuc; AVX1: @test6 186*f4a2713aSLionel Sambuc; AVX1: vpsubusb %xmm0, %xmm1, %xmm1 187*f4a2713aSLionel Sambuc 188*f4a2713aSLionel Sambuc; AVX2: @test6 189*f4a2713aSLionel Sambuc; AVX2: vpsubusb %xmm0, %xmm1, %xmm1 190*f4a2713aSLionel Sambuc} 191*f4a2713aSLionel Sambuc 192*f4a2713aSLionel Sambucdefine void @test7(i16* nocapture %head) nounwind { 193*f4a2713aSLionel Sambucvector.ph: 194*f4a2713aSLionel Sambuc br label %vector.body 195*f4a2713aSLionel Sambuc 196*f4a2713aSLionel Sambucvector.body: ; preds = %vector.body, %vector.ph 197*f4a2713aSLionel Sambuc %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 198*f4a2713aSLionel Sambuc %0 = getelementptr inbounds i16* %head, i64 %index 199*f4a2713aSLionel Sambuc %1 = bitcast i16* %0 to <16 x i16>* 200*f4a2713aSLionel Sambuc %2 = load <16 x i16>* %1, align 2 201*f4a2713aSLionel Sambuc %3 = icmp slt <16 x i16> %2, zeroinitializer 202*f4a2713aSLionel Sambuc %4 = xor <16 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768> 203*f4a2713aSLionel Sambuc %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer 204*f4a2713aSLionel Sambuc store <16 x i16> %5, <16 x i16>* %1, align 2 205*f4a2713aSLionel Sambuc %index.next = add i64 %index, 8 206*f4a2713aSLionel Sambuc %6 = icmp eq i64 %index.next, 16384 207*f4a2713aSLionel Sambuc br i1 %6, label %for.end, label %vector.body 208*f4a2713aSLionel Sambuc 209*f4a2713aSLionel Sambucfor.end: ; preds = %vector.body 210*f4a2713aSLionel Sambuc ret void 211*f4a2713aSLionel Sambuc 212*f4a2713aSLionel Sambuc; AVX2: @test7 213*f4a2713aSLionel Sambuc; AVX2: vpsubusw LCPI6_0(%rip), %ymm0, %ymm0 214*f4a2713aSLionel Sambuc} 215*f4a2713aSLionel Sambuc 216*f4a2713aSLionel Sambucdefine void @test8(i16* nocapture %head) nounwind { 217*f4a2713aSLionel Sambucvector.ph: 218*f4a2713aSLionel Sambuc br label %vector.body 219*f4a2713aSLionel Sambuc 220*f4a2713aSLionel Sambucvector.body: ; preds = %vector.body, %vector.ph 221*f4a2713aSLionel Sambuc %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 222*f4a2713aSLionel Sambuc %0 = getelementptr inbounds i16* %head, i64 %index 223*f4a2713aSLionel Sambuc %1 = bitcast i16* %0 to <16 x i16>* 224*f4a2713aSLionel Sambuc %2 = load <16 x i16>* %1, align 2 225*f4a2713aSLionel Sambuc %3 = icmp ugt <16 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766> 226*f4a2713aSLionel Sambuc %4 = add <16 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767> 227*f4a2713aSLionel Sambuc %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer 228*f4a2713aSLionel Sambuc store <16 x i16> %5, <16 x i16>* %1, align 2 229*f4a2713aSLionel Sambuc %index.next = add i64 %index, 8 230*f4a2713aSLionel Sambuc %6 = icmp eq i64 %index.next, 16384 231*f4a2713aSLionel Sambuc br i1 %6, label %for.end, label %vector.body 232*f4a2713aSLionel Sambuc 233*f4a2713aSLionel Sambucfor.end: ; preds = %vector.body 234*f4a2713aSLionel Sambuc ret void 235*f4a2713aSLionel Sambuc 236*f4a2713aSLionel Sambuc; AVX2: @test8 237*f4a2713aSLionel Sambuc; AVX2: vpsubusw LCPI7_0(%rip), %ymm0, %ymm0 238*f4a2713aSLionel Sambuc} 239*f4a2713aSLionel Sambuc 240*f4a2713aSLionel Sambucdefine void @test9(i16* nocapture %head, i16 zeroext %w) nounwind { 241*f4a2713aSLionel Sambucvector.ph: 242*f4a2713aSLionel Sambuc %0 = insertelement <16 x i16> undef, i16 %w, i32 0 243*f4a2713aSLionel Sambuc %broadcast15 = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> zeroinitializer 244*f4a2713aSLionel Sambuc br label %vector.body 245*f4a2713aSLionel Sambuc 246*f4a2713aSLionel Sambucvector.body: ; preds = %vector.body, %vector.ph 247*f4a2713aSLionel Sambuc %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 248*f4a2713aSLionel Sambuc %1 = getelementptr inbounds i16* %head, i64 %index 249*f4a2713aSLionel Sambuc %2 = bitcast i16* %1 to <16 x i16>* 250*f4a2713aSLionel Sambuc %3 = load <16 x i16>* %2, align 2 251*f4a2713aSLionel Sambuc %4 = icmp ult <16 x i16> %3, %broadcast15 252*f4a2713aSLionel Sambuc %5 = sub <16 x i16> %3, %broadcast15 253*f4a2713aSLionel Sambuc %6 = select <16 x i1> %4, <16 x i16> zeroinitializer, <16 x i16> %5 254*f4a2713aSLionel Sambuc store <16 x i16> %6, <16 x i16>* %2, align 2 255*f4a2713aSLionel Sambuc %index.next = add i64 %index, 8 256*f4a2713aSLionel Sambuc %7 = icmp eq i64 %index.next, 16384 257*f4a2713aSLionel Sambuc br i1 %7, label %for.end, label %vector.body 258*f4a2713aSLionel Sambuc 259*f4a2713aSLionel Sambucfor.end: ; preds = %vector.body 260*f4a2713aSLionel Sambuc ret void 261*f4a2713aSLionel Sambuc 262*f4a2713aSLionel Sambuc 263*f4a2713aSLionel Sambuc; AVX2: @test9 264*f4a2713aSLionel Sambuc; AVX2: vpsubusw %ymm0, %ymm1, %ymm1 265*f4a2713aSLionel Sambuc} 266*f4a2713aSLionel Sambuc 267*f4a2713aSLionel Sambucdefine void @test10(i8* nocapture %head) nounwind { 268*f4a2713aSLionel Sambucvector.ph: 269*f4a2713aSLionel Sambuc br label %vector.body 270*f4a2713aSLionel Sambuc 271*f4a2713aSLionel Sambucvector.body: ; preds = %vector.body, %vector.ph 272*f4a2713aSLionel Sambuc %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 273*f4a2713aSLionel Sambuc %0 = getelementptr inbounds i8* %head, i64 %index 274*f4a2713aSLionel Sambuc %1 = bitcast i8* %0 to <32 x i8>* 275*f4a2713aSLionel Sambuc %2 = load <32 x i8>* %1, align 1 276*f4a2713aSLionel Sambuc %3 = icmp slt <32 x i8> %2, zeroinitializer 277*f4a2713aSLionel Sambuc %4 = xor <32 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128> 278*f4a2713aSLionel Sambuc %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer 279*f4a2713aSLionel Sambuc store <32 x i8> %5, <32 x i8>* %1, align 1 280*f4a2713aSLionel Sambuc %index.next = add i64 %index, 16 281*f4a2713aSLionel Sambuc %6 = icmp eq i64 %index.next, 16384 282*f4a2713aSLionel Sambuc br i1 %6, label %for.end, label %vector.body 283*f4a2713aSLionel Sambuc 284*f4a2713aSLionel Sambucfor.end: ; preds = %vector.body 285*f4a2713aSLionel Sambuc ret void 286*f4a2713aSLionel Sambuc 287*f4a2713aSLionel Sambuc 288*f4a2713aSLionel Sambuc; AVX2: @test10 289*f4a2713aSLionel Sambuc; AVX2: vpsubusb LCPI9_0(%rip), %ymm0, %ymm0 290*f4a2713aSLionel Sambuc} 291*f4a2713aSLionel Sambuc 292*f4a2713aSLionel Sambucdefine void @test11(i8* nocapture %head) nounwind { 293*f4a2713aSLionel Sambucvector.ph: 294*f4a2713aSLionel Sambuc br label %vector.body 295*f4a2713aSLionel Sambuc 296*f4a2713aSLionel Sambucvector.body: ; preds = %vector.body, %vector.ph 297*f4a2713aSLionel Sambuc %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 298*f4a2713aSLionel Sambuc %0 = getelementptr inbounds i8* %head, i64 %index 299*f4a2713aSLionel Sambuc %1 = bitcast i8* %0 to <32 x i8>* 300*f4a2713aSLionel Sambuc %2 = load <32 x i8>* %1, align 1 301*f4a2713aSLionel Sambuc %3 = icmp ugt <32 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126> 302*f4a2713aSLionel Sambuc %4 = add <32 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127> 303*f4a2713aSLionel Sambuc %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer 304*f4a2713aSLionel Sambuc store <32 x i8> %5, <32 x i8>* %1, align 1 305*f4a2713aSLionel Sambuc %index.next = add i64 %index, 16 306*f4a2713aSLionel Sambuc %6 = icmp eq i64 %index.next, 16384 307*f4a2713aSLionel Sambuc br i1 %6, label %for.end, label %vector.body 308*f4a2713aSLionel Sambuc 309*f4a2713aSLionel Sambucfor.end: ; preds = %vector.body 310*f4a2713aSLionel Sambuc ret void 311*f4a2713aSLionel Sambuc 312*f4a2713aSLionel Sambuc; AVX2: @test11 313*f4a2713aSLionel Sambuc; AVX2: vpsubusb LCPI10_0(%rip), %ymm0, %ymm0 314*f4a2713aSLionel Sambuc} 315*f4a2713aSLionel Sambuc 316*f4a2713aSLionel Sambucdefine void @test12(i8* nocapture %head, i8 zeroext %w) nounwind { 317*f4a2713aSLionel Sambucvector.ph: 318*f4a2713aSLionel Sambuc %0 = insertelement <32 x i8> undef, i8 %w, i32 0 319*f4a2713aSLionel Sambuc %broadcast15 = shufflevector <32 x i8> %0, <32 x i8> undef, <32 x i32> zeroinitializer 320*f4a2713aSLionel Sambuc br label %vector.body 321*f4a2713aSLionel Sambuc 322*f4a2713aSLionel Sambucvector.body: ; preds = %vector.body, %vector.ph 323*f4a2713aSLionel Sambuc %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 324*f4a2713aSLionel Sambuc %1 = getelementptr inbounds i8* %head, i64 %index 325*f4a2713aSLionel Sambuc %2 = bitcast i8* %1 to <32 x i8>* 326*f4a2713aSLionel Sambuc %3 = load <32 x i8>* %2, align 1 327*f4a2713aSLionel Sambuc %4 = icmp ult <32 x i8> %3, %broadcast15 328*f4a2713aSLionel Sambuc %5 = sub <32 x i8> %3, %broadcast15 329*f4a2713aSLionel Sambuc %6 = select <32 x i1> %4, <32 x i8> zeroinitializer, <32 x i8> %5 330*f4a2713aSLionel Sambuc store <32 x i8> %6, <32 x i8>* %2, align 1 331*f4a2713aSLionel Sambuc %index.next = add i64 %index, 16 332*f4a2713aSLionel Sambuc %7 = icmp eq i64 %index.next, 16384 333*f4a2713aSLionel Sambuc br i1 %7, label %for.end, label %vector.body 334*f4a2713aSLionel Sambuc 335*f4a2713aSLionel Sambucfor.end: ; preds = %vector.body 336*f4a2713aSLionel Sambuc ret void 337*f4a2713aSLionel Sambuc 338*f4a2713aSLionel Sambuc; AVX2: @test12 339*f4a2713aSLionel Sambuc; AVX2: vpsubusb %ymm0, %ymm1, %ymm1 340*f4a2713aSLionel Sambuc} 341