1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64 3; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX2 4; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86 5 6declare <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64>, <2 x i64>) 7declare <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32>, <4 x i32>) 8declare <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16>, <8 x i16>) 9declare <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8>, <16 x i8>) 10 11define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { 12; X64-LABEL: vec_v2i64: 13; X64: # %bb.0: 14; X64-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 15; X64-NEXT: movdqa %xmm2, %xmm3 16; X64-NEXT: psrlq %xmm1, %xmm3 17; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3] 18; X64-NEXT: movdqa %xmm2, %xmm5 19; X64-NEXT: psrlq %xmm4, %xmm5 20; X64-NEXT: movsd {{.*#+}} xmm5 = xmm3[0],xmm5[1] 21; X64-NEXT: movdqa %xmm0, %xmm6 22; X64-NEXT: psllq %xmm1, %xmm6 23; X64-NEXT: movdqa %xmm0, %xmm3 24; X64-NEXT: psllq %xmm4, %xmm3 25; X64-NEXT: movdqa %xmm3, %xmm7 26; X64-NEXT: movsd {{.*#+}} xmm3 = xmm6[0],xmm3[1] 27; X64-NEXT: psrlq %xmm1, %xmm6 28; X64-NEXT: psrlq %xmm4, %xmm7 29; X64-NEXT: movsd {{.*#+}} xmm7 = xmm6[0],xmm7[1] 30; X64-NEXT: xorpd %xmm5, %xmm7 31; X64-NEXT: psubq %xmm5, %xmm7 32; X64-NEXT: pcmpeqd %xmm0, %xmm7 33; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,0,3,2] 34; X64-NEXT: pand %xmm7, %xmm1 35; X64-NEXT: andpd %xmm1, %xmm3 36; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 37; X64-NEXT: pand %xmm2, %xmm0 38; X64-NEXT: pxor %xmm5, %xmm5 39; X64-NEXT: pcmpgtd %xmm4, %xmm5 40; X64-NEXT: por %xmm2, %xmm5 41; X64-NEXT: pcmpeqd %xmm2, %xmm2 42; X64-NEXT: pxor %xmm5, %xmm2 43; X64-NEXT: por %xmm0, %xmm2 44; X64-NEXT: pandn %xmm2, %xmm1 45; X64-NEXT: por %xmm3, %xmm1 46; X64-NEXT: movdqa %xmm1, %xmm0 47; X64-NEXT: retq 48; 49; X64-AVX2-LABEL: vec_v2i64: 50; X64-AVX2: # %bb.0: 51; X64-AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 52; X64-AVX2-NEXT: # xmm2 = mem[0,0] 53; X64-AVX2-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775807,9223372036854775807] 54; X64-AVX2-NEXT: # xmm3 = mem[0,0] 55; X64-AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm3 56; X64-AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2 57; X64-AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm4 58; X64-AVX2-NEXT: vpsrlvq %xmm1, %xmm4, %xmm1 59; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 60; X64-AVX2-NEXT: vpsubq %xmm2, %xmm1, %xmm1 61; X64-AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 62; X64-AVX2-NEXT: vblendvpd %xmm0, %xmm4, %xmm3, %xmm0 63; X64-AVX2-NEXT: retq 64; 65; X86-LABEL: vec_v2i64: 66; X86: # %bb.0: 67; X86-NEXT: pushl %ebp 68; X86-NEXT: pushl %ebx 69; X86-NEXT: pushl %edi 70; X86-NEXT: pushl %esi 71; X86-NEXT: subl $20, %esp 72; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 73; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 74; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 75; X86-NEXT: movl %edx, %eax 76; X86-NEXT: shll %cl, %eax 77; X86-NEXT: shldl %cl, %edx, %edi 78; X86-NEXT: xorl %edx, %edx 79; X86-NEXT: testb $32, %cl 80; X86-NEXT: cmovnel %eax, %edi 81; X86-NEXT: cmovnel %edx, %eax 82; X86-NEXT: movl %eax, (%esp) # 4-byte Spill 83; X86-NEXT: movl %edi, %ebx 84; X86-NEXT: sarl %cl, %ebx 85; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 86; X86-NEXT: movl %edi, %eax 87; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 88; X86-NEXT: sarl $31, %eax 89; X86-NEXT: testb $32, %cl 90; X86-NEXT: cmovel %ebx, %eax 91; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 92; X86-NEXT: movb {{[0-9]+}}(%esp), %ch 93; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 94; X86-NEXT: movl %esi, %eax 95; X86-NEXT: movb %ch, %cl 96; X86-NEXT: shll %cl, %eax 97; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 98; X86-NEXT: movl %ebp, %ebx 99; X86-NEXT: shldl %cl, %esi, %ebx 100; X86-NEXT: testb $32, %ch 101; X86-NEXT: cmovnel %eax, %ebx 102; X86-NEXT: cmovnel %edx, %eax 103; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 104; X86-NEXT: movl %ebx, %esi 105; X86-NEXT: sarl %cl, %esi 106; X86-NEXT: movl %ebx, %edx 107; X86-NEXT: sarl $31, %edx 108; X86-NEXT: testb $32, %ch 109; X86-NEXT: cmovel %esi, %edx 110; X86-NEXT: movl (%esp), %eax # 4-byte Reload 111; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 112; X86-NEXT: shrdl %cl, %edi, %eax 113; X86-NEXT: testb $32, %cl 114; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload 115; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload 116; X86-NEXT: movb %ch, %cl 117; X86-NEXT: shrdl %cl, %ebx, %edi 118; X86-NEXT: testb $32, %ch 119; X86-NEXT: cmovnel %esi, %edi 120; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax 121; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 122; X86-NEXT: xorl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill 123; X86-NEXT: sarl $31, %esi 124; X86-NEXT: movl %esi, %ecx 125; X86-NEXT: xorl $2147483647, %ecx # imm = 0x7FFFFFFF 126; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload 127; X86-NEXT: notl %esi 128; X86-NEXT: cmovel (%esp), %esi # 4-byte Folded Reload 129; X86-NEXT: movl %esi, (%esp) # 4-byte Spill 130; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload 131; X86-NEXT: xorl {{[0-9]+}}(%esp), %edi 132; X86-NEXT: xorl %ebp, %edx 133; X86-NEXT: sarl $31, %ebp 134; X86-NEXT: movl %ebp, %esi 135; X86-NEXT: xorl $2147483647, %esi # imm = 0x7FFFFFFF 136; X86-NEXT: orl %edx, %edi 137; X86-NEXT: notl %ebp 138; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload 139; X86-NEXT: cmovel %ebx, %esi 140; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 141; X86-NEXT: movl %esi, 12(%eax) 142; X86-NEXT: movl %ebp, 8(%eax) 143; X86-NEXT: movl %ecx, 4(%eax) 144; X86-NEXT: movl (%esp), %ecx # 4-byte Reload 145; X86-NEXT: movl %ecx, (%eax) 146; X86-NEXT: addl $20, %esp 147; X86-NEXT: popl %esi 148; X86-NEXT: popl %edi 149; X86-NEXT: popl %ebx 150; X86-NEXT: popl %ebp 151; X86-NEXT: retl $4 152 %tmp = call <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64> %x, <2 x i64> %y) 153 ret <2 x i64> %tmp 154} 155 156define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { 157; X64-LABEL: vec_v4i32: 158; X64: # %bb.0: 159; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] 160; X64-NEXT: pshuflw {{.*#+}} xmm4 = xmm1[2,3,3,3,4,5,6,7] 161; X64-NEXT: pshuflw {{.*#+}} xmm5 = xmm1[0,1,1,1,4,5,6,7] 162; X64-NEXT: pslld $23, %xmm1 163; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 164; X64-NEXT: cvttps2dq %xmm1, %xmm6 165; X64-NEXT: movdqa %xmm0, %xmm1 166; X64-NEXT: pmuludq %xmm6, %xmm1 167; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] 168; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] 169; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 170; X64-NEXT: pmuludq %xmm7, %xmm6 171; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3] 172; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1] 173; X64-NEXT: pshuflw {{.*#+}} xmm6 = xmm3[2,3,3,3,4,5,6,7] 174; X64-NEXT: movdqa %xmm2, %xmm7 175; X64-NEXT: psrad %xmm6, %xmm7 176; X64-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,1,1,4,5,6,7] 177; X64-NEXT: movdqa %xmm1, %xmm6 178; X64-NEXT: psrad %xmm3, %xmm6 179; X64-NEXT: punpckhqdq {{.*#+}} xmm6 = xmm6[1],xmm7[1] 180; X64-NEXT: movdqa %xmm2, %xmm3 181; X64-NEXT: psrad %xmm4, %xmm3 182; X64-NEXT: psrad %xmm5, %xmm1 183; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 184; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm6[0,3] 185; X64-NEXT: pcmpeqd %xmm0, %xmm1 186; X64-NEXT: pand %xmm1, %xmm2 187; X64-NEXT: pxor %xmm3, %xmm3 188; X64-NEXT: pcmpgtd %xmm0, %xmm3 189; X64-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 190; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 191; X64-NEXT: por %xmm3, %xmm0 192; X64-NEXT: pandn %xmm0, %xmm1 193; X64-NEXT: por %xmm2, %xmm1 194; X64-NEXT: movdqa %xmm1, %xmm0 195; X64-NEXT: retq 196; 197; X64-AVX2-LABEL: vec_v4i32: 198; X64-AVX2: # %bb.0: 199; X64-AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 200; X64-AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647] 201; X64-AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm2 202; X64-AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm3 203; X64-AVX2-NEXT: vpsravd %xmm1, %xmm3, %xmm1 204; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 205; X64-AVX2-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0 206; X64-AVX2-NEXT: retq 207; 208; X86-LABEL: vec_v4i32: 209; X86: # %bb.0: 210; X86-NEXT: pushl %ebp 211; X86-NEXT: pushl %ebx 212; X86-NEXT: pushl %edi 213; X86-NEXT: pushl %esi 214; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 215; X86-NEXT: movb {{[0-9]+}}(%esp), %ch 216; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 217; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 218; X86-NEXT: movl %edi, %edx 219; X86-NEXT: shll %cl, %edx 220; X86-NEXT: movl %edx, %ebp 221; X86-NEXT: sarl %cl, %ebp 222; X86-NEXT: xorl %ebx, %ebx 223; X86-NEXT: testl %edi, %edi 224; X86-NEXT: sets %bl 225; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF 226; X86-NEXT: cmpl %ebp, %edi 227; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 228; X86-NEXT: cmovel %edx, %ebx 229; X86-NEXT: movl %edi, %ebp 230; X86-NEXT: movb %ch, %cl 231; X86-NEXT: shll %cl, %ebp 232; X86-NEXT: movl %ebp, %eax 233; X86-NEXT: sarl %cl, %eax 234; X86-NEXT: xorl %edx, %edx 235; X86-NEXT: testl %edi, %edi 236; X86-NEXT: sets %dl 237; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF 238; X86-NEXT: cmpl %eax, %edi 239; X86-NEXT: cmovel %ebp, %edx 240; X86-NEXT: movl %esi, %edi 241; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 242; X86-NEXT: shll %cl, %edi 243; X86-NEXT: movl %edi, %ebp 244; X86-NEXT: sarl %cl, %ebp 245; X86-NEXT: xorl %eax, %eax 246; X86-NEXT: testl %esi, %esi 247; X86-NEXT: sets %al 248; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF 249; X86-NEXT: cmpl %ebp, %esi 250; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 251; X86-NEXT: cmovel %edi, %eax 252; X86-NEXT: movl %esi, %edi 253; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 254; X86-NEXT: shll %cl, %edi 255; X86-NEXT: movl %edi, %ebp 256; X86-NEXT: sarl %cl, %ebp 257; X86-NEXT: xorl %ecx, %ecx 258; X86-NEXT: testl %esi, %esi 259; X86-NEXT: sets %cl 260; X86-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF 261; X86-NEXT: cmpl %ebp, %esi 262; X86-NEXT: cmovel %edi, %ecx 263; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 264; X86-NEXT: movl %ecx, 12(%esi) 265; X86-NEXT: movl %eax, 8(%esi) 266; X86-NEXT: movl %edx, 4(%esi) 267; X86-NEXT: movl %ebx, (%esi) 268; X86-NEXT: movl %esi, %eax 269; X86-NEXT: popl %esi 270; X86-NEXT: popl %edi 271; X86-NEXT: popl %ebx 272; X86-NEXT: popl %ebp 273; X86-NEXT: retl $4 274 %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 275 ret <4 x i32> %tmp 276} 277 278define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { 279; X64-LABEL: vec_v8i16: 280; X64: # %bb.0: 281; X64-NEXT: movdqa %xmm1, %xmm2 282; X64-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7] 283; X64-NEXT: pslld $23, %xmm2 284; X64-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216] 285; X64-NEXT: paddd %xmm3, %xmm2 286; X64-NEXT: cvttps2dq %xmm2, %xmm2 287; X64-NEXT: pslld $16, %xmm2 288; X64-NEXT: psrad $16, %xmm2 289; X64-NEXT: movdqa %xmm1, %xmm4 290; X64-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3] 291; X64-NEXT: pslld $23, %xmm4 292; X64-NEXT: paddd %xmm3, %xmm4 293; X64-NEXT: cvttps2dq %xmm4, %xmm3 294; X64-NEXT: pslld $16, %xmm3 295; X64-NEXT: psrad $16, %xmm3 296; X64-NEXT: packssdw %xmm2, %xmm3 297; X64-NEXT: pmullw %xmm0, %xmm3 298; X64-NEXT: psllw $12, %xmm1 299; X64-NEXT: movdqa %xmm1, %xmm2 300; X64-NEXT: psraw $15, %xmm2 301; X64-NEXT: movdqa %xmm3, %xmm4 302; X64-NEXT: psraw $8, %xmm4 303; X64-NEXT: pand %xmm2, %xmm4 304; X64-NEXT: pandn %xmm3, %xmm2 305; X64-NEXT: por %xmm4, %xmm2 306; X64-NEXT: paddw %xmm1, %xmm1 307; X64-NEXT: movdqa %xmm1, %xmm4 308; X64-NEXT: psraw $15, %xmm4 309; X64-NEXT: movdqa %xmm4, %xmm5 310; X64-NEXT: pandn %xmm2, %xmm5 311; X64-NEXT: psraw $4, %xmm2 312; X64-NEXT: pand %xmm4, %xmm2 313; X64-NEXT: por %xmm5, %xmm2 314; X64-NEXT: paddw %xmm1, %xmm1 315; X64-NEXT: movdqa %xmm1, %xmm4 316; X64-NEXT: psraw $15, %xmm4 317; X64-NEXT: movdqa %xmm4, %xmm5 318; X64-NEXT: pandn %xmm2, %xmm5 319; X64-NEXT: psraw $2, %xmm2 320; X64-NEXT: pand %xmm4, %xmm2 321; X64-NEXT: por %xmm5, %xmm2 322; X64-NEXT: paddw %xmm1, %xmm1 323; X64-NEXT: psraw $15, %xmm1 324; X64-NEXT: movdqa %xmm1, %xmm4 325; X64-NEXT: pandn %xmm2, %xmm4 326; X64-NEXT: psraw $1, %xmm2 327; X64-NEXT: pand %xmm1, %xmm2 328; X64-NEXT: por %xmm4, %xmm2 329; X64-NEXT: pcmpeqw %xmm0, %xmm2 330; X64-NEXT: pand %xmm2, %xmm3 331; X64-NEXT: pxor %xmm1, %xmm1 332; X64-NEXT: pcmpgtw %xmm0, %xmm1 333; X64-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 334; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 335; X64-NEXT: por %xmm1, %xmm0 336; X64-NEXT: pandn %xmm0, %xmm2 337; X64-NEXT: por %xmm3, %xmm2 338; X64-NEXT: movdqa %xmm2, %xmm0 339; X64-NEXT: retq 340; 341; X64-AVX2-LABEL: vec_v8i16: 342; X64-AVX2: # %bb.0: 343; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 344; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 345; X64-AVX2-NEXT: vpsllvd %ymm1, %ymm2, %ymm2 346; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] 347; X64-AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 348; X64-AVX2-NEXT: vpmovsxwd %xmm2, %ymm3 349; X64-AVX2-NEXT: vpsravd %ymm1, %ymm3, %ymm1 350; X64-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 351; X64-AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 352; X64-AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 353; X64-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 354; X64-AVX2-NEXT: vpcmpgtw %xmm0, %xmm3, %xmm0 355; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} xmm3 = [32767,32767,32767,32767,32767,32767,32767,32767] 356; X64-AVX2-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm0 357; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 358; X64-AVX2-NEXT: vzeroupper 359; X64-AVX2-NEXT: retq 360; 361; X86-LABEL: vec_v8i16: 362; X86: # %bb.0: 363; X86-NEXT: pushl %ebp 364; X86-NEXT: pushl %ebx 365; X86-NEXT: pushl %edi 366; X86-NEXT: pushl %esi 367; X86-NEXT: subl $16, %esp 368; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 369; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 370; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 371; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 372; X86-NEXT: movl %edi, %ebx 373; X86-NEXT: shll %cl, %ebx 374; X86-NEXT: movswl %bx, %ebp 375; X86-NEXT: sarl %cl, %ebp 376; X86-NEXT: xorl %ecx, %ecx 377; X86-NEXT: testw %di, %di 378; X86-NEXT: sets %cl 379; X86-NEXT: addl $32767, %ecx # imm = 0x7FFF 380; X86-NEXT: cmpw %bp, %di 381; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 382; X86-NEXT: cmovel %ebx, %ecx 383; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 384; X86-NEXT: movl %esi, %edi 385; X86-NEXT: movl %eax, %ecx 386; X86-NEXT: shll %cl, %edi 387; X86-NEXT: movswl %di, %ebx 388; X86-NEXT: sarl %cl, %ebx 389; X86-NEXT: xorl %eax, %eax 390; X86-NEXT: testw %si, %si 391; X86-NEXT: sets %al 392; X86-NEXT: addl $32767, %eax # imm = 0x7FFF 393; X86-NEXT: cmpw %bx, %si 394; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 395; X86-NEXT: cmovel %edi, %eax 396; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 397; X86-NEXT: movl %edx, %esi 398; X86-NEXT: shll %cl, %esi 399; X86-NEXT: movswl %si, %edi 400; X86-NEXT: sarl %cl, %edi 401; X86-NEXT: xorl %eax, %eax 402; X86-NEXT: testw %dx, %dx 403; X86-NEXT: sets %al 404; X86-NEXT: addl $32767, %eax # imm = 0x7FFF 405; X86-NEXT: cmpw %di, %dx 406; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 407; X86-NEXT: cmovel %esi, %eax 408; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 409; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 410; X86-NEXT: movl %eax, %edx 411; X86-NEXT: shll %cl, %edx 412; X86-NEXT: movswl %dx, %esi 413; X86-NEXT: sarl %cl, %esi 414; X86-NEXT: xorl %ebx, %ebx 415; X86-NEXT: testw %ax, %ax 416; X86-NEXT: sets %bl 417; X86-NEXT: addl $32767, %ebx # imm = 0x7FFF 418; X86-NEXT: cmpw %si, %ax 419; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 420; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 421; X86-NEXT: cmovel %edx, %ebx 422; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill 423; X86-NEXT: movl %eax, %edx 424; X86-NEXT: shll %cl, %edx 425; X86-NEXT: movswl %dx, %esi 426; X86-NEXT: sarl %cl, %esi 427; X86-NEXT: xorl %ecx, %ecx 428; X86-NEXT: testw %ax, %ax 429; X86-NEXT: sets %cl 430; X86-NEXT: addl $32767, %ecx # imm = 0x7FFF 431; X86-NEXT: cmpw %si, %ax 432; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 433; X86-NEXT: cmovel %edx, %ecx 434; X86-NEXT: movl %ecx, %ebp 435; X86-NEXT: movl %eax, %edx 436; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 437; X86-NEXT: shll %cl, %edx 438; X86-NEXT: movswl %dx, %esi 439; X86-NEXT: sarl %cl, %esi 440; X86-NEXT: xorl %ebx, %ebx 441; X86-NEXT: testw %ax, %ax 442; X86-NEXT: sets %bl 443; X86-NEXT: addl $32767, %ebx # imm = 0x7FFF 444; X86-NEXT: cmpw %si, %ax 445; X86-NEXT: cmovel %edx, %ebx 446; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 447; X86-NEXT: movl %eax, %esi 448; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 449; X86-NEXT: shll %cl, %esi 450; X86-NEXT: movswl %si, %edi 451; X86-NEXT: sarl %cl, %edi 452; X86-NEXT: xorl %edx, %edx 453; X86-NEXT: testw %ax, %ax 454; X86-NEXT: sets %dl 455; X86-NEXT: addl $32767, %edx # imm = 0x7FFF 456; X86-NEXT: cmpw %di, %ax 457; X86-NEXT: cmovel %esi, %edx 458; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 459; X86-NEXT: movl %eax, %esi 460; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 461; X86-NEXT: shll %cl, %esi 462; X86-NEXT: movswl %si, %edi 463; X86-NEXT: sarl %cl, %edi 464; X86-NEXT: xorl %ecx, %ecx 465; X86-NEXT: testw %ax, %ax 466; X86-NEXT: sets %cl 467; X86-NEXT: addl $32767, %ecx # imm = 0x7FFF 468; X86-NEXT: cmpw %di, %ax 469; X86-NEXT: cmovel %esi, %ecx 470; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 471; X86-NEXT: movw %cx, 14(%eax) 472; X86-NEXT: movw %dx, 12(%eax) 473; X86-NEXT: movw %bx, 10(%eax) 474; X86-NEXT: movw %bp, 8(%eax) 475; X86-NEXT: movl (%esp), %ecx # 4-byte Reload 476; X86-NEXT: movw %cx, 6(%eax) 477; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 478; X86-NEXT: movw %cx, 4(%eax) 479; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 480; X86-NEXT: movw %cx, 2(%eax) 481; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 482; X86-NEXT: movw %cx, (%eax) 483; X86-NEXT: addl $16, %esp 484; X86-NEXT: popl %esi 485; X86-NEXT: popl %edi 486; X86-NEXT: popl %ebx 487; X86-NEXT: popl %ebp 488; X86-NEXT: retl $4 489 %tmp = call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 490 ret <8 x i16> %tmp 491} 492 493define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { 494; X64-LABEL: vec_v16i8: 495; X64: # %bb.0: 496; X64-NEXT: psllw $5, %xmm1 497; X64-NEXT: pxor %xmm3, %xmm3 498; X64-NEXT: pxor %xmm4, %xmm4 499; X64-NEXT: pcmpgtb %xmm1, %xmm4 500; X64-NEXT: movdqa %xmm0, %xmm2 501; X64-NEXT: psllw $4, %xmm2 502; X64-NEXT: pand %xmm4, %xmm2 503; X64-NEXT: pandn %xmm0, %xmm4 504; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 505; X64-NEXT: por %xmm4, %xmm2 506; X64-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15] 507; X64-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] 508; X64-NEXT: paddb %xmm1, %xmm1 509; X64-NEXT: pxor %xmm6, %xmm6 510; X64-NEXT: pcmpgtb %xmm1, %xmm6 511; X64-NEXT: movdqa %xmm6, %xmm7 512; X64-NEXT: pandn %xmm2, %xmm7 513; X64-NEXT: psllw $2, %xmm2 514; X64-NEXT: pand %xmm6, %xmm2 515; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 516; X64-NEXT: por %xmm7, %xmm2 517; X64-NEXT: paddb %xmm1, %xmm1 518; X64-NEXT: pxor %xmm6, %xmm6 519; X64-NEXT: pcmpgtb %xmm1, %xmm6 520; X64-NEXT: movdqa %xmm6, %xmm1 521; X64-NEXT: pandn %xmm2, %xmm1 522; X64-NEXT: paddb %xmm2, %xmm2 523; X64-NEXT: pand %xmm6, %xmm2 524; X64-NEXT: por %xmm1, %xmm2 525; X64-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm2[8],xmm6[9],xmm2[9],xmm6[10],xmm2[10],xmm6[11],xmm2[11],xmm6[12],xmm2[12],xmm6[13],xmm2[13],xmm6[14],xmm2[14],xmm6[15],xmm2[15] 526; X64-NEXT: pxor %xmm1, %xmm1 527; X64-NEXT: pcmpgtw %xmm4, %xmm1 528; X64-NEXT: movdqa %xmm1, %xmm7 529; X64-NEXT: pandn %xmm6, %xmm7 530; X64-NEXT: psraw $4, %xmm6 531; X64-NEXT: pand %xmm1, %xmm6 532; X64-NEXT: por %xmm7, %xmm6 533; X64-NEXT: paddw %xmm4, %xmm4 534; X64-NEXT: pxor %xmm1, %xmm1 535; X64-NEXT: pcmpgtw %xmm4, %xmm1 536; X64-NEXT: movdqa %xmm1, %xmm7 537; X64-NEXT: pandn %xmm6, %xmm7 538; X64-NEXT: psraw $2, %xmm6 539; X64-NEXT: pand %xmm1, %xmm6 540; X64-NEXT: por %xmm7, %xmm6 541; X64-NEXT: paddw %xmm4, %xmm4 542; X64-NEXT: pxor %xmm1, %xmm1 543; X64-NEXT: pcmpgtw %xmm4, %xmm1 544; X64-NEXT: movdqa %xmm1, %xmm4 545; X64-NEXT: pandn %xmm6, %xmm4 546; X64-NEXT: psraw $1, %xmm6 547; X64-NEXT: pand %xmm1, %xmm6 548; X64-NEXT: por %xmm4, %xmm6 549; X64-NEXT: psrlw $8, %xmm6 550; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 551; X64-NEXT: pxor %xmm4, %xmm4 552; X64-NEXT: pcmpgtw %xmm5, %xmm4 553; X64-NEXT: movdqa %xmm4, %xmm7 554; X64-NEXT: pandn %xmm1, %xmm7 555; X64-NEXT: psraw $4, %xmm1 556; X64-NEXT: pand %xmm4, %xmm1 557; X64-NEXT: por %xmm7, %xmm1 558; X64-NEXT: paddw %xmm5, %xmm5 559; X64-NEXT: pxor %xmm4, %xmm4 560; X64-NEXT: pcmpgtw %xmm5, %xmm4 561; X64-NEXT: movdqa %xmm4, %xmm7 562; X64-NEXT: pandn %xmm1, %xmm7 563; X64-NEXT: psraw $2, %xmm1 564; X64-NEXT: pand %xmm4, %xmm1 565; X64-NEXT: por %xmm7, %xmm1 566; X64-NEXT: paddw %xmm5, %xmm5 567; X64-NEXT: pxor %xmm4, %xmm4 568; X64-NEXT: pcmpgtw %xmm5, %xmm4 569; X64-NEXT: movdqa %xmm4, %xmm5 570; X64-NEXT: pandn %xmm1, %xmm5 571; X64-NEXT: psraw $1, %xmm1 572; X64-NEXT: pand %xmm4, %xmm1 573; X64-NEXT: por %xmm5, %xmm1 574; X64-NEXT: psrlw $8, %xmm1 575; X64-NEXT: packuswb %xmm6, %xmm1 576; X64-NEXT: pcmpeqb %xmm0, %xmm1 577; X64-NEXT: pand %xmm1, %xmm2 578; X64-NEXT: pcmpgtb %xmm0, %xmm3 579; X64-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 580; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 581; X64-NEXT: por %xmm3, %xmm0 582; X64-NEXT: pandn %xmm0, %xmm1 583; X64-NEXT: por %xmm2, %xmm1 584; X64-NEXT: movdqa %xmm1, %xmm0 585; X64-NEXT: retq 586; 587; X64-AVX2-LABEL: vec_v16i8: 588; X64-AVX2: # %bb.0: 589; X64-AVX2-NEXT: vpsllw $5, %xmm1, %xmm1 590; X64-AVX2-NEXT: vpsllw $4, %xmm0, %xmm2 591; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 592; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm2 593; X64-AVX2-NEXT: vpsllw $2, %xmm2, %xmm3 594; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 595; X64-AVX2-NEXT: vpaddb %xmm1, %xmm1, %xmm4 596; X64-AVX2-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2 597; X64-AVX2-NEXT: vpaddb %xmm2, %xmm2, %xmm3 598; X64-AVX2-NEXT: vpaddb %xmm4, %xmm4, %xmm4 599; X64-AVX2-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2 600; X64-AVX2-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 601; X64-AVX2-NEXT: vpsraw $4, %xmm3, %xmm4 602; X64-AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 603; X64-AVX2-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 604; X64-AVX2-NEXT: vpsraw $2, %xmm3, %xmm4 605; X64-AVX2-NEXT: vpaddw %xmm5, %xmm5, %xmm5 606; X64-AVX2-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 607; X64-AVX2-NEXT: vpsraw $1, %xmm3, %xmm4 608; X64-AVX2-NEXT: vpaddw %xmm5, %xmm5, %xmm5 609; X64-AVX2-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 610; X64-AVX2-NEXT: vpsrlw $8, %xmm3, %xmm3 611; X64-AVX2-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 612; X64-AVX2-NEXT: vpsraw $4, %xmm4, %xmm5 613; X64-AVX2-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 614; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm5, %xmm4, %xmm4 615; X64-AVX2-NEXT: vpsraw $2, %xmm4, %xmm5 616; X64-AVX2-NEXT: vpaddw %xmm1, %xmm1, %xmm1 617; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm5, %xmm4, %xmm4 618; X64-AVX2-NEXT: vpsraw $1, %xmm4, %xmm5 619; X64-AVX2-NEXT: vpaddw %xmm1, %xmm1, %xmm1 620; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm5, %xmm4, %xmm1 621; X64-AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1 622; X64-AVX2-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 623; X64-AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 624; X64-AVX2-NEXT: vpbroadcastb {{.*#+}} xmm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 625; X64-AVX2-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm0 626; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 627; X64-AVX2-NEXT: retq 628; 629; X86-LABEL: vec_v16i8: 630; X86: # %bb.0: 631; X86-NEXT: pushl %ebp 632; X86-NEXT: pushl %ebx 633; X86-NEXT: pushl %edi 634; X86-NEXT: pushl %esi 635; X86-NEXT: subl $44, %esp 636; X86-NEXT: movb {{[0-9]+}}(%esp), %dh 637; X86-NEXT: movb {{[0-9]+}}(%esp), %dl 638; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx 639; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 640; X86-NEXT: movb {{[0-9]+}}(%esp), %ch 641; X86-NEXT: movb %ch, %bh 642; X86-NEXT: shlb %cl, %bh 643; X86-NEXT: movzbl %bh, %esi 644; X86-NEXT: sarb %cl, %bh 645; X86-NEXT: xorl %eax, %eax 646; X86-NEXT: testb %ch, %ch 647; X86-NEXT: sets %al 648; X86-NEXT: addl $127, %eax 649; X86-NEXT: cmpb %bh, %ch 650; X86-NEXT: cmovel %esi, %eax 651; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 652; X86-NEXT: movl %ebx, %eax 653; X86-NEXT: movl %edx, %ecx 654; X86-NEXT: shlb %cl, %al 655; X86-NEXT: movzbl %al, %esi 656; X86-NEXT: sarb %cl, %al 657; X86-NEXT: xorl %ecx, %ecx 658; X86-NEXT: testb %bl, %bl 659; X86-NEXT: sets %cl 660; X86-NEXT: addl $127, %ecx 661; X86-NEXT: cmpb %al, %bl 662; X86-NEXT: cmovel %esi, %ecx 663; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 664; X86-NEXT: movb %dh, %al 665; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 666; X86-NEXT: shlb %cl, %al 667; X86-NEXT: movzbl %al, %esi 668; X86-NEXT: sarb %cl, %al 669; X86-NEXT: xorl %ecx, %ecx 670; X86-NEXT: testb %dh, %dh 671; X86-NEXT: sets %cl 672; X86-NEXT: addl $127, %ecx 673; X86-NEXT: cmpb %al, %dh 674; X86-NEXT: cmovel %esi, %ecx 675; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 676; X86-NEXT: movb {{[0-9]+}}(%esp), %ah 677; X86-NEXT: movb %ah, %al 678; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 679; X86-NEXT: shlb %cl, %al 680; X86-NEXT: movzbl %al, %esi 681; X86-NEXT: sarb %cl, %al 682; X86-NEXT: xorl %edx, %edx 683; X86-NEXT: testb %ah, %ah 684; X86-NEXT: sets %dl 685; X86-NEXT: addl $127, %edx 686; X86-NEXT: cmpb %al, %ah 687; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 688; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 689; X86-NEXT: cmovel %esi, %edx 690; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 691; X86-NEXT: movl %eax, %edx 692; X86-NEXT: shlb %cl, %dl 693; X86-NEXT: movzbl %dl, %esi 694; X86-NEXT: sarb %cl, %dl 695; X86-NEXT: xorl %ecx, %ecx 696; X86-NEXT: testb %al, %al 697; X86-NEXT: sets %cl 698; X86-NEXT: addl $127, %ecx 699; X86-NEXT: cmpb %dl, %al 700; X86-NEXT: cmovel %esi, %ecx 701; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 702; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 703; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 704; X86-NEXT: movl %eax, %edx 705; X86-NEXT: shlb %cl, %dl 706; X86-NEXT: movzbl %dl, %esi 707; X86-NEXT: sarb %cl, %dl 708; X86-NEXT: xorl %ecx, %ecx 709; X86-NEXT: testb %al, %al 710; X86-NEXT: sets %cl 711; X86-NEXT: addl $127, %ecx 712; X86-NEXT: cmpb %dl, %al 713; X86-NEXT: cmovel %esi, %ecx 714; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 715; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 716; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 717; X86-NEXT: movl %eax, %edx 718; X86-NEXT: shlb %cl, %dl 719; X86-NEXT: movzbl %dl, %esi 720; X86-NEXT: sarb %cl, %dl 721; X86-NEXT: xorl %ecx, %ecx 722; X86-NEXT: testb %al, %al 723; X86-NEXT: sets %cl 724; X86-NEXT: addl $127, %ecx 725; X86-NEXT: cmpb %dl, %al 726; X86-NEXT: cmovel %esi, %ecx 727; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 728; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 729; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 730; X86-NEXT: movl %eax, %edx 731; X86-NEXT: shlb %cl, %dl 732; X86-NEXT: movzbl %dl, %esi 733; X86-NEXT: sarb %cl, %dl 734; X86-NEXT: xorl %ecx, %ecx 735; X86-NEXT: testb %al, %al 736; X86-NEXT: sets %cl 737; X86-NEXT: addl $127, %ecx 738; X86-NEXT: cmpb %dl, %al 739; X86-NEXT: cmovel %esi, %ecx 740; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 741; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 742; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 743; X86-NEXT: movl %eax, %edx 744; X86-NEXT: shlb %cl, %dl 745; X86-NEXT: movzbl %dl, %esi 746; X86-NEXT: sarb %cl, %dl 747; X86-NEXT: xorl %ecx, %ecx 748; X86-NEXT: testb %al, %al 749; X86-NEXT: sets %cl 750; X86-NEXT: addl $127, %ecx 751; X86-NEXT: cmpb %dl, %al 752; X86-NEXT: cmovel %esi, %ecx 753; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 754; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 755; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 756; X86-NEXT: movl %eax, %edx 757; X86-NEXT: shlb %cl, %dl 758; X86-NEXT: movzbl %dl, %esi 759; X86-NEXT: sarb %cl, %dl 760; X86-NEXT: xorl %ecx, %ecx 761; X86-NEXT: testb %al, %al 762; X86-NEXT: sets %cl 763; X86-NEXT: addl $127, %ecx 764; X86-NEXT: cmpb %dl, %al 765; X86-NEXT: cmovel %esi, %ecx 766; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 767; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 768; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 769; X86-NEXT: movl %eax, %edx 770; X86-NEXT: shlb %cl, %dl 771; X86-NEXT: movzbl %dl, %esi 772; X86-NEXT: sarb %cl, %dl 773; X86-NEXT: xorl %ecx, %ecx 774; X86-NEXT: testb %al, %al 775; X86-NEXT: sets %cl 776; X86-NEXT: addl $127, %ecx 777; X86-NEXT: cmpb %dl, %al 778; X86-NEXT: cmovel %esi, %ecx 779; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill 780; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 781; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 782; X86-NEXT: movl %eax, %edx 783; X86-NEXT: shlb %cl, %dl 784; X86-NEXT: movzbl %dl, %esi 785; X86-NEXT: sarb %cl, %dl 786; X86-NEXT: xorl %ecx, %ecx 787; X86-NEXT: testb %al, %al 788; X86-NEXT: sets %cl 789; X86-NEXT: addl $127, %ecx 790; X86-NEXT: cmpb %dl, %al 791; X86-NEXT: cmovel %esi, %ecx 792; X86-NEXT: movl %ecx, %ebp 793; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 794; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 795; X86-NEXT: movl %eax, %edx 796; X86-NEXT: shlb %cl, %dl 797; X86-NEXT: movzbl %dl, %esi 798; X86-NEXT: sarb %cl, %dl 799; X86-NEXT: xorl %ecx, %ecx 800; X86-NEXT: testb %al, %al 801; X86-NEXT: sets %cl 802; X86-NEXT: addl $127, %ecx 803; X86-NEXT: cmpb %dl, %al 804; X86-NEXT: cmovel %esi, %ecx 805; X86-NEXT: movl %ecx, %edi 806; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 807; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 808; X86-NEXT: movl %eax, %edx 809; X86-NEXT: shlb %cl, %dl 810; X86-NEXT: movzbl %dl, %esi 811; X86-NEXT: sarb %cl, %dl 812; X86-NEXT: xorl %ebx, %ebx 813; X86-NEXT: testb %al, %al 814; X86-NEXT: sets %bl 815; X86-NEXT: addl $127, %ebx 816; X86-NEXT: cmpb %dl, %al 817; X86-NEXT: cmovel %esi, %ebx 818; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 819; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 820; X86-NEXT: movb %al, %ah 821; X86-NEXT: shlb %cl, %ah 822; X86-NEXT: movzbl %ah, %esi 823; X86-NEXT: sarb %cl, %ah 824; X86-NEXT: xorl %edx, %edx 825; X86-NEXT: testb %al, %al 826; X86-NEXT: sets %dl 827; X86-NEXT: addl $127, %edx 828; X86-NEXT: cmpb %ah, %al 829; X86-NEXT: cmovel %esi, %edx 830; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 831; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 832; X86-NEXT: movb %al, %ah 833; X86-NEXT: shlb %cl, %ah 834; X86-NEXT: movzbl %ah, %esi 835; X86-NEXT: sarb %cl, %ah 836; X86-NEXT: xorl %ecx, %ecx 837; X86-NEXT: testb %al, %al 838; X86-NEXT: sets %cl 839; X86-NEXT: addl $127, %ecx 840; X86-NEXT: cmpb %ah, %al 841; X86-NEXT: cmovel %esi, %ecx 842; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 843; X86-NEXT: movb %cl, 15(%eax) 844; X86-NEXT: movb %dl, 14(%eax) 845; X86-NEXT: movb %bl, 13(%eax) 846; X86-NEXT: movl %edi, %ecx 847; X86-NEXT: movb %cl, 12(%eax) 848; X86-NEXT: movl %ebp, %ecx 849; X86-NEXT: movb %cl, 11(%eax) 850; X86-NEXT: movl (%esp), %ecx # 4-byte Reload 851; X86-NEXT: movb %cl, 10(%eax) 852; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 853; X86-NEXT: movb %cl, 9(%eax) 854; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 855; X86-NEXT: movb %cl, 8(%eax) 856; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 857; X86-NEXT: movb %cl, 7(%eax) 858; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 859; X86-NEXT: movb %cl, 6(%eax) 860; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 861; X86-NEXT: movb %cl, 5(%eax) 862; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 863; X86-NEXT: movb %cl, 4(%eax) 864; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 865; X86-NEXT: movb %cl, 3(%eax) 866; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 867; X86-NEXT: movb %cl, 2(%eax) 868; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 869; X86-NEXT: movb %cl, 1(%eax) 870; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 871; X86-NEXT: movb %cl, (%eax) 872; X86-NEXT: addl $44, %esp 873; X86-NEXT: popl %esi 874; X86-NEXT: popl %edi 875; X86-NEXT: popl %ebx 876; X86-NEXT: popl %ebp 877; X86-NEXT: retl $4 878 %tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 879 ret <16 x i8> %tmp 880} 881