1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2,X86-SSE,X86-SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2,X64-SSE,X64-SSE2 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE4,X86-SSE,X86-SSE4 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE4,X64-SSE,X64-SSE4 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1,X86-AVX,X86-AVX1 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1,X64-AVX,X64-AVX1 8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,X86-AVX,X86-AVX2 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,X64-AVX,X64-AVX2 10 11define <4 x i32> @trunc_lshr_v4i64(<4 x i64> %a) nounwind { 12; SSE2-LABEL: trunc_lshr_v4i64: 13; SSE2: # %bb.0: 14; SSE2-NEXT: psrlq $63, %xmm1 15; SSE2-NEXT: psrlq $63, %xmm0 16; SSE2-NEXT: packuswb %xmm1, %xmm0 17; SSE2-NEXT: ret{{[l|q]}} 18; 19; SSE4-LABEL: trunc_lshr_v4i64: 20; SSE4: # %bb.0: 21; SSE4-NEXT: psrlq $63, %xmm1 22; SSE4-NEXT: psrlq $63, %xmm0 23; SSE4-NEXT: packusdw %xmm1, %xmm0 24; SSE4-NEXT: ret{{[l|q]}} 25; 26; AVX1-LABEL: trunc_lshr_v4i64: 27; AVX1: # %bb.0: 28; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 29; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1 30; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0 31; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 32; AVX1-NEXT: vzeroupper 33; AVX1-NEXT: ret{{[l|q]}} 34; 35; AVX2-LABEL: trunc_lshr_v4i64: 36; AVX2: # %bb.0: 37; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0 38; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 39; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 40; AVX2-NEXT: vzeroupper 41; AVX2-NEXT: ret{{[l|q]}} 42 %1 = lshr <4 x i64> %a, <i64 63, i64 63, i64 63, i64 63> 43 %2 = trunc <4 x i64> %1 to <4 x i32> 44 ret <4 x i32> %2 45} 46 47define <8 x i16> @trunc_lshr_v4i64_bitcast(<4 x i64> %a0) { 48; SSE2-LABEL: trunc_lshr_v4i64_bitcast: 49; SSE2: # %bb.0: 50; SSE2-NEXT: psrlq $49, %xmm1 51; SSE2-NEXT: psrlq $49, %xmm0 52; SSE2-NEXT: packssdw %xmm1, %xmm0 53; SSE2-NEXT: ret{{[l|q]}} 54; 55; SSE4-LABEL: trunc_lshr_v4i64_bitcast: 56; SSE4: # %bb.0: 57; SSE4-NEXT: psrlq $49, %xmm1 58; SSE4-NEXT: psrlq $49, %xmm0 59; SSE4-NEXT: packusdw %xmm1, %xmm0 60; SSE4-NEXT: ret{{[l|q]}} 61; 62; AVX1-LABEL: trunc_lshr_v4i64_bitcast: 63; AVX1: # %bb.0: 64; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 65; AVX1-NEXT: vpsrlq $49, %xmm1, %xmm1 66; AVX1-NEXT: vpsrlq $49, %xmm0, %xmm0 67; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 68; AVX1-NEXT: vzeroupper 69; AVX1-NEXT: ret{{[l|q]}} 70; 71; AVX2-LABEL: trunc_lshr_v4i64_bitcast: 72; AVX2: # %bb.0: 73; AVX2-NEXT: vpsrlq $49, %ymm0, %ymm0 74; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 75; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 76; AVX2-NEXT: vzeroupper 77; AVX2-NEXT: ret{{[l|q]}} 78 %1 = lshr <4 x i64> %a0, <i64 49, i64 49, i64 49, i64 49> 79 %2 = bitcast <4 x i64> %1 to <8 x i32> 80 %3 = trunc <8 x i32> %2 to <8 x i16> 81 ret <8 x i16> %3 82} 83 84define <8 x i16> @trunc_lshr_v8i32(<8 x i32> %a) nounwind { 85; SSE2-LABEL: trunc_lshr_v8i32: 86; SSE2: # %bb.0: 87; SSE2-NEXT: psrld $31, %xmm1 88; SSE2-NEXT: psrld $31, %xmm0 89; SSE2-NEXT: packuswb %xmm1, %xmm0 90; SSE2-NEXT: ret{{[l|q]}} 91; 92; SSE4-LABEL: trunc_lshr_v8i32: 93; SSE4: # %bb.0: 94; SSE4-NEXT: psrld $31, %xmm1 95; SSE4-NEXT: psrld $31, %xmm0 96; SSE4-NEXT: packusdw %xmm1, %xmm0 97; SSE4-NEXT: ret{{[l|q]}} 98; 99; AVX1-LABEL: trunc_lshr_v8i32: 100; AVX1: # %bb.0: 101; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 102; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 103; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 104; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 105; AVX1-NEXT: vzeroupper 106; AVX1-NEXT: ret{{[l|q]}} 107; 108; AVX2-LABEL: trunc_lshr_v8i32: 109; AVX2: # %bb.0: 110; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 111; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 112; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 113; AVX2-NEXT: vzeroupper 114; AVX2-NEXT: ret{{[l|q]}} 115 %1 = lshr <8 x i32> %a, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 116 %2 = trunc <8 x i32> %1 to <8 x i16> 117 ret <8 x i16> %2 118} 119 120define <8 x i16> @trunc_lshr_v4i64_demandedelts(<4 x i64> %a0) { 121; SSE2-LABEL: trunc_lshr_v4i64_demandedelts: 122; SSE2: # %bb.0: 123; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 124; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] 125; SSE2-NEXT: pand %xmm2, %xmm1 126; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 127; SSE2-NEXT: pand %xmm2, %xmm0 128; SSE2-NEXT: packuswb %xmm1, %xmm0 129; SSE2-NEXT: ret{{[l|q]}} 130; 131; SSE4-LABEL: trunc_lshr_v4i64_demandedelts: 132; SSE4: # %bb.0: 133; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 134; SSE4-NEXT: pmovsxbd {{.*#+}} xmm2 = [1,1,1,1] 135; SSE4-NEXT: pand %xmm2, %xmm1 136; SSE4-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 137; SSE4-NEXT: pand %xmm2, %xmm0 138; SSE4-NEXT: packusdw %xmm1, %xmm0 139; SSE4-NEXT: ret{{[l|q]}} 140; 141; X86-AVX1-LABEL: trunc_lshr_v4i64_demandedelts: 142; X86-AVX1: # %bb.0: 143; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 144; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 145; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 146; X86-AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 147; X86-AVX1-NEXT: vzeroupper 148; X86-AVX1-NEXT: retl 149; 150; X64-AVX1-LABEL: trunc_lshr_v4i64_demandedelts: 151; X64-AVX1: # %bb.0: 152; X64-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 153; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 154; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 155; X64-AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 156; X64-AVX1-NEXT: vzeroupper 157; X64-AVX1-NEXT: retq 158; 159; AVX2-LABEL: trunc_lshr_v4i64_demandedelts: 160; AVX2: # %bb.0: 161; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 162; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] 163; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 164; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 165; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 166; AVX2-NEXT: vzeroupper 167; AVX2-NEXT: ret{{[l|q]}} 168 %1 = shl <4 x i64> %a0, <i64 63, i64 0, i64 63, i64 0> 169 %2 = lshr <4 x i64> %1, <i64 63, i64 0, i64 63, i64 0> 170 %3 = bitcast <4 x i64> %2 to <8 x i32> 171 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 172 %5 = trunc <8 x i32> %4 to <8 x i16> 173 ret <8 x i16> %5 174} 175 176define <16 x i8> @shuffle_lshr_2v8i16(<8 x i16> %a0, <8 x i16> %a1) { 177; SSE-LABEL: shuffle_lshr_2v8i16: 178; SSE: # %bb.0: 179; SSE-NEXT: psrlw $15, %xmm0 180; SSE-NEXT: psrlw $15, %xmm1 181; SSE-NEXT: packuswb %xmm1, %xmm0 182; SSE-NEXT: ret{{[l|q]}} 183; 184; AVX-LABEL: shuffle_lshr_2v8i16: 185; AVX: # %bb.0: 186; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0 187; AVX-NEXT: vpsrlw $15, %xmm1, %xmm1 188; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 189; AVX-NEXT: ret{{[l|q]}} 190 %lshr0 = lshr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 191 %lshr1 = lshr <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 192 %bc0 = bitcast <8 x i16> %lshr0 to <16 x i8> 193 %bc1 = bitcast <8 x i16> %lshr1 to <16 x i8> 194 %res = shufflevector <16 x i8> %bc0, <16 x i8> %bc1, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 195 ret <16 x i8> %res 196} 197 198define <8 x i16> @shuffle_lshr_2v4i32(<4 x i32> %a0, <4 x i32> %a1) { 199; SSE2-LABEL: shuffle_lshr_2v4i32: 200; SSE2: # %bb.0: 201; SSE2-NEXT: psrld $31, %xmm0 202; SSE2-NEXT: psrld $31, %xmm1 203; SSE2-NEXT: packssdw %xmm1, %xmm0 204; SSE2-NEXT: ret{{[l|q]}} 205; 206; SSE4-LABEL: shuffle_lshr_2v4i32: 207; SSE4: # %bb.0: 208; SSE4-NEXT: psrld $31, %xmm0 209; SSE4-NEXT: psrld $31, %xmm1 210; SSE4-NEXT: packusdw %xmm1, %xmm0 211; SSE4-NEXT: ret{{[l|q]}} 212; 213; AVX-LABEL: shuffle_lshr_2v4i32: 214; AVX: # %bb.0: 215; AVX-NEXT: vpsrld $31, %xmm0, %xmm0 216; AVX-NEXT: vpsrld $31, %xmm1, %xmm1 217; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 218; AVX-NEXT: ret{{[l|q]}} 219 %lshr0 = lshr <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31> 220 %lshr1 = lshr <4 x i32> %a1, <i32 31, i32 31, i32 31, i32 31> 221 %bc0 = bitcast <4 x i32> %lshr0 to <8 x i16> 222 %bc1 = bitcast <4 x i32> %lshr1 to <8 x i16> 223 %res = shufflevector <8 x i16> %bc0, <8 x i16> %bc1, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 224 ret <8 x i16> %res 225} 226 227define <4 x i32> @shuffle_lshr_2v2i64(<2 x i64> %a0, <2 x i64> %a1) { 228; SSE2-LABEL: shuffle_lshr_2v2i64: 229; SSE2: # %bb.0: 230; SSE2-NEXT: psrlq $63, %xmm0 231; SSE2-NEXT: psrlq $63, %xmm1 232; SSE2-NEXT: packuswb %xmm1, %xmm0 233; SSE2-NEXT: ret{{[l|q]}} 234; 235; SSE4-LABEL: shuffle_lshr_2v2i64: 236; SSE4: # %bb.0: 237; SSE4-NEXT: psrlq $63, %xmm0 238; SSE4-NEXT: psrlq $63, %xmm1 239; SSE4-NEXT: packusdw %xmm1, %xmm0 240; SSE4-NEXT: ret{{[l|q]}} 241; 242; AVX-LABEL: shuffle_lshr_2v2i64: 243; AVX: # %bb.0: 244; AVX-NEXT: vpsrlq $63, %xmm0, %xmm0 245; AVX-NEXT: vpsrlq $63, %xmm1, %xmm1 246; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 247; AVX-NEXT: ret{{[l|q]}} 248 %lshr0 = lshr <2 x i64> %a0, <i64 63, i64 63> 249 %lshr1 = lshr <2 x i64> %a1, <i64 63, i64 63> 250 %bc0 = bitcast <2 x i64> %lshr0 to <4 x i32> 251 %bc1 = bitcast <2 x i64> %lshr1 to <4 x i32> 252 %res = shufflevector <4 x i32> %bc0, <4 x i32> %bc1, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 253 ret <4 x i32> %res 254} 255 256define <4 x float> @shuffle_lshr_2v2i64_bitcast(<2 x i64> %a0, <2 x i64> %a1) { 257; SSE2-LABEL: shuffle_lshr_2v2i64_bitcast: 258; SSE2: # %bb.0: 259; SSE2-NEXT: psrlq $63, %xmm0 260; SSE2-NEXT: psrlq $63, %xmm1 261; SSE2-NEXT: packuswb %xmm1, %xmm0 262; SSE2-NEXT: ret{{[l|q]}} 263; 264; SSE4-LABEL: shuffle_lshr_2v2i64_bitcast: 265; SSE4: # %bb.0: 266; SSE4-NEXT: psrlq $63, %xmm0 267; SSE4-NEXT: psrlq $63, %xmm1 268; SSE4-NEXT: packusdw %xmm1, %xmm0 269; SSE4-NEXT: ret{{[l|q]}} 270; 271; AVX-LABEL: shuffle_lshr_2v2i64_bitcast: 272; AVX: # %bb.0: 273; AVX-NEXT: vpsrlq $63, %xmm0, %xmm0 274; AVX-NEXT: vpsrlq $63, %xmm1, %xmm1 275; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 276; AVX-NEXT: ret{{[l|q]}} 277 %lshr0 = lshr <2 x i64> %a0, <i64 63, i64 63> 278 %lshr1 = lshr <2 x i64> %a1, <i64 63, i64 63> 279 %bc0 = bitcast <2 x i64> %lshr0 to <4 x float> 280 %bc1 = bitcast <2 x i64> %lshr1 to <4 x float> 281 %res = shufflevector <4 x float> %bc0, <4 x float> %bc1, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 282 ret <4 x float> %res 283} 284 285define <16 x i8> @packuswb_icmp_zero_128(<8 x i16> %a0) { 286; X86-SSE-LABEL: packuswb_icmp_zero_128: 287; X86-SSE: # %bb.0: 288; X86-SSE-NEXT: pxor %xmm1, %xmm1 289; X86-SSE-NEXT: pcmpeqw %xmm0, %xmm1 290; X86-SSE-NEXT: packsswb %xmm1, %xmm1 291; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 292; X86-SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero 293; X86-SSE-NEXT: retl 294; 295; X64-SSE-LABEL: packuswb_icmp_zero_128: 296; X64-SSE: # %bb.0: 297; X64-SSE-NEXT: pxor %xmm1, %xmm1 298; X64-SSE-NEXT: pcmpeqw %xmm0, %xmm1 299; X64-SSE-NEXT: packsswb %xmm1, %xmm1 300; X64-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 301; X64-SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero 302; X64-SSE-NEXT: retq 303; 304; X86-AVX-LABEL: packuswb_icmp_zero_128: 305; X86-AVX: # %bb.0: 306; X86-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 307; X86-AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 308; X86-AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 309; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 310; X86-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 311; X86-AVX-NEXT: retl 312; 313; X64-AVX-LABEL: packuswb_icmp_zero_128: 314; X64-AVX: # %bb.0: 315; X64-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 316; X64-AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 317; X64-AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 318; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 319; X64-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 320; X64-AVX-NEXT: retq 321 %1 = icmp eq <8 x i16> %a0, zeroinitializer 322 %2 = zext <8 x i1> %1 to <8 x i8> 323 %3 = shufflevector <8 x i8> %2, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 324 ret <16 x i8> %3 325} 326 327define <16 x i8> @packuswb_icmp_zero_trunc_128(<8 x i16> %a0) { 328; SSE-LABEL: packuswb_icmp_zero_trunc_128: 329; SSE: # %bb.0: 330; SSE-NEXT: pxor %xmm1, %xmm1 331; SSE-NEXT: pcmpeqw %xmm1, %xmm0 332; SSE-NEXT: psrlw $15, %xmm0 333; SSE-NEXT: packuswb %xmm1, %xmm0 334; SSE-NEXT: ret{{[l|q]}} 335; 336; AVX-LABEL: packuswb_icmp_zero_trunc_128: 337; AVX: # %bb.0: 338; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 339; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 340; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0 341; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 342; AVX-NEXT: ret{{[l|q]}} 343 %1 = icmp eq <8 x i16> %a0, zeroinitializer 344 %2 = zext <8 x i1> %1 to <8 x i16> 345 %3 = shufflevector <8 x i16> %2, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 346 %4 = trunc <16 x i16> %3 to <16 x i8> 347 ret <16 x i8> %4 348} 349 350define <32 x i8> @packuswb_icmp_zero_256(<16 x i16> %a0) { 351; SSE-LABEL: packuswb_icmp_zero_256: 352; SSE: # %bb.0: 353; SSE-NEXT: pxor %xmm2, %xmm2 354; SSE-NEXT: pcmpeqw %xmm2, %xmm1 355; SSE-NEXT: psrlw $15, %xmm1 356; SSE-NEXT: pcmpeqw %xmm2, %xmm0 357; SSE-NEXT: psrlw $15, %xmm0 358; SSE-NEXT: pxor %xmm3, %xmm3 359; SSE-NEXT: packuswb %xmm0, %xmm3 360; SSE-NEXT: packuswb %xmm1, %xmm2 361; SSE-NEXT: movdqa %xmm3, %xmm0 362; SSE-NEXT: movdqa %xmm2, %xmm1 363; SSE-NEXT: ret{{[l|q]}} 364; 365; X86-AVX1-LABEL: packuswb_icmp_zero_256: 366; X86-AVX1: # %bb.0: 367; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 368; X86-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 369; X86-AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 370; X86-AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 371; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 372; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 373; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 374; X86-AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 375; X86-AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0 376; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 377; X86-AVX1-NEXT: retl 378; 379; X64-AVX1-LABEL: packuswb_icmp_zero_256: 380; X64-AVX1: # %bb.0: 381; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 382; X64-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 383; X64-AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 384; X64-AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 385; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 386; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 387; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 388; X64-AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 389; X64-AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0 390; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 391; X64-AVX1-NEXT: retq 392; 393; AVX2-LABEL: packuswb_icmp_zero_256: 394; AVX2: # %bb.0: 395; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 396; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 397; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0 398; AVX2-NEXT: vpackuswb %ymm0, %ymm1, %ymm0 399; AVX2-NEXT: ret{{[l|q]}} 400 %1 = icmp eq <16 x i16> %a0, zeroinitializer 401 %2 = zext <16 x i1> %1 to <16 x i16> 402 %3 = bitcast <16 x i16> %2 to <32 x i8> 403 %4 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %3, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62> 404 ret <32 x i8> %4 405} 406 407define <32 x i8> @packuswb_icmp_zero_trunc_256(<16 x i16> %a0) { 408; SSE-LABEL: packuswb_icmp_zero_trunc_256: 409; SSE: # %bb.0: 410; SSE-NEXT: pxor %xmm2, %xmm2 411; SSE-NEXT: pcmpeqw %xmm2, %xmm1 412; SSE-NEXT: psrlw $15, %xmm1 413; SSE-NEXT: pcmpeqw %xmm2, %xmm0 414; SSE-NEXT: psrlw $15, %xmm0 415; SSE-NEXT: pxor %xmm3, %xmm3 416; SSE-NEXT: packuswb %xmm0, %xmm3 417; SSE-NEXT: packuswb %xmm1, %xmm2 418; SSE-NEXT: movdqa %xmm3, %xmm0 419; SSE-NEXT: movdqa %xmm2, %xmm1 420; SSE-NEXT: ret{{[l|q]}} 421; 422; AVX1-LABEL: packuswb_icmp_zero_trunc_256: 423; AVX1: # %bb.0: 424; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 425; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2 426; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 427; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 428; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 429; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 430; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2 431; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 432; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 433; AVX1-NEXT: ret{{[l|q]}} 434; 435; AVX2-LABEL: packuswb_icmp_zero_trunc_256: 436; AVX2: # %bb.0: 437; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 438; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 439; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0 440; AVX2-NEXT: vpackuswb %ymm0, %ymm1, %ymm0 441; AVX2-NEXT: ret{{[l|q]}} 442 %1 = icmp eq <16 x i16> %a0, zeroinitializer 443 %2 = zext <16 x i1> %1 to <16 x i16> 444 %3 = shufflevector <16 x i16> zeroinitializer, <16 x i16> %2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 445 %4 = trunc <32 x i16> %3 to <32 x i8> 446 ret <32 x i8> %4 447} 448;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 449; X64-AVX2: {{.*}} 450; X64-SSE2: {{.*}} 451; X64-SSE4: {{.*}} 452; X86-AVX2: {{.*}} 453; X86-SSE2: {{.*}} 454; X86-SSE4: {{.*}} 455