1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-SLOW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW 12 13define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 14; SSE2-LABEL: zext_16i8_to_8i16: 15; SSE2: # %bb.0: # %entry 16; SSE2-NEXT: pxor %xmm1, %xmm1 17; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 18; SSE2-NEXT: retq 19; 20; SSSE3-LABEL: zext_16i8_to_8i16: 21; SSSE3: # %bb.0: # %entry 22; SSSE3-NEXT: pxor %xmm1, %xmm1 23; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 24; SSSE3-NEXT: retq 25; 26; SSE41-LABEL: zext_16i8_to_8i16: 27; SSE41: # %bb.0: # %entry 28; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 29; SSE41-NEXT: retq 30; 31; AVX-LABEL: zext_16i8_to_8i16: 32; AVX: # %bb.0: # %entry 33; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 34; AVX-NEXT: retq 35entry: 36 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 37 %C = zext <8 x i8> %B to <8 x i16> 38 ret <8 x i16> %C 39} 40 41; PR17654 42define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) { 43; SSE2-LABEL: zext_16i8_to_16i16: 44; SSE2: # %bb.0: # %entry 45; SSE2-NEXT: movdqa %xmm0, %xmm1 46; SSE2-NEXT: pxor %xmm2, %xmm2 47; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 48; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 49; SSE2-NEXT: retq 50; 51; SSSE3-LABEL: zext_16i8_to_16i16: 52; SSSE3: # %bb.0: # %entry 53; SSSE3-NEXT: movdqa %xmm0, %xmm1 54; SSSE3-NEXT: pxor %xmm2, %xmm2 55; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 56; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 57; SSSE3-NEXT: retq 58; 59; SSE41-LABEL: zext_16i8_to_16i16: 60; SSE41: # %bb.0: # %entry 61; SSE41-NEXT: movdqa %xmm0, %xmm1 62; SSE41-NEXT: pxor %xmm2, %xmm2 63; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 64; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 65; SSE41-NEXT: retq 66; 67; AVX1-LABEL: zext_16i8_to_16i16: 68; AVX1: # %bb.0: # %entry 69; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 70; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 71; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 72; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 73; AVX1-NEXT: retq 74; 75; AVX2-LABEL: zext_16i8_to_16i16: 76; AVX2: # %bb.0: # %entry 77; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 78; AVX2-NEXT: retq 79; 80; AVX512-LABEL: zext_16i8_to_16i16: 81; AVX512: # %bb.0: # %entry 82; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 83; AVX512-NEXT: retq 84entry: 85 %B = zext <16 x i8> %A to <16 x i16> 86 ret <16 x i16> %B 87} 88 89define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) { 90; SSE2-LABEL: zext_32i8_to_32i16: 91; SSE2: # %bb.0: # %entry 92; SSE2-NEXT: movdqa %xmm1, %xmm3 93; SSE2-NEXT: movdqa %xmm0, %xmm1 94; SSE2-NEXT: pxor %xmm4, %xmm4 95; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 96; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15] 97; SSE2-NEXT: movdqa %xmm3, %xmm2 98; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 99; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 100; SSE2-NEXT: retq 101; 102; SSSE3-LABEL: zext_32i8_to_32i16: 103; SSSE3: # %bb.0: # %entry 104; SSSE3-NEXT: movdqa %xmm1, %xmm3 105; SSSE3-NEXT: movdqa %xmm0, %xmm1 106; SSSE3-NEXT: pxor %xmm4, %xmm4 107; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 108; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15] 109; SSSE3-NEXT: movdqa %xmm3, %xmm2 110; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 111; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 112; SSSE3-NEXT: retq 113; 114; SSE41-LABEL: zext_32i8_to_32i16: 115; SSE41: # %bb.0: # %entry 116; SSE41-NEXT: movdqa %xmm1, %xmm3 117; SSE41-NEXT: movdqa %xmm0, %xmm1 118; SSE41-NEXT: pxor %xmm4, %xmm4 119; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 120; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15] 121; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero 122; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 123; SSE41-NEXT: retq 124; 125; AVX1-LABEL: zext_32i8_to_32i16: 126; AVX1: # %bb.0: # %entry 127; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 128; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 129; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 130; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 131; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 132; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 133; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 134; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 135; AVX1-NEXT: vmovaps %ymm2, %ymm0 136; AVX1-NEXT: retq 137; 138; AVX2-LABEL: zext_32i8_to_32i16: 139; AVX2: # %bb.0: # %entry 140; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 141; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 142; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 143; AVX2-NEXT: vmovdqa %ymm2, %ymm0 144; AVX2-NEXT: retq 145; 146; AVX512F-LABEL: zext_32i8_to_32i16: 147; AVX512F: # %bb.0: # %entry 148; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 149; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 150; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 151; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 152; AVX512F-NEXT: retq 153; 154; AVX512BW-LABEL: zext_32i8_to_32i16: 155; AVX512BW: # %bb.0: # %entry 156; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 157; AVX512BW-NEXT: retq 158entry: 159 %B = zext <32 x i8> %A to <32 x i16> 160 ret <32 x i16> %B 161} 162 163define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { 164; SSE2-LABEL: zext_16i8_to_4i32: 165; SSE2: # %bb.0: # %entry 166; SSE2-NEXT: pxor %xmm1, %xmm1 167; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 168; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 169; SSE2-NEXT: retq 170; 171; SSSE3-LABEL: zext_16i8_to_4i32: 172; SSSE3: # %bb.0: # %entry 173; SSSE3-NEXT: pxor %xmm1, %xmm1 174; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 175; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 176; SSSE3-NEXT: retq 177; 178; SSE41-LABEL: zext_16i8_to_4i32: 179; SSE41: # %bb.0: # %entry 180; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 181; SSE41-NEXT: retq 182; 183; AVX-LABEL: zext_16i8_to_4i32: 184; AVX: # %bb.0: # %entry 185; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 186; AVX-NEXT: retq 187entry: 188 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 189 %C = zext <4 x i8> %B to <4 x i32> 190 ret <4 x i32> %C 191} 192 193define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { 194; SSE2-LABEL: zext_16i8_to_8i32: 195; SSE2: # %bb.0: # %entry 196; SSE2-NEXT: movdqa %xmm0, %xmm1 197; SSE2-NEXT: pxor %xmm2, %xmm2 198; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 199; SSE2-NEXT: movdqa %xmm1, %xmm0 200; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 201; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 202; SSE2-NEXT: retq 203; 204; SSSE3-LABEL: zext_16i8_to_8i32: 205; SSSE3: # %bb.0: # %entry 206; SSSE3-NEXT: movdqa %xmm0, %xmm1 207; SSSE3-NEXT: pxor %xmm2, %xmm2 208; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 209; SSSE3-NEXT: movdqa %xmm1, %xmm0 210; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 211; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 212; SSSE3-NEXT: retq 213; 214; SSE41-LABEL: zext_16i8_to_8i32: 215; SSE41: # %bb.0: # %entry 216; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 217; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 218; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 219; SSE41-NEXT: movdqa %xmm2, %xmm0 220; SSE41-NEXT: retq 221; 222; AVX1-LABEL: zext_16i8_to_8i32: 223; AVX1: # %bb.0: # %entry 224; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 225; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 226; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 227; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 228; AVX1-NEXT: retq 229; 230; AVX2-LABEL: zext_16i8_to_8i32: 231; AVX2: # %bb.0: # %entry 232; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 233; AVX2-NEXT: retq 234; 235; AVX512-LABEL: zext_16i8_to_8i32: 236; AVX512: # %bb.0: # %entry 237; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 238; AVX512-NEXT: retq 239entry: 240 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 241 %C = zext <8 x i8> %B to <8 x i32> 242 ret <8 x i32> %C 243} 244 245define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp { 246; SSE2-LABEL: zext_16i8_to_16i32: 247; SSE2: # %bb.0: # %entry 248; SSE2-NEXT: movdqa %xmm0, %xmm3 249; SSE2-NEXT: pxor %xmm4, %xmm4 250; SSE2-NEXT: movdqa %xmm0, %xmm1 251; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 252; SSE2-NEXT: movdqa %xmm1, %xmm0 253; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 254; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 255; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 256; SSE2-NEXT: movdqa %xmm3, %xmm2 257; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 258; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 259; SSE2-NEXT: retq 260; 261; SSSE3-LABEL: zext_16i8_to_16i32: 262; SSSE3: # %bb.0: # %entry 263; SSSE3-NEXT: movdqa %xmm0, %xmm3 264; SSSE3-NEXT: pxor %xmm4, %xmm4 265; SSSE3-NEXT: movdqa %xmm0, %xmm1 266; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 267; SSSE3-NEXT: movdqa %xmm1, %xmm0 268; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 269; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 270; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 271; SSSE3-NEXT: movdqa %xmm3, %xmm2 272; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 273; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 274; SSSE3-NEXT: retq 275; 276; SSE41-LABEL: zext_16i8_to_16i32: 277; SSE41: # %bb.0: # %entry 278; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 279; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 280; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 281; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 282; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 283; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 284; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 285; SSE41-NEXT: movdqa %xmm4, %xmm0 286; SSE41-NEXT: retq 287; 288; AVX1-LABEL: zext_16i8_to_16i32: 289; AVX1: # %bb.0: # %entry 290; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 291; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 292; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 293; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 294; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 295; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 296; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 297; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 298; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 299; AVX1-NEXT: vmovaps %ymm2, %ymm0 300; AVX1-NEXT: retq 301; 302; AVX2-LABEL: zext_16i8_to_16i32: 303; AVX2: # %bb.0: # %entry 304; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 305; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 306; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 307; AVX2-NEXT: vmovdqa %ymm2, %ymm0 308; AVX2-NEXT: retq 309; 310; AVX512-LABEL: zext_16i8_to_16i32: 311; AVX512: # %bb.0: # %entry 312; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 313; AVX512-NEXT: retq 314entry: 315 %B = zext <16 x i8> %A to <16 x i32> 316 ret <16 x i32> %B 317} 318 319define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { 320; SSE2-LABEL: zext_16i8_to_2i64: 321; SSE2: # %bb.0: # %entry 322; SSE2-NEXT: pxor %xmm1, %xmm1 323; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 324; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 325; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 326; SSE2-NEXT: retq 327; 328; SSSE3-LABEL: zext_16i8_to_2i64: 329; SSSE3: # %bb.0: # %entry 330; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 331; SSSE3-NEXT: retq 332; 333; SSE41-LABEL: zext_16i8_to_2i64: 334; SSE41: # %bb.0: # %entry 335; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 336; SSE41-NEXT: retq 337; 338; AVX-LABEL: zext_16i8_to_2i64: 339; AVX: # %bb.0: # %entry 340; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 341; AVX-NEXT: retq 342entry: 343 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 344 %C = zext <2 x i8> %B to <2 x i64> 345 ret <2 x i64> %C 346} 347 348define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { 349; SSE2-LABEL: zext_16i8_to_4i64: 350; SSE2: # %bb.0: # %entry 351; SSE2-NEXT: movdqa %xmm0, %xmm1 352; SSE2-NEXT: pxor %xmm2, %xmm2 353; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 354; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 355; SSE2-NEXT: movdqa %xmm1, %xmm0 356; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 357; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 358; SSE2-NEXT: retq 359; 360; SSSE3-LABEL: zext_16i8_to_4i64: 361; SSSE3: # %bb.0: # %entry 362; SSSE3-NEXT: movdqa %xmm0, %xmm1 363; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 364; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 365; SSSE3-NEXT: retq 366; 367; SSE41-LABEL: zext_16i8_to_4i64: 368; SSE41: # %bb.0: # %entry 369; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 370; SSE41-NEXT: psrld $16, %xmm0 371; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 372; SSE41-NEXT: movdqa %xmm2, %xmm0 373; SSE41-NEXT: retq 374; 375; AVX1-LABEL: zext_16i8_to_4i64: 376; AVX1: # %bb.0: # %entry 377; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 378; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 379; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 380; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 381; AVX1-NEXT: retq 382; 383; AVX2-LABEL: zext_16i8_to_4i64: 384; AVX2: # %bb.0: # %entry 385; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 386; AVX2-NEXT: retq 387; 388; AVX512-LABEL: zext_16i8_to_4i64: 389; AVX512: # %bb.0: # %entry 390; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 391; AVX512-NEXT: retq 392entry: 393 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 394 %C = zext <4 x i8> %B to <4 x i64> 395 ret <4 x i64> %C 396} 397 398define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp { 399; SSE2-LABEL: zext_16i8_to_8i64: 400; SSE2: # %bb.0: # %entry 401; SSE2-NEXT: movdqa %xmm0, %xmm3 402; SSE2-NEXT: pxor %xmm4, %xmm4 403; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 404; SSE2-NEXT: movdqa %xmm3, %xmm1 405; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 406; SSE2-NEXT: movdqa %xmm1, %xmm0 407; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 408; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 409; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 410; SSE2-NEXT: movdqa %xmm3, %xmm2 411; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 412; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 413; SSE2-NEXT: retq 414; 415; SSSE3-LABEL: zext_16i8_to_8i64: 416; SSSE3: # %bb.0: # %entry 417; SSSE3-NEXT: movdqa %xmm0, %xmm3 418; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 419; SSSE3-NEXT: movdqa %xmm3, %xmm1 420; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 421; SSSE3-NEXT: movdqa %xmm3, %xmm2 422; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[4],zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero,zero,zero 423; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[6],zero,zero,zero,zero,zero,zero,zero,xmm3[7],zero,zero,zero,zero,zero,zero,zero 424; SSSE3-NEXT: retq 425; 426; SSE41-LABEL: zext_16i8_to_8i64: 427; SSE41: # %bb.0: # %entry 428; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 429; SSE41-NEXT: movdqa %xmm0, %xmm1 430; SSE41-NEXT: psrld $16, %xmm1 431; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 432; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 433; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero 434; SSE41-NEXT: psrlq $48, %xmm0 435; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 436; SSE41-NEXT: movdqa %xmm4, %xmm0 437; SSE41-NEXT: retq 438; 439; AVX1-LABEL: zext_16i8_to_8i64: 440; AVX1: # %bb.0: # %entry 441; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 442; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 443; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero 444; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 445; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 446; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 447; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 448; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 449; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 450; AVX1-NEXT: vmovaps %ymm2, %ymm0 451; AVX1-NEXT: retq 452; 453; AVX2-LABEL: zext_16i8_to_8i64: 454; AVX2: # %bb.0: # %entry 455; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 456; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 457; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 458; AVX2-NEXT: vmovdqa %ymm2, %ymm0 459; AVX2-NEXT: retq 460; 461; AVX512-LABEL: zext_16i8_to_8i64: 462; AVX512: # %bb.0: # %entry 463; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 464; AVX512-NEXT: retq 465entry: 466 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 467 %C = zext <8 x i8> %B to <8 x i64> 468 ret <8 x i64> %C 469} 470 471define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { 472; SSE2-LABEL: zext_8i16_to_4i32: 473; SSE2: # %bb.0: # %entry 474; SSE2-NEXT: pxor %xmm1, %xmm1 475; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 476; SSE2-NEXT: retq 477; 478; SSSE3-LABEL: zext_8i16_to_4i32: 479; SSSE3: # %bb.0: # %entry 480; SSSE3-NEXT: pxor %xmm1, %xmm1 481; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 482; SSSE3-NEXT: retq 483; 484; SSE41-LABEL: zext_8i16_to_4i32: 485; SSE41: # %bb.0: # %entry 486; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 487; SSE41-NEXT: retq 488; 489; AVX-LABEL: zext_8i16_to_4i32: 490; AVX: # %bb.0: # %entry 491; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 492; AVX-NEXT: retq 493entry: 494 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 495 %C = zext <4 x i16> %B to <4 x i32> 496 ret <4 x i32> %C 497} 498 499define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 500; SSE2-LABEL: zext_8i16_to_8i32: 501; SSE2: # %bb.0: # %entry 502; SSE2-NEXT: movdqa %xmm0, %xmm1 503; SSE2-NEXT: pxor %xmm2, %xmm2 504; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 505; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 506; SSE2-NEXT: retq 507; 508; SSSE3-LABEL: zext_8i16_to_8i32: 509; SSSE3: # %bb.0: # %entry 510; SSSE3-NEXT: movdqa %xmm0, %xmm1 511; SSSE3-NEXT: pxor %xmm2, %xmm2 512; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 513; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 514; SSSE3-NEXT: retq 515; 516; SSE41-LABEL: zext_8i16_to_8i32: 517; SSE41: # %bb.0: # %entry 518; SSE41-NEXT: movdqa %xmm0, %xmm1 519; SSE41-NEXT: pxor %xmm2, %xmm2 520; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 521; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 522; SSE41-NEXT: retq 523; 524; AVX1-LABEL: zext_8i16_to_8i32: 525; AVX1: # %bb.0: # %entry 526; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 527; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 528; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 529; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 530; AVX1-NEXT: retq 531; 532; AVX2-LABEL: zext_8i16_to_8i32: 533; AVX2: # %bb.0: # %entry 534; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 535; AVX2-NEXT: retq 536; 537; AVX512-LABEL: zext_8i16_to_8i32: 538; AVX512: # %bb.0: # %entry 539; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 540; AVX512-NEXT: retq 541entry: 542 %B = zext <8 x i16> %A to <8 x i32> 543 ret <8 x i32>%B 544} 545 546define <16 x i32> @zext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp { 547; SSE2-LABEL: zext_16i16_to_16i32: 548; SSE2: # %bb.0: # %entry 549; SSE2-NEXT: movdqa %xmm1, %xmm3 550; SSE2-NEXT: movdqa %xmm0, %xmm1 551; SSE2-NEXT: pxor %xmm4, %xmm4 552; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 553; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 554; SSE2-NEXT: movdqa %xmm3, %xmm2 555; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 556; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 557; SSE2-NEXT: retq 558; 559; SSSE3-LABEL: zext_16i16_to_16i32: 560; SSSE3: # %bb.0: # %entry 561; SSSE3-NEXT: movdqa %xmm1, %xmm3 562; SSSE3-NEXT: movdqa %xmm0, %xmm1 563; SSSE3-NEXT: pxor %xmm4, %xmm4 564; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 565; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 566; SSSE3-NEXT: movdqa %xmm3, %xmm2 567; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 568; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 569; SSSE3-NEXT: retq 570; 571; SSE41-LABEL: zext_16i16_to_16i32: 572; SSE41: # %bb.0: # %entry 573; SSE41-NEXT: movdqa %xmm1, %xmm3 574; SSE41-NEXT: movdqa %xmm0, %xmm1 575; SSE41-NEXT: pxor %xmm4, %xmm4 576; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 577; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 578; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero 579; SSE41-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 580; SSE41-NEXT: retq 581; 582; AVX1-LABEL: zext_16i16_to_16i32: 583; AVX1: # %bb.0: # %entry 584; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 585; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 586; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 587; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 588; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 589; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 590; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 591; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 592; AVX1-NEXT: vmovaps %ymm2, %ymm0 593; AVX1-NEXT: retq 594; 595; AVX2-LABEL: zext_16i16_to_16i32: 596; AVX2: # %bb.0: # %entry 597; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 598; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 599; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 600; AVX2-NEXT: vmovdqa %ymm2, %ymm0 601; AVX2-NEXT: retq 602; 603; AVX512-LABEL: zext_16i16_to_16i32: 604; AVX512: # %bb.0: # %entry 605; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 606; AVX512-NEXT: retq 607entry: 608 %B = zext <16 x i16> %A to <16 x i32> 609 ret <16 x i32> %B 610} 611 612define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { 613; SSE2-LABEL: zext_8i16_to_2i64: 614; SSE2: # %bb.0: # %entry 615; SSE2-NEXT: pxor %xmm1, %xmm1 616; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 617; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 618; SSE2-NEXT: retq 619; 620; SSSE3-LABEL: zext_8i16_to_2i64: 621; SSSE3: # %bb.0: # %entry 622; SSSE3-NEXT: pxor %xmm1, %xmm1 623; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 624; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 625; SSSE3-NEXT: retq 626; 627; SSE41-LABEL: zext_8i16_to_2i64: 628; SSE41: # %bb.0: # %entry 629; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 630; SSE41-NEXT: retq 631; 632; AVX-LABEL: zext_8i16_to_2i64: 633; AVX: # %bb.0: # %entry 634; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 635; AVX-NEXT: retq 636entry: 637 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 638 %C = zext <2 x i16> %B to <2 x i64> 639 ret <2 x i64> %C 640} 641 642define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { 643; SSE2-LABEL: zext_8i16_to_4i64: 644; SSE2: # %bb.0: # %entry 645; SSE2-NEXT: movdqa %xmm0, %xmm1 646; SSE2-NEXT: pxor %xmm2, %xmm2 647; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 648; SSE2-NEXT: movdqa %xmm1, %xmm0 649; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 650; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 651; SSE2-NEXT: retq 652; 653; SSSE3-LABEL: zext_8i16_to_4i64: 654; SSSE3: # %bb.0: # %entry 655; SSSE3-NEXT: movdqa %xmm0, %xmm1 656; SSSE3-NEXT: pxor %xmm2, %xmm2 657; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 658; SSSE3-NEXT: movdqa %xmm1, %xmm0 659; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 660; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 661; SSSE3-NEXT: retq 662; 663; SSE41-LABEL: zext_8i16_to_4i64: 664; SSE41: # %bb.0: # %entry 665; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 666; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 667; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 668; SSE41-NEXT: movdqa %xmm2, %xmm0 669; SSE41-NEXT: retq 670; 671; AVX1-LABEL: zext_8i16_to_4i64: 672; AVX1: # %bb.0: # %entry 673; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 674; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 675; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 676; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 677; AVX1-NEXT: retq 678; 679; AVX2-LABEL: zext_8i16_to_4i64: 680; AVX2: # %bb.0: # %entry 681; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 682; AVX2-NEXT: retq 683; 684; AVX512-LABEL: zext_8i16_to_4i64: 685; AVX512: # %bb.0: # %entry 686; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 687; AVX512-NEXT: retq 688entry: 689 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 690 %C = zext <4 x i16> %B to <4 x i64> 691 ret <4 x i64> %C 692} 693 694define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp { 695; SSE2-LABEL: zext_8i16_to_8i64: 696; SSE2: # %bb.0: # %entry 697; SSE2-NEXT: movdqa %xmm0, %xmm3 698; SSE2-NEXT: pxor %xmm4, %xmm4 699; SSE2-NEXT: movdqa %xmm0, %xmm1 700; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 701; SSE2-NEXT: movdqa %xmm1, %xmm0 702; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 703; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 704; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 705; SSE2-NEXT: movdqa %xmm3, %xmm2 706; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 707; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 708; SSE2-NEXT: retq 709; 710; SSSE3-LABEL: zext_8i16_to_8i64: 711; SSSE3: # %bb.0: # %entry 712; SSSE3-NEXT: movdqa %xmm0, %xmm3 713; SSSE3-NEXT: pxor %xmm4, %xmm4 714; SSSE3-NEXT: movdqa %xmm0, %xmm1 715; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 716; SSSE3-NEXT: movdqa %xmm1, %xmm0 717; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 718; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 719; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 720; SSSE3-NEXT: movdqa %xmm3, %xmm2 721; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 722; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 723; SSSE3-NEXT: retq 724; 725; SSE41-LABEL: zext_8i16_to_8i64: 726; SSE41: # %bb.0: # %entry 727; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 728; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 729; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 730; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 731; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 732; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 733; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 734; SSE41-NEXT: movdqa %xmm4, %xmm0 735; SSE41-NEXT: retq 736; 737; AVX1-LABEL: zext_8i16_to_8i64: 738; AVX1: # %bb.0: # %entry 739; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 740; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 741; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 742; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 743; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 744; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 745; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 746; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 747; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 748; AVX1-NEXT: vmovaps %ymm2, %ymm0 749; AVX1-NEXT: retq 750; 751; AVX2-LABEL: zext_8i16_to_8i64: 752; AVX2: # %bb.0: # %entry 753; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 754; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 755; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 756; AVX2-NEXT: vmovdqa %ymm2, %ymm0 757; AVX2-NEXT: retq 758; 759; AVX512-LABEL: zext_8i16_to_8i64: 760; AVX512: # %bb.0: # %entry 761; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 762; AVX512-NEXT: retq 763entry: 764 %B = zext <8 x i16> %A to <8 x i64> 765 ret <8 x i64> %B 766} 767 768define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { 769; SSE2-LABEL: zext_4i32_to_2i64: 770; SSE2: # %bb.0: # %entry 771; SSE2-NEXT: xorps %xmm1, %xmm1 772; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 773; SSE2-NEXT: retq 774; 775; SSSE3-LABEL: zext_4i32_to_2i64: 776; SSSE3: # %bb.0: # %entry 777; SSSE3-NEXT: xorps %xmm1, %xmm1 778; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 779; SSSE3-NEXT: retq 780; 781; SSE41-LABEL: zext_4i32_to_2i64: 782; SSE41: # %bb.0: # %entry 783; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 784; SSE41-NEXT: retq 785; 786; AVX-LABEL: zext_4i32_to_2i64: 787; AVX: # %bb.0: # %entry 788; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 789; AVX-NEXT: retq 790entry: 791 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 792 %C = zext <2 x i32> %B to <2 x i64> 793 ret <2 x i64> %C 794} 795 796define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 797; SSE2-LABEL: zext_4i32_to_4i64: 798; SSE2: # %bb.0: # %entry 799; SSE2-NEXT: movaps %xmm0, %xmm1 800; SSE2-NEXT: xorps %xmm2, %xmm2 801; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 802; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 803; SSE2-NEXT: retq 804; 805; SSSE3-LABEL: zext_4i32_to_4i64: 806; SSSE3: # %bb.0: # %entry 807; SSSE3-NEXT: movaps %xmm0, %xmm1 808; SSSE3-NEXT: xorps %xmm2, %xmm2 809; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 810; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 811; SSSE3-NEXT: retq 812; 813; SSE41-LABEL: zext_4i32_to_4i64: 814; SSE41: # %bb.0: # %entry 815; SSE41-NEXT: movdqa %xmm0, %xmm1 816; SSE41-NEXT: pxor %xmm2, %xmm2 817; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 818; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 819; SSE41-NEXT: retq 820; 821; AVX1-LABEL: zext_4i32_to_4i64: 822; AVX1: # %bb.0: # %entry 823; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 824; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 825; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 826; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 827; AVX1-NEXT: retq 828; 829; AVX2-LABEL: zext_4i32_to_4i64: 830; AVX2: # %bb.0: # %entry 831; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 832; AVX2-NEXT: retq 833; 834; AVX512-LABEL: zext_4i32_to_4i64: 835; AVX512: # %bb.0: # %entry 836; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 837; AVX512-NEXT: retq 838entry: 839 %B = zext <4 x i32> %A to <4 x i64> 840 ret <4 x i64>%B 841} 842 843define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp { 844; SSE2-LABEL: zext_8i32_to_8i64: 845; SSE2: # %bb.0: # %entry 846; SSE2-NEXT: movaps %xmm1, %xmm3 847; SSE2-NEXT: movaps %xmm0, %xmm1 848; SSE2-NEXT: xorps %xmm4, %xmm4 849; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 850; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 851; SSE2-NEXT: movaps %xmm3, %xmm2 852; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 853; SSE2-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 854; SSE2-NEXT: retq 855; 856; SSSE3-LABEL: zext_8i32_to_8i64: 857; SSSE3: # %bb.0: # %entry 858; SSSE3-NEXT: movaps %xmm1, %xmm3 859; SSSE3-NEXT: movaps %xmm0, %xmm1 860; SSSE3-NEXT: xorps %xmm4, %xmm4 861; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 862; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 863; SSSE3-NEXT: movaps %xmm3, %xmm2 864; SSSE3-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 865; SSSE3-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 866; SSSE3-NEXT: retq 867; 868; SSE41-LABEL: zext_8i32_to_8i64: 869; SSE41: # %bb.0: # %entry 870; SSE41-NEXT: movdqa %xmm1, %xmm3 871; SSE41-NEXT: movdqa %xmm0, %xmm1 872; SSE41-NEXT: pxor %xmm4, %xmm4 873; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 874; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 875; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero 876; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 877; SSE41-NEXT: retq 878; 879; AVX1-LABEL: zext_8i32_to_8i64: 880; AVX1: # %bb.0: # %entry 881; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 882; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 883; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero 884; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 885; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 886; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 887; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 888; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 889; AVX1-NEXT: vmovaps %ymm2, %ymm0 890; AVX1-NEXT: retq 891; 892; AVX2-LABEL: zext_8i32_to_8i64: 893; AVX2: # %bb.0: # %entry 894; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 895; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 896; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 897; AVX2-NEXT: vmovdqa %ymm2, %ymm0 898; AVX2-NEXT: retq 899; 900; AVX512-LABEL: zext_8i32_to_8i64: 901; AVX512: # %bb.0: # %entry 902; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 903; AVX512-NEXT: retq 904entry: 905 %B = zext <8 x i32> %A to <8 x i64> 906 ret <8 x i64>%B 907} 908 909define <2 x i64> @load_zext_2i8_to_2i64(ptr%ptr) { 910; SSE2-LABEL: load_zext_2i8_to_2i64: 911; SSE2: # %bb.0: # %entry 912; SSE2-NEXT: movzwl (%rdi), %eax 913; SSE2-NEXT: movd %eax, %xmm0 914; SSE2-NEXT: pxor %xmm1, %xmm1 915; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 916; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 917; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 918; SSE2-NEXT: retq 919; 920; SSSE3-LABEL: load_zext_2i8_to_2i64: 921; SSSE3: # %bb.0: # %entry 922; SSSE3-NEXT: movzwl (%rdi), %eax 923; SSSE3-NEXT: movd %eax, %xmm0 924; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 925; SSSE3-NEXT: retq 926; 927; SSE41-LABEL: load_zext_2i8_to_2i64: 928; SSE41: # %bb.0: # %entry 929; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 930; SSE41-NEXT: retq 931; 932; AVX-LABEL: load_zext_2i8_to_2i64: 933; AVX: # %bb.0: # %entry 934; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 935; AVX-NEXT: retq 936entry: 937 %X = load <2 x i8>, ptr %ptr 938 %Y = zext <2 x i8> %X to <2 x i64> 939 ret <2 x i64> %Y 940} 941 942define <4 x i32> @load_zext_4i8_to_4i32(ptr%ptr) { 943; SSE2-LABEL: load_zext_4i8_to_4i32: 944; SSE2: # %bb.0: # %entry 945; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 946; SSE2-NEXT: pxor %xmm1, %xmm1 947; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 948; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 949; SSE2-NEXT: retq 950; 951; SSSE3-LABEL: load_zext_4i8_to_4i32: 952; SSSE3: # %bb.0: # %entry 953; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 954; SSSE3-NEXT: pxor %xmm1, %xmm1 955; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 956; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 957; SSSE3-NEXT: retq 958; 959; SSE41-LABEL: load_zext_4i8_to_4i32: 960; SSE41: # %bb.0: # %entry 961; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 962; SSE41-NEXT: retq 963; 964; AVX-LABEL: load_zext_4i8_to_4i32: 965; AVX: # %bb.0: # %entry 966; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 967; AVX-NEXT: retq 968entry: 969 %X = load <4 x i8>, ptr %ptr 970 %Y = zext <4 x i8> %X to <4 x i32> 971 ret <4 x i32> %Y 972} 973 974define <4 x i64> @load_zext_4i8_to_4i64(ptr%ptr) { 975; SSE2-LABEL: load_zext_4i8_to_4i64: 976; SSE2: # %bb.0: # %entry 977; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 978; SSE2-NEXT: pxor %xmm2, %xmm2 979; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 980; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 981; SSE2-NEXT: movdqa %xmm1, %xmm0 982; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 983; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 984; SSE2-NEXT: retq 985; 986; SSSE3-LABEL: load_zext_4i8_to_4i64: 987; SSSE3: # %bb.0: # %entry 988; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 989; SSSE3-NEXT: movdqa %xmm1, %xmm0 990; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 991; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 992; SSSE3-NEXT: retq 993; 994; SSE41-LABEL: load_zext_4i8_to_4i64: 995; SSE41: # %bb.0: # %entry 996; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 997; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 998; SSE41-NEXT: retq 999; 1000; AVX1-LABEL: load_zext_4i8_to_4i64: 1001; AVX1: # %bb.0: # %entry 1002; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1003; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1004; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1005; AVX1-NEXT: retq 1006; 1007; AVX2-LABEL: load_zext_4i8_to_4i64: 1008; AVX2: # %bb.0: # %entry 1009; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1010; AVX2-NEXT: retq 1011; 1012; AVX512-LABEL: load_zext_4i8_to_4i64: 1013; AVX512: # %bb.0: # %entry 1014; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1015; AVX512-NEXT: retq 1016entry: 1017 %X = load <4 x i8>, ptr %ptr 1018 %Y = zext <4 x i8> %X to <4 x i64> 1019 ret <4 x i64> %Y 1020} 1021 1022define <8 x i16> @load_zext_8i8_to_8i16(ptr%ptr) { 1023; SSE2-LABEL: load_zext_8i8_to_8i16: 1024; SSE2: # %bb.0: # %entry 1025; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1026; SSE2-NEXT: pxor %xmm1, %xmm1 1027; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1028; SSE2-NEXT: retq 1029; 1030; SSSE3-LABEL: load_zext_8i8_to_8i16: 1031; SSSE3: # %bb.0: # %entry 1032; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1033; SSSE3-NEXT: pxor %xmm1, %xmm1 1034; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1035; SSSE3-NEXT: retq 1036; 1037; SSE41-LABEL: load_zext_8i8_to_8i16: 1038; SSE41: # %bb.0: # %entry 1039; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1040; SSE41-NEXT: retq 1041; 1042; AVX-LABEL: load_zext_8i8_to_8i16: 1043; AVX: # %bb.0: # %entry 1044; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1045; AVX-NEXT: retq 1046entry: 1047 %X = load <8 x i8>, ptr %ptr 1048 %Y = zext <8 x i8> %X to <8 x i16> 1049 ret <8 x i16> %Y 1050} 1051 1052define <8 x i32> @load_zext_8i8_to_8i32(ptr%ptr) { 1053; SSE2-LABEL: load_zext_8i8_to_8i32: 1054; SSE2: # %bb.0: # %entry 1055; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1056; SSE2-NEXT: pxor %xmm2, %xmm2 1057; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1058; SSE2-NEXT: movdqa %xmm1, %xmm0 1059; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1060; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1061; SSE2-NEXT: retq 1062; 1063; SSSE3-LABEL: load_zext_8i8_to_8i32: 1064; SSSE3: # %bb.0: # %entry 1065; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1066; SSSE3-NEXT: pxor %xmm2, %xmm2 1067; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1068; SSSE3-NEXT: movdqa %xmm1, %xmm0 1069; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1070; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1071; SSSE3-NEXT: retq 1072; 1073; SSE41-LABEL: load_zext_8i8_to_8i32: 1074; SSE41: # %bb.0: # %entry 1075; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1076; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1077; SSE41-NEXT: retq 1078; 1079; AVX1-LABEL: load_zext_8i8_to_8i32: 1080; AVX1: # %bb.0: # %entry 1081; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1082; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1083; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1084; AVX1-NEXT: retq 1085; 1086; AVX2-LABEL: load_zext_8i8_to_8i32: 1087; AVX2: # %bb.0: # %entry 1088; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1089; AVX2-NEXT: retq 1090; 1091; AVX512-LABEL: load_zext_8i8_to_8i32: 1092; AVX512: # %bb.0: # %entry 1093; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1094; AVX512-NEXT: retq 1095entry: 1096 %X = load <8 x i8>, ptr %ptr 1097 %Y = zext <8 x i8> %X to <8 x i32> 1098 ret <8 x i32> %Y 1099} 1100 1101define <8 x i32> @load_zext_16i8_to_8i32(ptr%ptr) { 1102; SSE2-LABEL: load_zext_16i8_to_8i32: 1103; SSE2: # %bb.0: # %entry 1104; SSE2-NEXT: movdqa (%rdi), %xmm1 1105; SSE2-NEXT: pxor %xmm2, %xmm2 1106; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1107; SSE2-NEXT: movdqa %xmm1, %xmm0 1108; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1109; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1110; SSE2-NEXT: retq 1111; 1112; SSSE3-LABEL: load_zext_16i8_to_8i32: 1113; SSSE3: # %bb.0: # %entry 1114; SSSE3-NEXT: movdqa (%rdi), %xmm1 1115; SSSE3-NEXT: pxor %xmm2, %xmm2 1116; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1117; SSSE3-NEXT: movdqa %xmm1, %xmm0 1118; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1119; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1120; SSSE3-NEXT: retq 1121; 1122; SSE41-LABEL: load_zext_16i8_to_8i32: 1123; SSE41: # %bb.0: # %entry 1124; SSE41-NEXT: movdqa (%rdi), %xmm1 1125; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1126; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1] 1127; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1128; SSE41-NEXT: retq 1129; 1130; AVX1-LABEL: load_zext_16i8_to_8i32: 1131; AVX1: # %bb.0: # %entry 1132; AVX1-NEXT: vmovdqa (%rdi), %xmm0 1133; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1134; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1135; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1136; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1137; AVX1-NEXT: retq 1138; 1139; AVX2-LABEL: load_zext_16i8_to_8i32: 1140; AVX2: # %bb.0: # %entry 1141; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1142; AVX2-NEXT: retq 1143; 1144; AVX512-LABEL: load_zext_16i8_to_8i32: 1145; AVX512: # %bb.0: # %entry 1146; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1147; AVX512-NEXT: retq 1148entry: 1149 %X = load <16 x i8>, ptr %ptr 1150 %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1151 %Z = zext <8 x i8> %Y to <8 x i32> 1152 ret <8 x i32> %Z 1153} 1154 1155define <8 x i64> @load_zext_8i8_to_8i64(ptr%ptr) { 1156; SSE2-LABEL: load_zext_8i8_to_8i64: 1157; SSE2: # %bb.0: # %entry 1158; SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero 1159; SSE2-NEXT: pxor %xmm4, %xmm4 1160; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 1161; SSE2-NEXT: movdqa %xmm3, %xmm1 1162; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 1163; SSE2-NEXT: movdqa %xmm1, %xmm0 1164; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 1165; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 1166; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 1167; SSE2-NEXT: movdqa %xmm3, %xmm2 1168; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 1169; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 1170; SSE2-NEXT: retq 1171; 1172; SSSE3-LABEL: load_zext_8i8_to_8i64: 1173; SSSE3: # %bb.0: # %entry 1174; SSSE3-NEXT: movq {{.*#+}} xmm3 = mem[0],zero 1175; SSSE3-NEXT: movdqa %xmm3, %xmm0 1176; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1177; SSSE3-NEXT: movdqa %xmm3, %xmm1 1178; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 1179; SSSE3-NEXT: movdqa %xmm3, %xmm2 1180; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[4],zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero,zero,zero 1181; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[6],zero,zero,zero,zero,zero,zero,zero,xmm3[7],zero,zero,zero,zero,zero,zero,zero 1182; SSSE3-NEXT: retq 1183; 1184; SSE41-LABEL: load_zext_8i8_to_8i64: 1185; SSE41: # %bb.0: # %entry 1186; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1187; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1188; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1189; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1190; SSE41-NEXT: retq 1191; 1192; AVX1-LABEL: load_zext_8i8_to_8i64: 1193; AVX1: # %bb.0: # %entry 1194; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1195; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1196; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1197; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1198; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 1199; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1200; AVX1-NEXT: retq 1201; 1202; AVX2-LABEL: load_zext_8i8_to_8i64: 1203; AVX2: # %bb.0: # %entry 1204; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1205; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1206; AVX2-NEXT: retq 1207; 1208; AVX512-LABEL: load_zext_8i8_to_8i64: 1209; AVX512: # %bb.0: # %entry 1210; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 1211; AVX512-NEXT: retq 1212entry: 1213 %X = load <8 x i8>, ptr %ptr 1214 %Y = zext <8 x i8> %X to <8 x i64> 1215 ret <8 x i64> %Y 1216} 1217 1218define <16 x i16> @load_zext_16i8_to_16i16(ptr%ptr) { 1219; SSE2-LABEL: load_zext_16i8_to_16i16: 1220; SSE2: # %bb.0: # %entry 1221; SSE2-NEXT: movdqa (%rdi), %xmm1 1222; SSE2-NEXT: pxor %xmm2, %xmm2 1223; SSE2-NEXT: movdqa %xmm1, %xmm0 1224; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1225; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1226; SSE2-NEXT: retq 1227; 1228; SSSE3-LABEL: load_zext_16i8_to_16i16: 1229; SSSE3: # %bb.0: # %entry 1230; SSSE3-NEXT: movdqa (%rdi), %xmm1 1231; SSSE3-NEXT: pxor %xmm2, %xmm2 1232; SSSE3-NEXT: movdqa %xmm1, %xmm0 1233; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1234; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1235; SSSE3-NEXT: retq 1236; 1237; SSE41-LABEL: load_zext_16i8_to_16i16: 1238; SSE41: # %bb.0: # %entry 1239; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1240; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1241; SSE41-NEXT: retq 1242; 1243; AVX1-LABEL: load_zext_16i8_to_16i16: 1244; AVX1: # %bb.0: # %entry 1245; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1246; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1247; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1248; AVX1-NEXT: retq 1249; 1250; AVX2-LABEL: load_zext_16i8_to_16i16: 1251; AVX2: # %bb.0: # %entry 1252; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 1253; AVX2-NEXT: retq 1254; 1255; AVX512-LABEL: load_zext_16i8_to_16i16: 1256; AVX512: # %bb.0: # %entry 1257; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 1258; AVX512-NEXT: retq 1259entry: 1260 %X = load <16 x i8>, ptr %ptr 1261 %Y = zext <16 x i8> %X to <16 x i16> 1262 ret <16 x i16> %Y 1263} 1264 1265define <2 x i64> @load_zext_2i16_to_2i64(ptr%ptr) { 1266; SSE2-LABEL: load_zext_2i16_to_2i64: 1267; SSE2: # %bb.0: # %entry 1268; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1269; SSE2-NEXT: pxor %xmm1, %xmm1 1270; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1271; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1272; SSE2-NEXT: retq 1273; 1274; SSSE3-LABEL: load_zext_2i16_to_2i64: 1275; SSSE3: # %bb.0: # %entry 1276; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1277; SSSE3-NEXT: pxor %xmm1, %xmm1 1278; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1279; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1280; SSSE3-NEXT: retq 1281; 1282; SSE41-LABEL: load_zext_2i16_to_2i64: 1283; SSE41: # %bb.0: # %entry 1284; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1285; SSE41-NEXT: retq 1286; 1287; AVX-LABEL: load_zext_2i16_to_2i64: 1288; AVX: # %bb.0: # %entry 1289; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1290; AVX-NEXT: retq 1291entry: 1292 %X = load <2 x i16>, ptr %ptr 1293 %Y = zext <2 x i16> %X to <2 x i64> 1294 ret <2 x i64> %Y 1295} 1296 1297define <4 x i32> @load_zext_4i16_to_4i32(ptr%ptr) { 1298; SSE2-LABEL: load_zext_4i16_to_4i32: 1299; SSE2: # %bb.0: # %entry 1300; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1301; SSE2-NEXT: pxor %xmm1, %xmm1 1302; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1303; SSE2-NEXT: retq 1304; 1305; SSSE3-LABEL: load_zext_4i16_to_4i32: 1306; SSSE3: # %bb.0: # %entry 1307; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1308; SSSE3-NEXT: pxor %xmm1, %xmm1 1309; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1310; SSSE3-NEXT: retq 1311; 1312; SSE41-LABEL: load_zext_4i16_to_4i32: 1313; SSE41: # %bb.0: # %entry 1314; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1315; SSE41-NEXT: retq 1316; 1317; AVX-LABEL: load_zext_4i16_to_4i32: 1318; AVX: # %bb.0: # %entry 1319; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1320; AVX-NEXT: retq 1321entry: 1322 %X = load <4 x i16>, ptr %ptr 1323 %Y = zext <4 x i16> %X to <4 x i32> 1324 ret <4 x i32> %Y 1325} 1326 1327define <4 x i64> @load_zext_4i16_to_4i64(ptr%ptr) { 1328; SSE2-LABEL: load_zext_4i16_to_4i64: 1329; SSE2: # %bb.0: # %entry 1330; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1331; SSE2-NEXT: pxor %xmm2, %xmm2 1332; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1333; SSE2-NEXT: movdqa %xmm1, %xmm0 1334; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1335; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1336; SSE2-NEXT: retq 1337; 1338; SSSE3-LABEL: load_zext_4i16_to_4i64: 1339; SSSE3: # %bb.0: # %entry 1340; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1341; SSSE3-NEXT: pxor %xmm2, %xmm2 1342; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1343; SSSE3-NEXT: movdqa %xmm1, %xmm0 1344; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1345; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1346; SSSE3-NEXT: retq 1347; 1348; SSE41-LABEL: load_zext_4i16_to_4i64: 1349; SSE41: # %bb.0: # %entry 1350; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1351; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1352; SSE41-NEXT: retq 1353; 1354; AVX1-LABEL: load_zext_4i16_to_4i64: 1355; AVX1: # %bb.0: # %entry 1356; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1357; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1358; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1359; AVX1-NEXT: retq 1360; 1361; AVX2-LABEL: load_zext_4i16_to_4i64: 1362; AVX2: # %bb.0: # %entry 1363; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1364; AVX2-NEXT: retq 1365; 1366; AVX512-LABEL: load_zext_4i16_to_4i64: 1367; AVX512: # %bb.0: # %entry 1368; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1369; AVX512-NEXT: retq 1370entry: 1371 %X = load <4 x i16>, ptr %ptr 1372 %Y = zext <4 x i16> %X to <4 x i64> 1373 ret <4 x i64> %Y 1374} 1375 1376define <8 x i32> @load_zext_8i16_to_8i32(ptr%ptr) { 1377; SSE2-LABEL: load_zext_8i16_to_8i32: 1378; SSE2: # %bb.0: # %entry 1379; SSE2-NEXT: movdqa (%rdi), %xmm1 1380; SSE2-NEXT: pxor %xmm2, %xmm2 1381; SSE2-NEXT: movdqa %xmm1, %xmm0 1382; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1383; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1384; SSE2-NEXT: retq 1385; 1386; SSSE3-LABEL: load_zext_8i16_to_8i32: 1387; SSSE3: # %bb.0: # %entry 1388; SSSE3-NEXT: movdqa (%rdi), %xmm1 1389; SSSE3-NEXT: pxor %xmm2, %xmm2 1390; SSSE3-NEXT: movdqa %xmm1, %xmm0 1391; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1392; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1393; SSSE3-NEXT: retq 1394; 1395; SSE41-LABEL: load_zext_8i16_to_8i32: 1396; SSE41: # %bb.0: # %entry 1397; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1398; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1399; SSE41-NEXT: retq 1400; 1401; AVX1-LABEL: load_zext_8i16_to_8i32: 1402; AVX1: # %bb.0: # %entry 1403; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1404; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1405; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1406; AVX1-NEXT: retq 1407; 1408; AVX2-LABEL: load_zext_8i16_to_8i32: 1409; AVX2: # %bb.0: # %entry 1410; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1411; AVX2-NEXT: retq 1412; 1413; AVX512-LABEL: load_zext_8i16_to_8i32: 1414; AVX512: # %bb.0: # %entry 1415; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1416; AVX512-NEXT: retq 1417entry: 1418 %X = load <8 x i16>, ptr %ptr 1419 %Y = zext <8 x i16> %X to <8 x i32> 1420 ret <8 x i32> %Y 1421} 1422 1423define <2 x i64> @load_zext_2i32_to_2i64(ptr%ptr) { 1424; SSE2-LABEL: load_zext_2i32_to_2i64: 1425; SSE2: # %bb.0: # %entry 1426; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1427; SSE2-NEXT: xorps %xmm1, %xmm1 1428; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1429; SSE2-NEXT: retq 1430; 1431; SSSE3-LABEL: load_zext_2i32_to_2i64: 1432; SSSE3: # %bb.0: # %entry 1433; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1434; SSSE3-NEXT: xorps %xmm1, %xmm1 1435; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1436; SSSE3-NEXT: retq 1437; 1438; SSE41-LABEL: load_zext_2i32_to_2i64: 1439; SSE41: # %bb.0: # %entry 1440; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1441; SSE41-NEXT: retq 1442; 1443; AVX-LABEL: load_zext_2i32_to_2i64: 1444; AVX: # %bb.0: # %entry 1445; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1446; AVX-NEXT: retq 1447entry: 1448 %X = load <2 x i32>, ptr %ptr 1449 %Y = zext <2 x i32> %X to <2 x i64> 1450 ret <2 x i64> %Y 1451} 1452 1453define <4 x i64> @load_zext_4i32_to_4i64(ptr%ptr) { 1454; SSE2-LABEL: load_zext_4i32_to_4i64: 1455; SSE2: # %bb.0: # %entry 1456; SSE2-NEXT: movaps (%rdi), %xmm1 1457; SSE2-NEXT: xorps %xmm2, %xmm2 1458; SSE2-NEXT: movaps %xmm1, %xmm0 1459; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1460; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1461; SSE2-NEXT: retq 1462; 1463; SSSE3-LABEL: load_zext_4i32_to_4i64: 1464; SSSE3: # %bb.0: # %entry 1465; SSSE3-NEXT: movaps (%rdi), %xmm1 1466; SSSE3-NEXT: xorps %xmm2, %xmm2 1467; SSSE3-NEXT: movaps %xmm1, %xmm0 1468; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1469; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1470; SSSE3-NEXT: retq 1471; 1472; SSE41-LABEL: load_zext_4i32_to_4i64: 1473; SSE41: # %bb.0: # %entry 1474; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1475; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 1476; SSE41-NEXT: retq 1477; 1478; AVX1-LABEL: load_zext_4i32_to_4i64: 1479; AVX1: # %bb.0: # %entry 1480; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1481; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 1482; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1483; AVX1-NEXT: retq 1484; 1485; AVX2-LABEL: load_zext_4i32_to_4i64: 1486; AVX2: # %bb.0: # %entry 1487; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1488; AVX2-NEXT: retq 1489; 1490; AVX512-LABEL: load_zext_4i32_to_4i64: 1491; AVX512: # %bb.0: # %entry 1492; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1493; AVX512-NEXT: retq 1494entry: 1495 %X = load <4 x i32>, ptr %ptr 1496 %Y = zext <4 x i32> %X to <4 x i64> 1497 ret <4 x i64> %Y 1498} 1499 1500define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) { 1501; SSE2-LABEL: zext_8i8_to_8i32: 1502; SSE2: # %bb.0: # %entry 1503; SSE2-NEXT: movdqa %xmm0, %xmm1 1504; SSE2-NEXT: pxor %xmm2, %xmm2 1505; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1506; SSE2-NEXT: movdqa %xmm1, %xmm0 1507; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1508; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1509; SSE2-NEXT: retq 1510; 1511; SSSE3-LABEL: zext_8i8_to_8i32: 1512; SSSE3: # %bb.0: # %entry 1513; SSSE3-NEXT: movdqa %xmm0, %xmm1 1514; SSSE3-NEXT: pxor %xmm2, %xmm2 1515; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1516; SSSE3-NEXT: movdqa %xmm1, %xmm0 1517; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1518; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1519; SSSE3-NEXT: retq 1520; 1521; SSE41-LABEL: zext_8i8_to_8i32: 1522; SSE41: # %bb.0: # %entry 1523; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1524; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1525; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1526; SSE41-NEXT: movdqa %xmm2, %xmm0 1527; SSE41-NEXT: retq 1528; 1529; AVX1-LABEL: zext_8i8_to_8i32: 1530; AVX1: # %bb.0: # %entry 1531; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1532; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1533; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1534; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1535; AVX1-NEXT: retq 1536; 1537; AVX2-LABEL: zext_8i8_to_8i32: 1538; AVX2: # %bb.0: # %entry 1539; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1540; AVX2-NEXT: retq 1541; 1542; AVX512-LABEL: zext_8i8_to_8i32: 1543; AVX512: # %bb.0: # %entry 1544; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1545; AVX512-NEXT: retq 1546entry: 1547 %t = zext <8 x i8> %z to <8 x i32> 1548 ret <8 x i32> %t 1549} 1550 1551define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 1552; SSE2-LABEL: shuf_zext_8i16_to_8i32: 1553; SSE2: # %bb.0: # %entry 1554; SSE2-NEXT: movdqa %xmm0, %xmm1 1555; SSE2-NEXT: pxor %xmm2, %xmm2 1556; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1557; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1558; SSE2-NEXT: retq 1559; 1560; SSSE3-LABEL: shuf_zext_8i16_to_8i32: 1561; SSSE3: # %bb.0: # %entry 1562; SSSE3-NEXT: movdqa %xmm0, %xmm1 1563; SSSE3-NEXT: pxor %xmm2, %xmm2 1564; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1565; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1566; SSSE3-NEXT: retq 1567; 1568; SSE41-LABEL: shuf_zext_8i16_to_8i32: 1569; SSE41: # %bb.0: # %entry 1570; SSE41-NEXT: movdqa %xmm0, %xmm1 1571; SSE41-NEXT: pxor %xmm2, %xmm2 1572; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1573; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1574; SSE41-NEXT: retq 1575; 1576; AVX1-LABEL: shuf_zext_8i16_to_8i32: 1577; AVX1: # %bb.0: # %entry 1578; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1579; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1580; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1581; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1582; AVX1-NEXT: retq 1583; 1584; AVX2-LABEL: shuf_zext_8i16_to_8i32: 1585; AVX2: # %bb.0: # %entry 1586; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1587; AVX2-NEXT: retq 1588; 1589; AVX512-LABEL: shuf_zext_8i16_to_8i32: 1590; AVX512: # %bb.0: # %entry 1591; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1592; AVX512-NEXT: retq 1593entry: 1594 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8> 1595 %Z = bitcast <16 x i16> %B to <8 x i32> 1596 ret <8 x i32> %Z 1597} 1598 1599define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1600; SSE2-LABEL: shuf_zext_4i32_to_4i64: 1601; SSE2: # %bb.0: # %entry 1602; SSE2-NEXT: movaps %xmm0, %xmm1 1603; SSE2-NEXT: xorps %xmm2, %xmm2 1604; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1605; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1606; SSE2-NEXT: retq 1607; 1608; SSSE3-LABEL: shuf_zext_4i32_to_4i64: 1609; SSSE3: # %bb.0: # %entry 1610; SSSE3-NEXT: movaps %xmm0, %xmm1 1611; SSSE3-NEXT: xorps %xmm2, %xmm2 1612; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1613; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1614; SSSE3-NEXT: retq 1615; 1616; SSE41-LABEL: shuf_zext_4i32_to_4i64: 1617; SSE41: # %bb.0: # %entry 1618; SSE41-NEXT: movdqa %xmm0, %xmm1 1619; SSE41-NEXT: pxor %xmm2, %xmm2 1620; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1621; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1622; SSE41-NEXT: retq 1623; 1624; AVX1-LABEL: shuf_zext_4i32_to_4i64: 1625; AVX1: # %bb.0: # %entry 1626; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1627; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1628; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1629; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1630; AVX1-NEXT: retq 1631; 1632; AVX2-LABEL: shuf_zext_4i32_to_4i64: 1633; AVX2: # %bb.0: # %entry 1634; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1635; AVX2-NEXT: retq 1636; 1637; AVX512-LABEL: shuf_zext_4i32_to_4i64: 1638; AVX512: # %bb.0: # %entry 1639; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1640; AVX512-NEXT: retq 1641entry: 1642 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4> 1643 %Z = bitcast <8 x i32> %B to <4 x i64> 1644 ret <4 x i64> %Z 1645} 1646 1647define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) { 1648; SSE2-LABEL: shuf_zext_8i8_to_8i32: 1649; SSE2: # %bb.0: # %entry 1650; SSE2-NEXT: movdqa %xmm0, %xmm1 1651; SSE2-NEXT: pxor %xmm2, %xmm2 1652; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1653; SSE2-NEXT: movdqa %xmm1, %xmm0 1654; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1655; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1656; SSE2-NEXT: retq 1657; 1658; SSSE3-LABEL: shuf_zext_8i8_to_8i32: 1659; SSSE3: # %bb.0: # %entry 1660; SSSE3-NEXT: movdqa %xmm0, %xmm1 1661; SSSE3-NEXT: pxor %xmm2, %xmm2 1662; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1663; SSSE3-NEXT: movdqa %xmm1, %xmm0 1664; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1665; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1666; SSSE3-NEXT: retq 1667; 1668; SSE41-LABEL: shuf_zext_8i8_to_8i32: 1669; SSE41: # %bb.0: # %entry 1670; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1671; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1672; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1673; SSE41-NEXT: movdqa %xmm2, %xmm0 1674; SSE41-NEXT: retq 1675; 1676; AVX1-LABEL: shuf_zext_8i8_to_8i32: 1677; AVX1: # %bb.0: # %entry 1678; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1679; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1680; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1681; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1682; AVX1-NEXT: retq 1683; 1684; AVX2-LABEL: shuf_zext_8i8_to_8i32: 1685; AVX2: # %bb.0: # %entry 1686; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1687; AVX2-NEXT: retq 1688; 1689; AVX512-LABEL: shuf_zext_8i8_to_8i32: 1690; AVX512: # %bb.0: # %entry 1691; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1692; AVX512-NEXT: retq 1693entry: 1694 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8> 1695 %Z = bitcast <32 x i8> %B to <8 x i32> 1696 ret <8 x i32> %Z 1697} 1698 1699define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp { 1700; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6: 1701; SSE2: # %bb.0: # %entry 1702; SSE2-NEXT: pxor %xmm1, %xmm1 1703; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1704; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1705; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1706; SSE2-NEXT: retq 1707; 1708; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6: 1709; SSSE3: # %bb.0: # %entry 1710; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 1711; SSSE3-NEXT: retq 1712; 1713; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6: 1714; SSE41: # %bb.0: # %entry 1715; SSE41-NEXT: psrlq $48, %xmm0 1716; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1717; SSE41-NEXT: retq 1718; 1719; AVX1-LABEL: shuf_zext_16i8_to_2i64_offset6: 1720; AVX1: # %bb.0: # %entry 1721; AVX1-NEXT: vpsrlq $48, %xmm0, %xmm0 1722; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1723; AVX1-NEXT: retq 1724; 1725; AVX2-SLOW-LABEL: shuf_zext_16i8_to_2i64_offset6: 1726; AVX2-SLOW: # %bb.0: # %entry 1727; AVX2-SLOW-NEXT: vpsrlq $48, %xmm0, %xmm0 1728; AVX2-SLOW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1729; AVX2-SLOW-NEXT: retq 1730; 1731; AVX2-FAST-LABEL: shuf_zext_16i8_to_2i64_offset6: 1732; AVX2-FAST: # %bb.0: # %entry 1733; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 1734; AVX2-FAST-NEXT: retq 1735; 1736; AVX512F-LABEL: shuf_zext_16i8_to_2i64_offset6: 1737; AVX512F: # %bb.0: # %entry 1738; AVX512F-NEXT: vpsrlq $48, %xmm0, %xmm0 1739; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1740; AVX512F-NEXT: retq 1741; 1742; AVX512BW-LABEL: shuf_zext_16i8_to_2i64_offset6: 1743; AVX512BW: # %bb.0: # %entry 1744; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 1745; AVX512BW-NEXT: retq 1746entry: 1747 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1748 %Z = bitcast <16 x i8> %B to <2 x i64> 1749 ret <2 x i64> %Z 1750} 1751 1752define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp { 1753; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11: 1754; SSE2: # %bb.0: # %entry 1755; SSE2-NEXT: movdqa %xmm0, %xmm1 1756; SSE2-NEXT: psrlq $8, %xmm1 1757; SSE2-NEXT: pxor %xmm2, %xmm2 1758; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1759; SSE2-NEXT: movdqa %xmm1, %xmm0 1760; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1761; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1762; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1763; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1764; SSE2-NEXT: retq 1765; 1766; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11: 1767; SSSE3: # %bb.0: # %entry 1768; SSSE3-NEXT: movdqa %xmm0, %xmm1 1769; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero 1770; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero 1771; SSSE3-NEXT: retq 1772; 1773; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11: 1774; SSE41: # %bb.0: # %entry 1775; SSE41-NEXT: movdqa %xmm0, %xmm1 1776; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1777; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1778; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1779; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1780; SSE41-NEXT: movdqa %xmm2, %xmm0 1781; SSE41-NEXT: retq 1782; 1783; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11: 1784; AVX1: # %bb.0: # %entry 1785; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1786; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1787; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1788; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1789; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1790; AVX1-NEXT: retq 1791; 1792; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11: 1793; AVX2: # %bb.0: # %entry 1794; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1795; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 1796; AVX2-NEXT: retq 1797; 1798; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11: 1799; AVX512: # %bb.0: # %entry 1800; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1801; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 1802; AVX512-NEXT: retq 1803entry: 1804 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1805 %Z = bitcast <32 x i8> %B to <4 x i64> 1806 ret <4 x i64> %Z 1807} 1808 1809define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp { 1810; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6: 1811; SSE2: # %bb.0: # %entry 1812; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1813; SSE2-NEXT: pxor %xmm1, %xmm1 1814; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1815; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1816; SSE2-NEXT: retq 1817; 1818; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6: 1819; SSSE3: # %bb.0: # %entry 1820; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero 1821; SSSE3-NEXT: retq 1822; 1823; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6: 1824; SSE41: # %bb.0: # %entry 1825; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1826; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1827; SSE41-NEXT: retq 1828; 1829; AVX1-LABEL: shuf_zext_8i16_to_2i64_offset6: 1830; AVX1: # %bb.0: # %entry 1831; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1832; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1833; AVX1-NEXT: retq 1834; 1835; AVX2-SLOW-LABEL: shuf_zext_8i16_to_2i64_offset6: 1836; AVX2-SLOW: # %bb.0: # %entry 1837; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1838; AVX2-SLOW-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1839; AVX2-SLOW-NEXT: retq 1840; 1841; AVX2-FAST-LABEL: shuf_zext_8i16_to_2i64_offset6: 1842; AVX2-FAST: # %bb.0: # %entry 1843; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero 1844; AVX2-FAST-NEXT: retq 1845; 1846; AVX512F-LABEL: shuf_zext_8i16_to_2i64_offset6: 1847; AVX512F: # %bb.0: # %entry 1848; AVX512F-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1849; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1850; AVX512F-NEXT: retq 1851; 1852; AVX512BW-LABEL: shuf_zext_8i16_to_2i64_offset6: 1853; AVX512BW: # %bb.0: # %entry 1854; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero 1855; AVX512BW-NEXT: retq 1856entry: 1857 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8> 1858 %Z = bitcast <8 x i16> %B to <2 x i64> 1859 ret <2 x i64> %Z 1860} 1861 1862define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp { 1863; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2: 1864; SSE2: # %bb.0: # %entry 1865; SSE2-NEXT: movdqa %xmm0, %xmm1 1866; SSE2-NEXT: pxor %xmm2, %xmm2 1867; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1868; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1869; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1870; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1871; SSE2-NEXT: retq 1872; 1873; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2: 1874; SSSE3: # %bb.0: # %entry 1875; SSSE3-NEXT: movdqa %xmm0, %xmm1 1876; SSSE3-NEXT: pxor %xmm2, %xmm2 1877; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1878; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1879; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1880; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1881; SSSE3-NEXT: retq 1882; 1883; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2: 1884; SSE41: # %bb.0: # %entry 1885; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1886; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 1887; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1888; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1889; SSE41-NEXT: movdqa %xmm2, %xmm0 1890; SSE41-NEXT: retq 1891; 1892; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2: 1893; AVX1: # %bb.0: # %entry 1894; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1895; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 1896; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1897; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1898; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1899; AVX1-NEXT: retq 1900; 1901; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2: 1902; AVX2: # %bb.0: # %entry 1903; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] 1904; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1905; AVX2-NEXT: retq 1906; 1907; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2: 1908; AVX512: # %bb.0: # %entry 1909; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] 1910; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1911; AVX512-NEXT: retq 1912entry: 1913 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8> 1914 %Z = bitcast <16 x i16> %B to <4 x i64> 1915 ret <4 x i64> %Z 1916} 1917 1918define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp { 1919; SSE2-LABEL: shuf_zext_8i16_to_4i32_offset1: 1920; SSE2: # %bb.0: # %entry 1921; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1922; SSE2-NEXT: pxor %xmm1, %xmm1 1923; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1924; SSE2-NEXT: retq 1925; 1926; SSSE3-LABEL: shuf_zext_8i16_to_4i32_offset1: 1927; SSSE3: # %bb.0: # %entry 1928; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1929; SSSE3-NEXT: pxor %xmm1, %xmm1 1930; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1931; SSSE3-NEXT: retq 1932; 1933; SSE41-LABEL: shuf_zext_8i16_to_4i32_offset1: 1934; SSE41: # %bb.0: # %entry 1935; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1936; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1937; SSE41-NEXT: retq 1938; 1939; AVX1-LABEL: shuf_zext_8i16_to_4i32_offset1: 1940; AVX1: # %bb.0: # %entry 1941; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1942; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1943; AVX1-NEXT: retq 1944; 1945; AVX2-SLOW-LABEL: shuf_zext_8i16_to_4i32_offset1: 1946; AVX2-SLOW: # %bb.0: # %entry 1947; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1948; AVX2-SLOW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1949; AVX2-SLOW-NEXT: retq 1950; 1951; AVX2-FAST-LABEL: shuf_zext_8i16_to_4i32_offset1: 1952; AVX2-FAST: # %bb.0: # %entry 1953; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero 1954; AVX2-FAST-NEXT: retq 1955; 1956; AVX512F-LABEL: shuf_zext_8i16_to_4i32_offset1: 1957; AVX512F: # %bb.0: # %entry 1958; AVX512F-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1959; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1960; AVX512F-NEXT: retq 1961; 1962; AVX512BW-LABEL: shuf_zext_8i16_to_4i32_offset1: 1963; AVX512BW: # %bb.0: # %entry 1964; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero 1965; AVX512BW-NEXT: retq 1966entry: 1967 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8> 1968 %Z = bitcast <8 x i16> %B to <4 x i32> 1969 ret <4 x i32> %Z 1970} 1971 1972define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp { 1973; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3: 1974; SSE2: # %bb.0: # %entry 1975; SSE2-NEXT: movdqa %xmm0, %xmm1 1976; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1977; SSE2-NEXT: pxor %xmm2, %xmm2 1978; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1979; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1980; SSE2-NEXT: retq 1981; 1982; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3: 1983; SSSE3: # %bb.0: # %entry 1984; SSSE3-NEXT: movdqa %xmm0, %xmm1 1985; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1986; SSSE3-NEXT: pxor %xmm2, %xmm2 1987; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1988; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1989; SSSE3-NEXT: retq 1990; 1991; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3: 1992; SSE41: # %bb.0: # %entry 1993; SSE41-NEXT: movdqa %xmm0, %xmm1 1994; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1995; SSE41-NEXT: pxor %xmm2, %xmm2 1996; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1997; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1998; SSE41-NEXT: retq 1999; 2000; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3: 2001; AVX1: # %bb.0: # %entry 2002; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2003; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 2004; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2005; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2006; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2007; AVX1-NEXT: retq 2008; 2009; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3: 2010; AVX2: # %bb.0: # %entry 2011; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 2012; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2013; AVX2-NEXT: retq 2014; 2015; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3: 2016; AVX512: # %bb.0: # %entry 2017; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 2018; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2019; AVX512-NEXT: retq 2020entry: 2021 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8> 2022 %Z = bitcast <16 x i16> %B to <8 x i32> 2023 ret <8 x i32> %Z 2024} 2025 2026define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp { 2027; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8: 2028; SSE2: # %bb.0: # %entry 2029; SSE2-NEXT: pxor %xmm2, %xmm2 2030; SSE2-NEXT: movdqa %xmm1, %xmm0 2031; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2032; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2033; SSE2-NEXT: retq 2034; 2035; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8: 2036; SSSE3: # %bb.0: # %entry 2037; SSSE3-NEXT: pxor %xmm2, %xmm2 2038; SSSE3-NEXT: movdqa %xmm1, %xmm0 2039; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2040; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2041; SSSE3-NEXT: retq 2042; 2043; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8: 2044; SSE41: # %bb.0: # %entry 2045; SSE41-NEXT: pxor %xmm2, %xmm2 2046; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 2047; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2048; SSE41-NEXT: retq 2049; 2050; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8: 2051; AVX1: # %bb.0: # %entry 2052; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2053; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2054; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2055; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2056; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2057; AVX1-NEXT: retq 2058; 2059; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8: 2060; AVX2: # %bb.0: # %entry 2061; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 2062; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2063; AVX2-NEXT: retq 2064; 2065; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8: 2066; AVX512: # %bb.0: # %entry 2067; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 2068; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2069; AVX512-NEXT: retq 2070entry: 2071 %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16> 2072 %Z = bitcast <16 x i16> %B to <8 x i32> 2073 ret <8 x i32> %Z 2074} 2075 2076define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp { 2077; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2: 2078; SSE: # %bb.0: # %entry 2079; SSE-NEXT: xorps %xmm1, %xmm1 2080; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2081; SSE-NEXT: retq 2082; 2083; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2: 2084; AVX: # %bb.0: # %entry 2085; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 2086; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2087; AVX-NEXT: retq 2088entry: 2089 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4> 2090 %Z = bitcast <4 x i32> %B to <2 x i64> 2091 ret <2 x i64> %Z 2092} 2093 2094define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp { 2095; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1: 2096; SSE2: # %bb.0: # %entry 2097; SSE2-NEXT: movdqa %xmm0, %xmm1 2098; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0] 2099; SSE2-NEXT: pand %xmm1, %xmm0 2100; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2101; SSE2-NEXT: retq 2102; 2103; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1: 2104; SSSE3: # %bb.0: # %entry 2105; SSSE3-NEXT: movdqa %xmm0, %xmm1 2106; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0] 2107; SSSE3-NEXT: pand %xmm1, %xmm0 2108; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2109; SSSE3-NEXT: retq 2110; 2111; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1: 2112; SSE41: # %bb.0: # %entry 2113; SSE41-NEXT: movdqa %xmm0, %xmm1 2114; SSE41-NEXT: pxor %xmm0, %xmm0 2115; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 2116; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2117; SSE41-NEXT: retq 2118; 2119; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1: 2120; AVX1: # %bb.0: # %entry 2121; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2122; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 2123; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2124; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2125; AVX1-NEXT: retq 2126; 2127; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1: 2128; AVX2: # %bb.0: # %entry 2129; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3] 2130; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2131; AVX2-NEXT: retq 2132; 2133; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1: 2134; AVX512: # %bb.0: # %entry 2135; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3] 2136; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2137; AVX512-NEXT: retq 2138entry: 2139 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4> 2140 %Z = bitcast <8 x i32> %B to <4 x i64> 2141 ret <4 x i64> %Z 2142} 2143 2144define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) { 2145; SSE2-LABEL: zext_32i8_to_32i32: 2146; SSE2: # %bb.0: 2147; SSE2-NEXT: movq %rdi, %rax 2148; SSE2-NEXT: pxor %xmm2, %xmm2 2149; SSE2-NEXT: movdqa %xmm0, %xmm3 2150; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2151; SSE2-NEXT: movdqa %xmm3, %xmm4 2152; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] 2153; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2154; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] 2155; SSE2-NEXT: movdqa %xmm0, %xmm5 2156; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3] 2157; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2158; SSE2-NEXT: movdqa %xmm1, %xmm6 2159; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7] 2160; SSE2-NEXT: movdqa %xmm6, %xmm7 2161; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3] 2162; SSE2-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7] 2163; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 2164; SSE2-NEXT: movdqa %xmm1, %xmm8 2165; SSE2-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3] 2166; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2167; SSE2-NEXT: movdqa %xmm1, 112(%rdi) 2168; SSE2-NEXT: movdqa %xmm8, 96(%rdi) 2169; SSE2-NEXT: movdqa %xmm6, 80(%rdi) 2170; SSE2-NEXT: movdqa %xmm7, 64(%rdi) 2171; SSE2-NEXT: movdqa %xmm0, 48(%rdi) 2172; SSE2-NEXT: movdqa %xmm5, 32(%rdi) 2173; SSE2-NEXT: movdqa %xmm3, 16(%rdi) 2174; SSE2-NEXT: movdqa %xmm4, (%rdi) 2175; SSE2-NEXT: retq 2176; 2177; SSSE3-LABEL: zext_32i8_to_32i32: 2178; SSSE3: # %bb.0: 2179; SSSE3-NEXT: movq %rdi, %rax 2180; SSSE3-NEXT: pxor %xmm2, %xmm2 2181; SSSE3-NEXT: movdqa %xmm0, %xmm3 2182; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2183; SSSE3-NEXT: movdqa %xmm3, %xmm4 2184; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] 2185; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2186; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] 2187; SSSE3-NEXT: movdqa %xmm0, %xmm5 2188; SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3] 2189; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2190; SSSE3-NEXT: movdqa %xmm1, %xmm6 2191; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7] 2192; SSSE3-NEXT: movdqa %xmm6, %xmm7 2193; SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3] 2194; SSSE3-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7] 2195; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 2196; SSSE3-NEXT: movdqa %xmm1, %xmm8 2197; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3] 2198; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2199; SSSE3-NEXT: movdqa %xmm1, 112(%rdi) 2200; SSSE3-NEXT: movdqa %xmm8, 96(%rdi) 2201; SSSE3-NEXT: movdqa %xmm6, 80(%rdi) 2202; SSSE3-NEXT: movdqa %xmm7, 64(%rdi) 2203; SSSE3-NEXT: movdqa %xmm0, 48(%rdi) 2204; SSSE3-NEXT: movdqa %xmm5, 32(%rdi) 2205; SSSE3-NEXT: movdqa %xmm3, 16(%rdi) 2206; SSSE3-NEXT: movdqa %xmm4, (%rdi) 2207; SSSE3-NEXT: retq 2208; 2209; SSE41-LABEL: zext_32i8_to_32i32: 2210; SSE41: # %bb.0: 2211; SSE41-NEXT: movq %rdi, %rax 2212; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2213; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] 2214; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 2215; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] 2216; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero 2217; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 2218; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2219; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 2220; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,1,1] 2221; SSE41-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero 2222; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3] 2223; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero 2224; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] 2225; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 2226; SSE41-NEXT: movdqa %xmm1, 112(%rdi) 2227; SSE41-NEXT: movdqa %xmm7, 96(%rdi) 2228; SSE41-NEXT: movdqa %xmm6, 80(%rdi) 2229; SSE41-NEXT: movdqa %xmm5, 64(%rdi) 2230; SSE41-NEXT: movdqa %xmm0, 48(%rdi) 2231; SSE41-NEXT: movdqa %xmm4, 32(%rdi) 2232; SSE41-NEXT: movdqa %xmm3, 16(%rdi) 2233; SSE41-NEXT: movdqa %xmm2, (%rdi) 2234; SSE41-NEXT: retq 2235; 2236; AVX1-LABEL: zext_32i8_to_32i32: 2237; AVX1: # %bb.0: 2238; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2239; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 2240; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 2241; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4 2242; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2243; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 2244; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[1,1,1,1] 2245; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 2246; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 2247; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2248; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 2249; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 2250; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2251; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 2252; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm3[2,3,2,3] 2253; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2254; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 2255; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 2256; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3 2257; AVX1-NEXT: vmovaps %ymm4, %ymm0 2258; AVX1-NEXT: retq 2259; 2260; AVX2-LABEL: zext_32i8_to_32i32: 2261; AVX2: # %bb.0: 2262; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2263; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 2264; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero 2265; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 2266; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2267; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm3[2,3,2,3] 2268; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2269; AVX2-NEXT: vmovdqa %ymm4, %ymm0 2270; AVX2-NEXT: retq 2271; 2272; AVX512-LABEL: zext_32i8_to_32i32: 2273; AVX512: # %bb.0: 2274; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 2275; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 2276; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 2277; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0 2278; AVX512-NEXT: retq 2279 %res = zext <32 x i8>%x to <32 x i32> 2280 ret <32 x i32> %res 2281} 2282 2283define <2 x i32> @zext_2i8_to_2i32(ptr %addr) { 2284; SSE2-LABEL: zext_2i8_to_2i32: 2285; SSE2: # %bb.0: 2286; SSE2-NEXT: movzwl (%rdi), %eax 2287; SSE2-NEXT: movd %eax, %xmm0 2288; SSE2-NEXT: pxor %xmm1, %xmm1 2289; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2290; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2291; SSE2-NEXT: paddd %xmm0, %xmm0 2292; SSE2-NEXT: retq 2293; 2294; SSSE3-LABEL: zext_2i8_to_2i32: 2295; SSSE3: # %bb.0: 2296; SSSE3-NEXT: movzwl (%rdi), %eax 2297; SSSE3-NEXT: movd %eax, %xmm0 2298; SSSE3-NEXT: pxor %xmm1, %xmm1 2299; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2300; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2301; SSSE3-NEXT: paddd %xmm0, %xmm0 2302; SSSE3-NEXT: retq 2303; 2304; SSE41-LABEL: zext_2i8_to_2i32: 2305; SSE41: # %bb.0: 2306; SSE41-NEXT: movzwl (%rdi), %eax 2307; SSE41-NEXT: movd %eax, %xmm0 2308; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2309; SSE41-NEXT: paddd %xmm0, %xmm0 2310; SSE41-NEXT: retq 2311; 2312; AVX-LABEL: zext_2i8_to_2i32: 2313; AVX: # %bb.0: 2314; AVX-NEXT: movzwl (%rdi), %eax 2315; AVX-NEXT: vmovd %eax, %xmm0 2316; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2317; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 2318; AVX-NEXT: retq 2319 %x = load <2 x i8>, ptr %addr, align 1 2320 %y = zext <2 x i8> %x to <2 x i32> 2321 %z = add <2 x i32>%y, %y 2322 ret <2 x i32>%z 2323} 2324 2325define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) { 2326; SSE2-LABEL: zext_4i17_to_4i32: 2327; SSE2: # %bb.0: 2328; SSE2-NEXT: movq (%rdi), %rax 2329; SSE2-NEXT: movd %eax, %xmm0 2330; SSE2-NEXT: movq %rax, %rcx 2331; SSE2-NEXT: shrq $17, %rcx 2332; SSE2-NEXT: movd %ecx, %xmm1 2333; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2334; SSE2-NEXT: movl 8(%rdi), %ecx 2335; SSE2-NEXT: shll $13, %ecx 2336; SSE2-NEXT: movq %rax, %rdx 2337; SSE2-NEXT: shrq $51, %rdx 2338; SSE2-NEXT: orl %ecx, %edx 2339; SSE2-NEXT: movd %edx, %xmm1 2340; SSE2-NEXT: shrq $34, %rax 2341; SSE2-NEXT: movd %eax, %xmm2 2342; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2343; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 2344; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2345; SSE2-NEXT: retq 2346; 2347; SSSE3-LABEL: zext_4i17_to_4i32: 2348; SSSE3: # %bb.0: 2349; SSSE3-NEXT: movq (%rdi), %rax 2350; SSSE3-NEXT: movd %eax, %xmm0 2351; SSSE3-NEXT: movq %rax, %rcx 2352; SSSE3-NEXT: shrq $17, %rcx 2353; SSSE3-NEXT: movd %ecx, %xmm1 2354; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2355; SSSE3-NEXT: movl 8(%rdi), %ecx 2356; SSSE3-NEXT: shll $13, %ecx 2357; SSSE3-NEXT: movq %rax, %rdx 2358; SSSE3-NEXT: shrq $51, %rdx 2359; SSSE3-NEXT: orl %ecx, %edx 2360; SSSE3-NEXT: movd %edx, %xmm1 2361; SSSE3-NEXT: shrq $34, %rax 2362; SSSE3-NEXT: movd %eax, %xmm2 2363; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2364; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 2365; SSSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2366; SSSE3-NEXT: retq 2367; 2368; SSE41-LABEL: zext_4i17_to_4i32: 2369; SSE41: # %bb.0: 2370; SSE41-NEXT: movl 8(%rdi), %eax 2371; SSE41-NEXT: shll $13, %eax 2372; SSE41-NEXT: movq (%rdi), %rcx 2373; SSE41-NEXT: movq %rcx, %rdx 2374; SSE41-NEXT: shrq $51, %rdx 2375; SSE41-NEXT: orl %eax, %edx 2376; SSE41-NEXT: movq %rcx, %rax 2377; SSE41-NEXT: shrq $17, %rax 2378; SSE41-NEXT: movd %ecx, %xmm0 2379; SSE41-NEXT: pinsrd $1, %eax, %xmm0 2380; SSE41-NEXT: shrq $34, %rcx 2381; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 2382; SSE41-NEXT: pinsrd $3, %edx, %xmm0 2383; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2384; SSE41-NEXT: retq 2385; 2386; AVX1-LABEL: zext_4i17_to_4i32: 2387; AVX1: # %bb.0: 2388; AVX1-NEXT: movl 8(%rdi), %eax 2389; AVX1-NEXT: shll $13, %eax 2390; AVX1-NEXT: movq (%rdi), %rcx 2391; AVX1-NEXT: movq %rcx, %rdx 2392; AVX1-NEXT: shrq $51, %rdx 2393; AVX1-NEXT: orl %eax, %edx 2394; AVX1-NEXT: movq %rcx, %rax 2395; AVX1-NEXT: shrq $17, %rax 2396; AVX1-NEXT: vmovd %ecx, %xmm0 2397; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 2398; AVX1-NEXT: shrq $34, %rcx 2399; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 2400; AVX1-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 2401; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2402; AVX1-NEXT: retq 2403; 2404; AVX2-LABEL: zext_4i17_to_4i32: 2405; AVX2: # %bb.0: 2406; AVX2-NEXT: movl 8(%rdi), %eax 2407; AVX2-NEXT: shll $13, %eax 2408; AVX2-NEXT: movq (%rdi), %rcx 2409; AVX2-NEXT: movq %rcx, %rdx 2410; AVX2-NEXT: shrq $51, %rdx 2411; AVX2-NEXT: orl %eax, %edx 2412; AVX2-NEXT: movq %rcx, %rax 2413; AVX2-NEXT: shrq $17, %rax 2414; AVX2-NEXT: vmovd %ecx, %xmm0 2415; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 2416; AVX2-NEXT: shrq $34, %rcx 2417; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 2418; AVX2-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 2419; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071] 2420; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 2421; AVX2-NEXT: retq 2422; 2423; AVX512-LABEL: zext_4i17_to_4i32: 2424; AVX512: # %bb.0: 2425; AVX512-NEXT: movl 8(%rdi), %eax 2426; AVX512-NEXT: shll $13, %eax 2427; AVX512-NEXT: movq (%rdi), %rcx 2428; AVX512-NEXT: movq %rcx, %rdx 2429; AVX512-NEXT: shrq $51, %rdx 2430; AVX512-NEXT: orl %eax, %edx 2431; AVX512-NEXT: movq %rcx, %rax 2432; AVX512-NEXT: shrq $17, %rax 2433; AVX512-NEXT: vmovd %ecx, %xmm0 2434; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 2435; AVX512-NEXT: shrq $34, %rcx 2436; AVX512-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 2437; AVX512-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 2438; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071] 2439; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 2440; AVX512-NEXT: retq 2441 %a = load <4 x i17>, ptr %ptr 2442 %b = zext <4 x i17> %a to <4 x i32> 2443 ret <4 x i32> %b 2444} 2445 2446define <8 x i64> @zext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp { 2447; SSE2-LABEL: zext_8i6_to_8i64: 2448; SSE2: # %bb.0: # %entry 2449; SSE2-NEXT: movd %edi, %xmm0 2450; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2451; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 2452; SSE2-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 2453; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0] 2454; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 2455; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [63,63] 2456; SSE2-NEXT: pand %xmm4, %xmm0 2457; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 2458; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5] 2459; SSE2-NEXT: pand %xmm4, %xmm1 2460; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2] 2461; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5] 2462; SSE2-NEXT: pand %xmm4, %xmm2 2463; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 2464; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5] 2465; SSE2-NEXT: pand %xmm4, %xmm3 2466; SSE2-NEXT: retq 2467; 2468; SSSE3-LABEL: zext_8i6_to_8i64: 2469; SSSE3: # %bb.0: # %entry 2470; SSSE3-NEXT: movd %edi, %xmm0 2471; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2472; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 2473; SSSE3-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 2474; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0] 2475; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 2476; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [63,63] 2477; SSSE3-NEXT: pand %xmm4, %xmm0 2478; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 2479; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5] 2480; SSSE3-NEXT: pand %xmm4, %xmm1 2481; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2] 2482; SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5] 2483; SSSE3-NEXT: pand %xmm4, %xmm2 2484; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 2485; SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5] 2486; SSSE3-NEXT: pand %xmm4, %xmm3 2487; SSSE3-NEXT: retq 2488; 2489; SSE41-LABEL: zext_8i6_to_8i64: 2490; SSE41: # %bb.0: # %entry 2491; SSE41-NEXT: movd %edi, %xmm0 2492; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2493; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 2494; SSE41-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 2495; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 2496; SSE41-NEXT: pmovsxbq {{.*#+}} xmm4 = [63,63] 2497; SSE41-NEXT: pand %xmm4, %xmm0 2498; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 2499; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 2500; SSE41-NEXT: pand %xmm4, %xmm1 2501; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3] 2502; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 2503; SSE41-NEXT: pand %xmm4, %xmm2 2504; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 2505; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 2506; SSE41-NEXT: pand %xmm4, %xmm3 2507; SSE41-NEXT: retq 2508; 2509; AVX1-LABEL: zext_8i6_to_8i64: 2510; AVX1: # %bb.0: # %entry 2511; AVX1-NEXT: vmovd %edi, %xmm0 2512; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2513; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2514; AVX1-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2515; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2516; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 2517; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] 2518; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 2519; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2520; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 2521; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 2522; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] 2523; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 2524; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 2525; AVX1-NEXT: retq 2526; 2527; AVX2-LABEL: zext_8i6_to_8i64: 2528; AVX2: # %bb.0: # %entry 2529; AVX2-NEXT: vmovd %edi, %xmm0 2530; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2531; AVX2-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2532; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2533; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 2534; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 2535; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 2536; AVX2-NEXT: retq 2537; 2538; AVX512-LABEL: zext_8i6_to_8i64: 2539; AVX512: # %bb.0: # %entry 2540; AVX512-NEXT: vmovd %edi, %xmm0 2541; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0 2542; AVX512-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2543; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2544; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 2545; AVX512-NEXT: retq 2546entry: 2547 %a = trunc i32 %x to i6 2548 %b = insertelement <8 x i6> undef, i6 %a, i32 0 2549 %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer 2550 %d = add <8 x i6> %c, <i6 0, i6 1, i6 2, i6 3, i6 4, i6 5, i6 6, i6 7> 2551 %e = zext <8 x i6> %d to <8 x i64> 2552 ret <8 x i64> %e 2553} 2554 2555define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) { 2556; SSE2-LABEL: splatshuf_zext_v4i64: 2557; SSE2: # %bb.0: 2558; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2559; SSE2-NEXT: pxor %xmm1, %xmm1 2560; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2561; SSE2-NEXT: movdqa %xmm0, %xmm1 2562; SSE2-NEXT: retq 2563; 2564; SSSE3-LABEL: splatshuf_zext_v4i64: 2565; SSSE3: # %bb.0: 2566; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2567; SSSE3-NEXT: pxor %xmm1, %xmm1 2568; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2569; SSSE3-NEXT: movdqa %xmm0, %xmm1 2570; SSSE3-NEXT: retq 2571; 2572; SSE41-LABEL: splatshuf_zext_v4i64: 2573; SSE41: # %bb.0: 2574; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2575; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 2576; SSE41-NEXT: movdqa %xmm0, %xmm1 2577; SSE41-NEXT: retq 2578; 2579; AVX1-LABEL: splatshuf_zext_v4i64: 2580; AVX1: # %bb.0: 2581; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2582; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 2583; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2584; AVX1-NEXT: retq 2585; 2586; AVX2-LABEL: splatshuf_zext_v4i64: 2587; AVX2: # %bb.0: 2588; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 2589; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2590; AVX2-NEXT: retq 2591; 2592; AVX512-LABEL: splatshuf_zext_v4i64: 2593; AVX512: # %bb.0: 2594; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0 2595; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2596; AVX512-NEXT: retq 2597 %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer 2598 %ext = zext <4 x i32> %shuf to <4 x i64> 2599 ret <4 x i64> %ext 2600} 2601 2602define <8 x i32> @splatshuf_zext_v8i32_matching_undefs(<8 x i16> %x) { 2603; SSE2-LABEL: splatshuf_zext_v8i32_matching_undefs: 2604; SSE2: # %bb.0: 2605; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 2606; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7] 2607; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2608; SSE2-NEXT: movdqa %xmm0, %xmm1 2609; SSE2-NEXT: retq 2610; 2611; SSSE3-LABEL: splatshuf_zext_v8i32_matching_undefs: 2612; SSSE3: # %bb.0: 2613; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[u,u],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero 2614; SSSE3-NEXT: movdqa %xmm0, %xmm1 2615; SSSE3-NEXT: retq 2616; 2617; SSE41-LABEL: splatshuf_zext_v8i32_matching_undefs: 2618; SSE41: # %bb.0: 2619; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[6,7],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero 2620; SSE41-NEXT: movdqa %xmm0, %xmm1 2621; SSE41-NEXT: retq 2622; 2623; AVX1-LABEL: splatshuf_zext_v8i32_matching_undefs: 2624; AVX1: # %bb.0: 2625; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[6,7],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero 2626; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2627; AVX1-NEXT: retq 2628; 2629; AVX2-LABEL: splatshuf_zext_v8i32_matching_undefs: 2630; AVX2: # %bb.0: 2631; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15] 2632; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2633; AVX2-NEXT: retq 2634; 2635; AVX512-LABEL: splatshuf_zext_v8i32_matching_undefs: 2636; AVX512: # %bb.0: 2637; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15] 2638; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2639; AVX512-NEXT: retq 2640 %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 3, i32 7, i32 0, i32 undef, i32 3, i32 7> 2641 %ext = zext <8 x i16> %shuf to <8 x i32> 2642 ret <8 x i32> %ext 2643} 2644 2645define <8 x i32> @splatshuf_zext_v8i32_unmatched_undef(<8 x i16> %x) { 2646; SSE2-LABEL: splatshuf_zext_v8i32_unmatched_undef: 2647; SSE2: # %bb.0: 2648; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 2649; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7] 2650; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2651; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7] 2652; SSE2-NEXT: pxor %xmm1, %xmm1 2653; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2654; SSE2-NEXT: movdqa %xmm0, %xmm1 2655; SSE2-NEXT: retq 2656; 2657; SSSE3-LABEL: splatshuf_zext_v8i32_unmatched_undef: 2658; SSSE3: # %bb.0: 2659; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero 2660; SSSE3-NEXT: movdqa %xmm0, %xmm1 2661; SSSE3-NEXT: retq 2662; 2663; SSE41-LABEL: splatshuf_zext_v8i32_unmatched_undef: 2664; SSE41: # %bb.0: 2665; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero 2666; SSE41-NEXT: movdqa %xmm0, %xmm1 2667; SSE41-NEXT: retq 2668; 2669; AVX1-LABEL: splatshuf_zext_v8i32_unmatched_undef: 2670; AVX1: # %bb.0: 2671; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] 2672; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2673; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2674; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2675; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2676; AVX1-NEXT: retq 2677; 2678; AVX2-LABEL: splatshuf_zext_v8i32_unmatched_undef: 2679; AVX2: # %bb.0: 2680; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] 2681; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2682; AVX2-NEXT: retq 2683; 2684; AVX512-LABEL: splatshuf_zext_v8i32_unmatched_undef: 2685; AVX512: # %bb.0: 2686; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] 2687; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2688; AVX512-NEXT: retq 2689 %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 7, i32 0, i32 undef, i32 3, i32 7> 2690 %ext = zext <8 x i16> %shuf to <8 x i32> 2691 ret <8 x i32> %ext 2692} 2693 2694define <16 x i16> @splatshuf_zext_v16i16(<16 x i8> %x) { 2695; SSE2-LABEL: splatshuf_zext_v16i16: 2696; SSE2: # %bb.0: 2697; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2698; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6] 2699; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2] 2700; SSE2-NEXT: pxor %xmm1, %xmm1 2701; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2702; SSE2-NEXT: movdqa %xmm0, %xmm1 2703; SSE2-NEXT: retq 2704; 2705; SSSE3-LABEL: splatshuf_zext_v16i16: 2706; SSSE3: # %bb.0: 2707; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero 2708; SSSE3-NEXT: movdqa %xmm0, %xmm1 2709; SSSE3-NEXT: retq 2710; 2711; SSE41-LABEL: splatshuf_zext_v16i16: 2712; SSE41: # %bb.0: 2713; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero 2714; SSE41-NEXT: movdqa %xmm0, %xmm1 2715; SSE41-NEXT: retq 2716; 2717; AVX1-LABEL: splatshuf_zext_v16i16: 2718; AVX1: # %bb.0: 2719; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero 2720; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2721; AVX1-NEXT: retq 2722; 2723; AVX2-LABEL: splatshuf_zext_v16i16: 2724; AVX2: # %bb.0: 2725; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] 2726; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2727; AVX2-NEXT: retq 2728; 2729; AVX512-LABEL: splatshuf_zext_v16i16: 2730; AVX512: # %bb.0: 2731; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] 2732; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2733; AVX512-NEXT: retq 2734 %shuf = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14> 2735 %ext = zext <16 x i8> %shuf to <16 x i16> 2736 ret <16 x i16> %ext 2737} 2738