1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX2 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512VLBW 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512VLBW 11 12; 13; 128-bit vectors 14; 15 16define <2 x i64> @ext_i2_2i64(i2 %a0) { 17; SSE2-SSSE3-LABEL: ext_i2_2i64: 18; SSE2-SSSE3: # %bb.0: 19; SSE2-SSSE3-NEXT: movd %edi, %xmm0 20; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1] 21; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 22; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 23; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 24; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 25; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 26; SSE2-SSSE3-NEXT: psrlq $63, %xmm0 27; SSE2-SSSE3-NEXT: retq 28; 29; AVX1-LABEL: ext_i2_2i64: 30; AVX1: # %bb.0: 31; AVX1-NEXT: vmovd %edi, %xmm0 32; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 33; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [1,2] 34; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 35; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 36; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0 37; AVX1-NEXT: retq 38; 39; AVX2-LABEL: ext_i2_2i64: 40; AVX2: # %bb.0: 41; AVX2-NEXT: vmovd %edi, %xmm0 42; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 43; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [1,2] 44; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 45; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 46; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0 47; AVX2-NEXT: retq 48; 49; AVX512F-LABEL: ext_i2_2i64: 50; AVX512F: # %bb.0: 51; AVX512F-NEXT: kmovw %edi, %k1 52; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1 53; AVX512F-NEXT: vpsrlq $63, %xmm0, %xmm0 54; AVX512F-NEXT: vzeroupper 55; AVX512F-NEXT: retq 56; 57; AVX512VLBW-LABEL: ext_i2_2i64: 58; AVX512VLBW: # %bb.0: 59; AVX512VLBW-NEXT: kmovd %edi, %k1 60; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 61; AVX512VLBW-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 62; AVX512VLBW-NEXT: vpsrlq $63, %xmm0, %xmm0 63; AVX512VLBW-NEXT: retq 64 %1 = bitcast i2 %a0 to <2 x i1> 65 %2 = zext <2 x i1> %1 to <2 x i64> 66 ret <2 x i64> %2 67} 68 69define <4 x i32> @ext_i4_4i32(i4 %a0) { 70; SSE2-SSSE3-LABEL: ext_i4_4i32: 71; SSE2-SSSE3: # %bb.0: 72; SSE2-SSSE3-NEXT: movd %edi, %xmm0 73; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 74; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8] 75; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 76; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 77; SSE2-SSSE3-NEXT: psrld $31, %xmm0 78; SSE2-SSSE3-NEXT: retq 79; 80; AVX1-LABEL: ext_i4_4i32: 81; AVX1: # %bb.0: 82; AVX1-NEXT: vmovd %edi, %xmm0 83; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 84; AVX1-NEXT: vpmovsxbd {{.*#+}} xmm1 = [1,2,4,8] 85; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 86; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 87; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 88; AVX1-NEXT: retq 89; 90; AVX2-LABEL: ext_i4_4i32: 91; AVX2: # %bb.0: 92; AVX2-NEXT: vmovd %edi, %xmm0 93; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 94; AVX2-NEXT: vpmovsxbd {{.*#+}} xmm1 = [1,2,4,8] 95; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 96; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 97; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 98; AVX2-NEXT: retq 99; 100; AVX512F-LABEL: ext_i4_4i32: 101; AVX512F: # %bb.0: 102; AVX512F-NEXT: kmovw %edi, %k1 103; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1 104; AVX512F-NEXT: vpsrld $31, %xmm0, %xmm0 105; AVX512F-NEXT: vzeroupper 106; AVX512F-NEXT: retq 107; 108; AVX512VLBW-LABEL: ext_i4_4i32: 109; AVX512VLBW: # %bb.0: 110; AVX512VLBW-NEXT: kmovd %edi, %k1 111; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 112; AVX512VLBW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 113; AVX512VLBW-NEXT: vpsrld $31, %xmm0, %xmm0 114; AVX512VLBW-NEXT: retq 115 %1 = bitcast i4 %a0 to <4 x i1> 116 %2 = zext <4 x i1> %1 to <4 x i32> 117 ret <4 x i32> %2 118} 119 120define <8 x i16> @ext_i8_8i16(i8 %a0) { 121; SSE2-SSSE3-LABEL: ext_i8_8i16: 122; SSE2-SSSE3: # %bb.0: 123; SSE2-SSSE3-NEXT: movd %edi, %xmm0 124; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 125; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 126; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 127; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 128; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0 129; SSE2-SSSE3-NEXT: psrlw $15, %xmm0 130; SSE2-SSSE3-NEXT: retq 131; 132; AVX1-LABEL: ext_i8_8i16: 133; AVX1: # %bb.0: 134; AVX1-NEXT: vmovd %edi, %xmm0 135; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 136; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 137; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 138; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 139; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 140; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 141; AVX1-NEXT: retq 142; 143; AVX2-LABEL: ext_i8_8i16: 144; AVX2: # %bb.0: 145; AVX2-NEXT: vmovd %edi, %xmm0 146; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 147; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 148; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 149; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 150; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0 151; AVX2-NEXT: retq 152; 153; AVX512F-LABEL: ext_i8_8i16: 154; AVX512F: # %bb.0: 155; AVX512F-NEXT: kmovw %edi, %k1 156; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1 157; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 158; AVX512F-NEXT: vpsrlw $15, %xmm0, %xmm0 159; AVX512F-NEXT: vzeroupper 160; AVX512F-NEXT: retq 161; 162; AVX512VLBW-LABEL: ext_i8_8i16: 163; AVX512VLBW: # %bb.0: 164; AVX512VLBW-NEXT: kmovd %edi, %k0 165; AVX512VLBW-NEXT: vpmovm2w %k0, %xmm0 166; AVX512VLBW-NEXT: vpsrlw $15, %xmm0, %xmm0 167; AVX512VLBW-NEXT: retq 168 %1 = bitcast i8 %a0 to <8 x i1> 169 %2 = zext <8 x i1> %1 to <8 x i16> 170 ret <8 x i16> %2 171} 172 173define <16 x i8> @ext_i16_16i8(i16 %a0) { 174; SSE2-LABEL: ext_i16_16i8: 175; SSE2: # %bb.0: 176; SSE2-NEXT: movd %edi, %xmm0 177; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 178; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 179; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 180; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 181; SSE2-NEXT: pand %xmm1, %xmm0 182; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 183; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 184; SSE2-NEXT: retq 185; 186; SSSE3-LABEL: ext_i16_16i8: 187; SSSE3: # %bb.0: 188; SSSE3-NEXT: movd %edi, %xmm0 189; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 190; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 191; SSSE3-NEXT: pand %xmm1, %xmm0 192; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 193; SSSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 194; SSSE3-NEXT: retq 195; 196; AVX1-LABEL: ext_i16_16i8: 197; AVX1: # %bb.0: 198; AVX1-NEXT: vmovd %edi, %xmm0 199; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 200; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 201; AVX1-NEXT: # xmm1 = mem[0,0] 202; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 203; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 204; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 205; AVX1-NEXT: retq 206; 207; AVX2-LABEL: ext_i16_16i8: 208; AVX2: # %bb.0: 209; AVX2-NEXT: vmovd %edi, %xmm0 210; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 211; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 212; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 213; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 214; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 215; AVX2-NEXT: retq 216; 217; AVX512F-LABEL: ext_i16_16i8: 218; AVX512F: # %bb.0: 219; AVX512F-NEXT: kmovw %edi, %k1 220; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 221; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 222; AVX512F-NEXT: vzeroupper 223; AVX512F-NEXT: retq 224; 225; AVX512VLBW-LABEL: ext_i16_16i8: 226; AVX512VLBW: # %bb.0: 227; AVX512VLBW-NEXT: kmovd %edi, %k1 228; AVX512VLBW-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 229; AVX512VLBW-NEXT: retq 230 %1 = bitcast i16 %a0 to <16 x i1> 231 %2 = zext <16 x i1> %1 to <16 x i8> 232 ret <16 x i8> %2 233} 234 235; 236; 256-bit vectors 237; 238 239define <4 x i64> @ext_i4_4i64(i4 %a0) { 240; SSE2-SSSE3-LABEL: ext_i4_4i64: 241; SSE2-SSSE3: # %bb.0: 242; SSE2-SSSE3-NEXT: movd %edi, %xmm0 243; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] 244; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 245; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1 246; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 247; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 248; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 249; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 250; SSE2-SSSE3-NEXT: psrlq $63, %xmm0 251; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8] 252; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 253; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 254; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 255; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 256; SSE2-SSSE3-NEXT: psrlq $63, %xmm1 257; SSE2-SSSE3-NEXT: retq 258; 259; AVX1-LABEL: ext_i4_4i64: 260; AVX1: # %bb.0: 261; AVX1-NEXT: vmovd %edi, %xmm0 262; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 263; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 264; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8] 265; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 266; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm1 267; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 268; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 269; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 270; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 271; AVX1-NEXT: retq 272; 273; AVX2-LABEL: ext_i4_4i64: 274; AVX2: # %bb.0: 275; AVX2-NEXT: vmovd %edi, %xmm0 276; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 277; AVX2-NEXT: vpmovsxbq {{.*#+}} ymm1 = [1,2,4,8] 278; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 279; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 280; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0 281; AVX2-NEXT: retq 282; 283; AVX512F-LABEL: ext_i4_4i64: 284; AVX512F: # %bb.0: 285; AVX512F-NEXT: kmovw %edi, %k1 286; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1 287; AVX512F-NEXT: vpsrlq $63, %ymm0, %ymm0 288; AVX512F-NEXT: retq 289; 290; AVX512VLBW-LABEL: ext_i4_4i64: 291; AVX512VLBW: # %bb.0: 292; AVX512VLBW-NEXT: kmovd %edi, %k1 293; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 294; AVX512VLBW-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} 295; AVX512VLBW-NEXT: vpsrlq $63, %ymm0, %ymm0 296; AVX512VLBW-NEXT: retq 297 %1 = bitcast i4 %a0 to <4 x i1> 298 %2 = zext <4 x i1> %1 to <4 x i64> 299 ret <4 x i64> %2 300} 301 302define <8 x i32> @ext_i8_8i32(i8 %a0) { 303; SSE2-SSSE3-LABEL: ext_i8_8i32: 304; SSE2-SSSE3: # %bb.0: 305; SSE2-SSSE3-NEXT: movd %edi, %xmm0 306; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 307; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8] 308; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 309; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 310; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 311; SSE2-SSSE3-NEXT: psrld $31, %xmm0 312; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 313; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 314; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 315; SSE2-SSSE3-NEXT: psrld $31, %xmm1 316; SSE2-SSSE3-NEXT: retq 317; 318; AVX1-LABEL: ext_i8_8i32: 319; AVX1: # %bb.0: 320; AVX1-NEXT: vmovd %edi, %xmm0 321; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 322; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 323; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 324; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 325; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 326; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 327; AVX1-NEXT: retq 328; 329; AVX2-LABEL: ext_i8_8i32: 330; AVX2: # %bb.0: 331; AVX2-NEXT: vmovd %edi, %xmm0 332; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 333; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128] 334; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 335; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 336; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 337; AVX2-NEXT: retq 338; 339; AVX512F-LABEL: ext_i8_8i32: 340; AVX512F: # %bb.0: 341; AVX512F-NEXT: kmovw %edi, %k1 342; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1 343; AVX512F-NEXT: vpsrld $31, %ymm0, %ymm0 344; AVX512F-NEXT: retq 345; 346; AVX512VLBW-LABEL: ext_i8_8i32: 347; AVX512VLBW: # %bb.0: 348; AVX512VLBW-NEXT: kmovd %edi, %k1 349; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 350; AVX512VLBW-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 351; AVX512VLBW-NEXT: vpsrld $31, %ymm0, %ymm0 352; AVX512VLBW-NEXT: retq 353 %1 = bitcast i8 %a0 to <8 x i1> 354 %2 = zext <8 x i1> %1 to <8 x i32> 355 ret <8 x i32> %2 356} 357 358define <16 x i16> @ext_i16_16i16(i16 %a0) { 359; SSE2-SSSE3-LABEL: ext_i16_16i16: 360; SSE2-SSSE3: # %bb.0: 361; SSE2-SSSE3-NEXT: movd %edi, %xmm0 362; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 363; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 364; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128] 365; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 366; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 367; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0 368; SSE2-SSSE3-NEXT: psrlw $15, %xmm0 369; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768] 370; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 371; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1 372; SSE2-SSSE3-NEXT: psrlw $15, %xmm1 373; SSE2-SSSE3-NEXT: retq 374; 375; AVX1-LABEL: ext_i16_16i16: 376; AVX1: # %bb.0: 377; AVX1-NEXT: vmovd %edi, %xmm0 378; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 379; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 380; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 381; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 382; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 383; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 384; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 385; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 386; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 387; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 388; AVX1-NEXT: retq 389; 390; AVX2-LABEL: ext_i16_16i16: 391; AVX2: # %bb.0: 392; AVX2-NEXT: vmovd %edi, %xmm0 393; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 394; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 395; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 396; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 397; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0 398; AVX2-NEXT: retq 399; 400; AVX512F-LABEL: ext_i16_16i16: 401; AVX512F: # %bb.0: 402; AVX512F-NEXT: kmovw %edi, %k1 403; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1 404; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 405; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0 406; AVX512F-NEXT: retq 407; 408; AVX512VLBW-LABEL: ext_i16_16i16: 409; AVX512VLBW: # %bb.0: 410; AVX512VLBW-NEXT: kmovd %edi, %k0 411; AVX512VLBW-NEXT: vpmovm2w %k0, %ymm0 412; AVX512VLBW-NEXT: vpsrlw $15, %ymm0, %ymm0 413; AVX512VLBW-NEXT: retq 414 %1 = bitcast i16 %a0 to <16 x i1> 415 %2 = zext <16 x i1> %1 to <16 x i16> 416 ret <16 x i16> %2 417} 418 419define <32 x i8> @ext_i32_32i8(i32 %a0) { 420; SSE2-SSSE3-LABEL: ext_i32_32i8: 421; SSE2-SSSE3: # %bb.0: 422; SSE2-SSSE3-NEXT: movd %edi, %xmm1 423; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 424; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 425; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 426; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 427; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 428; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0 429; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 430; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 431; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7] 432; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 433; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 434; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1 435; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1 436; SSE2-SSSE3-NEXT: retq 437; 438; AVX1-LABEL: ext_i32_32i8: 439; AVX1: # %bb.0: 440; AVX1-NEXT: vmovd %edi, %xmm0 441; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 442; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 443; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 444; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 445; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] 446; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 447; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 448; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 449; AVX1-NEXT: # xmm2 = mem[0,0] 450; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 451; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 452; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 453; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 454; AVX1-NEXT: retq 455; 456; AVX2-LABEL: ext_i32_32i8: 457; AVX2: # %bb.0: 458; AVX2-NEXT: vmovd %edi, %xmm0 459; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 460; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27] 461; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 462; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 463; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 464; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 465; AVX2-NEXT: retq 466; 467; AVX512F-LABEL: ext_i32_32i8: 468; AVX512F: # %bb.0: 469; AVX512F-NEXT: kmovw %edi, %k1 470; AVX512F-NEXT: shrl $16, %edi 471; AVX512F-NEXT: kmovw %edi, %k2 472; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 473; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z} 474; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 475; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k2} {z} 476; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 477; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 478; AVX512F-NEXT: retq 479; 480; AVX512VLBW-LABEL: ext_i32_32i8: 481; AVX512VLBW: # %bb.0: 482; AVX512VLBW-NEXT: kmovd %edi, %k1 483; AVX512VLBW-NEXT: vmovdqu8 {{.*#+}} ymm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 484; AVX512VLBW-NEXT: retq 485 %1 = bitcast i32 %a0 to <32 x i1> 486 %2 = zext <32 x i1> %1 to <32 x i8> 487 ret <32 x i8> %2 488} 489 490; 491; 512-bit vectors 492; 493 494define <8 x i64> @ext_i8_8i64(i8 %a0) { 495; SSE2-SSSE3-LABEL: ext_i8_8i64: 496; SSE2-SSSE3: # %bb.0: 497; SSE2-SSSE3-NEXT: movd %edi, %xmm0 498; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1] 499; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 500; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1 501; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 502; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 503; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 504; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 505; SSE2-SSSE3-NEXT: psrlq $63, %xmm0 506; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8] 507; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2 508; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 509; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 510; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 511; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 512; SSE2-SSSE3-NEXT: psrlq $63, %xmm1 513; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32] 514; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3 515; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3 516; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3 517; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2] 518; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2 519; SSE2-SSSE3-NEXT: psrlq $63, %xmm2 520; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128] 521; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4 522; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 523; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2] 524; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 525; SSE2-SSSE3-NEXT: psrlq $63, %xmm3 526; SSE2-SSSE3-NEXT: retq 527; 528; AVX1-LABEL: ext_i8_8i64: 529; AVX1: # %bb.0: 530; AVX1-NEXT: vmovd %edi, %xmm0 531; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 532; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 533; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8] 534; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2 535; AVX1-NEXT: vpcmpeqq %xmm0, %xmm2, %xmm0 536; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 537; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 538; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 539; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1,1,1,1] 540; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 541; AVX1-NEXT: vmovaps {{.*#+}} ymm3 = [16,32,64,128] 542; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1 543; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm3 544; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 545; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 546; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 547; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 548; AVX1-NEXT: retq 549; 550; AVX2-LABEL: ext_i8_8i64: 551; AVX2: # %bb.0: 552; AVX2-NEXT: vmovd %edi, %xmm0 553; AVX2-NEXT: vpbroadcastb %xmm0, %ymm1 554; AVX2-NEXT: vpmovsxbq {{.*#+}} ymm0 = [1,2,4,8] 555; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2 556; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0 557; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0 558; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = [16,32,64,128] 559; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 560; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1 561; AVX2-NEXT: vpsrlq $63, %ymm1, %ymm1 562; AVX2-NEXT: retq 563; 564; AVX512F-LABEL: ext_i8_8i64: 565; AVX512F: # %bb.0: 566; AVX512F-NEXT: kmovw %edi, %k1 567; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1 568; AVX512F-NEXT: vpsrlq $63, %zmm0, %zmm0 569; AVX512F-NEXT: retq 570; 571; AVX512VLBW-LABEL: ext_i8_8i64: 572; AVX512VLBW: # %bb.0: 573; AVX512VLBW-NEXT: kmovd %edi, %k1 574; AVX512VLBW-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1 575; AVX512VLBW-NEXT: vpsrlq $63, %zmm0, %zmm0 576; AVX512VLBW-NEXT: retq 577 %1 = bitcast i8 %a0 to <8 x i1> 578 %2 = zext <8 x i1> %1 to <8 x i64> 579 ret <8 x i64> %2 580} 581 582define <16 x i32> @ext_i16_16i32(i16 %a0) { 583; SSE2-SSSE3-LABEL: ext_i16_16i32: 584; SSE2-SSSE3: # %bb.0: 585; SSE2-SSSE3-NEXT: movd %edi, %xmm0 586; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 587; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8] 588; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0 589; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 590; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 591; SSE2-SSSE3-NEXT: psrld $31, %xmm0 592; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 593; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1 594; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 595; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 596; SSE2-SSSE3-NEXT: psrld $31, %xmm1 597; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048] 598; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 599; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 600; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2 601; SSE2-SSSE3-NEXT: psrld $31, %xmm2 602; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768] 603; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 604; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3 605; SSE2-SSSE3-NEXT: psrld $31, %xmm3 606; SSE2-SSSE3-NEXT: retq 607; 608; AVX1-LABEL: ext_i16_16i32: 609; AVX1: # %bb.0: 610; AVX1-NEXT: vmovd %edi, %xmm0 611; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 612; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 613; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 614; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 615; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 616; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] 617; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 618; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 619; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 620; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 621; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 622; AVX1-NEXT: retq 623; 624; AVX2-LABEL: ext_i16_16i32: 625; AVX2: # %bb.0: 626; AVX2-NEXT: vmovd %edi, %xmm0 627; AVX2-NEXT: vpbroadcastw %xmm0, %ymm1 628; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128] 629; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2 630; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0 631; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 632; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768] 633; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 634; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1 635; AVX2-NEXT: vpsrld $31, %ymm1, %ymm1 636; AVX2-NEXT: retq 637; 638; AVX512F-LABEL: ext_i16_16i32: 639; AVX512F: # %bb.0: 640; AVX512F-NEXT: kmovw %edi, %k1 641; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1 642; AVX512F-NEXT: vpsrld $31, %zmm0, %zmm0 643; AVX512F-NEXT: retq 644; 645; AVX512VLBW-LABEL: ext_i16_16i32: 646; AVX512VLBW: # %bb.0: 647; AVX512VLBW-NEXT: kmovd %edi, %k1 648; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1 649; AVX512VLBW-NEXT: vpsrld $31, %zmm0, %zmm0 650; AVX512VLBW-NEXT: retq 651 %1 = bitcast i16 %a0 to <16 x i1> 652 %2 = zext <16 x i1> %1 to <16 x i32> 653 ret <16 x i32> %2 654} 655 656define <32 x i16> @ext_i32_32i16(i32 %a0) { 657; SSE2-SSSE3-LABEL: ext_i32_32i16: 658; SSE2-SSSE3: # %bb.0: 659; SSE2-SSSE3-NEXT: movd %edi, %xmm2 660; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,0,0,4,5,6,7] 661; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 662; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128] 663; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 664; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 665; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0 666; SSE2-SSSE3-NEXT: psrlw $15, %xmm0 667; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768] 668; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 669; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1 670; SSE2-SSSE3-NEXT: psrlw $15, %xmm1 671; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7] 672; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] 673; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 674; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 675; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2 676; SSE2-SSSE3-NEXT: psrlw $15, %xmm2 677; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3 678; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3 679; SSE2-SSSE3-NEXT: psrlw $15, %xmm3 680; SSE2-SSSE3-NEXT: retq 681; 682; AVX1-LABEL: ext_i32_32i16: 683; AVX1: # %bb.0: 684; AVX1-NEXT: vmovd %edi, %xmm1 685; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7] 686; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 687; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 688; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 689; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 690; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 691; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [256,512,1024,2048,4096,8192,16384,32768] 692; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3 693; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 694; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 695; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 696; AVX1-NEXT: vandps %ymm3, %ymm0, %ymm0 697; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7] 698; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 699; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 700; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 701; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 702; AVX1-NEXT: vpcmpeqw %xmm4, %xmm5, %xmm4 703; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 704; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1 705; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1 706; AVX1-NEXT: retq 707; 708; AVX2-LABEL: ext_i32_32i16: 709; AVX2: # %bb.0: 710; AVX2-NEXT: vmovd %edi, %xmm0 711; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 712; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 713; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 714; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 715; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0 716; AVX2-NEXT: shrl $16, %edi 717; AVX2-NEXT: vmovd %edi, %xmm2 718; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2 719; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2 720; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1 721; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm1 722; AVX2-NEXT: retq 723; 724; AVX512F-LABEL: ext_i32_32i16: 725; AVX512F: # %bb.0: 726; AVX512F-NEXT: kmovw %edi, %k1 727; AVX512F-NEXT: shrl $16, %edi 728; AVX512F-NEXT: kmovw %edi, %k2 729; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1 730; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 731; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0 732; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k2} {z} = -1 733; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 734; AVX512F-NEXT: vpsrlw $15, %ymm1, %ymm1 735; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 736; AVX512F-NEXT: retq 737; 738; AVX512VLBW-LABEL: ext_i32_32i16: 739; AVX512VLBW: # %bb.0: 740; AVX512VLBW-NEXT: kmovd %edi, %k0 741; AVX512VLBW-NEXT: vpmovm2w %k0, %zmm0 742; AVX512VLBW-NEXT: vpsrlw $15, %zmm0, %zmm0 743; AVX512VLBW-NEXT: retq 744 %1 = bitcast i32 %a0 to <32 x i1> 745 %2 = zext <32 x i1> %1 to <32 x i16> 746 ret <32 x i16> %2 747} 748 749define <64 x i8> @ext_i64_64i8(i64 %a0) { 750; SSE2-SSSE3-LABEL: ext_i64_64i8: 751; SSE2-SSSE3: # %bb.0: 752; SSE2-SSSE3-NEXT: movq %rdi, %xmm3 753; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 754; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7] 755; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 756; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 757; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 758; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0 759; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 760; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0 761; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7] 762; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 763; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 764; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1 765; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 766; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5] 767; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] 768; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 769; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2 770; SSE2-SSSE3-NEXT: pand %xmm5, %xmm2 771; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7] 772; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 773; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 774; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3 775; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3 776; SSE2-SSSE3-NEXT: retq 777; 778; AVX1-LABEL: ext_i64_64i8: 779; AVX1: # %bb.0: 780; AVX1-NEXT: vmovq %rdi, %xmm0 781; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 782; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 783; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7] 784; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 785; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] 786; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 787; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 788; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 789; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3 790; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 791; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 792; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 793; AVX1-NEXT: vandps %ymm3, %ymm0, %ymm0 794; AVX1-NEXT: vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,4,5,5] 795; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7] 796; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm4, %ymm1 797; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7] 798; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 799; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 800; AVX1-NEXT: vpcmpeqb %xmm2, %xmm4, %xmm4 801; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 802; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1 803; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1 804; AVX1-NEXT: retq 805; 806; AVX2-LABEL: ext_i64_64i8: 807; AVX2: # %bb.0: 808; AVX2-NEXT: vmovq %rdi, %xmm0 809; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1 810; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27] 811; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 812; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 813; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 814; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 815; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0 816; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,13,13,13,13,13,13,13,13,22,22,22,22,22,22,22,22,31,31,31,31,31,31,31,31] 817; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 818; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 819; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1 820; AVX2-NEXT: retq 821; 822; AVX512F-LABEL: ext_i64_64i8: 823; AVX512F: # %bb.0: 824; AVX512F-NEXT: movq %rdi, %rax 825; AVX512F-NEXT: movl %edi, %ecx 826; AVX512F-NEXT: kmovw %edi, %k1 827; AVX512F-NEXT: shrq $32, %rdi 828; AVX512F-NEXT: shrq $48, %rax 829; AVX512F-NEXT: shrl $16, %ecx 830; AVX512F-NEXT: kmovw %ecx, %k2 831; AVX512F-NEXT: kmovw %eax, %k3 832; AVX512F-NEXT: kmovw %edi, %k4 833; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 834; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k4} {z} 835; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 836; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm2 {%k3} {z} 837; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 838; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 839; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} 840; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 841; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k2} {z} 842; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 843; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 844; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 845; AVX512F-NEXT: retq 846; 847; AVX512VLBW-LABEL: ext_i64_64i8: 848; AVX512VLBW: # %bb.0: 849; AVX512VLBW-NEXT: kmovq %rdi, %k1 850; AVX512VLBW-NEXT: vmovdqu8 {{.*#+}} zmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 851; AVX512VLBW-NEXT: retq 852 %1 = bitcast i64 %a0 to <64 x i1> 853 %2 = zext <64 x i1> %1 to <64 x i8> 854 ret <64 x i8> %2 855} 856