1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=SKX 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=AVX512DQNOBW 5 6define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone { 7; KNL-LABEL: zext_8x8mem_to_8x16: 8; KNL: # %bb.0: 9; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 10; KNL-NEXT: vpsllw $15, %xmm0, %xmm0 11; KNL-NEXT: vpsraw $15, %xmm0, %xmm0 12; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 13; KNL-NEXT: retq 14; 15; SKX-LABEL: zext_8x8mem_to_8x16: 16; SKX: # %bb.0: 17; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 18; SKX-NEXT: vpmovw2m %xmm0, %k1 19; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 20; SKX-NEXT: retq 21; 22; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x16: 23; AVX512DQNOBW: # %bb.0: 24; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 25; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0 26; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0 27; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0 28; AVX512DQNOBW-NEXT: retq 29 %a = load <8 x i8>,ptr%i,align 1 30 %x = zext <8 x i8> %a to <8 x i16> 31 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer 32 ret <8 x i16> %ret 33} 34 35define <8 x i16> @sext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone { 36; KNL-LABEL: sext_8x8mem_to_8x16: 37; KNL: # %bb.0: 38; KNL-NEXT: vpmovsxbw (%rdi), %xmm1 39; KNL-NEXT: vpsllw $15, %xmm0, %xmm0 40; KNL-NEXT: vpsraw $15, %xmm0, %xmm0 41; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 42; KNL-NEXT: retq 43; 44; SKX-LABEL: sext_8x8mem_to_8x16: 45; SKX: # %bb.0: 46; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 47; SKX-NEXT: vpmovw2m %xmm0, %k1 48; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} 49; SKX-NEXT: retq 50; 51; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x16: 52; AVX512DQNOBW: # %bb.0: 53; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %xmm1 54; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0 55; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0 56; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0 57; AVX512DQNOBW-NEXT: retq 58 %a = load <8 x i8>,ptr%i,align 1 59 %x = sext <8 x i8> %a to <8 x i16> 60 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer 61 ret <8 x i16> %ret 62} 63 64 65define <16 x i16> @zext_16x8mem_to_16x16(ptr%i , <16 x i1> %mask) nounwind readnone { 66; KNL-LABEL: zext_16x8mem_to_16x16: 67; KNL: # %bb.0: 68; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 69; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 70; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 71; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 72; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 73; KNL-NEXT: retq 74; 75; SKX-LABEL: zext_16x8mem_to_16x16: 76; SKX: # %bb.0: 77; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 78; SKX-NEXT: vpmovb2m %xmm0, %k1 79; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 80; SKX-NEXT: retq 81; 82; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x16: 83; AVX512DQNOBW: # %bb.0: 84; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 85; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 86; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0 87; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0 88; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm0, %ymm0 89; AVX512DQNOBW-NEXT: retq 90 %a = load <16 x i8>,ptr%i,align 1 91 %x = zext <16 x i8> %a to <16 x i16> 92 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 93 ret <16 x i16> %ret 94} 95 96define <16 x i16> @sext_16x8mem_to_16x16(ptr%i , <16 x i1> %mask) nounwind readnone { 97; KNL-LABEL: sext_16x8mem_to_16x16: 98; KNL: # %bb.0: 99; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 100; KNL-NEXT: vpmovsxbw (%rdi), %ymm1 101; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 102; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 103; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 104; KNL-NEXT: retq 105; 106; SKX-LABEL: sext_16x8mem_to_16x16: 107; SKX: # %bb.0: 108; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 109; SKX-NEXT: vpmovb2m %xmm0, %k1 110; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} 111; SKX-NEXT: retq 112; 113; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x16: 114; AVX512DQNOBW: # %bb.0: 115; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 116; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm1 117; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0 118; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0 119; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm0, %ymm0 120; AVX512DQNOBW-NEXT: retq 121 %a = load <16 x i8>,ptr%i,align 1 122 %x = sext <16 x i8> %a to <16 x i16> 123 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 124 ret <16 x i16> %ret 125} 126 127define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { 128; ALL-LABEL: zext_16x8_to_16x16: 129; ALL: # %bb.0: 130; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 131; ALL-NEXT: retq 132 %x = zext <16 x i8> %a to <16 x i16> 133 ret <16 x i16> %x 134} 135 136define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { 137; KNL-LABEL: zext_16x8_to_16x16_mask: 138; KNL: # %bb.0: 139; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 140; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 141; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 142; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 143; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 144; KNL-NEXT: retq 145; 146; SKX-LABEL: zext_16x8_to_16x16_mask: 147; SKX: # %bb.0: 148; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 149; SKX-NEXT: vpmovb2m %xmm1, %k1 150; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 151; SKX-NEXT: retq 152; 153; AVX512DQNOBW-LABEL: zext_16x8_to_16x16_mask: 154; AVX512DQNOBW: # %bb.0: 155; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 156; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 157; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 158; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 159; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 160; AVX512DQNOBW-NEXT: retq 161 %x = zext <16 x i8> %a to <16 x i16> 162 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 163 ret <16 x i16> %ret 164} 165 166define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { 167; ALL-LABEL: sext_16x8_to_16x16: 168; ALL: # %bb.0: 169; ALL-NEXT: vpmovsxbw %xmm0, %ymm0 170; ALL-NEXT: retq 171 %x = sext <16 x i8> %a to <16 x i16> 172 ret <16 x i16> %x 173} 174 175define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { 176; KNL-LABEL: sext_16x8_to_16x16_mask: 177; KNL: # %bb.0: 178; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 179; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 180; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 181; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 182; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 183; KNL-NEXT: retq 184; 185; SKX-LABEL: sext_16x8_to_16x16_mask: 186; SKX: # %bb.0: 187; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 188; SKX-NEXT: vpmovb2m %xmm1, %k1 189; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} 190; SKX-NEXT: retq 191; 192; AVX512DQNOBW-LABEL: sext_16x8_to_16x16_mask: 193; AVX512DQNOBW: # %bb.0: 194; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 195; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0 196; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 197; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 198; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 199; AVX512DQNOBW-NEXT: retq 200 %x = sext <16 x i8> %a to <16 x i16> 201 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 202 ret <16 x i16> %ret 203} 204 205define <32 x i16> @zext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readnone { 206; KNL-LABEL: zext_32x8mem_to_32x16: 207; KNL: # %bb.0: 208; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 209; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 210; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 211; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 212; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 213; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 214; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 215; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 216; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 217; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 218; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 219; KNL-NEXT: vpandq %zmm2, %zmm0, %zmm0 220; KNL-NEXT: retq 221; 222; SKX-LABEL: zext_32x8mem_to_32x16: 223; SKX: # %bb.0: 224; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 225; SKX-NEXT: vpmovb2m %ymm0, %k1 226; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero 227; SKX-NEXT: retq 228; 229; AVX512DQNOBW-LABEL: zext_32x8mem_to_32x16: 230; AVX512DQNOBW: # %bb.0: 231; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1 232; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 233; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 234; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 235; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 236; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 237; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0 238; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0 239; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 240; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 241; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 242; AVX512DQNOBW-NEXT: vpandq %zmm2, %zmm0, %zmm0 243; AVX512DQNOBW-NEXT: retq 244 %a = load <32 x i8>,ptr%i,align 1 245 %x = zext <32 x i8> %a to <32 x i16> 246 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 247 ret <32 x i16> %ret 248} 249 250define <32 x i16> @sext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readnone { 251; KNL-LABEL: sext_32x8mem_to_32x16: 252; KNL: # %bb.0: 253; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 254; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 255; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 256; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm2 257; KNL-NEXT: vpmovsxbw (%rdi), %ymm3 258; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 259; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 260; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 261; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 262; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 263; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 264; KNL-NEXT: vpandq %zmm2, %zmm0, %zmm0 265; KNL-NEXT: retq 266; 267; SKX-LABEL: sext_32x8mem_to_32x16: 268; SKX: # %bb.0: 269; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 270; SKX-NEXT: vpmovb2m %ymm0, %k1 271; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} 272; SKX-NEXT: retq 273; 274; AVX512DQNOBW-LABEL: sext_32x8mem_to_32x16: 275; AVX512DQNOBW: # %bb.0: 276; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1 277; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 278; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 279; AVX512DQNOBW-NEXT: vpmovsxbw 16(%rdi), %ymm2 280; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm3 281; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 282; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0 283; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0 284; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 285; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 286; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 287; AVX512DQNOBW-NEXT: vpandq %zmm2, %zmm0, %zmm0 288; AVX512DQNOBW-NEXT: retq 289 %a = load <32 x i8>,ptr%i,align 1 290 %x = sext <32 x i8> %a to <32 x i16> 291 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 292 ret <32 x i16> %ret 293} 294 295define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { 296; KNL-LABEL: zext_32x8_to_32x16: 297; KNL: # %bb.0: 298; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 299; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 300; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 301; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 302; KNL-NEXT: retq 303; 304; SKX-LABEL: zext_32x8_to_32x16: 305; SKX: # %bb.0: 306; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 307; SKX-NEXT: retq 308; 309; AVX512DQNOBW-LABEL: zext_32x8_to_32x16: 310; AVX512DQNOBW: # %bb.0: 311; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 312; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0 313; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 314; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 315; AVX512DQNOBW-NEXT: retq 316 %x = zext <32 x i8> %a to <32 x i16> 317 ret <32 x i16> %x 318} 319 320define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { 321; KNL-LABEL: zext_32x8_to_32x16_mask: 322; KNL: # %bb.0: 323; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 324; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 325; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 326; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 327; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 328; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 329; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 330; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 331; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 332; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 333; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 334; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 335; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0 336; KNL-NEXT: retq 337; 338; SKX-LABEL: zext_32x8_to_32x16_mask: 339; SKX: # %bb.0: 340; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 341; SKX-NEXT: vpmovb2m %ymm1, %k1 342; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 343; SKX-NEXT: retq 344; 345; AVX512DQNOBW-LABEL: zext_32x8_to_32x16_mask: 346; AVX512DQNOBW: # %bb.0: 347; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 348; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 349; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 350; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 351; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0 352; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 353; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 354; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 355; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 356; AVX512DQNOBW-NEXT: vpsllw $15, %ymm2, %ymm2 357; AVX512DQNOBW-NEXT: vpsraw $15, %ymm2, %ymm2 358; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 359; AVX512DQNOBW-NEXT: vpandq %zmm0, %zmm1, %zmm0 360; AVX512DQNOBW-NEXT: retq 361 %x = zext <32 x i8> %a to <32 x i16> 362 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 363 ret <32 x i16> %ret 364} 365 366define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { 367; KNL-LABEL: sext_32x8_to_32x16: 368; KNL: # %bb.0: 369; KNL-NEXT: vpmovsxbw %xmm0, %ymm1 370; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 371; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 372; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 373; KNL-NEXT: retq 374; 375; SKX-LABEL: sext_32x8_to_32x16: 376; SKX: # %bb.0: 377; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 378; SKX-NEXT: retq 379; 380; AVX512DQNOBW-LABEL: sext_32x8_to_32x16: 381; AVX512DQNOBW: # %bb.0: 382; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm1 383; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0 384; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0 385; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 386; AVX512DQNOBW-NEXT: retq 387 %x = sext <32 x i8> %a to <32 x i16> 388 ret <32 x i16> %x 389} 390 391define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { 392; KNL-LABEL: sext_32x8_to_32x16_mask: 393; KNL: # %bb.0: 394; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 395; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 396; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 397; KNL-NEXT: vpmovsxbw %xmm0, %ymm3 398; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 399; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 400; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 401; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 402; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 403; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 404; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 405; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 406; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0 407; KNL-NEXT: retq 408; 409; SKX-LABEL: sext_32x8_to_32x16_mask: 410; SKX: # %bb.0: 411; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 412; SKX-NEXT: vpmovb2m %ymm1, %k1 413; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} 414; SKX-NEXT: retq 415; 416; AVX512DQNOBW-LABEL: sext_32x8_to_32x16_mask: 417; AVX512DQNOBW: # %bb.0: 418; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 419; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 420; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 421; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm3 422; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0 423; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0 424; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 425; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 426; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 427; AVX512DQNOBW-NEXT: vpsllw $15, %ymm2, %ymm2 428; AVX512DQNOBW-NEXT: vpsraw $15, %ymm2, %ymm2 429; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 430; AVX512DQNOBW-NEXT: vpandq %zmm0, %zmm1, %zmm0 431; AVX512DQNOBW-NEXT: retq 432 %x = sext <32 x i8> %a to <32 x i16> 433 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 434 ret <32 x i16> %ret 435} 436 437define <4 x i32> @zext_4x8mem_to_4x32(ptr%i , <4 x i1> %mask) nounwind readnone { 438; KNL-LABEL: zext_4x8mem_to_4x32: 439; KNL: # %bb.0: 440; KNL-NEXT: vpslld $31, %xmm0, %xmm0 441; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 442; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 443; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 444; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 445; KNL-NEXT: vzeroupper 446; KNL-NEXT: retq 447; 448; AVX512DQ-LABEL: zext_4x8mem_to_4x32: 449; AVX512DQ: # %bb.0: 450; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 451; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 452; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 453; AVX512DQ-NEXT: retq 454 %a = load <4 x i8>,ptr%i,align 1 455 %x = zext <4 x i8> %a to <4 x i32> 456 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 457 ret <4 x i32> %ret 458} 459 460define <4 x i32> @sext_4x8mem_to_4x32(ptr%i , <4 x i1> %mask) nounwind readnone { 461; KNL-LABEL: sext_4x8mem_to_4x32: 462; KNL: # %bb.0: 463; KNL-NEXT: vpslld $31, %xmm0, %xmm0 464; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 465; KNL-NEXT: vpmovsxbd (%rdi), %xmm0 466; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 467; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 468; KNL-NEXT: vzeroupper 469; KNL-NEXT: retq 470; 471; AVX512DQ-LABEL: sext_4x8mem_to_4x32: 472; AVX512DQ: # %bb.0: 473; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 474; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 475; AVX512DQ-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} 476; AVX512DQ-NEXT: retq 477 %a = load <4 x i8>,ptr%i,align 1 478 %x = sext <4 x i8> %a to <4 x i32> 479 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 480 ret <4 x i32> %ret 481} 482 483define <8 x i32> @zext_8x8mem_to_8x32(ptr%i , <8 x i1> %mask) nounwind readnone { 484; KNL-LABEL: zext_8x8mem_to_8x32: 485; KNL: # %bb.0: 486; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 487; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 488; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 489; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 490; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 491; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 492; KNL-NEXT: retq 493; 494; SKX-LABEL: zext_8x8mem_to_8x32: 495; SKX: # %bb.0: 496; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 497; SKX-NEXT: vpmovw2m %xmm0, %k1 498; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 499; SKX-NEXT: retq 500; 501; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x32: 502; AVX512DQNOBW: # %bb.0: 503; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 504; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 505; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 506; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 507; AVX512DQNOBW-NEXT: retq 508 %a = load <8 x i8>,ptr%i,align 1 509 %x = zext <8 x i8> %a to <8 x i32> 510 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 511 ret <8 x i32> %ret 512} 513 514define <8 x i32> @sext_8x8mem_to_8x32(ptr%i , <8 x i1> %mask) nounwind readnone { 515; KNL-LABEL: sext_8x8mem_to_8x32: 516; KNL: # %bb.0: 517; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 518; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 519; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 520; KNL-NEXT: vpmovsxbd (%rdi), %ymm0 521; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 522; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 523; KNL-NEXT: retq 524; 525; SKX-LABEL: sext_8x8mem_to_8x32: 526; SKX: # %bb.0: 527; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 528; SKX-NEXT: vpmovw2m %xmm0, %k1 529; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} 530; SKX-NEXT: retq 531; 532; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x32: 533; AVX512DQNOBW: # %bb.0: 534; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 535; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 536; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 537; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} 538; AVX512DQNOBW-NEXT: retq 539 %a = load <8 x i8>,ptr%i,align 1 540 %x = sext <8 x i8> %a to <8 x i32> 541 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 542 ret <8 x i32> %ret 543} 544 545define <16 x i32> @zext_16x8mem_to_16x32(ptr%i , <16 x i1> %mask) nounwind readnone { 546; KNL-LABEL: zext_16x8mem_to_16x32: 547; KNL: # %bb.0: 548; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 549; KNL-NEXT: vpslld $31, %zmm0, %zmm0 550; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 551; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero 552; KNL-NEXT: retq 553; 554; SKX-LABEL: zext_16x8mem_to_16x32: 555; SKX: # %bb.0: 556; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 557; SKX-NEXT: vpmovb2m %xmm0, %k1 558; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero 559; SKX-NEXT: retq 560; 561; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x32: 562; AVX512DQNOBW: # %bb.0: 563; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 564; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 565; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 566; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero 567; AVX512DQNOBW-NEXT: retq 568 %a = load <16 x i8>,ptr%i,align 1 569 %x = zext <16 x i8> %a to <16 x i32> 570 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 571 ret <16 x i32> %ret 572} 573 574define <16 x i32> @sext_16x8mem_to_16x32(ptr%i , <16 x i1> %mask) nounwind readnone { 575; KNL-LABEL: sext_16x8mem_to_16x32: 576; KNL: # %bb.0: 577; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 578; KNL-NEXT: vpslld $31, %zmm0, %zmm0 579; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 580; KNL-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} 581; KNL-NEXT: retq 582; 583; SKX-LABEL: sext_16x8mem_to_16x32: 584; SKX: # %bb.0: 585; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 586; SKX-NEXT: vpmovb2m %xmm0, %k1 587; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} 588; SKX-NEXT: retq 589; 590; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x32: 591; AVX512DQNOBW: # %bb.0: 592; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 593; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 594; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 595; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} 596; AVX512DQNOBW-NEXT: retq 597 %a = load <16 x i8>,ptr%i,align 1 598 %x = sext <16 x i8> %a to <16 x i32> 599 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 600 ret <16 x i32> %ret 601} 602 603define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { 604; KNL-LABEL: zext_16x8_to_16x32_mask: 605; KNL: # %bb.0: 606; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 607; KNL-NEXT: vpslld $31, %zmm1, %zmm1 608; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 609; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 610; KNL-NEXT: retq 611; 612; SKX-LABEL: zext_16x8_to_16x32_mask: 613; SKX: # %bb.0: 614; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 615; SKX-NEXT: vpmovb2m %xmm1, %k1 616; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 617; SKX-NEXT: retq 618; 619; AVX512DQNOBW-LABEL: zext_16x8_to_16x32_mask: 620; AVX512DQNOBW: # %bb.0: 621; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1 622; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1 623; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1 624; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 625; AVX512DQNOBW-NEXT: retq 626 %x = zext <16 x i8> %a to <16 x i32> 627 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 628 ret <16 x i32> %ret 629} 630 631define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { 632; KNL-LABEL: sext_16x8_to_16x32_mask: 633; KNL: # %bb.0: 634; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 635; KNL-NEXT: vpslld $31, %zmm1, %zmm1 636; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 637; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} 638; KNL-NEXT: retq 639; 640; SKX-LABEL: sext_16x8_to_16x32_mask: 641; SKX: # %bb.0: 642; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 643; SKX-NEXT: vpmovb2m %xmm1, %k1 644; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} 645; SKX-NEXT: retq 646; 647; AVX512DQNOBW-LABEL: sext_16x8_to_16x32_mask: 648; AVX512DQNOBW: # %bb.0: 649; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1 650; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1 651; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1 652; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} 653; AVX512DQNOBW-NEXT: retq 654 %x = sext <16 x i8> %a to <16 x i32> 655 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 656 ret <16 x i32> %ret 657} 658 659define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { 660; ALL-LABEL: zext_16x8_to_16x32: 661; ALL: # %bb.0: 662; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 663; ALL-NEXT: retq 664 %x = zext <16 x i8> %i to <16 x i32> 665 ret <16 x i32> %x 666} 667 668define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { 669; ALL-LABEL: sext_16x8_to_16x32: 670; ALL: # %bb.0: 671; ALL-NEXT: vpmovsxbd %xmm0, %zmm0 672; ALL-NEXT: retq 673 %x = sext <16 x i8> %i to <16 x i32> 674 ret <16 x i32> %x 675} 676 677define <2 x i64> @zext_2x8mem_to_2x64(ptr%i , <2 x i1> %mask) nounwind readnone { 678; KNL-LABEL: zext_2x8mem_to_2x64: 679; KNL: # %bb.0: 680; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 681; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 682; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 683; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 684; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 685; KNL-NEXT: vzeroupper 686; KNL-NEXT: retq 687; 688; AVX512DQ-LABEL: zext_2x8mem_to_2x64: 689; AVX512DQ: # %bb.0: 690; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 691; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 692; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 693; AVX512DQ-NEXT: retq 694 %a = load <2 x i8>,ptr%i,align 1 695 %x = zext <2 x i8> %a to <2 x i64> 696 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 697 ret <2 x i64> %ret 698} 699define <2 x i64> @sext_2x8mem_to_2x64mask(ptr%i , <2 x i1> %mask) nounwind readnone { 700; KNL-LABEL: sext_2x8mem_to_2x64mask: 701; KNL: # %bb.0: 702; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 703; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 704; KNL-NEXT: vpmovsxbq (%rdi), %xmm0 705; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 706; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 707; KNL-NEXT: vzeroupper 708; KNL-NEXT: retq 709; 710; AVX512DQ-LABEL: sext_2x8mem_to_2x64mask: 711; AVX512DQ: # %bb.0: 712; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 713; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 714; AVX512DQ-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} 715; AVX512DQ-NEXT: retq 716 %a = load <2 x i8>,ptr%i,align 1 717 %x = sext <2 x i8> %a to <2 x i64> 718 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 719 ret <2 x i64> %ret 720} 721define <2 x i64> @sext_2x8mem_to_2x64(ptr%i) nounwind readnone { 722; ALL-LABEL: sext_2x8mem_to_2x64: 723; ALL: # %bb.0: 724; ALL-NEXT: vpmovsxbq (%rdi), %xmm0 725; ALL-NEXT: retq 726 %a = load <2 x i8>,ptr%i,align 1 727 %x = sext <2 x i8> %a to <2 x i64> 728 ret <2 x i64> %x 729} 730 731define <4 x i64> @zext_4x8mem_to_4x64(ptr%i , <4 x i1> %mask) nounwind readnone { 732; KNL-LABEL: zext_4x8mem_to_4x64: 733; KNL: # %bb.0: 734; KNL-NEXT: vpslld $31, %xmm0, %xmm0 735; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 736; KNL-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 737; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 738; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 739; KNL-NEXT: retq 740; 741; AVX512DQ-LABEL: zext_4x8mem_to_4x64: 742; AVX512DQ: # %bb.0: 743; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 744; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 745; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 746; AVX512DQ-NEXT: retq 747 %a = load <4 x i8>,ptr%i,align 1 748 %x = zext <4 x i8> %a to <4 x i64> 749 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 750 ret <4 x i64> %ret 751} 752 753define <4 x i64> @sext_4x8mem_to_4x64mask(ptr%i , <4 x i1> %mask) nounwind readnone { 754; KNL-LABEL: sext_4x8mem_to_4x64mask: 755; KNL: # %bb.0: 756; KNL-NEXT: vpslld $31, %xmm0, %xmm0 757; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 758; KNL-NEXT: vpmovsxbq (%rdi), %ymm0 759; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 760; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 761; KNL-NEXT: retq 762; 763; AVX512DQ-LABEL: sext_4x8mem_to_4x64mask: 764; AVX512DQ: # %bb.0: 765; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 766; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 767; AVX512DQ-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} 768; AVX512DQ-NEXT: retq 769 %a = load <4 x i8>,ptr%i,align 1 770 %x = sext <4 x i8> %a to <4 x i64> 771 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 772 ret <4 x i64> %ret 773} 774 775define <4 x i64> @sext_4x8mem_to_4x64(ptr%i) nounwind readnone { 776; ALL-LABEL: sext_4x8mem_to_4x64: 777; ALL: # %bb.0: 778; ALL-NEXT: vpmovsxbq (%rdi), %ymm0 779; ALL-NEXT: retq 780 %a = load <4 x i8>,ptr%i,align 1 781 %x = sext <4 x i8> %a to <4 x i64> 782 ret <4 x i64> %x 783} 784 785define <8 x i64> @zext_8x8mem_to_8x64(ptr%i , <8 x i1> %mask) nounwind readnone { 786; KNL-LABEL: zext_8x8mem_to_8x64: 787; KNL: # %bb.0: 788; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 789; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 790; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 791; KNL-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 792; KNL-NEXT: retq 793; 794; SKX-LABEL: zext_8x8mem_to_8x64: 795; SKX: # %bb.0: 796; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 797; SKX-NEXT: vpmovw2m %xmm0, %k1 798; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 799; SKX-NEXT: retq 800; 801; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x64: 802; AVX512DQNOBW: # %bb.0: 803; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 804; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 805; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 806; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 807; AVX512DQNOBW-NEXT: retq 808 %a = load <8 x i8>,ptr%i,align 1 809 %x = zext <8 x i8> %a to <8 x i64> 810 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 811 ret <8 x i64> %ret 812} 813 814define <8 x i64> @sext_8x8mem_to_8x64mask(ptr%i , <8 x i1> %mask) nounwind readnone { 815; KNL-LABEL: sext_8x8mem_to_8x64mask: 816; KNL: # %bb.0: 817; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 818; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 819; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 820; KNL-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} 821; KNL-NEXT: retq 822; 823; SKX-LABEL: sext_8x8mem_to_8x64mask: 824; SKX: # %bb.0: 825; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 826; SKX-NEXT: vpmovw2m %xmm0, %k1 827; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} 828; SKX-NEXT: retq 829; 830; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x64mask: 831; AVX512DQNOBW: # %bb.0: 832; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 833; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 834; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 835; AVX512DQNOBW-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} 836; AVX512DQNOBW-NEXT: retq 837 %a = load <8 x i8>,ptr%i,align 1 838 %x = sext <8 x i8> %a to <8 x i64> 839 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 840 ret <8 x i64> %ret 841} 842 843define <8 x i64> @sext_8x8mem_to_8x64(ptr%i) nounwind readnone { 844; ALL-LABEL: sext_8x8mem_to_8x64: 845; ALL: # %bb.0: 846; ALL-NEXT: vpmovsxbq (%rdi), %zmm0 847; ALL-NEXT: retq 848 %a = load <8 x i8>,ptr%i,align 1 849 %x = sext <8 x i8> %a to <8 x i64> 850 ret <8 x i64> %x 851} 852 853define <4 x i32> @zext_4x16mem_to_4x32(ptr%i , <4 x i1> %mask) nounwind readnone { 854; KNL-LABEL: zext_4x16mem_to_4x32: 855; KNL: # %bb.0: 856; KNL-NEXT: vpslld $31, %xmm0, %xmm0 857; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 858; KNL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 859; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 860; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 861; KNL-NEXT: vzeroupper 862; KNL-NEXT: retq 863; 864; AVX512DQ-LABEL: zext_4x16mem_to_4x32: 865; AVX512DQ: # %bb.0: 866; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 867; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 868; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 869; AVX512DQ-NEXT: retq 870 %a = load <4 x i16>,ptr%i,align 1 871 %x = zext <4 x i16> %a to <4 x i32> 872 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 873 ret <4 x i32> %ret 874} 875 876define <4 x i32> @sext_4x16mem_to_4x32mask(ptr%i , <4 x i1> %mask) nounwind readnone { 877; KNL-LABEL: sext_4x16mem_to_4x32mask: 878; KNL: # %bb.0: 879; KNL-NEXT: vpslld $31, %xmm0, %xmm0 880; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 881; KNL-NEXT: vpmovsxwd (%rdi), %xmm0 882; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 883; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 884; KNL-NEXT: vzeroupper 885; KNL-NEXT: retq 886; 887; AVX512DQ-LABEL: sext_4x16mem_to_4x32mask: 888; AVX512DQ: # %bb.0: 889; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 890; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 891; AVX512DQ-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} 892; AVX512DQ-NEXT: retq 893 %a = load <4 x i16>,ptr%i,align 1 894 %x = sext <4 x i16> %a to <4 x i32> 895 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 896 ret <4 x i32> %ret 897} 898 899define <4 x i32> @sext_4x16mem_to_4x32(ptr%i) nounwind readnone { 900; ALL-LABEL: sext_4x16mem_to_4x32: 901; ALL: # %bb.0: 902; ALL-NEXT: vpmovsxwd (%rdi), %xmm0 903; ALL-NEXT: retq 904 %a = load <4 x i16>,ptr%i,align 1 905 %x = sext <4 x i16> %a to <4 x i32> 906 ret <4 x i32> %x 907} 908 909 910define <8 x i32> @zext_8x16mem_to_8x32(ptr%i , <8 x i1> %mask) nounwind readnone { 911; KNL-LABEL: zext_8x16mem_to_8x32: 912; KNL: # %bb.0: 913; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 914; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 915; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 916; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 917; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 918; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 919; KNL-NEXT: retq 920; 921; SKX-LABEL: zext_8x16mem_to_8x32: 922; SKX: # %bb.0: 923; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 924; SKX-NEXT: vpmovw2m %xmm0, %k1 925; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 926; SKX-NEXT: retq 927; 928; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x32: 929; AVX512DQNOBW: # %bb.0: 930; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 931; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 932; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 933; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 934; AVX512DQNOBW-NEXT: retq 935 %a = load <8 x i16>,ptr%i,align 1 936 %x = zext <8 x i16> %a to <8 x i32> 937 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 938 ret <8 x i32> %ret 939} 940 941define <8 x i32> @sext_8x16mem_to_8x32mask(ptr%i , <8 x i1> %mask) nounwind readnone { 942; KNL-LABEL: sext_8x16mem_to_8x32mask: 943; KNL: # %bb.0: 944; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 945; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 946; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 947; KNL-NEXT: vpmovsxwd (%rdi), %ymm0 948; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 949; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 950; KNL-NEXT: retq 951; 952; SKX-LABEL: sext_8x16mem_to_8x32mask: 953; SKX: # %bb.0: 954; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 955; SKX-NEXT: vpmovw2m %xmm0, %k1 956; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} 957; SKX-NEXT: retq 958; 959; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x32mask: 960; AVX512DQNOBW: # %bb.0: 961; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 962; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 963; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 964; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} 965; AVX512DQNOBW-NEXT: retq 966 %a = load <8 x i16>,ptr%i,align 1 967 %x = sext <8 x i16> %a to <8 x i32> 968 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 969 ret <8 x i32> %ret 970} 971 972define <8 x i32> @sext_8x16mem_to_8x32(ptr%i) nounwind readnone { 973; ALL-LABEL: sext_8x16mem_to_8x32: 974; ALL: # %bb.0: 975; ALL-NEXT: vpmovsxwd (%rdi), %ymm0 976; ALL-NEXT: retq 977 %a = load <8 x i16>,ptr%i,align 1 978 %x = sext <8 x i16> %a to <8 x i32> 979 ret <8 x i32> %x 980} 981 982define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { 983; KNL-LABEL: zext_8x16_to_8x32mask: 984; KNL: # %bb.0: 985; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 986; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 987; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 988; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 989; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 990; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 991; KNL-NEXT: retq 992; 993; SKX-LABEL: zext_8x16_to_8x32mask: 994; SKX: # %bb.0: 995; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 996; SKX-NEXT: vpmovw2m %xmm1, %k1 997; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 998; SKX-NEXT: retq 999; 1000; AVX512DQNOBW-LABEL: zext_8x16_to_8x32mask: 1001; AVX512DQNOBW: # %bb.0: 1002; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1 1003; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1 1004; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1 1005; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1006; AVX512DQNOBW-NEXT: retq 1007 %x = zext <8 x i16> %a to <8 x i32> 1008 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 1009 ret <8 x i32> %ret 1010} 1011 1012define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone { 1013; ALL-LABEL: zext_8x16_to_8x32: 1014; ALL: # %bb.0: 1015; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1016; ALL-NEXT: retq 1017 %x = zext <8 x i16> %a to <8 x i32> 1018 ret <8 x i32> %x 1019} 1020 1021define <16 x i32> @zext_16x16mem_to_16x32(ptr%i , <16 x i1> %mask) nounwind readnone { 1022; KNL-LABEL: zext_16x16mem_to_16x32: 1023; KNL: # %bb.0: 1024; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1025; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1026; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1027; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 1028; KNL-NEXT: retq 1029; 1030; SKX-LABEL: zext_16x16mem_to_16x32: 1031; SKX: # %bb.0: 1032; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 1033; SKX-NEXT: vpmovb2m %xmm0, %k1 1034; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 1035; SKX-NEXT: retq 1036; 1037; AVX512DQNOBW-LABEL: zext_16x16mem_to_16x32: 1038; AVX512DQNOBW: # %bb.0: 1039; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 1040; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 1041; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 1042; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 1043; AVX512DQNOBW-NEXT: retq 1044 %a = load <16 x i16>,ptr%i,align 1 1045 %x = zext <16 x i16> %a to <16 x i32> 1046 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 1047 ret <16 x i32> %ret 1048} 1049 1050define <16 x i32> @sext_16x16mem_to_16x32mask(ptr%i , <16 x i1> %mask) nounwind readnone { 1051; KNL-LABEL: sext_16x16mem_to_16x32mask: 1052; KNL: # %bb.0: 1053; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1054; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1055; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1056; KNL-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} 1057; KNL-NEXT: retq 1058; 1059; SKX-LABEL: sext_16x16mem_to_16x32mask: 1060; SKX: # %bb.0: 1061; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 1062; SKX-NEXT: vpmovb2m %xmm0, %k1 1063; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} 1064; SKX-NEXT: retq 1065; 1066; AVX512DQNOBW-LABEL: sext_16x16mem_to_16x32mask: 1067; AVX512DQNOBW: # %bb.0: 1068; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 1069; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 1070; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 1071; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} 1072; AVX512DQNOBW-NEXT: retq 1073 %a = load <16 x i16>,ptr%i,align 1 1074 %x = sext <16 x i16> %a to <16 x i32> 1075 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 1076 ret <16 x i32> %ret 1077} 1078 1079define <16 x i32> @sext_16x16mem_to_16x32(ptr%i) nounwind readnone { 1080; ALL-LABEL: sext_16x16mem_to_16x32: 1081; ALL: # %bb.0: 1082; ALL-NEXT: vpmovsxwd (%rdi), %zmm0 1083; ALL-NEXT: retq 1084 %a = load <16 x i16>,ptr%i,align 1 1085 %x = sext <16 x i16> %a to <16 x i32> 1086 ret <16 x i32> %x 1087} 1088define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone { 1089; KNL-LABEL: zext_16x16_to_16x32mask: 1090; KNL: # %bb.0: 1091; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 1092; KNL-NEXT: vpslld $31, %zmm1, %zmm1 1093; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 1094; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1095; KNL-NEXT: retq 1096; 1097; SKX-LABEL: zext_16x16_to_16x32mask: 1098; SKX: # %bb.0: 1099; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 1100; SKX-NEXT: vpmovb2m %xmm1, %k1 1101; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1102; SKX-NEXT: retq 1103; 1104; AVX512DQNOBW-LABEL: zext_16x16_to_16x32mask: 1105; AVX512DQNOBW: # %bb.0: 1106; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1 1107; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1 1108; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1 1109; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1110; AVX512DQNOBW-NEXT: retq 1111 %x = zext <16 x i16> %a to <16 x i32> 1112 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 1113 ret <16 x i32> %ret 1114} 1115 1116define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone { 1117; ALL-LABEL: zext_16x16_to_16x32: 1118; ALL: # %bb.0: 1119; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1120; ALL-NEXT: retq 1121 %x = zext <16 x i16> %a to <16 x i32> 1122 ret <16 x i32> %x 1123} 1124 1125define <2 x i64> @zext_2x16mem_to_2x64(ptr%i , <2 x i1> %mask) nounwind readnone { 1126; KNL-LABEL: zext_2x16mem_to_2x64: 1127; KNL: # %bb.0: 1128; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 1129; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1130; KNL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1131; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1132; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1133; KNL-NEXT: vzeroupper 1134; KNL-NEXT: retq 1135; 1136; AVX512DQ-LABEL: zext_2x16mem_to_2x64: 1137; AVX512DQ: # %bb.0: 1138; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 1139; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 1140; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1141; AVX512DQ-NEXT: retq 1142 %a = load <2 x i16>,ptr%i,align 1 1143 %x = zext <2 x i16> %a to <2 x i64> 1144 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 1145 ret <2 x i64> %ret 1146} 1147 1148define <2 x i64> @sext_2x16mem_to_2x64mask(ptr%i , <2 x i1> %mask) nounwind readnone { 1149; KNL-LABEL: sext_2x16mem_to_2x64mask: 1150; KNL: # %bb.0: 1151; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 1152; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1153; KNL-NEXT: vpmovsxwq (%rdi), %xmm0 1154; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1155; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1156; KNL-NEXT: vzeroupper 1157; KNL-NEXT: retq 1158; 1159; AVX512DQ-LABEL: sext_2x16mem_to_2x64mask: 1160; AVX512DQ: # %bb.0: 1161; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 1162; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 1163; AVX512DQ-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} 1164; AVX512DQ-NEXT: retq 1165 %a = load <2 x i16>,ptr%i,align 1 1166 %x = sext <2 x i16> %a to <2 x i64> 1167 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 1168 ret <2 x i64> %ret 1169} 1170 1171define <2 x i64> @sext_2x16mem_to_2x64(ptr%i) nounwind readnone { 1172; ALL-LABEL: sext_2x16mem_to_2x64: 1173; ALL: # %bb.0: 1174; ALL-NEXT: vpmovsxwq (%rdi), %xmm0 1175; ALL-NEXT: retq 1176 %a = load <2 x i16>,ptr%i,align 1 1177 %x = sext <2 x i16> %a to <2 x i64> 1178 ret <2 x i64> %x 1179} 1180 1181define <4 x i64> @zext_4x16mem_to_4x64(ptr%i , <4 x i1> %mask) nounwind readnone { 1182; KNL-LABEL: zext_4x16mem_to_4x64: 1183; KNL: # %bb.0: 1184; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1185; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1186; KNL-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1187; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1188; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1189; KNL-NEXT: retq 1190; 1191; AVX512DQ-LABEL: zext_4x16mem_to_4x64: 1192; AVX512DQ: # %bb.0: 1193; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 1194; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 1195; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1196; AVX512DQ-NEXT: retq 1197 %a = load <4 x i16>,ptr%i,align 1 1198 %x = zext <4 x i16> %a to <4 x i64> 1199 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1200 ret <4 x i64> %ret 1201} 1202 1203define <4 x i64> @sext_4x16mem_to_4x64mask(ptr%i , <4 x i1> %mask) nounwind readnone { 1204; KNL-LABEL: sext_4x16mem_to_4x64mask: 1205; KNL: # %bb.0: 1206; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1207; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1208; KNL-NEXT: vpmovsxwq (%rdi), %ymm0 1209; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1210; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1211; KNL-NEXT: retq 1212; 1213; AVX512DQ-LABEL: sext_4x16mem_to_4x64mask: 1214; AVX512DQ: # %bb.0: 1215; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 1216; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 1217; AVX512DQ-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} 1218; AVX512DQ-NEXT: retq 1219 %a = load <4 x i16>,ptr%i,align 1 1220 %x = sext <4 x i16> %a to <4 x i64> 1221 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1222 ret <4 x i64> %ret 1223} 1224 1225define <4 x i64> @sext_4x16mem_to_4x64(ptr%i) nounwind readnone { 1226; ALL-LABEL: sext_4x16mem_to_4x64: 1227; ALL: # %bb.0: 1228; ALL-NEXT: vpmovsxwq (%rdi), %ymm0 1229; ALL-NEXT: retq 1230 %a = load <4 x i16>,ptr%i,align 1 1231 %x = sext <4 x i16> %a to <4 x i64> 1232 ret <4 x i64> %x 1233} 1234 1235define <8 x i64> @zext_8x16mem_to_8x64(ptr%i , <8 x i1> %mask) nounwind readnone { 1236; KNL-LABEL: zext_8x16mem_to_8x64: 1237; KNL: # %bb.0: 1238; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1239; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1240; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1241; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1242; KNL-NEXT: retq 1243; 1244; SKX-LABEL: zext_8x16mem_to_8x64: 1245; SKX: # %bb.0: 1246; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1247; SKX-NEXT: vpmovw2m %xmm0, %k1 1248; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1249; SKX-NEXT: retq 1250; 1251; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x64: 1252; AVX512DQNOBW: # %bb.0: 1253; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 1254; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 1255; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 1256; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1257; AVX512DQNOBW-NEXT: retq 1258 %a = load <8 x i16>,ptr%i,align 1 1259 %x = zext <8 x i16> %a to <8 x i64> 1260 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1261 ret <8 x i64> %ret 1262} 1263 1264define <8 x i64> @sext_8x16mem_to_8x64mask(ptr%i , <8 x i1> %mask) nounwind readnone { 1265; KNL-LABEL: sext_8x16mem_to_8x64mask: 1266; KNL: # %bb.0: 1267; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1268; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1269; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1270; KNL-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} 1271; KNL-NEXT: retq 1272; 1273; SKX-LABEL: sext_8x16mem_to_8x64mask: 1274; SKX: # %bb.0: 1275; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1276; SKX-NEXT: vpmovw2m %xmm0, %k1 1277; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} 1278; SKX-NEXT: retq 1279; 1280; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x64mask: 1281; AVX512DQNOBW: # %bb.0: 1282; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 1283; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 1284; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 1285; AVX512DQNOBW-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} 1286; AVX512DQNOBW-NEXT: retq 1287 %a = load <8 x i16>,ptr%i,align 1 1288 %x = sext <8 x i16> %a to <8 x i64> 1289 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1290 ret <8 x i64> %ret 1291} 1292 1293define <8 x i64> @sext_8x16mem_to_8x64(ptr%i) nounwind readnone { 1294; ALL-LABEL: sext_8x16mem_to_8x64: 1295; ALL: # %bb.0: 1296; ALL-NEXT: vpmovsxwq (%rdi), %zmm0 1297; ALL-NEXT: retq 1298 %a = load <8 x i16>,ptr%i,align 1 1299 %x = sext <8 x i16> %a to <8 x i64> 1300 ret <8 x i64> %x 1301} 1302 1303define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { 1304; KNL-LABEL: zext_8x16_to_8x64mask: 1305; KNL: # %bb.0: 1306; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 1307; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 1308; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 1309; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1310; KNL-NEXT: retq 1311; 1312; SKX-LABEL: zext_8x16_to_8x64mask: 1313; SKX: # %bb.0: 1314; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 1315; SKX-NEXT: vpmovw2m %xmm1, %k1 1316; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1317; SKX-NEXT: retq 1318; 1319; AVX512DQNOBW-LABEL: zext_8x16_to_8x64mask: 1320; AVX512DQNOBW: # %bb.0: 1321; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1 1322; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1 1323; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1 1324; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1325; AVX512DQNOBW-NEXT: retq 1326 %x = zext <8 x i16> %a to <8 x i64> 1327 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1328 ret <8 x i64> %ret 1329} 1330 1331define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone { 1332; ALL-LABEL: zext_8x16_to_8x64: 1333; ALL: # %bb.0: 1334; ALL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1335; ALL-NEXT: retq 1336 %ret = zext <8 x i16> %a to <8 x i64> 1337 ret <8 x i64> %ret 1338} 1339 1340define <2 x i64> @zext_2x32mem_to_2x64(ptr%i , <2 x i1> %mask) nounwind readnone { 1341; KNL-LABEL: zext_2x32mem_to_2x64: 1342; KNL: # %bb.0: 1343; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 1344; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1345; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1346; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1347; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1348; KNL-NEXT: vzeroupper 1349; KNL-NEXT: retq 1350; 1351; AVX512DQ-LABEL: zext_2x32mem_to_2x64: 1352; AVX512DQ: # %bb.0: 1353; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 1354; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 1355; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero 1356; AVX512DQ-NEXT: retq 1357 %a = load <2 x i32>,ptr%i,align 1 1358 %x = zext <2 x i32> %a to <2 x i64> 1359 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 1360 ret <2 x i64> %ret 1361} 1362 1363define <2 x i64> @sext_2x32mem_to_2x64mask(ptr%i , <2 x i1> %mask) nounwind readnone { 1364; KNL-LABEL: sext_2x32mem_to_2x64mask: 1365; KNL: # %bb.0: 1366; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 1367; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1368; KNL-NEXT: vpmovsxdq (%rdi), %xmm0 1369; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1370; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1371; KNL-NEXT: vzeroupper 1372; KNL-NEXT: retq 1373; 1374; AVX512DQ-LABEL: sext_2x32mem_to_2x64mask: 1375; AVX512DQ: # %bb.0: 1376; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 1377; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 1378; AVX512DQ-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} 1379; AVX512DQ-NEXT: retq 1380 %a = load <2 x i32>,ptr%i,align 1 1381 %x = sext <2 x i32> %a to <2 x i64> 1382 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 1383 ret <2 x i64> %ret 1384} 1385 1386define <2 x i64> @sext_2x32mem_to_2x64(ptr%i) nounwind readnone { 1387; ALL-LABEL: sext_2x32mem_to_2x64: 1388; ALL: # %bb.0: 1389; ALL-NEXT: vpmovsxdq (%rdi), %xmm0 1390; ALL-NEXT: retq 1391 %a = load <2 x i32>,ptr%i,align 1 1392 %x = sext <2 x i32> %a to <2 x i64> 1393 ret <2 x i64> %x 1394} 1395 1396define <4 x i64> @zext_4x32mem_to_4x64(ptr%i , <4 x i1> %mask) nounwind readnone { 1397; KNL-LABEL: zext_4x32mem_to_4x64: 1398; KNL: # %bb.0: 1399; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1400; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1401; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1402; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1403; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1404; KNL-NEXT: retq 1405; 1406; AVX512DQ-LABEL: zext_4x32mem_to_4x64: 1407; AVX512DQ: # %bb.0: 1408; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 1409; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 1410; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1411; AVX512DQ-NEXT: retq 1412 %a = load <4 x i32>,ptr%i,align 1 1413 %x = zext <4 x i32> %a to <4 x i64> 1414 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1415 ret <4 x i64> %ret 1416} 1417 1418define <4 x i64> @sext_4x32mem_to_4x64mask(ptr%i , <4 x i1> %mask) nounwind readnone { 1419; KNL-LABEL: sext_4x32mem_to_4x64mask: 1420; KNL: # %bb.0: 1421; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1422; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1423; KNL-NEXT: vpmovsxdq (%rdi), %ymm0 1424; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1425; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1426; KNL-NEXT: retq 1427; 1428; AVX512DQ-LABEL: sext_4x32mem_to_4x64mask: 1429; AVX512DQ: # %bb.0: 1430; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 1431; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 1432; AVX512DQ-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} 1433; AVX512DQ-NEXT: retq 1434 %a = load <4 x i32>,ptr%i,align 1 1435 %x = sext <4 x i32> %a to <4 x i64> 1436 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1437 ret <4 x i64> %ret 1438} 1439 1440define <4 x i64> @sext_4x32mem_to_4x64(ptr%i) nounwind readnone { 1441; ALL-LABEL: sext_4x32mem_to_4x64: 1442; ALL: # %bb.0: 1443; ALL-NEXT: vpmovsxdq (%rdi), %ymm0 1444; ALL-NEXT: retq 1445 %a = load <4 x i32>,ptr%i,align 1 1446 %x = sext <4 x i32> %a to <4 x i64> 1447 ret <4 x i64> %x 1448} 1449 1450define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone { 1451; ALL-LABEL: sext_4x32_to_4x64: 1452; ALL: # %bb.0: 1453; ALL-NEXT: vpmovsxdq %xmm0, %ymm0 1454; ALL-NEXT: retq 1455 %x = sext <4 x i32> %a to <4 x i64> 1456 ret <4 x i64> %x 1457} 1458 1459define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone { 1460; KNL-LABEL: zext_4x32_to_4x64mask: 1461; KNL: # %bb.0: 1462; KNL-NEXT: vpslld $31, %xmm1, %xmm1 1463; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 1464; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1465; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1466; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1467; KNL-NEXT: retq 1468; 1469; AVX512DQ-LABEL: zext_4x32_to_4x64mask: 1470; AVX512DQ: # %bb.0: 1471; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm1 1472; AVX512DQ-NEXT: vpmovd2m %xmm1, %k1 1473; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1474; AVX512DQ-NEXT: retq 1475 %x = zext <4 x i32> %a to <4 x i64> 1476 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1477 ret <4 x i64> %ret 1478} 1479 1480define <8 x i64> @zext_8x32mem_to_8x64(ptr%i , <8 x i1> %mask) nounwind readnone { 1481; KNL-LABEL: zext_8x32mem_to_8x64: 1482; KNL: # %bb.0: 1483; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1484; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1485; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1486; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1487; KNL-NEXT: retq 1488; 1489; SKX-LABEL: zext_8x32mem_to_8x64: 1490; SKX: # %bb.0: 1491; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1492; SKX-NEXT: vpmovw2m %xmm0, %k1 1493; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1494; SKX-NEXT: retq 1495; 1496; AVX512DQNOBW-LABEL: zext_8x32mem_to_8x64: 1497; AVX512DQNOBW: # %bb.0: 1498; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 1499; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 1500; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 1501; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1502; AVX512DQNOBW-NEXT: retq 1503 %a = load <8 x i32>,ptr%i,align 1 1504 %x = zext <8 x i32> %a to <8 x i64> 1505 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1506 ret <8 x i64> %ret 1507} 1508 1509define <8 x i64> @sext_8x32mem_to_8x64mask(ptr%i , <8 x i1> %mask) nounwind readnone { 1510; KNL-LABEL: sext_8x32mem_to_8x64mask: 1511; KNL: # %bb.0: 1512; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1513; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1514; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1515; KNL-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} 1516; KNL-NEXT: retq 1517; 1518; SKX-LABEL: sext_8x32mem_to_8x64mask: 1519; SKX: # %bb.0: 1520; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1521; SKX-NEXT: vpmovw2m %xmm0, %k1 1522; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} 1523; SKX-NEXT: retq 1524; 1525; AVX512DQNOBW-LABEL: sext_8x32mem_to_8x64mask: 1526; AVX512DQNOBW: # %bb.0: 1527; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 1528; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 1529; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 1530; AVX512DQNOBW-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} 1531; AVX512DQNOBW-NEXT: retq 1532 %a = load <8 x i32>,ptr%i,align 1 1533 %x = sext <8 x i32> %a to <8 x i64> 1534 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1535 ret <8 x i64> %ret 1536} 1537 1538define <8 x i64> @sext_8x32mem_to_8x64(ptr%i) nounwind readnone { 1539; ALL-LABEL: sext_8x32mem_to_8x64: 1540; ALL: # %bb.0: 1541; ALL-NEXT: vpmovsxdq (%rdi), %zmm0 1542; ALL-NEXT: retq 1543 %a = load <8 x i32>,ptr%i,align 1 1544 %x = sext <8 x i32> %a to <8 x i64> 1545 ret <8 x i64> %x 1546} 1547 1548define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { 1549; ALL-LABEL: sext_8x32_to_8x64: 1550; ALL: # %bb.0: 1551; ALL-NEXT: vpmovsxdq %ymm0, %zmm0 1552; ALL-NEXT: retq 1553 %x = sext <8 x i32> %a to <8 x i64> 1554 ret <8 x i64> %x 1555} 1556 1557define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone { 1558; KNL-LABEL: zext_8x32_to_8x64mask: 1559; KNL: # %bb.0: 1560; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 1561; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 1562; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 1563; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 1564; KNL-NEXT: retq 1565; 1566; SKX-LABEL: zext_8x32_to_8x64mask: 1567; SKX: # %bb.0: 1568; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 1569; SKX-NEXT: vpmovw2m %xmm1, %k1 1570; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 1571; SKX-NEXT: retq 1572; 1573; AVX512DQNOBW-LABEL: zext_8x32_to_8x64mask: 1574; AVX512DQNOBW: # %bb.0: 1575; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1 1576; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1 1577; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1 1578; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 1579; AVX512DQNOBW-NEXT: retq 1580 %x = zext <8 x i32> %a to <8 x i64> 1581 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1582 ret <8 x i64> %ret 1583} 1584define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { 1585; ALL-LABEL: fptrunc_test: 1586; ALL: # %bb.0: 1587; ALL-NEXT: vcvtpd2ps %zmm0, %ymm0 1588; ALL-NEXT: retq 1589 %b = fptrunc <8 x double> %a to <8 x float> 1590 ret <8 x float> %b 1591} 1592 1593define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { 1594; ALL-LABEL: fpext_test: 1595; ALL: # %bb.0: 1596; ALL-NEXT: vcvtps2pd %ymm0, %zmm0 1597; ALL-NEXT: retq 1598 %b = fpext <8 x float> %a to <8 x double> 1599 ret <8 x double> %b 1600} 1601 1602define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { 1603; KNL-LABEL: zext_16i1_to_16xi32: 1604; KNL: # %bb.0: 1605; KNL-NEXT: kmovw %edi, %k1 1606; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1607; KNL-NEXT: vpsrld $31, %zmm0, %zmm0 1608; KNL-NEXT: retq 1609; 1610; SKX-LABEL: zext_16i1_to_16xi32: 1611; SKX: # %bb.0: 1612; SKX-NEXT: kmovd %edi, %k0 1613; SKX-NEXT: vpmovm2d %k0, %zmm0 1614; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 1615; SKX-NEXT: retq 1616; 1617; AVX512DQNOBW-LABEL: zext_16i1_to_16xi32: 1618; AVX512DQNOBW: # %bb.0: 1619; AVX512DQNOBW-NEXT: kmovw %edi, %k0 1620; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm0 1621; AVX512DQNOBW-NEXT: vpsrld $31, %zmm0, %zmm0 1622; AVX512DQNOBW-NEXT: retq 1623 %a = bitcast i16 %b to <16 x i1> 1624 %c = zext <16 x i1> %a to <16 x i32> 1625 ret <16 x i32> %c 1626} 1627 1628define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { 1629; KNL-LABEL: zext_8i1_to_8xi64: 1630; KNL: # %bb.0: 1631; KNL-NEXT: kmovw %edi, %k1 1632; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1633; KNL-NEXT: vpsrlq $63, %zmm0, %zmm0 1634; KNL-NEXT: retq 1635; 1636; SKX-LABEL: zext_8i1_to_8xi64: 1637; SKX: # %bb.0: 1638; SKX-NEXT: kmovd %edi, %k0 1639; SKX-NEXT: vpmovm2q %k0, %zmm0 1640; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 1641; SKX-NEXT: retq 1642; 1643; AVX512DQNOBW-LABEL: zext_8i1_to_8xi64: 1644; AVX512DQNOBW: # %bb.0: 1645; AVX512DQNOBW-NEXT: kmovw %edi, %k0 1646; AVX512DQNOBW-NEXT: vpmovm2q %k0, %zmm0 1647; AVX512DQNOBW-NEXT: vpsrlq $63, %zmm0, %zmm0 1648; AVX512DQNOBW-NEXT: retq 1649 %a = bitcast i8 %b to <8 x i1> 1650 %c = zext <8 x i1> %a to <8 x i64> 1651 ret <8 x i64> %c 1652} 1653 1654define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { 1655; ALL-LABEL: trunc_16i8_to_16i1: 1656; ALL: # %bb.0: 1657; ALL-NEXT: vpsllw $7, %xmm0, %xmm0 1658; ALL-NEXT: vpmovmskb %xmm0, %eax 1659; ALL-NEXT: # kill: def $ax killed $ax killed $eax 1660; ALL-NEXT: retq 1661 %mask_b = trunc <16 x i8>%a to <16 x i1> 1662 %mask = bitcast <16 x i1> %mask_b to i16 1663 ret i16 %mask 1664} 1665 1666define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { 1667; KNL-LABEL: trunc_16i32_to_16i1: 1668; KNL: # %bb.0: 1669; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1670; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1671; KNL-NEXT: kmovw %k0, %eax 1672; KNL-NEXT: # kill: def $ax killed $ax killed $eax 1673; KNL-NEXT: vzeroupper 1674; KNL-NEXT: retq 1675; 1676; SKX-LABEL: trunc_16i32_to_16i1: 1677; SKX: # %bb.0: 1678; SKX-NEXT: vpslld $31, %zmm0, %zmm0 1679; SKX-NEXT: vpmovd2m %zmm0, %k0 1680; SKX-NEXT: kmovd %k0, %eax 1681; SKX-NEXT: # kill: def $ax killed $ax killed $eax 1682; SKX-NEXT: vzeroupper 1683; SKX-NEXT: retq 1684; 1685; AVX512DQNOBW-LABEL: trunc_16i32_to_16i1: 1686; AVX512DQNOBW: # %bb.0: 1687; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 1688; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k0 1689; AVX512DQNOBW-NEXT: kmovw %k0, %eax 1690; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax 1691; AVX512DQNOBW-NEXT: vzeroupper 1692; AVX512DQNOBW-NEXT: retq 1693 %mask_b = trunc <16 x i32>%a to <16 x i1> 1694 %mask = bitcast <16 x i1> %mask_b to i16 1695 ret i16 %mask 1696} 1697 1698define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { 1699; ALL-LABEL: trunc_4i32_to_4i1: 1700; ALL: # %bb.0: 1701; ALL-NEXT: vpand %xmm1, %xmm0, %xmm0 1702; ALL-NEXT: vpslld $31, %xmm0, %xmm0 1703; ALL-NEXT: vpsrad $31, %xmm0, %xmm0 1704; ALL-NEXT: retq 1705 %mask_a = trunc <4 x i32>%a to <4 x i1> 1706 %mask_b = trunc <4 x i32>%b to <4 x i1> 1707 %a_and_b = and <4 x i1>%mask_a, %mask_b 1708 %res = sext <4 x i1>%a_and_b to <4 x i32> 1709 ret <4 x i32>%res 1710} 1711 1712 1713define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { 1714; KNL-LABEL: trunc_8i16_to_8i1: 1715; KNL: # %bb.0: 1716; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1717; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1718; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 1719; KNL-NEXT: kmovw %k0, %eax 1720; KNL-NEXT: # kill: def $al killed $al killed $eax 1721; KNL-NEXT: vzeroupper 1722; KNL-NEXT: retq 1723; 1724; SKX-LABEL: trunc_8i16_to_8i1: 1725; SKX: # %bb.0: 1726; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1727; SKX-NEXT: vpmovw2m %xmm0, %k0 1728; SKX-NEXT: kmovd %k0, %eax 1729; SKX-NEXT: # kill: def $al killed $al killed $eax 1730; SKX-NEXT: retq 1731; 1732; AVX512DQNOBW-LABEL: trunc_8i16_to_8i1: 1733; AVX512DQNOBW: # %bb.0: 1734; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 1735; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 1736; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k0 1737; AVX512DQNOBW-NEXT: kmovw %k0, %eax 1738; AVX512DQNOBW-NEXT: # kill: def $al killed $al killed $eax 1739; AVX512DQNOBW-NEXT: vzeroupper 1740; AVX512DQNOBW-NEXT: retq 1741 %mask_b = trunc <8 x i16>%a to <8 x i1> 1742 %mask = bitcast <8 x i1> %mask_b to i8 1743 ret i8 %mask 1744} 1745 1746define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { 1747; KNL-LABEL: sext_8i1_8i32: 1748; KNL: # %bb.0: 1749; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1750; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1751; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1752; KNL-NEXT: retq 1753; 1754; AVX512DQ-LABEL: sext_8i1_8i32: 1755; AVX512DQ: # %bb.0: 1756; AVX512DQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1757; AVX512DQ-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 1758; AVX512DQ-NEXT: retq 1759 %x = icmp slt <8 x i32> %a1, %a2 1760 %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 1761 %y = sext <8 x i1> %x1 to <8 x i32> 1762 ret <8 x i32> %y 1763} 1764 1765 1766define i16 @trunc_i32_to_i1(i32 %a) { 1767; KNL-LABEL: trunc_i32_to_i1: 1768; KNL: # %bb.0: 1769; KNL-NEXT: andl $1, %edi 1770; KNL-NEXT: kmovw %edi, %k0 1771; KNL-NEXT: movw $-4, %ax 1772; KNL-NEXT: kmovw %eax, %k1 1773; KNL-NEXT: kshiftrw $1, %k1, %k1 1774; KNL-NEXT: kshiftlw $1, %k1, %k1 1775; KNL-NEXT: korw %k0, %k1, %k0 1776; KNL-NEXT: kmovw %k0, %eax 1777; KNL-NEXT: # kill: def $ax killed $ax killed $eax 1778; KNL-NEXT: retq 1779; 1780; SKX-LABEL: trunc_i32_to_i1: 1781; SKX: # %bb.0: 1782; SKX-NEXT: andl $1, %edi 1783; SKX-NEXT: kmovw %edi, %k0 1784; SKX-NEXT: movw $-4, %ax 1785; SKX-NEXT: kmovd %eax, %k1 1786; SKX-NEXT: kshiftrw $1, %k1, %k1 1787; SKX-NEXT: kshiftlw $1, %k1, %k1 1788; SKX-NEXT: korw %k0, %k1, %k0 1789; SKX-NEXT: kmovd %k0, %eax 1790; SKX-NEXT: # kill: def $ax killed $ax killed $eax 1791; SKX-NEXT: retq 1792; 1793; AVX512DQNOBW-LABEL: trunc_i32_to_i1: 1794; AVX512DQNOBW: # %bb.0: 1795; AVX512DQNOBW-NEXT: andl $1, %edi 1796; AVX512DQNOBW-NEXT: kmovw %edi, %k0 1797; AVX512DQNOBW-NEXT: movw $-4, %ax 1798; AVX512DQNOBW-NEXT: kmovw %eax, %k1 1799; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1 1800; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1 1801; AVX512DQNOBW-NEXT: korw %k0, %k1, %k0 1802; AVX512DQNOBW-NEXT: kmovw %k0, %eax 1803; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax 1804; AVX512DQNOBW-NEXT: retq 1805 %a_i = trunc i32 %a to i1 1806 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0 1807 %res = bitcast <16 x i1> %maskv to i16 1808 ret i16 %res 1809} 1810 1811define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { 1812; KNL-LABEL: sext_8i1_8i16: 1813; KNL: # %bb.0: 1814; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1815; KNL-NEXT: vpmovdw %zmm0, %ymm0 1816; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1817; KNL-NEXT: vzeroupper 1818; KNL-NEXT: retq 1819; 1820; SKX-LABEL: sext_8i1_8i16: 1821; SKX: # %bb.0: 1822; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 1823; SKX-NEXT: vpmovm2w %k0, %xmm0 1824; SKX-NEXT: vzeroupper 1825; SKX-NEXT: retq 1826; 1827; AVX512DQNOBW-LABEL: sext_8i1_8i16: 1828; AVX512DQNOBW: # %bb.0: 1829; AVX512DQNOBW-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 1830; AVX512DQNOBW-NEXT: vpmovm2d %k0, %ymm0 1831; AVX512DQNOBW-NEXT: vpmovdw %ymm0, %xmm0 1832; AVX512DQNOBW-NEXT: vzeroupper 1833; AVX512DQNOBW-NEXT: retq 1834 %x = icmp slt <8 x i32> %a1, %a2 1835 %y = sext <8 x i1> %x to <8 x i16> 1836 ret <8 x i16> %y 1837} 1838 1839define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { 1840; KNL-LABEL: sext_16i1_16i32: 1841; KNL: # %bb.0: 1842; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 1843; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1844; KNL-NEXT: retq 1845; 1846; AVX512DQ-LABEL: sext_16i1_16i32: 1847; AVX512DQ: # %bb.0: 1848; AVX512DQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 1849; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 1850; AVX512DQ-NEXT: retq 1851 %x = icmp slt <16 x i32> %a1, %a2 1852 %y = sext <16 x i1> %x to <16 x i32> 1853 ret <16 x i32> %y 1854} 1855 1856define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { 1857; KNL-LABEL: sext_8i1_8i64: 1858; KNL: # %bb.0: 1859; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1860; KNL-NEXT: vpmovsxdq %ymm0, %zmm0 1861; KNL-NEXT: retq 1862; 1863; AVX512DQ-LABEL: sext_8i1_8i64: 1864; AVX512DQ: # %bb.0: 1865; AVX512DQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 1866; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 1867; AVX512DQ-NEXT: retq 1868 %x = icmp slt <8 x i32> %a1, %a2 1869 %y = sext <8 x i1> %x to <8 x i64> 1870 ret <8 x i64> %y 1871} 1872 1873define void @extload_v8i64(ptr %a, ptr %res) { 1874; ALL-LABEL: extload_v8i64: 1875; ALL: # %bb.0: 1876; ALL-NEXT: vpmovsxbq (%rdi), %zmm0 1877; ALL-NEXT: vmovdqa64 %zmm0, (%rsi) 1878; ALL-NEXT: vzeroupper 1879; ALL-NEXT: retq 1880 %sign_load = load <8 x i8>, ptr %a 1881 %c = sext <8 x i8> %sign_load to <8 x i64> 1882 store <8 x i64> %c, ptr %res 1883 ret void 1884} 1885 1886define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { 1887; KNL-LABEL: test21: 1888; KNL: # %bb.0: 1889; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1890; KNL-NEXT: andl $1, %eax 1891; KNL-NEXT: kmovw %eax, %k0 1892; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1893; KNL-NEXT: kmovw %eax, %k1 1894; KNL-NEXT: kshiftlw $15, %k1, %k1 1895; KNL-NEXT: kshiftrw $14, %k1, %k1 1896; KNL-NEXT: korw %k1, %k0, %k0 1897; KNL-NEXT: movw $-5, %ax 1898; KNL-NEXT: kmovw %eax, %k1 1899; KNL-NEXT: kandw %k1, %k0, %k0 1900; KNL-NEXT: kmovw %k1, %k7 1901; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1902; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1903; KNL-NEXT: kmovw %eax, %k1 1904; KNL-NEXT: kshiftlw $15, %k1, %k1 1905; KNL-NEXT: kshiftrw $13, %k1, %k1 1906; KNL-NEXT: korw %k1, %k0, %k0 1907; KNL-NEXT: movw $-9, %ax 1908; KNL-NEXT: kmovw %eax, %k1 1909; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1910; KNL-NEXT: kandw %k1, %k0, %k0 1911; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1912; KNL-NEXT: kmovw %eax, %k1 1913; KNL-NEXT: kshiftlw $15, %k1, %k1 1914; KNL-NEXT: kshiftrw $12, %k1, %k1 1915; KNL-NEXT: korw %k1, %k0, %k0 1916; KNL-NEXT: movw $-17, %ax 1917; KNL-NEXT: kmovw %eax, %k1 1918; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1919; KNL-NEXT: kandw %k1, %k0, %k0 1920; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1921; KNL-NEXT: kmovw %eax, %k1 1922; KNL-NEXT: kshiftlw $15, %k1, %k1 1923; KNL-NEXT: kshiftrw $11, %k1, %k1 1924; KNL-NEXT: korw %k1, %k0, %k0 1925; KNL-NEXT: movw $-33, %ax 1926; KNL-NEXT: kmovw %eax, %k1 1927; KNL-NEXT: kandw %k1, %k0, %k0 1928; KNL-NEXT: kmovw %k1, %k2 1929; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1930; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1931; KNL-NEXT: kmovw %eax, %k1 1932; KNL-NEXT: kshiftlw $15, %k1, %k1 1933; KNL-NEXT: kshiftrw $10, %k1, %k1 1934; KNL-NEXT: korw %k1, %k0, %k0 1935; KNL-NEXT: movw $-65, %ax 1936; KNL-NEXT: kmovw %eax, %k1 1937; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1938; KNL-NEXT: kandw %k1, %k0, %k0 1939; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1940; KNL-NEXT: kmovw %eax, %k1 1941; KNL-NEXT: kshiftlw $15, %k1, %k1 1942; KNL-NEXT: kshiftrw $9, %k1, %k1 1943; KNL-NEXT: korw %k1, %k0, %k0 1944; KNL-NEXT: movw $-129, %ax 1945; KNL-NEXT: kmovw %eax, %k1 1946; KNL-NEXT: kandw %k1, %k0, %k0 1947; KNL-NEXT: kmovw %k1, %k3 1948; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1949; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1950; KNL-NEXT: kmovw %eax, %k1 1951; KNL-NEXT: kshiftlw $15, %k1, %k1 1952; KNL-NEXT: kshiftrw $8, %k1, %k1 1953; KNL-NEXT: korw %k1, %k0, %k0 1954; KNL-NEXT: movw $-257, %ax # imm = 0xFEFF 1955; KNL-NEXT: kmovw %eax, %k1 1956; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1957; KNL-NEXT: kandw %k1, %k0, %k0 1958; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1959; KNL-NEXT: kmovw %eax, %k1 1960; KNL-NEXT: kshiftlw $15, %k1, %k1 1961; KNL-NEXT: kshiftrw $7, %k1, %k1 1962; KNL-NEXT: korw %k1, %k0, %k0 1963; KNL-NEXT: movw $-513, %ax # imm = 0xFDFF 1964; KNL-NEXT: kmovw %eax, %k1 1965; KNL-NEXT: kandw %k1, %k0, %k0 1966; KNL-NEXT: kmovw %k1, %k4 1967; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1968; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1969; KNL-NEXT: kmovw %eax, %k1 1970; KNL-NEXT: kshiftlw $15, %k1, %k1 1971; KNL-NEXT: kshiftrw $6, %k1, %k1 1972; KNL-NEXT: korw %k1, %k0, %k0 1973; KNL-NEXT: movw $-1025, %ax # imm = 0xFBFF 1974; KNL-NEXT: kmovw %eax, %k1 1975; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1976; KNL-NEXT: kandw %k1, %k0, %k0 1977; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1978; KNL-NEXT: kmovw %eax, %k1 1979; KNL-NEXT: kshiftlw $15, %k1, %k1 1980; KNL-NEXT: kshiftrw $5, %k1, %k1 1981; KNL-NEXT: korw %k1, %k0, %k0 1982; KNL-NEXT: movw $-2049, %ax # imm = 0xF7FF 1983; KNL-NEXT: kmovw %eax, %k5 1984; KNL-NEXT: kandw %k5, %k0, %k0 1985; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1986; KNL-NEXT: kmovw %eax, %k1 1987; KNL-NEXT: kshiftlw $15, %k1, %k1 1988; KNL-NEXT: kshiftrw $4, %k1, %k1 1989; KNL-NEXT: korw %k1, %k0, %k0 1990; KNL-NEXT: movw $-4097, %ax # imm = 0xEFFF 1991; KNL-NEXT: kmovw %eax, %k1 1992; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1993; KNL-NEXT: kandw %k1, %k0, %k0 1994; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1995; KNL-NEXT: kmovw %eax, %k1 1996; KNL-NEXT: kshiftlw $15, %k1, %k1 1997; KNL-NEXT: kshiftrw $3, %k1, %k1 1998; KNL-NEXT: korw %k1, %k0, %k0 1999; KNL-NEXT: movw $-8193, %ax # imm = 0xDFFF 2000; KNL-NEXT: kmovw %eax, %k1 2001; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2002; KNL-NEXT: kandw %k1, %k0, %k0 2003; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2004; KNL-NEXT: kmovw %eax, %k1 2005; KNL-NEXT: kshiftlw $15, %k1, %k1 2006; KNL-NEXT: kshiftrw $2, %k1, %k1 2007; KNL-NEXT: korw %k1, %k0, %k1 2008; KNL-NEXT: movw $-16385, %ax # imm = 0xBFFF 2009; KNL-NEXT: kmovw %eax, %k0 2010; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2011; KNL-NEXT: kandw %k0, %k1, %k1 2012; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2013; KNL-NEXT: kmovw %eax, %k6 2014; KNL-NEXT: kshiftlw $14, %k6, %k6 2015; KNL-NEXT: korw %k6, %k1, %k1 2016; KNL-NEXT: kshiftlw $1, %k1, %k1 2017; KNL-NEXT: kshiftrw $1, %k1, %k1 2018; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2019; KNL-NEXT: kmovw %eax, %k6 2020; KNL-NEXT: kshiftlw $15, %k6, %k6 2021; KNL-NEXT: korw %k6, %k1, %k1 2022; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2023; KNL-NEXT: andl $1, %edi 2024; KNL-NEXT: kmovw %esi, %k1 2025; KNL-NEXT: kshiftlw $15, %k1, %k1 2026; KNL-NEXT: kshiftrw $14, %k1, %k1 2027; KNL-NEXT: kmovw %edi, %k6 2028; KNL-NEXT: korw %k1, %k6, %k1 2029; KNL-NEXT: kandw %k7, %k1, %k1 2030; KNL-NEXT: kmovw %edx, %k6 2031; KNL-NEXT: kshiftlw $15, %k6, %k6 2032; KNL-NEXT: kshiftrw $13, %k6, %k6 2033; KNL-NEXT: korw %k6, %k1, %k1 2034; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload 2035; KNL-NEXT: kandw %k7, %k1, %k1 2036; KNL-NEXT: kmovw %ecx, %k6 2037; KNL-NEXT: kshiftlw $15, %k6, %k6 2038; KNL-NEXT: kshiftrw $12, %k6, %k6 2039; KNL-NEXT: korw %k6, %k1, %k1 2040; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2041; KNL-NEXT: kandw %k0, %k1, %k1 2042; KNL-NEXT: kmovw %r8d, %k6 2043; KNL-NEXT: kshiftlw $15, %k6, %k6 2044; KNL-NEXT: kshiftrw $11, %k6, %k6 2045; KNL-NEXT: korw %k6, %k1, %k1 2046; KNL-NEXT: kandw %k2, %k1, %k1 2047; KNL-NEXT: kmovw %r9d, %k6 2048; KNL-NEXT: kshiftlw $15, %k6, %k6 2049; KNL-NEXT: kshiftrw $10, %k6, %k6 2050; KNL-NEXT: korw %k6, %k1, %k1 2051; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload 2052; KNL-NEXT: kandw %k2, %k1, %k1 2053; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2054; KNL-NEXT: kmovw %eax, %k6 2055; KNL-NEXT: kshiftlw $15, %k6, %k6 2056; KNL-NEXT: kshiftrw $9, %k6, %k6 2057; KNL-NEXT: korw %k6, %k1, %k1 2058; KNL-NEXT: kandw %k3, %k1, %k1 2059; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2060; KNL-NEXT: kmovw %eax, %k6 2061; KNL-NEXT: kshiftlw $15, %k6, %k6 2062; KNL-NEXT: kshiftrw $8, %k6, %k6 2063; KNL-NEXT: korw %k6, %k1, %k1 2064; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload 2065; KNL-NEXT: kandw %k3, %k1, %k1 2066; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2067; KNL-NEXT: kmovw %eax, %k6 2068; KNL-NEXT: kshiftlw $15, %k6, %k6 2069; KNL-NEXT: kshiftrw $7, %k6, %k6 2070; KNL-NEXT: korw %k6, %k1, %k1 2071; KNL-NEXT: kandw %k4, %k1, %k1 2072; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2073; KNL-NEXT: kmovw %eax, %k6 2074; KNL-NEXT: kshiftlw $15, %k6, %k6 2075; KNL-NEXT: kshiftrw $6, %k6, %k6 2076; KNL-NEXT: korw %k6, %k1, %k1 2077; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload 2078; KNL-NEXT: kandw %k4, %k1, %k1 2079; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2080; KNL-NEXT: kmovw %eax, %k6 2081; KNL-NEXT: kshiftlw $15, %k6, %k6 2082; KNL-NEXT: kshiftrw $5, %k6, %k6 2083; KNL-NEXT: korw %k6, %k1, %k1 2084; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2085; KNL-NEXT: kandw %k5, %k1, %k1 2086; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2087; KNL-NEXT: kmovw %eax, %k6 2088; KNL-NEXT: kshiftlw $15, %k6, %k6 2089; KNL-NEXT: kshiftrw $4, %k6, %k6 2090; KNL-NEXT: korw %k6, %k1, %k1 2091; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload 2092; KNL-NEXT: kandw %k6, %k1, %k1 2093; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2094; KNL-NEXT: kmovw %eax, %k6 2095; KNL-NEXT: kshiftlw $15, %k6, %k6 2096; KNL-NEXT: kshiftrw $3, %k6, %k6 2097; KNL-NEXT: korw %k6, %k1, %k1 2098; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload 2099; KNL-NEXT: kandw %k6, %k1, %k1 2100; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2101; KNL-NEXT: kmovw %eax, %k6 2102; KNL-NEXT: kshiftlw $15, %k6, %k6 2103; KNL-NEXT: kshiftrw $2, %k6, %k6 2104; KNL-NEXT: korw %k6, %k1, %k1 2105; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload 2106; KNL-NEXT: kandw %k6, %k1, %k1 2107; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2108; KNL-NEXT: kmovw %eax, %k6 2109; KNL-NEXT: kshiftlw $14, %k6, %k6 2110; KNL-NEXT: korw %k6, %k1, %k1 2111; KNL-NEXT: kshiftlw $1, %k1, %k1 2112; KNL-NEXT: kshiftrw $1, %k1, %k1 2113; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2114; KNL-NEXT: kmovw %eax, %k6 2115; KNL-NEXT: kshiftlw $15, %k6, %k6 2116; KNL-NEXT: korw %k6, %k1, %k1 2117; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2118; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2119; KNL-NEXT: andl $1, %eax 2120; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 2121; KNL-NEXT: kmovw %ecx, %k1 2122; KNL-NEXT: kshiftlw $15, %k1, %k1 2123; KNL-NEXT: kshiftrw $14, %k1, %k1 2124; KNL-NEXT: kmovw %eax, %k6 2125; KNL-NEXT: korw %k1, %k6, %k1 2126; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload 2127; KNL-NEXT: kandw %k6, %k1, %k1 2128; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2129; KNL-NEXT: kmovw %eax, %k6 2130; KNL-NEXT: kshiftlw $15, %k6, %k6 2131; KNL-NEXT: kshiftrw $13, %k6, %k6 2132; KNL-NEXT: korw %k6, %k1, %k1 2133; KNL-NEXT: kandw %k7, %k1, %k1 2134; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2135; KNL-NEXT: kmovw %eax, %k6 2136; KNL-NEXT: kshiftlw $15, %k6, %k6 2137; KNL-NEXT: kshiftrw $12, %k6, %k6 2138; KNL-NEXT: korw %k6, %k1, %k1 2139; KNL-NEXT: kandw %k0, %k1, %k1 2140; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2141; KNL-NEXT: kmovw %eax, %k6 2142; KNL-NEXT: kshiftlw $15, %k6, %k6 2143; KNL-NEXT: kshiftrw $11, %k6, %k6 2144; KNL-NEXT: korw %k6, %k1, %k1 2145; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2146; KNL-NEXT: kandw %k0, %k1, %k1 2147; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2148; KNL-NEXT: kmovw %eax, %k6 2149; KNL-NEXT: kshiftlw $15, %k6, %k6 2150; KNL-NEXT: kshiftrw $10, %k6, %k6 2151; KNL-NEXT: korw %k6, %k1, %k1 2152; KNL-NEXT: kandw %k2, %k1, %k1 2153; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2154; KNL-NEXT: kmovw %eax, %k6 2155; KNL-NEXT: kshiftlw $15, %k6, %k6 2156; KNL-NEXT: kshiftrw $9, %k6, %k6 2157; KNL-NEXT: korw %k6, %k1, %k1 2158; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2159; KNL-NEXT: kandw %k0, %k1, %k1 2160; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2161; KNL-NEXT: kmovw %eax, %k6 2162; KNL-NEXT: kshiftlw $15, %k6, %k6 2163; KNL-NEXT: kshiftrw $8, %k6, %k6 2164; KNL-NEXT: korw %k6, %k1, %k1 2165; KNL-NEXT: kandw %k3, %k1, %k1 2166; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2167; KNL-NEXT: kmovw %eax, %k6 2168; KNL-NEXT: kshiftlw $15, %k6, %k6 2169; KNL-NEXT: kshiftrw $7, %k6, %k6 2170; KNL-NEXT: korw %k6, %k1, %k1 2171; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload 2172; KNL-NEXT: kandw %k3, %k1, %k1 2173; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2174; KNL-NEXT: kmovw %eax, %k6 2175; KNL-NEXT: kshiftlw $15, %k6, %k6 2176; KNL-NEXT: kshiftrw $6, %k6, %k6 2177; KNL-NEXT: korw %k6, %k1, %k1 2178; KNL-NEXT: kandw %k4, %k1, %k1 2179; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2180; KNL-NEXT: kmovw %eax, %k6 2181; KNL-NEXT: kshiftlw $15, %k6, %k6 2182; KNL-NEXT: kshiftrw $5, %k6, %k6 2183; KNL-NEXT: korw %k6, %k1, %k1 2184; KNL-NEXT: kandw %k5, %k1, %k1 2185; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2186; KNL-NEXT: kmovw %eax, %k6 2187; KNL-NEXT: kshiftlw $15, %k6, %k6 2188; KNL-NEXT: kshiftrw $4, %k6, %k6 2189; KNL-NEXT: korw %k6, %k1, %k1 2190; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2191; KNL-NEXT: kandw %k0, %k1, %k1 2192; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2193; KNL-NEXT: kmovw %eax, %k6 2194; KNL-NEXT: kshiftlw $15, %k6, %k6 2195; KNL-NEXT: kshiftrw $3, %k6, %k6 2196; KNL-NEXT: korw %k6, %k1, %k1 2197; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload 2198; KNL-NEXT: kandw %k2, %k1, %k1 2199; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2200; KNL-NEXT: kmovw %eax, %k6 2201; KNL-NEXT: kshiftlw $15, %k6, %k6 2202; KNL-NEXT: kshiftrw $2, %k6, %k6 2203; KNL-NEXT: korw %k6, %k1, %k1 2204; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2205; KNL-NEXT: kandw %k5, %k1, %k1 2206; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2207; KNL-NEXT: kmovw %eax, %k6 2208; KNL-NEXT: kshiftlw $14, %k6, %k6 2209; KNL-NEXT: korw %k6, %k1, %k1 2210; KNL-NEXT: kshiftlw $1, %k1, %k1 2211; KNL-NEXT: kshiftrw $1, %k1, %k1 2212; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2213; KNL-NEXT: kmovw %eax, %k6 2214; KNL-NEXT: kshiftlw $15, %k6, %k6 2215; KNL-NEXT: korw %k6, %k1, %k1 2216; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2217; KNL-NEXT: andl $1, %eax 2218; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 2219; KNL-NEXT: kmovw %ecx, %k6 2220; KNL-NEXT: kshiftlw $15, %k6, %k6 2221; KNL-NEXT: kshiftrw $14, %k6, %k6 2222; KNL-NEXT: kmovw %eax, %k7 2223; KNL-NEXT: korw %k6, %k7, %k6 2224; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2225; KNL-NEXT: kandw %k5, %k6, %k6 2226; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2227; KNL-NEXT: kmovw %eax, %k7 2228; KNL-NEXT: kshiftlw $15, %k7, %k7 2229; KNL-NEXT: kshiftrw $13, %k7, %k7 2230; KNL-NEXT: korw %k7, %k6, %k6 2231; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2232; KNL-NEXT: kandw %k5, %k6, %k6 2233; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2234; KNL-NEXT: kmovw %eax, %k7 2235; KNL-NEXT: kshiftlw $15, %k7, %k7 2236; KNL-NEXT: kshiftrw $12, %k7, %k7 2237; KNL-NEXT: korw %k7, %k6, %k6 2238; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2239; KNL-NEXT: kandw %k5, %k6, %k6 2240; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2241; KNL-NEXT: kmovw %eax, %k7 2242; KNL-NEXT: kshiftlw $15, %k7, %k7 2243; KNL-NEXT: kshiftrw $11, %k7, %k7 2244; KNL-NEXT: korw %k7, %k6, %k6 2245; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2246; KNL-NEXT: kandw %k5, %k6, %k6 2247; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2248; KNL-NEXT: kmovw %eax, %k7 2249; KNL-NEXT: kshiftlw $15, %k7, %k7 2250; KNL-NEXT: kshiftrw $10, %k7, %k7 2251; KNL-NEXT: korw %k7, %k6, %k6 2252; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2253; KNL-NEXT: kandw %k5, %k6, %k6 2254; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2255; KNL-NEXT: kmovw %eax, %k7 2256; KNL-NEXT: kshiftlw $15, %k7, %k7 2257; KNL-NEXT: kshiftrw $9, %k7, %k7 2258; KNL-NEXT: korw %k7, %k6, %k6 2259; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2260; KNL-NEXT: kandw %k5, %k6, %k6 2261; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2262; KNL-NEXT: kmovw %eax, %k7 2263; KNL-NEXT: kshiftlw $15, %k7, %k7 2264; KNL-NEXT: kshiftrw $8, %k7, %k7 2265; KNL-NEXT: korw %k7, %k6, %k6 2266; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2267; KNL-NEXT: kandw %k5, %k6, %k6 2268; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2269; KNL-NEXT: kmovw %eax, %k7 2270; KNL-NEXT: kshiftlw $15, %k7, %k7 2271; KNL-NEXT: kshiftrw $7, %k7, %k7 2272; KNL-NEXT: korw %k7, %k6, %k6 2273; KNL-NEXT: kandw %k3, %k6, %k6 2274; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2275; KNL-NEXT: kmovw %eax, %k7 2276; KNL-NEXT: kshiftlw $15, %k7, %k7 2277; KNL-NEXT: kshiftrw $6, %k7, %k7 2278; KNL-NEXT: korw %k7, %k6, %k6 2279; KNL-NEXT: kandw %k4, %k6, %k5 2280; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2281; KNL-NEXT: kmovw %eax, %k6 2282; KNL-NEXT: kshiftlw $15, %k6, %k6 2283; KNL-NEXT: kshiftrw $5, %k6, %k6 2284; KNL-NEXT: korw %k6, %k5, %k5 2285; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload 2286; KNL-NEXT: kandw %k3, %k5, %k4 2287; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2288; KNL-NEXT: kmovw %eax, %k5 2289; KNL-NEXT: kshiftlw $15, %k5, %k5 2290; KNL-NEXT: kshiftrw $4, %k5, %k5 2291; KNL-NEXT: korw %k5, %k4, %k4 2292; KNL-NEXT: kandw %k0, %k4, %k3 2293; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2294; KNL-NEXT: kmovw %eax, %k4 2295; KNL-NEXT: kshiftlw $15, %k4, %k4 2296; KNL-NEXT: kshiftrw $3, %k4, %k4 2297; KNL-NEXT: korw %k4, %k3, %k3 2298; KNL-NEXT: kandw %k2, %k3, %k2 2299; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2300; KNL-NEXT: kmovw %eax, %k3 2301; KNL-NEXT: kshiftlw $15, %k3, %k3 2302; KNL-NEXT: kshiftrw $2, %k3, %k3 2303; KNL-NEXT: korw %k3, %k2, %k2 2304; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2305; KNL-NEXT: kandw %k0, %k2, %k0 2306; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2307; KNL-NEXT: kmovw %eax, %k2 2308; KNL-NEXT: kshiftlw $14, %k2, %k2 2309; KNL-NEXT: korw %k2, %k0, %k0 2310; KNL-NEXT: kshiftlw $1, %k0, %k0 2311; KNL-NEXT: kshiftrw $1, %k0, %k0 2312; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2313; KNL-NEXT: kmovw %eax, %k2 2314; KNL-NEXT: kshiftlw $15, %k2, %k2 2315; KNL-NEXT: korw %k2, %k0, %k2 2316; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} 2317; KNL-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z} 2318; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2319; KNL-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} 2320; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2321; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z} 2322; KNL-NEXT: vpmovdw %zmm2, %ymm2 2323; KNL-NEXT: vpmovdw %zmm3, %ymm3 2324; KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 2325; KNL-NEXT: vpandq %zmm1, %zmm2, %zmm1 2326; KNL-NEXT: vpmovdw %zmm4, %ymm2 2327; KNL-NEXT: vpmovdw %zmm5, %ymm3 2328; KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 2329; KNL-NEXT: vpandq %zmm0, %zmm2, %zmm0 2330; KNL-NEXT: retq 2331; 2332; SKX-LABEL: test21: 2333; SKX: # %bb.0: 2334; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 2335; SKX-NEXT: vpmovb2m %zmm2, %k1 2336; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 2337; SKX-NEXT: kshiftrq $32, %k1, %k1 2338; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} 2339; SKX-NEXT: retq 2340; 2341; AVX512DQNOBW-LABEL: test21: 2342; AVX512DQNOBW: # %bb.0: 2343; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2344; AVX512DQNOBW-NEXT: andl $1, %eax 2345; AVX512DQNOBW-NEXT: kmovw %eax, %k0 2346; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2347; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2348; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2349; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k1 2350; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2351; AVX512DQNOBW-NEXT: movw $-5, %ax 2352; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2353; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2354; AVX512DQNOBW-NEXT: kmovw %k1, %k7 2355; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2356; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2357; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2358; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2359; AVX512DQNOBW-NEXT: kshiftrw $13, %k1, %k1 2360; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2361; AVX512DQNOBW-NEXT: movw $-9, %ax 2362; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2363; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2364; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2365; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2366; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2367; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2368; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k1 2369; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2370; AVX512DQNOBW-NEXT: movw $-17, %ax 2371; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2372; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2373; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2374; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2375; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2376; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2377; AVX512DQNOBW-NEXT: kshiftrw $11, %k1, %k1 2378; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2379; AVX512DQNOBW-NEXT: movw $-33, %ax 2380; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2381; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2382; AVX512DQNOBW-NEXT: kmovw %k1, %k2 2383; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2384; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2385; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2386; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2387; AVX512DQNOBW-NEXT: kshiftrw $10, %k1, %k1 2388; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2389; AVX512DQNOBW-NEXT: movw $-65, %ax 2390; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2391; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2392; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2393; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2394; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2395; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2396; AVX512DQNOBW-NEXT: kshiftrw $9, %k1, %k1 2397; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2398; AVX512DQNOBW-NEXT: movw $-129, %ax 2399; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2400; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2401; AVX512DQNOBW-NEXT: kmovw %k1, %k3 2402; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2403; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2404; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2405; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2406; AVX512DQNOBW-NEXT: kshiftrw $8, %k1, %k1 2407; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2408; AVX512DQNOBW-NEXT: movw $-257, %ax # imm = 0xFEFF 2409; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2410; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2411; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2412; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2413; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2414; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2415; AVX512DQNOBW-NEXT: kshiftrw $7, %k1, %k1 2416; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2417; AVX512DQNOBW-NEXT: movw $-513, %ax # imm = 0xFDFF 2418; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2419; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2420; AVX512DQNOBW-NEXT: kmovw %k1, %k4 2421; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2422; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2423; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2424; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2425; AVX512DQNOBW-NEXT: kshiftrw $6, %k1, %k1 2426; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2427; AVX512DQNOBW-NEXT: movw $-1025, %ax # imm = 0xFBFF 2428; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2429; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2430; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2431; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2432; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2433; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2434; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k1 2435; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2436; AVX512DQNOBW-NEXT: movw $-2049, %ax # imm = 0xF7FF 2437; AVX512DQNOBW-NEXT: kmovw %eax, %k5 2438; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 2439; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2440; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2441; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2442; AVX512DQNOBW-NEXT: kshiftrw $4, %k1, %k1 2443; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2444; AVX512DQNOBW-NEXT: movw $-4097, %ax # imm = 0xEFFF 2445; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2446; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2447; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2448; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2449; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2450; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2451; AVX512DQNOBW-NEXT: kshiftrw $3, %k1, %k1 2452; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2453; AVX512DQNOBW-NEXT: movw $-8193, %ax # imm = 0xDFFF 2454; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2455; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2456; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2457; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2458; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2459; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2460; AVX512DQNOBW-NEXT: kshiftrw $2, %k1, %k1 2461; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2462; AVX512DQNOBW-NEXT: movw $-16385, %ax # imm = 0xBFFF 2463; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2464; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2465; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2466; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2467; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2468; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6 2469; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2470; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 2471; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 2472; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2473; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2474; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2475; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2476; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2477; AVX512DQNOBW-NEXT: andl $1, %edi 2478; AVX512DQNOBW-NEXT: kmovw %esi, %k0 2479; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0 2480; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0 2481; AVX512DQNOBW-NEXT: kmovw %edi, %k6 2482; AVX512DQNOBW-NEXT: korw %k0, %k6, %k0 2483; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 2484; AVX512DQNOBW-NEXT: kmovw %edx, %k6 2485; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2486; AVX512DQNOBW-NEXT: kshiftrw $13, %k6, %k6 2487; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2488; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload 2489; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 2490; AVX512DQNOBW-NEXT: kmovw %ecx, %k6 2491; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2492; AVX512DQNOBW-NEXT: kshiftrw $12, %k6, %k6 2493; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2494; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2495; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2496; AVX512DQNOBW-NEXT: kmovw %r8d, %k6 2497; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2498; AVX512DQNOBW-NEXT: kshiftrw $11, %k6, %k6 2499; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2500; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 2501; AVX512DQNOBW-NEXT: kmovw %r9d, %k6 2502; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2503; AVX512DQNOBW-NEXT: kshiftrw $10, %k6, %k6 2504; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2505; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload 2506; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 2507; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2508; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2509; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2510; AVX512DQNOBW-NEXT: kshiftrw $9, %k6, %k6 2511; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2512; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 2513; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2514; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2515; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2516; AVX512DQNOBW-NEXT: kshiftrw $8, %k6, %k6 2517; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2518; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload 2519; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 2520; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2521; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2522; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2523; AVX512DQNOBW-NEXT: kshiftrw $7, %k6, %k6 2524; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2525; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 2526; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2527; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2528; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2529; AVX512DQNOBW-NEXT: kshiftrw $6, %k6, %k6 2530; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2531; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload 2532; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 2533; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2534; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2535; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2536; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6 2537; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2538; AVX512DQNOBW-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2539; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 2540; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2541; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2542; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2543; AVX512DQNOBW-NEXT: kshiftrw $4, %k6, %k6 2544; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2545; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload 2546; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 2547; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2548; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2549; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2550; AVX512DQNOBW-NEXT: kshiftrw $3, %k6, %k6 2551; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2552; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload 2553; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 2554; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2555; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2556; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2557; AVX512DQNOBW-NEXT: kshiftrw $2, %k6, %k6 2558; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2559; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload 2560; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 2561; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2562; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2563; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6 2564; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2565; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 2566; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 2567; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2568; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2569; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2570; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2571; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2572; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2573; AVX512DQNOBW-NEXT: andl $1, %eax 2574; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 2575; AVX512DQNOBW-NEXT: kmovw %ecx, %k0 2576; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0 2577; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0 2578; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2579; AVX512DQNOBW-NEXT: korw %k0, %k6, %k0 2580; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload 2581; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 2582; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2583; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2584; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2585; AVX512DQNOBW-NEXT: kshiftrw $13, %k6, %k6 2586; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2587; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 2588; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2589; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2590; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2591; AVX512DQNOBW-NEXT: kshiftrw $12, %k6, %k6 2592; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2593; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2594; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2595; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2596; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2597; AVX512DQNOBW-NEXT: kshiftrw $11, %k6, %k6 2598; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2599; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2600; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2601; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2602; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2603; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2604; AVX512DQNOBW-NEXT: kshiftrw $10, %k6, %k6 2605; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2606; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 2607; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2608; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2609; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2610; AVX512DQNOBW-NEXT: kshiftrw $9, %k6, %k6 2611; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2612; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2613; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2614; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2615; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2616; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2617; AVX512DQNOBW-NEXT: kshiftrw $8, %k6, %k6 2618; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2619; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 2620; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2621; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2622; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2623; AVX512DQNOBW-NEXT: kshiftrw $7, %k6, %k6 2624; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2625; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload 2626; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 2627; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2628; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2629; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2630; AVX512DQNOBW-NEXT: kshiftrw $6, %k6, %k6 2631; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2632; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 2633; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2634; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2635; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2636; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6 2637; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2638; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 2639; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2640; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2641; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2642; AVX512DQNOBW-NEXT: kshiftrw $4, %k6, %k6 2643; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2644; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2645; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2646; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2647; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2648; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2649; AVX512DQNOBW-NEXT: kshiftrw $3, %k6, %k6 2650; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2651; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload 2652; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 2653; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2654; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2655; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2656; AVX512DQNOBW-NEXT: kshiftrw $2, %k6, %k6 2657; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2658; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2659; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 2660; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2661; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2662; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6 2663; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2664; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 2665; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 2666; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2667; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2668; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2669; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 2670; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2671; AVX512DQNOBW-NEXT: andl $1, %eax 2672; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 2673; AVX512DQNOBW-NEXT: kmovw %ecx, %k6 2674; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2675; AVX512DQNOBW-NEXT: kshiftrw $14, %k6, %k6 2676; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2677; AVX512DQNOBW-NEXT: korw %k6, %k7, %k6 2678; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2679; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 2680; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2681; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2682; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2683; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7 2684; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2685; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2686; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 2687; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2688; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2689; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2690; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7 2691; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2692; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2693; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 2694; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2695; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2696; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2697; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7 2698; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2699; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2700; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 2701; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2702; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2703; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2704; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7 2705; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2706; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2707; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 2708; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2709; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2710; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2711; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7 2712; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2713; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2714; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 2715; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2716; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2717; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2718; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7 2719; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2720; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2721; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 2722; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2723; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2724; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2725; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7 2726; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2727; AVX512DQNOBW-NEXT: kandw %k3, %k6, %k6 2728; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2729; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2730; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2731; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7 2732; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2733; AVX512DQNOBW-NEXT: kandw %k4, %k6, %k5 2734; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2735; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2736; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2737; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6 2738; AVX512DQNOBW-NEXT: korw %k6, %k5, %k5 2739; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload 2740; AVX512DQNOBW-NEXT: kandw %k3, %k5, %k4 2741; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2742; AVX512DQNOBW-NEXT: kmovw %eax, %k5 2743; AVX512DQNOBW-NEXT: kshiftlw $15, %k5, %k5 2744; AVX512DQNOBW-NEXT: kshiftrw $4, %k5, %k5 2745; AVX512DQNOBW-NEXT: korw %k5, %k4, %k4 2746; AVX512DQNOBW-NEXT: kandw %k1, %k4, %k3 2747; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2748; AVX512DQNOBW-NEXT: kmovw %eax, %k4 2749; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4 2750; AVX512DQNOBW-NEXT: kshiftrw $3, %k4, %k4 2751; AVX512DQNOBW-NEXT: korw %k4, %k3, %k3 2752; AVX512DQNOBW-NEXT: kandw %k2, %k3, %k2 2753; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2754; AVX512DQNOBW-NEXT: kmovw %eax, %k3 2755; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3 2756; AVX512DQNOBW-NEXT: kshiftrw $2, %k3, %k3 2757; AVX512DQNOBW-NEXT: korw %k3, %k2, %k2 2758; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2759; AVX512DQNOBW-NEXT: kandw %k1, %k2, %k1 2760; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2761; AVX512DQNOBW-NEXT: kmovw %eax, %k2 2762; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2 2763; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1 2764; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1 2765; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1 2766; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2767; AVX512DQNOBW-NEXT: kmovw %eax, %k2 2768; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2 2769; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1 2770; AVX512DQNOBW-NEXT: vpmovm2d %k1, %zmm2 2771; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm3 2772; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2773; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm4 2774; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2775; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm5 2776; AVX512DQNOBW-NEXT: vpmovdw %zmm2, %ymm2 2777; AVX512DQNOBW-NEXT: vpmovdw %zmm3, %ymm3 2778; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 2779; AVX512DQNOBW-NEXT: vpandq %zmm1, %zmm2, %zmm1 2780; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm2 2781; AVX512DQNOBW-NEXT: vpmovdw %zmm5, %ymm3 2782; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 2783; AVX512DQNOBW-NEXT: vpandq %zmm0, %zmm2, %zmm0 2784; AVX512DQNOBW-NEXT: retq 2785 %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer 2786 ret <64 x i16> %ret 2787} 2788 2789define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone { 2790; ALL-LABEL: shuffle_zext_16x8_to_16x16: 2791; ALL: # %bb.0: 2792; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2793; ALL-NEXT: retq 2794 %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> 2795 %2 = bitcast <32 x i8> %1 to <16 x i16> 2796 ret <16 x i16> %2 2797} 2798 2799define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone { 2800; KNL-LABEL: shuffle_zext_16x8_to_16x16_mask: 2801; KNL: # %bb.0: 2802; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 2803; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2804; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 2805; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 2806; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 2807; KNL-NEXT: retq 2808; 2809; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask: 2810; SKX: # %bb.0: 2811; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 2812; SKX-NEXT: vpmovb2m %xmm1, %k1 2813; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2814; SKX-NEXT: retq 2815; 2816; AVX512DQNOBW-LABEL: shuffle_zext_16x8_to_16x16_mask: 2817; AVX512DQNOBW: # %bb.0: 2818; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 2819; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2820; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 2821; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 2822; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 2823; AVX512DQNOBW-NEXT: retq 2824 %x = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> 2825 %bc = bitcast <32 x i8> %x to <16 x i16> 2826 %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer 2827 ret <16 x i16> %ret 2828} 2829 2830define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) { 2831; ALL-LABEL: zext_32x8_to_16x16: 2832; ALL: # %bb.0: 2833; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2834; ALL-NEXT: retq 2835 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32> 2836 %2 = bitcast <32 x i8> %1 to <16 x i16> 2837 ret <16 x i16> %2 2838} 2839 2840define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) { 2841; ALL-LABEL: zext_32x8_to_8x32: 2842; ALL: # %bb.0: 2843; ALL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2844; ALL-NEXT: retq 2845 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32> 2846 %2 = bitcast <32 x i8> %1 to <8 x i32> 2847 ret <8 x i32> %2 2848} 2849 2850define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) { 2851; ALL-LABEL: zext_32x8_to_4x64: 2852; ALL: # %bb.0: 2853; ALL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 2854; ALL-NEXT: retq 2855 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> 2856 %2 = bitcast <32 x i8> %1 to <4 x i64> 2857 ret <4 x i64> %2 2858} 2859 2860define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) { 2861; ALL-LABEL: zext_16x16_to_8x32: 2862; ALL: # %bb.0: 2863; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2864; ALL-NEXT: retq 2865 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16> 2866 %2 = bitcast <16 x i16> %1 to <8 x i32> 2867 ret <8 x i32> %2 2868} 2869 2870define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) { 2871; ALL-LABEL: zext_16x16_to_4x64: 2872; ALL: # %bb.0: 2873; ALL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2874; ALL-NEXT: retq 2875 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16> 2876 %2 = bitcast <16 x i16> %1 to <4 x i64> 2877 ret <4 x i64> %2 2878} 2879 2880define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) { 2881; ALL-LABEL: zext_8x32_to_4x64: 2882; ALL: # %bb.0: 2883; ALL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2884; ALL-NEXT: retq 2885 %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8> 2886 %2 = bitcast <8 x i32> %1 to <4 x i64> 2887 ret <4 x i64> %2 2888} 2889 2890define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 { 2891; KNL-LABEL: zext_64xi1_to_64xi8: 2892; KNL: # %bb.0: 2893; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2894; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 2895; KNL-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2 2896; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 2897; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 2898; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 2899; KNL-NEXT: retq 2900; 2901; SKX-LABEL: zext_64xi1_to_64xi8: 2902; SKX: # %bb.0: 2903; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 2904; SKX-NEXT: vmovdqu8 {{.*#+}} zmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2905; SKX-NEXT: retq 2906; 2907; AVX512DQNOBW-LABEL: zext_64xi1_to_64xi8: 2908; AVX512DQNOBW: # %bb.0: 2909; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2910; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3 2911; AVX512DQNOBW-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2 2912; AVX512DQNOBW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 2913; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 2914; AVX512DQNOBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 2915; AVX512DQNOBW-NEXT: retq 2916 %mask = icmp eq <64 x i8> %x, %y 2917 %1 = zext <64 x i1> %mask to <64 x i8> 2918 ret <64 x i8> %1 2919} 2920 2921define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { 2922; KNL-LABEL: zext_32xi1_to_32xi16: 2923; KNL: # %bb.0: 2924; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2925; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 2926; KNL-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 2927; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2928; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 2929; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 2930; KNL-NEXT: retq 2931; 2932; SKX-LABEL: zext_32xi1_to_32xi16: 2933; SKX: # %bb.0: 2934; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 2935; SKX-NEXT: vpmovm2w %k0, %zmm0 2936; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 2937; SKX-NEXT: retq 2938; 2939; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi16: 2940; AVX512DQNOBW: # %bb.0: 2941; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2942; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3 2943; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 2944; AVX512DQNOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2945; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 2946; AVX512DQNOBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 2947; AVX512DQNOBW-NEXT: retq 2948 %mask = icmp eq <32 x i16> %x, %y 2949 %1 = zext <32 x i1> %mask to <32 x i16> 2950 ret <32 x i16> %1 2951} 2952 2953define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 { 2954; ALL-LABEL: zext_16xi1_to_16xi16: 2955; ALL: # %bb.0: 2956; ALL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2957; ALL-NEXT: vpsrlw $15, %ymm0, %ymm0 2958; ALL-NEXT: retq 2959 %mask = icmp eq <16 x i16> %x, %y 2960 %1 = zext <16 x i1> %mask to <16 x i16> 2961 ret <16 x i16> %1 2962} 2963 2964 2965define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 { 2966; KNL-LABEL: zext_32xi1_to_32xi8: 2967; KNL: # %bb.0: 2968; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2969; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 2970; KNL-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 2971; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2972; KNL-NEXT: vpacksswb %ymm2, %ymm0, %ymm0 2973; KNL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2974; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 2975; KNL-NEXT: retq 2976; 2977; SKX-LABEL: zext_32xi1_to_32xi8: 2978; SKX: # %bb.0: 2979; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 2980; SKX-NEXT: vmovdqu8 {{.*#+}} ymm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2981; SKX-NEXT: retq 2982; 2983; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi8: 2984; AVX512DQNOBW: # %bb.0: 2985; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2986; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3 2987; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 2988; AVX512DQNOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2989; AVX512DQNOBW-NEXT: vpacksswb %ymm2, %ymm0, %ymm0 2990; AVX512DQNOBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2991; AVX512DQNOBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 2992; AVX512DQNOBW-NEXT: retq 2993 %mask = icmp eq <32 x i16> %x, %y 2994 %1 = zext <32 x i1> %mask to <32 x i8> 2995 ret <32 x i8> %1 2996} 2997 2998define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 { 2999; KNL-LABEL: zext_4xi1_to_4x32: 3000; KNL: # %bb.0: 3001; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 3002; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 3003; KNL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 3004; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 3005; KNL-NEXT: retq 3006; 3007; SKX-LABEL: zext_4xi1_to_4x32: 3008; SKX: # %bb.0: 3009; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 3010; SKX-NEXT: vpmovm2d %k0, %xmm0 3011; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 3012; SKX-NEXT: retq 3013; 3014; AVX512DQNOBW-LABEL: zext_4xi1_to_4x32: 3015; AVX512DQNOBW: # %bb.0: 3016; AVX512DQNOBW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 3017; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 3018; AVX512DQNOBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 3019; AVX512DQNOBW-NEXT: retq 3020 %mask = icmp eq <4 x i8> %x, %y 3021 %1 = zext <4 x i1> %mask to <4 x i32> 3022 ret <4 x i32> %1 3023} 3024 3025define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 { 3026; KNL-LABEL: zext_2xi1_to_2xi64: 3027; KNL: # %bb.0: 3028; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 3029; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 3030; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3031; KNL-NEXT: retq 3032; 3033; SKX-LABEL: zext_2xi1_to_2xi64: 3034; SKX: # %bb.0: 3035; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 3036; SKX-NEXT: vpmovm2q %k0, %xmm0 3037; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 3038; SKX-NEXT: retq 3039; 3040; AVX512DQNOBW-LABEL: zext_2xi1_to_2xi64: 3041; AVX512DQNOBW: # %bb.0: 3042; AVX512DQNOBW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 3043; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 3044; AVX512DQNOBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 3045; AVX512DQNOBW-NEXT: retq 3046 %mask = icmp eq <2 x i8> %x, %y 3047 %1 = zext <2 x i1> %mask to <2 x i64> 3048 ret <2 x i64> %1 3049} 3050