1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc < %s -mtriple=i686-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X86-SSE2 3; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64-SSE2 4; RUN: llc < %s -mtriple=i686-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X86-SSE42 5; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64-SSE42 6; RUN: llc < %s -mtriple=i686-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X86-AVX2 7; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64-AVX2 8; RUN: llc < %s -mtriple=i686-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X86-AVX512 9; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64-AVX512 10 11; PR78897 - Don't vectorize a mul of extracted values if we'd still need the extract. 12; TODO: We should vectorize on 32-bit targets. 13define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind { 14; X86-SSE2-LABEL: produceShuffleVectorForByte: 15; X86-SSE2: # %bb.0: # %entry 16; X86-SSE2-NEXT: pushl %ebx 17; X86-SSE2-NEXT: pushl %edi 18; X86-SSE2-NEXT: pushl %esi 19; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 20; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 21; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 22; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 23; X86-SSE2-NEXT: pxor %xmm0, %xmm0 24; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 25; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u] 26; X86-SSE2-NEXT: pand %xmm0, %xmm1 27; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] 28; X86-SSE2-NEXT: movd %xmm2, %esi 29; X86-SSE2-NEXT: movd %xmm1, %ecx 30; X86-SSE2-NEXT: movl $286331152, %edi # imm = 0x11111110 31; X86-SSE2-NEXT: movl %ecx, %eax 32; X86-SSE2-NEXT: mull %edi 33; X86-SSE2-NEXT: imull $286331153, %ecx, %ebx # imm = 0x11111111 34; X86-SSE2-NEXT: addl %edx, %ebx 35; X86-SSE2-NEXT: imull $286331152, %esi, %edx # imm = 0x11111110 36; X86-SSE2-NEXT: addl %ebx, %edx 37; X86-SSE2-NEXT: movd %edx, %xmm2 38; X86-SSE2-NEXT: movd %eax, %xmm1 39; X86-SSE2-NEXT: xorl $286331153, %ecx # imm = 0x11111111 40; X86-SSE2-NEXT: movl %ecx, %eax 41; X86-SSE2-NEXT: mull %edi 42; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 43; X86-SSE2-NEXT: xorl $17895697, %esi # imm = 0x1111111 44; X86-SSE2-NEXT: imull $286331153, %ecx, %ecx # imm = 0x11111111 45; X86-SSE2-NEXT: addl %edx, %ecx 46; X86-SSE2-NEXT: imull $286331152, %esi, %edx # imm = 0x11111110 47; X86-SSE2-NEXT: addl %ecx, %edx 48; X86-SSE2-NEXT: movd %edx, %xmm2 49; X86-SSE2-NEXT: movd %eax, %xmm3 50; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 51; X86-SSE2-NEXT: pand %xmm0, %xmm1 52; X86-SSE2-NEXT: pandn %xmm3, %xmm0 53; X86-SSE2-NEXT: por %xmm1, %xmm0 54; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 55; X86-SSE2-NEXT: psrlw $4, %xmm1 56; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 57; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 58; X86-SSE2-NEXT: popl %esi 59; X86-SSE2-NEXT: popl %edi 60; X86-SSE2-NEXT: popl %ebx 61; X86-SSE2-NEXT: retl 62; 63; X64-SSE2-LABEL: produceShuffleVectorForByte: 64; X64-SSE2: # %bb.0: # %entry 65; X64-SSE2-NEXT: movd %edi, %xmm0 66; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 67; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 68; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 69; X64-SSE2-NEXT: pxor %xmm0, %xmm0 70; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 71; X64-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u] 72; X64-SSE2-NEXT: pand %xmm0, %xmm1 73; X64-SSE2-NEXT: movq %xmm1, %rax 74; X64-SSE2-NEXT: movabsq $1229782938247303440, %rcx # imm = 0x1111111111111110 75; X64-SSE2-NEXT: movabsq $76861433640456465, %rdx # imm = 0x111111111111111 76; X64-SSE2-NEXT: xorq %rax, %rdx 77; X64-SSE2-NEXT: imulq %rcx, %rax 78; X64-SSE2-NEXT: movq %rax, %xmm1 79; X64-SSE2-NEXT: imulq %rcx, %rdx 80; X64-SSE2-NEXT: movq %rdx, %xmm2 81; X64-SSE2-NEXT: pand %xmm0, %xmm1 82; X64-SSE2-NEXT: pandn %xmm2, %xmm0 83; X64-SSE2-NEXT: por %xmm1, %xmm0 84; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 85; X64-SSE2-NEXT: psrlw $4, %xmm1 86; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 87; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 88; X64-SSE2-NEXT: retq 89; 90; X86-SSE42-LABEL: produceShuffleVectorForByte: 91; X86-SSE42: # %bb.0: # %entry 92; X86-SSE42-NEXT: pushl %ebx 93; X86-SSE42-NEXT: pushl %edi 94; X86-SSE42-NEXT: pushl %esi 95; X86-SSE42-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 96; X86-SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 97; X86-SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 98; X86-SSE42-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 99; X86-SSE42-NEXT: pxor %xmm0, %xmm0 100; X86-SSE42-NEXT: pcmpeqb %xmm1, %xmm0 101; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u] 102; X86-SSE42-NEXT: pand %xmm0, %xmm1 103; X86-SSE42-NEXT: movd %xmm1, %ecx 104; X86-SSE42-NEXT: movl $286331152, %edi # imm = 0x11111110 105; X86-SSE42-NEXT: movl %ecx, %eax 106; X86-SSE42-NEXT: mull %edi 107; X86-SSE42-NEXT: pextrd $1, %xmm1, %esi 108; X86-SSE42-NEXT: imull $286331153, %ecx, %ebx # imm = 0x11111111 109; X86-SSE42-NEXT: addl %edx, %ebx 110; X86-SSE42-NEXT: imull $286331152, %esi, %edx # imm = 0x11111110 111; X86-SSE42-NEXT: addl %ebx, %edx 112; X86-SSE42-NEXT: movd %eax, %xmm2 113; X86-SSE42-NEXT: pinsrd $1, %edx, %xmm2 114; X86-SSE42-NEXT: xorl $286331153, %ecx # imm = 0x11111111 115; X86-SSE42-NEXT: movl %ecx, %eax 116; X86-SSE42-NEXT: mull %edi 117; X86-SSE42-NEXT: xorl $17895697, %esi # imm = 0x1111111 118; X86-SSE42-NEXT: imull $286331153, %ecx, %ecx # imm = 0x11111111 119; X86-SSE42-NEXT: addl %edx, %ecx 120; X86-SSE42-NEXT: imull $286331152, %esi, %edx # imm = 0x11111110 121; X86-SSE42-NEXT: addl %ecx, %edx 122; X86-SSE42-NEXT: movd %eax, %xmm1 123; X86-SSE42-NEXT: pinsrd $1, %edx, %xmm1 124; X86-SSE42-NEXT: pblendvb %xmm0, %xmm2, %xmm1 125; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 126; X86-SSE42-NEXT: psrlw $4, %xmm0 127; X86-SSE42-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 128; X86-SSE42-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 129; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 130; X86-SSE42-NEXT: popl %esi 131; X86-SSE42-NEXT: popl %edi 132; X86-SSE42-NEXT: popl %ebx 133; X86-SSE42-NEXT: retl 134; 135; X64-SSE42-LABEL: produceShuffleVectorForByte: 136; X64-SSE42: # %bb.0: # %entry 137; X64-SSE42-NEXT: movd %edi, %xmm0 138; X64-SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 139; X64-SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 140; X64-SSE42-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 141; X64-SSE42-NEXT: pxor %xmm0, %xmm0 142; X64-SSE42-NEXT: pcmpeqb %xmm1, %xmm0 143; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u] 144; X64-SSE42-NEXT: pand %xmm0, %xmm1 145; X64-SSE42-NEXT: movq %xmm1, %rax 146; X64-SSE42-NEXT: movabsq $1229782938247303440, %rcx # imm = 0x1111111111111110 147; X64-SSE42-NEXT: movabsq $76861433640456465, %rdx # imm = 0x111111111111111 148; X64-SSE42-NEXT: xorq %rax, %rdx 149; X64-SSE42-NEXT: imulq %rcx, %rax 150; X64-SSE42-NEXT: movq %rax, %xmm2 151; X64-SSE42-NEXT: imulq %rcx, %rdx 152; X64-SSE42-NEXT: movq %rdx, %xmm1 153; X64-SSE42-NEXT: pblendvb %xmm0, %xmm2, %xmm1 154; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 155; X64-SSE42-NEXT: psrlw $4, %xmm0 156; X64-SSE42-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 157; X64-SSE42-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 158; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 159; X64-SSE42-NEXT: retq 160; 161; X86-AVX2-LABEL: produceShuffleVectorForByte: 162; X86-AVX2: # %bb.0: # %entry 163; X86-AVX2-NEXT: pushl %ebx 164; X86-AVX2-NEXT: pushl %edi 165; X86-AVX2-NEXT: pushl %esi 166; X86-AVX2-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm0 167; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 168; X86-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 169; X86-AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 170; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 171; X86-AVX2-NEXT: vmovd %xmm1, %edx 172; X86-AVX2-NEXT: movl $286331152, %ecx # imm = 0x11111110 173; X86-AVX2-NEXT: mulxl %ecx, %edi, %esi 174; X86-AVX2-NEXT: vpextrd $1, %xmm1, %eax 175; X86-AVX2-NEXT: imull $286331153, %edx, %ebx # imm = 0x11111111 176; X86-AVX2-NEXT: addl %esi, %ebx 177; X86-AVX2-NEXT: imull $286331152, %eax, %esi # imm = 0x11111110 178; X86-AVX2-NEXT: addl %ebx, %esi 179; X86-AVX2-NEXT: vmovd %edi, %xmm1 180; X86-AVX2-NEXT: xorl $286331153, %edx # imm = 0x11111111 181; X86-AVX2-NEXT: mulxl %ecx, %edi, %ecx 182; X86-AVX2-NEXT: vpinsrd $1, %esi, %xmm1, %xmm1 183; X86-AVX2-NEXT: xorl $17895697, %eax # imm = 0x1111111 184; X86-AVX2-NEXT: imull $286331153, %edx, %edx # imm = 0x11111111 185; X86-AVX2-NEXT: addl %ecx, %edx 186; X86-AVX2-NEXT: imull $286331152, %eax, %eax # imm = 0x11111110 187; X86-AVX2-NEXT: addl %edx, %eax 188; X86-AVX2-NEXT: vmovd %edi, %xmm2 189; X86-AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 190; X86-AVX2-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 191; X86-AVX2-NEXT: vpsrlw $4, %xmm0, %xmm1 192; X86-AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 193; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 194; X86-AVX2-NEXT: popl %esi 195; X86-AVX2-NEXT: popl %edi 196; X86-AVX2-NEXT: popl %ebx 197; X86-AVX2-NEXT: retl 198; 199; X64-AVX2-LABEL: produceShuffleVectorForByte: 200; X64-AVX2: # %bb.0: # %entry 201; X64-AVX2-NEXT: vmovd %edi, %xmm0 202; X64-AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 203; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 204; X64-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 205; X64-AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 206; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 207; X64-AVX2-NEXT: vmovq %xmm1, %rax 208; X64-AVX2-NEXT: movabsq $1229782938247303440, %rcx # imm = 0x1111111111111110 209; X64-AVX2-NEXT: movabsq $76861433640456465, %rdx # imm = 0x111111111111111 210; X64-AVX2-NEXT: xorq %rax, %rdx 211; X64-AVX2-NEXT: imulq %rcx, %rax 212; X64-AVX2-NEXT: vmovq %rax, %xmm1 213; X64-AVX2-NEXT: imulq %rcx, %rdx 214; X64-AVX2-NEXT: vmovq %rdx, %xmm2 215; X64-AVX2-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 216; X64-AVX2-NEXT: vpsrlw $4, %xmm0, %xmm1 217; X64-AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 218; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 219; X64-AVX2-NEXT: retq 220; 221; X86-AVX512-LABEL: produceShuffleVectorForByte: 222; X86-AVX512: # %bb.0: # %entry 223; X86-AVX512-NEXT: pushl %ebx 224; X86-AVX512-NEXT: pushl %edi 225; X86-AVX512-NEXT: pushl %esi 226; X86-AVX512-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm0 227; X86-AVX512-NEXT: vptestnmb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %k1 228; X86-AVX512-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k1} {z} = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u] 229; X86-AVX512-NEXT: vpextrd $1, %xmm0, %eax 230; X86-AVX512-NEXT: vmovd %xmm0, %edx 231; X86-AVX512-NEXT: movl $286331152, %ecx # imm = 0x11111110 232; X86-AVX512-NEXT: mulxl %ecx, %edi, %esi 233; X86-AVX512-NEXT: imull $286331153, %edx, %ebx # imm = 0x11111111 234; X86-AVX512-NEXT: addl %esi, %ebx 235; X86-AVX512-NEXT: imull $286331152, %eax, %esi # imm = 0x11111110 236; X86-AVX512-NEXT: addl %ebx, %esi 237; X86-AVX512-NEXT: vmovd %edi, %xmm0 238; X86-AVX512-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 239; X86-AVX512-NEXT: xorl $17895697, %eax # imm = 0x1111111 240; X86-AVX512-NEXT: xorl $286331153, %edx # imm = 0x11111111 241; X86-AVX512-NEXT: mulxl %ecx, %esi, %ecx 242; X86-AVX512-NEXT: imull $286331153, %edx, %edx # imm = 0x11111111 243; X86-AVX512-NEXT: addl %ecx, %edx 244; X86-AVX512-NEXT: imull $286331152, %eax, %eax # imm = 0x11111110 245; X86-AVX512-NEXT: addl %edx, %eax 246; X86-AVX512-NEXT: vmovd %esi, %xmm1 247; X86-AVX512-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 248; X86-AVX512-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1} 249; X86-AVX512-NEXT: vpsrlw $4, %xmm1, %xmm0 250; X86-AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 251; X86-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0 252; X86-AVX512-NEXT: popl %esi 253; X86-AVX512-NEXT: popl %edi 254; X86-AVX512-NEXT: popl %ebx 255; X86-AVX512-NEXT: retl 256; 257; X64-AVX512-LABEL: produceShuffleVectorForByte: 258; X64-AVX512: # %bb.0: # %entry 259; X64-AVX512-NEXT: vpbroadcastb %edi, %xmm0 260; X64-AVX512-NEXT: vptestnmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 261; X64-AVX512-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k1} {z} = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u] 262; X64-AVX512-NEXT: vmovq %xmm0, %rax 263; X64-AVX512-NEXT: movabsq $1229782938247303440, %rcx # imm = 0x1111111111111110 264; X64-AVX512-NEXT: movabsq $76861433640456465, %rdx # imm = 0x111111111111111 265; X64-AVX512-NEXT: xorq %rax, %rdx 266; X64-AVX512-NEXT: imulq %rcx, %rax 267; X64-AVX512-NEXT: vmovq %rax, %xmm0 268; X64-AVX512-NEXT: imulq %rcx, %rdx 269; X64-AVX512-NEXT: vmovq %rdx, %xmm1 270; X64-AVX512-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1} 271; X64-AVX512-NEXT: vpsrlw $4, %xmm1, %xmm0 272; X64-AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 273; X64-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 274; X64-AVX512-NEXT: retq 275entry: 276 %const = bitcast i64 1229782938247303440 to i64 277 %1 = insertelement <1 x i8> poison, i8 %0, i64 0 278 %2 = shufflevector <1 x i8> %1, <1 x i8> poison, <8 x i32> zeroinitializer 279 %3 = and <8 x i8> %2, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128> 280 %.not.not = icmp eq <8 x i8> %3, zeroinitializer 281 %4 = select <8 x i1> %.not.not, <8 x i8> <i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17>, <8 x i8> zeroinitializer 282 %5 = bitcast <8 x i8> %4 to i64 283 %6 = mul i64 %5, %const 284 %7 = bitcast i64 %6 to <8 x i8> 285 %8 = xor i64 %5, 76861433640456465 286 %9 = mul i64 %8, %const 287 %10 = bitcast i64 %9 to <8 x i8> 288 %11 = select <8 x i1> %.not.not, <8 x i8> %7, <8 x i8> %10 289 %12 = and <8 x i8> %11, <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15> 290 %13 = lshr <8 x i8> %11, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> 291 %14 = shufflevector <8 x i8> %12, <8 x i8> %13, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 292 ret <16 x i8> %14 293} 294