1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512 7 8define <2 x i1> @bitcast_i2_2i1(i2 zeroext %a0) { 9; SSE2-SSSE3-LABEL: bitcast_i2_2i1: 10; SSE2-SSSE3: # %bb.0: 11; SSE2-SSSE3-NEXT: movd %edi, %xmm0 12; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1] 13; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 14; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 15; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 16; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 17; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 18; SSE2-SSSE3-NEXT: psrlq $63, %xmm0 19; SSE2-SSSE3-NEXT: retq 20; 21; AVX1-LABEL: bitcast_i2_2i1: 22; AVX1: # %bb.0: 23; AVX1-NEXT: vmovd %edi, %xmm0 24; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 25; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [1,2] 26; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 27; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 28; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0 29; AVX1-NEXT: retq 30; 31; AVX2-LABEL: bitcast_i2_2i1: 32; AVX2: # %bb.0: 33; AVX2-NEXT: vmovd %edi, %xmm0 34; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 35; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [1,2] 36; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 37; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 38; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0 39; AVX2-NEXT: retq 40; 41; AVX512-LABEL: bitcast_i2_2i1: 42; AVX512: # %bb.0: 43; AVX512-NEXT: kmovd %edi, %k1 44; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 45; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 46; AVX512-NEXT: retq 47 %1 = bitcast i2 %a0 to <2 x i1> 48 ret <2 x i1> %1 49} 50 51define <4 x i1> @bitcast_i4_4i1(i4 zeroext %a0) { 52; SSE2-SSSE3-LABEL: bitcast_i4_4i1: 53; SSE2-SSSE3: # %bb.0: 54; SSE2-SSSE3-NEXT: movd %edi, %xmm0 55; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 56; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8] 57; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 58; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 59; SSE2-SSSE3-NEXT: psrld $31, %xmm0 60; SSE2-SSSE3-NEXT: retq 61; 62; AVX1-LABEL: bitcast_i4_4i1: 63; AVX1: # %bb.0: 64; AVX1-NEXT: vmovd %edi, %xmm0 65; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 66; AVX1-NEXT: vpmovsxbd {{.*#+}} xmm1 = [1,2,4,8] 67; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 68; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 69; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 70; AVX1-NEXT: retq 71; 72; AVX2-LABEL: bitcast_i4_4i1: 73; AVX2: # %bb.0: 74; AVX2-NEXT: vmovd %edi, %xmm0 75; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 76; AVX2-NEXT: vpmovsxbd {{.*#+}} xmm1 = [1,2,4,8] 77; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 78; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 79; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 80; AVX2-NEXT: retq 81; 82; AVX512-LABEL: bitcast_i4_4i1: 83; AVX512: # %bb.0: 84; AVX512-NEXT: kmovd %edi, %k1 85; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 86; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 87; AVX512-NEXT: retq 88 %1 = bitcast i4 %a0 to <4 x i1> 89 ret <4 x i1> %1 90} 91 92define <8 x i1> @bitcast_i8_8i1(i8 zeroext %a0) { 93; SSE2-SSSE3-LABEL: bitcast_i8_8i1: 94; SSE2-SSSE3: # %bb.0: 95; SSE2-SSSE3-NEXT: movd %edi, %xmm0 96; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 97; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 98; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 99; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 100; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0 101; SSE2-SSSE3-NEXT: psrlw $15, %xmm0 102; SSE2-SSSE3-NEXT: retq 103; 104; AVX1-LABEL: bitcast_i8_8i1: 105; AVX1: # %bb.0: 106; AVX1-NEXT: vmovd %edi, %xmm0 107; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 108; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 109; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 110; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 111; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 112; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 113; AVX1-NEXT: retq 114; 115; AVX2-LABEL: bitcast_i8_8i1: 116; AVX2: # %bb.0: 117; AVX2-NEXT: vmovd %edi, %xmm0 118; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 119; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 120; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 121; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 122; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0 123; AVX2-NEXT: retq 124; 125; AVX512-LABEL: bitcast_i8_8i1: 126; AVX512: # %bb.0: 127; AVX512-NEXT: kmovd %edi, %k0 128; AVX512-NEXT: vpmovm2w %k0, %xmm0 129; AVX512-NEXT: retq 130 %1 = bitcast i8 %a0 to <8 x i1> 131 ret <8 x i1> %1 132} 133 134; PR54911 135define <8 x i1> @bitcast_i8_8i1_freeze(i8 zeroext %a0) { 136; SSE2-SSSE3-LABEL: bitcast_i8_8i1_freeze: 137; SSE2-SSSE3: # %bb.0: 138; SSE2-SSSE3-NEXT: movd %edi, %xmm0 139; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 140; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 141; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 142; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 143; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0 144; SSE2-SSSE3-NEXT: psrlw $15, %xmm0 145; SSE2-SSSE3-NEXT: retq 146; 147; AVX1-LABEL: bitcast_i8_8i1_freeze: 148; AVX1: # %bb.0: 149; AVX1-NEXT: vmovd %edi, %xmm0 150; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 151; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 152; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 153; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 154; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 155; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 156; AVX1-NEXT: retq 157; 158; AVX2-LABEL: bitcast_i8_8i1_freeze: 159; AVX2: # %bb.0: 160; AVX2-NEXT: vmovd %edi, %xmm0 161; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 162; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 163; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 164; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 165; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0 166; AVX2-NEXT: retq 167; 168; AVX512-LABEL: bitcast_i8_8i1_freeze: 169; AVX512: # %bb.0: 170; AVX512-NEXT: kmovd %edi, %k0 171; AVX512-NEXT: vpmovm2w %k0, %xmm0 172; AVX512-NEXT: retq 173 %1 = bitcast i8 %a0 to <8 x i1> 174 %2 = freeze <8 x i1> %1 175 ret <8 x i1> %2 176} 177 178define <16 x i1> @bitcast_i16_16i1(i16 zeroext %a0) { 179; SSE2-LABEL: bitcast_i16_16i1: 180; SSE2: # %bb.0: 181; SSE2-NEXT: movd %edi, %xmm0 182; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 183; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 184; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 185; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 186; SSE2-NEXT: pand %xmm1, %xmm0 187; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 188; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 189; SSE2-NEXT: retq 190; 191; SSSE3-LABEL: bitcast_i16_16i1: 192; SSSE3: # %bb.0: 193; SSSE3-NEXT: movd %edi, %xmm0 194; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 195; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 196; SSSE3-NEXT: pand %xmm1, %xmm0 197; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 198; SSSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 199; SSSE3-NEXT: retq 200; 201; AVX1-LABEL: bitcast_i16_16i1: 202; AVX1: # %bb.0: 203; AVX1-NEXT: vmovd %edi, %xmm0 204; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 205; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 206; AVX1-NEXT: # xmm1 = mem[0,0] 207; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 208; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 209; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 210; AVX1-NEXT: retq 211; 212; AVX2-LABEL: bitcast_i16_16i1: 213; AVX2: # %bb.0: 214; AVX2-NEXT: vmovd %edi, %xmm0 215; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 216; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 217; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 218; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 219; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 220; AVX2-NEXT: retq 221; 222; AVX512-LABEL: bitcast_i16_16i1: 223; AVX512: # %bb.0: 224; AVX512-NEXT: kmovd %edi, %k0 225; AVX512-NEXT: vpmovm2b %k0, %xmm0 226; AVX512-NEXT: retq 227 %1 = bitcast i16 %a0 to <16 x i1> 228 ret <16 x i1> %1 229} 230 231define <32 x i1> @bitcast_i32_32i1(i32 %a0) { 232; SSE2-SSSE3-LABEL: bitcast_i32_32i1: 233; SSE2-SSSE3: # %bb.0: 234; SSE2-SSSE3-NEXT: movq %rdi, %rax 235; SSE2-SSSE3-NEXT: movl %esi, (%rdi) 236; SSE2-SSSE3-NEXT: retq 237; 238; AVX1-LABEL: bitcast_i32_32i1: 239; AVX1: # %bb.0: 240; AVX1-NEXT: vmovd %edi, %xmm0 241; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 242; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 243; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 244; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 245; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] 246; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 247; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 248; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 249; AVX1-NEXT: # xmm2 = mem[0,0] 250; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 251; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 252; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 253; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 254; AVX1-NEXT: retq 255; 256; AVX2-LABEL: bitcast_i32_32i1: 257; AVX2: # %bb.0: 258; AVX2-NEXT: vmovd %edi, %xmm0 259; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 260; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27] 261; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 262; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 263; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 264; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 265; AVX2-NEXT: retq 266; 267; AVX512-LABEL: bitcast_i32_32i1: 268; AVX512: # %bb.0: 269; AVX512-NEXT: kmovd %edi, %k0 270; AVX512-NEXT: vpmovm2b %k0, %ymm0 271; AVX512-NEXT: retq 272 %1 = bitcast i32 %a0 to <32 x i1> 273 ret <32 x i1> %1 274} 275 276define <64 x i1> @bitcast_i64_64i1(i64 %a0) { 277; SSE2-SSSE3-LABEL: bitcast_i64_64i1: 278; SSE2-SSSE3: # %bb.0: 279; SSE2-SSSE3-NEXT: movq %rdi, %rax 280; SSE2-SSSE3-NEXT: movq %rsi, (%rdi) 281; SSE2-SSSE3-NEXT: retq 282; 283; AVX12-LABEL: bitcast_i64_64i1: 284; AVX12: # %bb.0: 285; AVX12-NEXT: movq %rdi, %rax 286; AVX12-NEXT: movq %rsi, (%rdi) 287; AVX12-NEXT: retq 288; 289; AVX512-LABEL: bitcast_i64_64i1: 290; AVX512: # %bb.0: 291; AVX512-NEXT: kmovq %rdi, %k0 292; AVX512-NEXT: vpmovm2b %k0, %zmm0 293; AVX512-NEXT: retq 294 %1 = bitcast i64 %a0 to <64 x i1> 295 ret <64 x i1> %1 296} 297