1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX2 5 6define <2 x double> @signbits_sext_v2i64_sitofp_v2f64(i32 %a0, i32 %a1) nounwind { 7; X86-LABEL: signbits_sext_v2i64_sitofp_v2f64: 8; X86: # %bb.0: 9; X86-NEXT: vcvtdq2pd {{[0-9]+}}(%esp), %xmm0 10; X86-NEXT: retl 11; 12; X64-LABEL: signbits_sext_v2i64_sitofp_v2f64: 13; X64: # %bb.0: 14; X64-NEXT: vmovd %edi, %xmm0 15; X64-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 16; X64-NEXT: vcvtdq2pd %xmm0, %xmm0 17; X64-NEXT: retq 18 %1 = sext i32 %a0 to i64 19 %2 = sext i32 %a1 to i64 20 %3 = insertelement <2 x i64> undef, i64 %1, i32 0 21 %4 = insertelement <2 x i64> %3, i64 %2, i32 1 22 %5 = sitofp <2 x i64> %4 to <2 x double> 23 ret <2 x double> %5 24} 25 26define <4 x float> @signbits_sext_v4i64_sitofp_v4f32(i8 signext %a0, i16 signext %a1, i32 %a2, i32 %a3) nounwind { 27; X86-LABEL: signbits_sext_v4i64_sitofp_v4f32: 28; X86: # %bb.0: 29; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax 30; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx 31; X86-NEXT: vmovd %ecx, %xmm0 32; X86-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 33; X86-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 34; X86-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 35; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 36; X86-NEXT: retl 37; 38; X64-LABEL: signbits_sext_v4i64_sitofp_v4f32: 39; X64: # %bb.0: 40; X64-NEXT: vmovd %edi, %xmm0 41; X64-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 42; X64-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 43; X64-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 44; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 45; X64-NEXT: retq 46 %1 = sext i8 %a0 to i64 47 %2 = sext i16 %a1 to i64 48 %3 = sext i32 %a2 to i64 49 %4 = sext i32 %a3 to i64 50 %5 = insertelement <4 x i64> undef, i64 %1, i32 0 51 %6 = insertelement <4 x i64> %5, i64 %2, i32 1 52 %7 = insertelement <4 x i64> %6, i64 %3, i32 2 53 %8 = insertelement <4 x i64> %7, i64 %4, i32 3 54 %9 = sitofp <4 x i64> %8 to <4 x float> 55 ret <4 x float> %9 56} 57 58define <4 x double> @signbits_ashr_sitofp_0(<4 x i64> %a0) nounwind { 59; X86-LABEL: signbits_ashr_sitofp_0: 60; X86: # %bb.0: 61; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 62; X86-NEXT: vpsrlq $36, %xmm1, %xmm2 63; X86-NEXT: vpsrlq $35, %xmm1, %xmm1 64; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 65; X86-NEXT: vpmovsxdq {{.*#+}} xmm2 = [268435456,134217728] 66; X86-NEXT: vpxor %xmm2, %xmm1, %xmm1 67; X86-NEXT: vpsubq %xmm2, %xmm1, %xmm1 68; X86-NEXT: vpsrlq $34, %xmm0, %xmm2 69; X86-NEXT: vpsrlq $33, %xmm0, %xmm0 70; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 71; X86-NEXT: vpmovsxdq {{.*#+}} xmm2 = [1073741824,536870912] 72; X86-NEXT: vpxor %xmm2, %xmm0, %xmm0 73; X86-NEXT: vpsubq %xmm2, %xmm0, %xmm0 74; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 75; X86-NEXT: vcvtdq2pd %xmm0, %ymm0 76; X86-NEXT: retl 77; 78; X64-AVX1-LABEL: signbits_ashr_sitofp_0: 79; X64-AVX1: # %bb.0: 80; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 81; X64-AVX1-NEXT: vpsrlq $36, %xmm1, %xmm2 82; X64-AVX1-NEXT: vpsrlq $35, %xmm1, %xmm1 83; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 84; X64-AVX1-NEXT: vpmovsxdq {{.*#+}} xmm2 = [268435456,134217728] 85; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 86; X64-AVX1-NEXT: vpsubq %xmm2, %xmm1, %xmm1 87; X64-AVX1-NEXT: vpsrlq $34, %xmm0, %xmm2 88; X64-AVX1-NEXT: vpsrlq $33, %xmm0, %xmm0 89; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 90; X64-AVX1-NEXT: vpmovsxdq {{.*#+}} xmm2 = [1073741824,536870912] 91; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 92; X64-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 93; X64-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 94; X64-AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 95; X64-AVX1-NEXT: retq 96; 97; X64-AVX2-LABEL: signbits_ashr_sitofp_0: 98; X64-AVX2: # %bb.0: 99; X64-AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 100; X64-AVX2-NEXT: vpmovsxdq {{.*#+}} ymm1 = [1073741824,536870912,268435456,134217728] 101; X64-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 102; X64-AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 103; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 104; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 105; X64-AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 106; X64-AVX2-NEXT: retq 107 %1 = ashr <4 x i64> %a0, <i64 33, i64 34, i64 35, i64 36> 108 %2 = sitofp <4 x i64> %1 to <4 x double> 109 ret <4 x double> %2 110} 111 112; PR45794 113define <4 x float> @signbits_ashr_sitofp_1(<4 x i64> %a0) nounwind { 114; X86-LABEL: signbits_ashr_sitofp_1: 115; X86: # %bb.0: 116; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 117; X86-NEXT: vpsrad $16, %xmm1, %xmm1 118; X86-NEXT: vpsrad $16, %xmm0, %xmm0 119; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] 120; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 121; X86-NEXT: vzeroupper 122; X86-NEXT: retl 123; 124; X64-AVX1-LABEL: signbits_ashr_sitofp_1: 125; X64-AVX1: # %bb.0: 126; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 127; X64-AVX1-NEXT: vpsrad $16, %xmm1, %xmm1 128; X64-AVX1-NEXT: vpsrad $16, %xmm0, %xmm0 129; X64-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] 130; X64-AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 131; X64-AVX1-NEXT: vzeroupper 132; X64-AVX1-NEXT: retq 133; 134; X64-AVX2-LABEL: signbits_ashr_sitofp_1: 135; X64-AVX2: # %bb.0: 136; X64-AVX2-NEXT: vpsrad $16, %ymm0, %ymm0 137; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 138; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] 139; X64-AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 140; X64-AVX2-NEXT: vzeroupper 141; X64-AVX2-NEXT: retq 142 %1 = ashr <4 x i64> %a0, <i64 48, i64 48, i64 48, i64 48> 143 %2 = sitofp <4 x i64> %1 to <4 x float> 144 ret <4 x float> %2 145} 146 147define float @signbits_ashr_extract_sitofp_0(<2 x i64> %a0) nounwind { 148; X86-LABEL: signbits_ashr_extract_sitofp_0: 149; X86: # %bb.0: 150; X86-NEXT: pushl %eax 151; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 152; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 153; X86-NEXT: vmovss %xmm0, (%esp) 154; X86-NEXT: flds (%esp) 155; X86-NEXT: popl %eax 156; X86-NEXT: retl 157; 158; X64-LABEL: signbits_ashr_extract_sitofp_0: 159; X64: # %bb.0: 160; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 161; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 162; X64-NEXT: retq 163 %1 = ashr <2 x i64> %a0, <i64 32, i64 32> 164 %2 = extractelement <2 x i64> %1, i32 0 165 %3 = sitofp i64 %2 to float 166 ret float %3 167} 168 169define float @signbits_ashr_extract_sitofp_1(<2 x i64> %a0) nounwind { 170; X86-LABEL: signbits_ashr_extract_sitofp_1: 171; X86: # %bb.0: 172; X86-NEXT: pushl %eax 173; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 174; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 175; X86-NEXT: vmovss %xmm0, (%esp) 176; X86-NEXT: flds (%esp) 177; X86-NEXT: popl %eax 178; X86-NEXT: retl 179; 180; X64-LABEL: signbits_ashr_extract_sitofp_1: 181; X64: # %bb.0: 182; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 183; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 184; X64-NEXT: retq 185 %1 = ashr <2 x i64> %a0, <i64 32, i64 63> 186 %2 = extractelement <2 x i64> %1, i32 0 187 %3 = sitofp i64 %2 to float 188 ret float %3 189} 190 191define float @signbits_ashr_shl_extract_sitofp(<2 x i64> %a0) nounwind { 192; X86-LABEL: signbits_ashr_shl_extract_sitofp: 193; X86: # %bb.0: 194; X86-NEXT: pushl %eax 195; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 196; X86-NEXT: vpsrad $29, %xmm0, %xmm0 197; X86-NEXT: vpsllq $20, %xmm0, %xmm0 198; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 199; X86-NEXT: vmovss %xmm0, (%esp) 200; X86-NEXT: flds (%esp) 201; X86-NEXT: popl %eax 202; X86-NEXT: retl 203; 204; X64-LABEL: signbits_ashr_shl_extract_sitofp: 205; X64: # %bb.0: 206; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 207; X64-NEXT: vpsrad $29, %xmm0, %xmm0 208; X64-NEXT: vpsllq $20, %xmm0, %xmm0 209; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 210; X64-NEXT: retq 211 %1 = ashr <2 x i64> %a0, <i64 61, i64 60> 212 %2 = shl <2 x i64> %1, <i64 20, i64 16> 213 %3 = extractelement <2 x i64> %2, i32 0 214 %4 = sitofp i64 %3 to float 215 ret float %4 216} 217 218define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwind { 219; X86-LABEL: signbits_ashr_insert_ashr_extract_sitofp: 220; X86: # %bb.0: 221; X86-NEXT: pushl %eax 222; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 223; X86-NEXT: vmovd %eax, %xmm0 224; X86-NEXT: sarl $30, %eax 225; X86-NEXT: vpslld $2, %xmm0, %xmm0 226; X86-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 227; X86-NEXT: vpsrlq $3, %xmm0, %xmm0 228; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 229; X86-NEXT: vmovss %xmm0, (%esp) 230; X86-NEXT: flds (%esp) 231; X86-NEXT: popl %eax 232; X86-NEXT: retl 233; 234; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp: 235; X64: # %bb.0: 236; X64-NEXT: sarq $30, %rdi 237; X64-NEXT: vmovq %rdi, %xmm0 238; X64-NEXT: vpsrlq $3, %xmm0, %xmm0 239; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 240; X64-NEXT: retq 241 %1 = ashr i64 %a0, 30 242 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 243 %3 = insertelement <2 x i64> %2, i64 %a1, i32 1 244 %4 = ashr <2 x i64> %3, <i64 3, i64 3> 245 %5 = extractelement <2 x i64> %4, i32 0 246 %6 = sitofp i64 %5 to float 247 ret float %6 248} 249 250define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1) nounwind { 251; X86-LABEL: signbits_sext_shuffle_sitofp: 252; X86: # %bb.0: 253; X86-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero 254; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 255; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 256; X86-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 257; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 258; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 259; X86-NEXT: vcvtdq2pd %xmm0, %ymm0 260; X86-NEXT: retl 261; 262; X64-AVX1-LABEL: signbits_sext_shuffle_sitofp: 263; X64-AVX1: # %bb.0: 264; X64-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero 265; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 266; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 267; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 268; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 269; X64-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 270; X64-AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 271; X64-AVX1-NEXT: retq 272; 273; X64-AVX2-LABEL: signbits_sext_shuffle_sitofp: 274; X64-AVX2: # %bb.0: 275; X64-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 276; X64-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0] 277; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 278; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 279; X64-AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 280; X64-AVX2-NEXT: retq 281 %1 = sext <4 x i32> %a0 to <4 x i64> 282 %2 = shufflevector <4 x i64> %1, <4 x i64>%a1, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 283 %3 = sitofp <4 x i64> %2 to <4 x double> 284 ret <4 x double> %3 285} 286 287define <2 x double> @signbits_sext_shl_sitofp(<2 x i16> %a0) nounwind { 288; X86-LABEL: signbits_sext_shl_sitofp: 289; X86: # %bb.0: 290; X86-NEXT: vpmovsxwq %xmm0, %xmm0 291; X86-NEXT: vpsllq $5, %xmm0, %xmm1 292; X86-NEXT: vpsllq $11, %xmm0, %xmm0 293; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 294; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 295; X86-NEXT: vcvtdq2pd %xmm0, %xmm0 296; X86-NEXT: retl 297; 298; X64-AVX1-LABEL: signbits_sext_shl_sitofp: 299; X64-AVX1: # %bb.0: 300; X64-AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 301; X64-AVX1-NEXT: vpsllq $5, %xmm0, %xmm1 302; X64-AVX1-NEXT: vpsllq $11, %xmm0, %xmm0 303; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 304; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 305; X64-AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 306; X64-AVX1-NEXT: retq 307; 308; X64-AVX2-LABEL: signbits_sext_shl_sitofp: 309; X64-AVX2: # %bb.0: 310; X64-AVX2-NEXT: vpmovsxwq %xmm0, %xmm0 311; X64-AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 312; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 313; X64-AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0 314; X64-AVX2-NEXT: retq 315 %1 = sext <2 x i16> %a0 to <2 x i64> 316 %2 = shl <2 x i64> %1, <i64 11, i64 5> 317 %3 = sitofp <2 x i64> %2 to <2 x double> 318 ret <2 x double> %3 319} 320 321define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4 x i64> %a1) nounwind { 322; CHECK-LABEL: signbits_ashr_concat_ashr_extract_sitofp: 323; CHECK: # %bb.0: 324; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3,2,3] 325; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0 326; CHECK-NEXT: ret{{[l|q]}} 327 %1 = ashr <2 x i64> %a0, <i64 16, i64 16> 328 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 329 %3 = shufflevector <4 x i64> %a1, <4 x i64> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 330 %4 = ashr <4 x i64> %3, <i64 16, i64 16, i64 16, i64 16> 331 %5 = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 332 %6 = sitofp <2 x i64> %5 to <2 x double> 333 ret <2 x double> %6 334} 335 336define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2 x i64> %a1, i32 %a2) nounwind { 337; X86-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp: 338; X86: # %bb.0: 339; X86-NEXT: pushl %eax 340; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 341; X86-NEXT: vpsrad $29, %xmm0, %xmm0 342; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 343; X86-NEXT: vpand %xmm0, %xmm1, %xmm0 344; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 345; X86-NEXT: vmovss %xmm0, (%esp) 346; X86-NEXT: flds (%esp) 347; X86-NEXT: popl %eax 348; X86-NEXT: retl 349; 350; X64-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp: 351; X64: # %bb.0: 352; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 353; X64-NEXT: vpsrad $29, %xmm0, %xmm0 354; X64-NEXT: vmovd %edi, %xmm1 355; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 356; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 357; X64-NEXT: retq 358 %1 = ashr <2 x i64> %a0, <i64 61, i64 60> 359 %2 = sext i32 %a2 to i64 360 %3 = insertelement <2 x i64> %a1, i64 %2, i32 0 361 %4 = shl <2 x i64> %3, <i64 20, i64 20> 362 %5 = ashr <2 x i64> %4, <i64 20, i64 20> 363 %6 = and <2 x i64> %1, %5 364 %7 = extractelement <2 x i64> %6, i32 0 365 %8 = sitofp i64 %7 to float 366 ret float %8 367} 368 369define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind { 370; X86-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp: 371; X86: # %bb.0: 372; X86-NEXT: pushl %eax 373; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 374; X86-NEXT: vpsrad $29, %xmm0, %xmm0 375; X86-NEXT: vpxor %xmm0, %xmm1, %xmm0 376; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 377; X86-NEXT: vmovss %xmm0, (%esp) 378; X86-NEXT: flds (%esp) 379; X86-NEXT: popl %eax 380; X86-NEXT: retl 381; 382; X64-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp: 383; X64: # %bb.0: 384; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 385; X64-NEXT: vpsrad $29, %xmm0, %xmm0 386; X64-NEXT: vpxor %xmm0, %xmm1, %xmm0 387; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 388; X64-NEXT: retq 389 %1 = ashr <2 x i64> %a0, <i64 61, i64 60> 390 %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 391 %3 = sext <2 x i32> %2 to <2 x i64> 392 %4 = and <2 x i64> %1, %3 393 %5 = or <2 x i64> %4, %3 394 %6 = xor <2 x i64> %5, %1 395 %7 = extractelement <2 x i64> %6, i32 0 396 %8 = sitofp i64 %7 to float 397 ret float %8 398} 399 400define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, <4 x i32> %a3) nounwind { 401; X86-LABEL: signbits_ashr_sext_select_shuffle_sitofp: 402; X86: # %bb.0: 403; X86-NEXT: pushl %ebp 404; X86-NEXT: movl %esp, %ebp 405; X86-NEXT: andl $-16, %esp 406; X86-NEXT: subl $16, %esp 407; X86-NEXT: vmovapd 8(%ebp), %xmm3 408; X86-NEXT: vpsrad $31, %xmm2, %xmm4 409; X86-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[1,1,3,3] 410; X86-NEXT: vpsrad $1, %xmm5, %xmm5 411; X86-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7] 412; X86-NEXT: vextractf128 $1, %ymm2, %xmm2 413; X86-NEXT: vpsrad $31, %xmm2, %xmm5 414; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 415; X86-NEXT: vpsrad $1, %xmm2, %xmm2 416; X86-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7] 417; X86-NEXT: vshufps {{.*#+}} xmm5 = xmm3[2,2,3,3] 418; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm6 419; X86-NEXT: vextractf128 $1, %ymm1, %xmm1 420; X86-NEXT: vextractf128 $1, %ymm0, %xmm0 421; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 422; X86-NEXT: vblendvpd %xmm0, %xmm2, %xmm5, %xmm0 423; X86-NEXT: vblendvpd %xmm6, %xmm4, %xmm3, %xmm1 424; X86-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 425; X86-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 426; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 427; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 428; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 429; X86-NEXT: movl %ebp, %esp 430; X86-NEXT: popl %ebp 431; X86-NEXT: vzeroupper 432; X86-NEXT: retl 433; 434; X64-AVX1-LABEL: signbits_ashr_sext_select_shuffle_sitofp: 435; X64-AVX1: # %bb.0: 436; X64-AVX1-NEXT: vpsrad $31, %xmm2, %xmm4 437; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[1,1,3,3] 438; X64-AVX1-NEXT: vpsrad $1, %xmm5, %xmm5 439; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7] 440; X64-AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 441; X64-AVX1-NEXT: vpsrad $31, %xmm2, %xmm5 442; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 443; X64-AVX1-NEXT: vpsrad $1, %xmm2, %xmm2 444; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7] 445; X64-AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm3[2,2,3,3] 446; X64-AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm6 447; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 448; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 449; X64-AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 450; X64-AVX1-NEXT: vblendvpd %xmm0, %xmm2, %xmm5, %xmm0 451; X64-AVX1-NEXT: vblendvpd %xmm6, %xmm4, %xmm3, %xmm1 452; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 453; X64-AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 454; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 455; X64-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 456; X64-AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 457; X64-AVX1-NEXT: vzeroupper 458; X64-AVX1-NEXT: retq 459; 460; X64-AVX2-LABEL: signbits_ashr_sext_select_shuffle_sitofp: 461; X64-AVX2: # %bb.0: 462; X64-AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[1,1,3,3,5,5,7,7] 463; X64-AVX2-NEXT: vpsrad $1, %ymm2, %ymm2 464; X64-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero 465; X64-AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 466; X64-AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm3, %ymm0 467; X64-AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 468; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 469; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 470; X64-AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 471; X64-AVX2-NEXT: vzeroupper 472; X64-AVX2-NEXT: retq 473 %1 = ashr <4 x i64> %a2, <i64 33, i64 63, i64 33, i64 63> 474 %2 = sext <4 x i32> %a3 to <4 x i64> 475 %3 = icmp eq <4 x i64> %a0, %a1 476 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 477 %5 = shufflevector <4 x i64> %4, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 478 %6 = sitofp <4 x i64> %5 to <4 x float> 479 ret <4 x float> %6 480} 481 482define <4 x i32> @signbits_mask_ashr_smax(<4 x i32> %a0, <4 x i32> %a1) { 483; X86-LABEL: signbits_mask_ashr_smax: 484; X86: # %bb.0: 485; X86-NEXT: vpand %xmm1, %xmm0, %xmm0 486; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 487; X86-NEXT: vpsrad $25, %xmm0, %xmm0 488; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 489; X86-NEXT: retl 490; 491; X64-AVX1-LABEL: signbits_mask_ashr_smax: 492; X64-AVX1: # %bb.0: 493; X64-AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 494; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 495; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 496; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 497; X64-AVX1-NEXT: retq 498; 499; X64-AVX2-LABEL: signbits_mask_ashr_smax: 500; X64-AVX2: # %bb.0: 501; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 502; X64-AVX2-NEXT: vpsrad $25, %xmm0, %xmm0 503; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 504; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 505; X64-AVX2-NEXT: retq 506 %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0> 507 %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0> 508 %3 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> %2) 509 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer 510 %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4> 511 %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536> 512 ret <4 x i32> %6 513} 514declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 515 516define <4 x i32> @signbits_mask_ashr_smin(<4 x i32> %a0, <4 x i32> %a1) { 517; X86-LABEL: signbits_mask_ashr_smin: 518; X86: # %bb.0: 519; X86-NEXT: vpor %xmm1, %xmm0, %xmm0 520; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 521; X86-NEXT: vpsrad $25, %xmm0, %xmm0 522; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 523; X86-NEXT: retl 524; 525; X64-AVX1-LABEL: signbits_mask_ashr_smin: 526; X64-AVX1: # %bb.0: 527; X64-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 528; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 529; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 530; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 531; X64-AVX1-NEXT: retq 532; 533; X64-AVX2-LABEL: signbits_mask_ashr_smin: 534; X64-AVX2: # %bb.0: 535; X64-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 536; X64-AVX2-NEXT: vpsrad $25, %xmm0, %xmm0 537; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 538; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 539; X64-AVX2-NEXT: retq 540 %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0> 541 %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0> 542 %3 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> %2) 543 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer 544 %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4> 545 %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536> 546 ret <4 x i32> %6 547} 548declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 549 550define <4 x i32> @signbits_mask_ashr_umax(<4 x i32> %a0, <4 x i32> %a1) { 551; X86-LABEL: signbits_mask_ashr_umax: 552; X86: # %bb.0: 553; X86-NEXT: vpor %xmm1, %xmm0, %xmm0 554; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 555; X86-NEXT: vpsrad $25, %xmm0, %xmm0 556; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 557; X86-NEXT: retl 558; 559; X64-AVX1-LABEL: signbits_mask_ashr_umax: 560; X64-AVX1: # %bb.0: 561; X64-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 562; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 563; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 564; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 565; X64-AVX1-NEXT: retq 566; 567; X64-AVX2-LABEL: signbits_mask_ashr_umax: 568; X64-AVX2: # %bb.0: 569; X64-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 570; X64-AVX2-NEXT: vpsrad $25, %xmm0, %xmm0 571; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 572; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 573; X64-AVX2-NEXT: retq 574 %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0> 575 %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0> 576 %3 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %1, <4 x i32> %2) 577 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer 578 %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4> 579 %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536> 580 ret <4 x i32> %6 581} 582declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 583 584define <4 x i32> @signbits_mask_ashr_umin(<4 x i32> %a0, <4 x i32> %a1) { 585; X86-LABEL: signbits_mask_ashr_umin: 586; X86: # %bb.0: 587; X86-NEXT: vpand %xmm1, %xmm0, %xmm0 588; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 589; X86-NEXT: vpsrad $25, %xmm0, %xmm0 590; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 591; X86-NEXT: retl 592; 593; X64-AVX1-LABEL: signbits_mask_ashr_umin: 594; X64-AVX1: # %bb.0: 595; X64-AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 596; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 597; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 598; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 599; X64-AVX1-NEXT: retq 600; 601; X64-AVX2-LABEL: signbits_mask_ashr_umin: 602; X64-AVX2: # %bb.0: 603; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 604; X64-AVX2-NEXT: vpsrad $25, %xmm0, %xmm0 605; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 606; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 607; X64-AVX2-NEXT: retq 608 %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0> 609 %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0> 610 %3 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> %2) 611 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer 612 %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4> 613 %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536> 614 ret <4 x i32> %6 615} 616declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 617 618define i32 @signbits_cmpss(float %0, float %1) { 619; X86-LABEL: signbits_cmpss: 620; X86: # %bb.0: 621; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 622; X86-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm0, %xmm0 623; X86-NEXT: vmovd %xmm0, %eax 624; X86-NEXT: retl 625; 626; X64-LABEL: signbits_cmpss: 627; X64: # %bb.0: 628; X64-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 629; X64-NEXT: vmovd %xmm0, %eax 630; X64-NEXT: retq 631 %3 = fcmp oeq float %0, %1 632 %4 = sext i1 %3 to i32 633 ret i32 %4 634} 635 636define i32 @signbits_cmpss_int(<4 x float> %0, <4 x float> %1) { 637; CHECK-LABEL: signbits_cmpss_int: 638; CHECK: # %bb.0: 639; CHECK-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 640; CHECK-NEXT: vextractps $0, %xmm0, %eax 641; CHECK-NEXT: ret{{[l|q]}} 642 %3 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %0, <4 x float> %1, i8 0) 643 %4 = bitcast <4 x float> %3 to <4 x i32> 644 %5 = extractelement <4 x i32> %4, i32 0 645 %6 = ashr i32 %5, 31 646 ret i32 %6 647} 648declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8 immarg) 649 650define i64 @signbits_cmpsd(double %0, double %1) { 651; X86-LABEL: signbits_cmpsd: 652; X86: # %bb.0: 653; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 654; X86-NEXT: vcmpeqsd {{[0-9]+}}(%esp), %xmm0, %xmm0 655; X86-NEXT: vmovd %xmm0, %eax 656; X86-NEXT: andl $1, %eax 657; X86-NEXT: negl %eax 658; X86-NEXT: movl %eax, %edx 659; X86-NEXT: retl 660; 661; X64-LABEL: signbits_cmpsd: 662; X64: # %bb.0: 663; X64-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 664; X64-NEXT: vmovq %xmm0, %rax 665; X64-NEXT: retq 666 %3 = fcmp oeq double %0, %1 667 %4 = sext i1 %3 to i64 668 ret i64 %4 669} 670 671define i64 @signbits_cmpsd_int(<2 x double> %0, <2 x double> %1) { 672; X86-LABEL: signbits_cmpsd_int: 673; X86: # %bb.0: 674; X86-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 675; X86-NEXT: vextractps $1, %xmm0, %eax 676; X86-NEXT: movl %eax, %edx 677; X86-NEXT: retl 678; 679; X64-LABEL: signbits_cmpsd_int: 680; X64: # %bb.0: 681; X64-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 682; X64-NEXT: vmovq %xmm0, %rax 683; X64-NEXT: retq 684 %3 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %0, <2 x double> %1, i8 0) 685 %4 = bitcast <2 x double> %3 to <2 x i64> 686 %5 = extractelement <2 x i64> %4, i32 0 687 %6 = ashr i64 %5, 63 688 ret i64 %6 689} 690declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8 immarg) 691 692; Make sure we can preserve sign bit information into the second basic block 693; so we can avoid having to shift bit 0 into bit 7 for each element due to 694; v32i1->v32i8 promotion and the splitting of v32i8 into 2xv16i8. This requires 695; ComputeNumSignBits handling for insert_subvector. 696define void @cross_bb_signbits_insert_subvec(ptr %ptr, <32 x i8> %x, <32 x i8> %z) { 697; X86-LABEL: cross_bb_signbits_insert_subvec: 698; X86: # %bb.0: 699; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 700; X86-NEXT: vextractf128 $1, %ymm0, %xmm2 701; X86-NEXT: vpxor %xmm3, %xmm3, %xmm3 702; X86-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 703; X86-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 704; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 705; X86-NEXT: vandnps %ymm1, %ymm0, %ymm1 706; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 707; X86-NEXT: vorps %ymm1, %ymm0, %ymm0 708; X86-NEXT: vmovaps %ymm0, (%eax) 709; X86-NEXT: vzeroupper 710; X86-NEXT: retl 711; 712; X64-AVX1-LABEL: cross_bb_signbits_insert_subvec: 713; X64-AVX1: # %bb.0: 714; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 715; X64-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 716; X64-AVX1-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 717; X64-AVX1-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 718; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 719; X64-AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1 720; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 721; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 722; X64-AVX1-NEXT: vmovaps %ymm0, (%rdi) 723; X64-AVX1-NEXT: vzeroupper 724; X64-AVX1-NEXT: retq 725; 726; X64-AVX2-LABEL: cross_bb_signbits_insert_subvec: 727; X64-AVX2: # %bb.0: 728; X64-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 729; X64-AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 730; X64-AVX2-NEXT: vpblendvb %ymm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 731; X64-AVX2-NEXT: vmovdqa %ymm0, (%rdi) 732; X64-AVX2-NEXT: vzeroupper 733; X64-AVX2-NEXT: retq 734 %a = icmp eq <32 x i8> %x, zeroinitializer 735 %b = icmp eq <32 x i8> %x, zeroinitializer 736 %c = and <32 x i1> %a, %b 737 br label %block 738 739block: 740 %d = select <32 x i1> %c, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <32 x i8> %z 741 store <32 x i8> %d, ptr %ptr, align 32 742 br label %exit 743 744exit: 745 ret void 746} 747 748