1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 4 5define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind { 6; X86-LABEL: knownbits_mask_extract_sext: 7; X86: # %bb.0: 8; X86-NEXT: vmovd %xmm0, %eax 9; X86-NEXT: andl $15, %eax 10; X86-NEXT: retl 11; 12; X64-LABEL: knownbits_mask_extract_sext: 13; X64: # %bb.0: 14; X64-NEXT: vmovd %xmm0, %eax 15; X64-NEXT: andl $15, %eax 16; X64-NEXT: retq 17 %1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 18 %2 = extractelement <8 x i16> %1, i32 0 19 %3 = sext i16 %2 to i32 20 ret i32 %3 21} 22 23define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind { 24; X86-LABEL: knownbits_mask_extract_uitofp: 25; X86: # %bb.0: 26; X86-NEXT: pushl %eax 27; X86-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 28; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 29; X86-NEXT: vmovss %xmm0, (%esp) 30; X86-NEXT: flds (%esp) 31; X86-NEXT: popl %eax 32; X86-NEXT: retl 33; 34; X64-LABEL: knownbits_mask_extract_uitofp: 35; X64: # %bb.0: 36; X64-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 37; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 38; X64-NEXT: retq 39 %1 = and <2 x i64> %a0, <i64 65535, i64 -1> 40 %2 = extractelement <2 x i64> %1, i32 0 41 %3 = uitofp i64 %2 to float 42 ret float %3 43} 44 45define <4 x float> @knownbits_insert_uitofp(<4 x i32> %a0, i16 %a1, i16 %a2) nounwind { 46; X86-LABEL: knownbits_insert_uitofp: 47; X86: # %bb.0: 48; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 49; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 50; X86-NEXT: vmovd %ecx, %xmm0 51; X86-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 52; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] 53; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 54; X86-NEXT: retl 55; 56; X64-LABEL: knownbits_insert_uitofp: 57; X64: # %bb.0: 58; X64-NEXT: movzwl %di, %eax 59; X64-NEXT: movzwl %si, %ecx 60; X64-NEXT: vmovd %eax, %xmm0 61; X64-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 62; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] 63; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 64; X64-NEXT: retq 65 %1 = zext i16 %a1 to i32 66 %2 = zext i16 %a2 to i32 67 %3 = insertelement <4 x i32> %a0, i32 %1, i32 0 68 %4 = insertelement <4 x i32> %3, i32 %2, i32 2 69 %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 70 %6 = uitofp <4 x i32> %5 to <4 x float> 71 ret <4 x float> %6 72} 73 74define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind { 75; X86-LABEL: knownbits_mask_shuffle_sext: 76; X86: # %bb.0: 77; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 78; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 79; X86-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 80; X86-NEXT: retl 81; 82; X64-LABEL: knownbits_mask_shuffle_sext: 83; X64: # %bb.0: 84; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 85; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 86; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 87; X64-NEXT: retq 88 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15> 89 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 90 %3 = sext <4 x i16> %2 to <4 x i32> 91 ret <4 x i32> %3 92} 93 94define <4 x i32> @knownbits_mask_shuffle_shuffle_sext(<8 x i16> %a0) nounwind { 95; X86-LABEL: knownbits_mask_shuffle_shuffle_sext: 96; X86: # %bb.0: 97; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 98; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 99; X86-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 100; X86-NEXT: retl 101; 102; X64-LABEL: knownbits_mask_shuffle_shuffle_sext: 103; X64: # %bb.0: 104; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 105; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 106; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 107; X64-NEXT: retq 108 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15> 109 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 110 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 111 %4 = sext <4 x i16> %3 to <4 x i32> 112 ret <4 x i32> %4 113} 114 115define <4 x i32> @knownbits_mask_shuffle_shuffle_undef_sext(<8 x i16> %a0) nounwind { 116; X86-LABEL: knownbits_mask_shuffle_shuffle_undef_sext: 117; X86: # %bb.0: 118; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 119; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 120; X86-NEXT: vpmovsxwd %xmm0, %xmm0 121; X86-NEXT: retl 122; 123; X64-LABEL: knownbits_mask_shuffle_shuffle_undef_sext: 124; X64: # %bb.0: 125; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 126; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 127; X64-NEXT: vpmovsxwd %xmm0, %xmm0 128; X64-NEXT: retq 129 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15> 130 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 131 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 132 %4 = sext <4 x i16> %3 to <4 x i32> 133 ret <4 x i32> %4 134} 135 136define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind { 137; X86-LABEL: knownbits_mask_shuffle_uitofp: 138; X86: # %bb.0: 139; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3] 140; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 141; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 142; X86-NEXT: retl 143; 144; X64-LABEL: knownbits_mask_shuffle_uitofp: 145; X64: # %bb.0: 146; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3] 147; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 148; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 149; X64-NEXT: retq 150 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085> 151 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 152 %3 = uitofp <4 x i32> %2 to <4 x float> 153 ret <4 x float> %3 154} 155 156define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind { 157; X86-LABEL: knownbits_mask_or_shuffle_uitofp: 158; X86: # %bb.0: 159; X86-NEXT: vbroadcastss {{.*#+}} xmm0 = [6.5535E+4,6.5535E+4,6.5535E+4,6.5535E+4] 160; X86-NEXT: retl 161; 162; X64-LABEL: knownbits_mask_or_shuffle_uitofp: 163; X64: # %bb.0: 164; X64-NEXT: vbroadcastss {{.*#+}} xmm0 = [6.5535E+4,6.5535E+4,6.5535E+4,6.5535E+4] 165; X64-NEXT: retq 166 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085> 167 %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535> 168 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 169 %4 = uitofp <4 x i32> %3 to <4 x float> 170 ret <4 x float> %4 171} 172 173define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind { 174; X86-LABEL: knownbits_mask_xor_shuffle_uitofp: 175; X86: # %bb.0: 176; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3] 177; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 178; X86-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 179; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 180; X86-NEXT: retl 181; 182; X64-LABEL: knownbits_mask_xor_shuffle_uitofp: 183; X64: # %bb.0: 184; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3] 185; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 186; X64-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 187; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 188; X64-NEXT: retq 189 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085> 190 %2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535> 191 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 192 %4 = uitofp <4 x i32> %3 to <4 x float> 193 ret <4 x float> %4 194} 195 196define <4 x i32> @knownbits_mask_shl_shuffle_lshr(<4 x i32> %a0) nounwind { 197; X86-LABEL: knownbits_mask_shl_shuffle_lshr: 198; X86: # %bb.0: 199; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 200; X86-NEXT: retl 201; 202; X64-LABEL: knownbits_mask_shl_shuffle_lshr: 203; X64: # %bb.0: 204; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 205; X64-NEXT: retq 206 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536> 207 %2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17> 208 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 209 %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15> 210 ret <4 x i32> %4 211} 212 213define <4 x i32> @knownbits_mask_ashr_shuffle_lshr(<4 x i32> %a0) nounwind { 214; X86-LABEL: knownbits_mask_ashr_shuffle_lshr: 215; X86: # %bb.0: 216; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 217; X86-NEXT: retl 218; 219; X64-LABEL: knownbits_mask_ashr_shuffle_lshr: 220; X64: # %bb.0: 221; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 222; X64-NEXT: retq 223 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 -1, i32 131071> 224 %2 = ashr <4 x i32> %1, <i32 15, i32 15, i32 15, i32 15> 225 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 226 %4 = lshr <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30> 227 ret <4 x i32> %4 228} 229 230define <4 x i32> @knownbits_mask_mul_shuffle_shl(<4 x i32> %a0, <4 x i32> %a1) nounwind { 231; X86-LABEL: knownbits_mask_mul_shuffle_shl: 232; X86: # %bb.0: 233; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 234; X86-NEXT: retl 235; 236; X64-LABEL: knownbits_mask_mul_shuffle_shl: 237; X64: # %bb.0: 238; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 239; X64-NEXT: retq 240 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536> 241 %2 = mul <4 x i32> %a1, %1 242 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 243 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 244 ret <4 x i32> %4 245} 246 247define <4 x i32> @knownbits_mask_trunc_shuffle_shl(<4 x i64> %a0) nounwind { 248; X86-LABEL: knownbits_mask_trunc_shuffle_shl: 249; X86: # %bb.0: 250; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 251; X86-NEXT: retl 252; 253; X64-LABEL: knownbits_mask_trunc_shuffle_shl: 254; X64: # %bb.0: 255; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 256; X64-NEXT: retq 257 %1 = and <4 x i64> %a0, <i64 -65536, i64 -7, i64 7, i64 -65536> 258 %2 = trunc <4 x i64> %1 to <4 x i32> 259 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 260 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 261 ret <4 x i32> %4 262} 263 264define <4 x i32> @knownbits_mask_add_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind { 265; X86-LABEL: knownbits_mask_add_shuffle_lshr: 266; X86: # %bb.0: 267; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 268; X86-NEXT: retl 269; 270; X64-LABEL: knownbits_mask_add_shuffle_lshr: 271; X64: # %bb.0: 272; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 273; X64-NEXT: retq 274 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 275 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767> 276 %3 = add <4 x i32> %1, %2 277 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 278 %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17> 279 ret <4 x i32> %5 280} 281 282define <4 x i32> @knownbits_mask_sub_shuffle_lshr(<4 x i32> %a0) nounwind { 283; X86-LABEL: knownbits_mask_sub_shuffle_lshr: 284; X86: # %bb.0: 285; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 286; X86-NEXT: retl 287; 288; X64-LABEL: knownbits_mask_sub_shuffle_lshr: 289; X64: # %bb.0: 290; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 291; X64-NEXT: retq 292 %1 = and <4 x i32> %a0, <i32 15, i32 -1, i32 -1, i32 15> 293 %2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %1 294 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 295 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 296 ret <4 x i32> %4 297} 298 299define <4 x i32> @knownbits_mask_udiv_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind { 300; X86-LABEL: knownbits_mask_udiv_shuffle_lshr: 301; X86: # %bb.0: 302; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 303; X86-NEXT: retl 304; 305; X64-LABEL: knownbits_mask_udiv_shuffle_lshr: 306; X64: # %bb.0: 307; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 308; X64-NEXT: retq 309 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 310 %2 = udiv <4 x i32> %1, %a1 311 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 312 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 313 ret <4 x i32> %4 314} 315 316define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind { 317; X86-LABEL: knownbits_urem_lshr: 318; X86: # %bb.0: 319; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 320; X86-NEXT: retl 321; 322; X64-LABEL: knownbits_urem_lshr: 323; X64: # %bb.0: 324; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 325; X64-NEXT: retq 326 %1 = urem <4 x i32> %a0, <i32 16, i32 16, i32 16, i32 16> 327 %2 = lshr <4 x i32> %1, <i32 22, i32 22, i32 22, i32 22> 328 ret <4 x i32> %2 329} 330 331define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind { 332; X86-LABEL: knownbits_mask_urem_shuffle_lshr: 333; X86: # %bb.0: 334; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 335; X86-NEXT: retl 336; 337; X64-LABEL: knownbits_mask_urem_shuffle_lshr: 338; X64: # %bb.0: 339; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 340; X64-NEXT: retq 341 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 342 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767> 343 %3 = urem <4 x i32> %1, %2 344 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 345 %5 = lshr <4 x i32> %4, <i32 22, i32 22, i32 22, i32 22> 346 ret <4 x i32> %5 347} 348 349define <4 x i32> @knownbits_mask_srem_shuffle_lshr(<4 x i32> %a0) nounwind { 350; X86-LABEL: knownbits_mask_srem_shuffle_lshr: 351; X86: # %bb.0: 352; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 353; X86-NEXT: retl 354; 355; X64-LABEL: knownbits_mask_srem_shuffle_lshr: 356; X64: # %bb.0: 357; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 358; X64-NEXT: retq 359 %1 = and <4 x i32> %a0, <i32 -32768, i32 -1, i32 -1, i32 -32768> 360 %2 = srem <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16> 361 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 362 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 363 ret <4 x i32> %4 364} 365 366define <4 x i32> @knownbits_mask_bswap_shuffle_shl(<4 x i32> %a0) nounwind { 367; X86-LABEL: knownbits_mask_bswap_shuffle_shl: 368; X86: # %bb.0: 369; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 370; X86-NEXT: retl 371; 372; X64-LABEL: knownbits_mask_bswap_shuffle_shl: 373; X64: # %bb.0: 374; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 375; X64-NEXT: retq 376 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 377 %2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %1) 378 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 379 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 380 ret <4 x i32> %4 381} 382declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) 383 384define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind { 385; X86-LABEL: knownbits_mask_concat_uitofp: 386; X86: # %bb.0: 387; X86-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3,1,3] 388; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [131071,131071,131071,131071] 389; X86-NEXT: vandps %xmm2, %xmm1, %xmm1 390; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,0,2] 391; X86-NEXT: vandps %xmm2, %xmm0, %xmm0 392; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 393; X86-NEXT: vcvtdq2ps %ymm0, %ymm0 394; X86-NEXT: retl 395; 396; X64-LABEL: knownbits_mask_concat_uitofp: 397; X64: # %bb.0: 398; X64-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3,1,3] 399; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [131071,131071,131071,131071] 400; X64-NEXT: vandps %xmm2, %xmm1, %xmm1 401; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,0,2] 402; X64-NEXT: vandps %xmm2, %xmm0, %xmm0 403; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 404; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 405; X64-NEXT: retq 406 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 131071, i32 -1> 407 %2 = and <4 x i32> %a1, <i32 -1, i32 131071, i32 -1, i32 131071> 408 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 2, i32 0, i32 2, i32 5, i32 7, i32 5, i32 7> 409 %4 = uitofp <8 x i32> %3 to <8 x float> 410 ret <8 x float> %4 411} 412 413define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind { 414; X86-LABEL: knownbits_lshr_bitcast_shuffle_uitofp: 415; X86: # %bb.0: 416; X86-NEXT: vpsrlq $1, %xmm0, %xmm0 417; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 418; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 419; X86-NEXT: retl 420; 421; X64-LABEL: knownbits_lshr_bitcast_shuffle_uitofp: 422; X64: # %bb.0: 423; X64-NEXT: vpsrlq $1, %xmm0, %xmm0 424; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 425; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 426; X64-NEXT: retq 427 %1 = lshr <2 x i64> %a0, <i64 1, i64 1> 428 %2 = bitcast <2 x i64> %1 to <4 x i32> 429 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 430 %4 = uitofp <4 x i32> %3 to <4 x float> 431 ret <4 x float> %4 432} 433 434define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) { 435; X86-LABEL: knownbits_smax_smin_shuffle_uitofp: 436; X86: # %bb.0: 437; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 438; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 439; X86-NEXT: vpminsd %xmm1, %xmm0, %xmm0 440; X86-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 441; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 442; X86-NEXT: retl 443; 444; X64-LABEL: knownbits_smax_smin_shuffle_uitofp: 445; X64: # %bb.0: 446; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 447; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 448; X64-NEXT: vpminsd %xmm1, %xmm0, %xmm0 449; X64-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 450; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 451; X64-NEXT: retq 452 %1 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a0, <4 x i32> <i32 0, i32 -65535, i32 -65535, i32 0>) 453 %2 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 131071>) 454 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 455 %4 = uitofp <4 x i32> %3 to <4 x float> 456 ret <4 x float> %4 457} 458declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 459declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 460 461define <4 x float> @knownbits_umin_shuffle_uitofp(<4 x i32> %a0) { 462; X86-LABEL: knownbits_umin_shuffle_uitofp: 463; X86: # %bb.0: 464; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 465; X86-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 466; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 467; X86-NEXT: retl 468; 469; X64-LABEL: knownbits_umin_shuffle_uitofp: 470; X64: # %bb.0: 471; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 472; X64-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 473; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 474; X64-NEXT: retq 475 %1 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>) 476 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 477 %3 = uitofp <4 x i32> %2 to <4 x float> 478 ret <4 x float> %3 479} 480declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 481 482define <4 x i32> @knownbits_umax_shuffle_ashr(<4 x i32> %a0) { 483; X86-LABEL: knownbits_umax_shuffle_ashr: 484; X86: # %bb.0: 485; X86-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 486; X86-NEXT: retl 487; 488; X64-LABEL: knownbits_umax_shuffle_ashr: 489; X64: # %bb.0: 490; X64-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 491; X64-NEXT: retq 492 %1 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>) 493 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2> 494 %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31> 495 ret <4 x i32> %3 496} 497declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 498 499define <4 x float> @knownbits_mask_umax_shuffle_uitofp(<4 x i32> %a0) { 500; X86-LABEL: knownbits_mask_umax_shuffle_uitofp: 501; X86: # %bb.0: 502; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 503; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 504; X86-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 505; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 506; X86-NEXT: retl 507; 508; X64-LABEL: knownbits_mask_umax_shuffle_uitofp: 509; X64: # %bb.0: 510; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 511; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 512; X64-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 513; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 514; X64-NEXT: retq 515 %1 = and <4 x i32> %a0, <i32 65535, i32 -1, i32 -1, i32 262143> 516 %2 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %1, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>) 517 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 518 %4 = uitofp <4 x i32> %3 to <4 x float> 519 ret <4 x float> %4 520} 521 522define <4 x i32> @knownbits_mask_bitreverse_ashr(<4 x i32> %a0) { 523; X86-LABEL: knownbits_mask_bitreverse_ashr: 524; X86: # %bb.0: 525; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 526; X86-NEXT: retl 527; 528; X64-LABEL: knownbits_mask_bitreverse_ashr: 529; X64: # %bb.0: 530; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 531; X64-NEXT: retq 532 %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32 -2, i32 -2> 533 %2 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %1) 534 %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31> 535 ret <4 x i32> %3 536} 537declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) nounwind readnone 538 539; If we don't know that the input isn't INT_MIN we can't combine to sitofp 540define <4 x float> @knownbits_abs_uitofp(<4 x i32> %a0) { 541; X86-LABEL: knownbits_abs_uitofp: 542; X86: # %bb.0: 543; X86-NEXT: vpabsd %xmm0, %xmm0 544; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 545; X86-NEXT: vpsrld $16, %xmm0, %xmm0 546; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 547; X86-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 548; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 549; X86-NEXT: retl 550; 551; X64-LABEL: knownbits_abs_uitofp: 552; X64: # %bb.0: 553; X64-NEXT: vpabsd %xmm0, %xmm0 554; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 555; X64-NEXT: vpsrld $16, %xmm0, %xmm0 556; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 557; X64-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 558; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 559; X64-NEXT: retq 560 %1 = sub <4 x i32> zeroinitializer, %a0 561 %2 = icmp slt <4 x i32> %a0, zeroinitializer 562 %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> %a0 563 %4 = uitofp <4 x i32> %3 to <4 x float> 564 ret <4 x float> %4 565} 566 567define <4 x float> @knownbits_or_abs_uitofp(<4 x i32> %a0) { 568; X86-LABEL: knownbits_or_abs_uitofp: 569; X86: # %bb.0: 570; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2] 571; X86-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 572; X86-NEXT: vpabsd %xmm0, %xmm0 573; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 574; X86-NEXT: retl 575; 576; X64-LABEL: knownbits_or_abs_uitofp: 577; X64: # %bb.0: 578; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2] 579; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 580; X64-NEXT: vpabsd %xmm0, %xmm0 581; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 582; X64-NEXT: retq 583 %1 = or <4 x i32> %a0, <i32 1, i32 0, i32 3, i32 0> 584 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> 585 %3 = sub <4 x i32> zeroinitializer, %2 586 %4 = icmp slt <4 x i32> %2, zeroinitializer 587 %5 = select <4 x i1> %4, <4 x i32> %3, <4 x i32> %2 588 %6 = uitofp <4 x i32> %5 to <4 x float> 589 ret <4 x float> %6 590} 591 592define <4 x float> @knownbits_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) nounwind { 593; X86-LABEL: knownbits_and_select_shuffle_uitofp: 594; X86: # %bb.0: 595; X86-NEXT: pushl %ebp 596; X86-NEXT: movl %esp, %ebp 597; X86-NEXT: andl $-16, %esp 598; X86-NEXT: subl $16, %esp 599; X86-NEXT: vmovaps 8(%ebp), %xmm3 600; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2, %xmm2 601; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3, %xmm3 602; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 603; X86-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 604; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,2,2] 605; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 606; X86-NEXT: movl %ebp, %esp 607; X86-NEXT: popl %ebp 608; X86-NEXT: retl 609; 610; X64-LABEL: knownbits_and_select_shuffle_uitofp: 611; X64: # %bb.0: 612; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 613; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 614; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 615; X64-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 616; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,2,2] 617; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 618; X64-NEXT: retq 619 %1 = and <4 x i32> %a2, <i32 65535, i32 -1, i32 255, i32 -1> 620 %2 = and <4 x i32> %a3, <i32 255, i32 -1, i32 65535, i32 -1> 621 %3 = icmp eq <4 x i32> %a0, %a1 622 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 623 %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 624 %6 = uitofp <4 x i32> %5 to <4 x float> 625 ret <4 x float> %6 626} 627 628define <4 x float> @knownbits_lshr_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) nounwind { 629; X86-LABEL: knownbits_lshr_and_select_shuffle_uitofp: 630; X86: # %bb.0: 631; X86-NEXT: pushl %ebp 632; X86-NEXT: movl %esp, %ebp 633; X86-NEXT: andl $-16, %esp 634; X86-NEXT: subl $16, %esp 635; X86-NEXT: vmovaps 8(%ebp), %xmm3 636; X86-NEXT: vpsrld $5, %xmm2, %xmm2 637; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3, %xmm3 638; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 639; X86-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 640; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,2,2] 641; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 642; X86-NEXT: movl %ebp, %esp 643; X86-NEXT: popl %ebp 644; X86-NEXT: retl 645; 646; X64-LABEL: knownbits_lshr_and_select_shuffle_uitofp: 647; X64: # %bb.0: 648; X64-NEXT: vpsrld $5, %xmm2, %xmm2 649; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 650; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 651; X64-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 652; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,2,2] 653; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 654; X64-NEXT: retq 655 %1 = lshr <4 x i32> %a2, <i32 5, i32 1, i32 5, i32 1> 656 %2 = and <4 x i32> %a3, <i32 255, i32 -1, i32 65535, i32 -1> 657 %3 = icmp eq <4 x i32> %a0, %a1 658 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 659 %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 660 %6 = uitofp <4 x i32> %5 to <4 x float> 661 ret <4 x float> %6 662} 663 664define <2 x double> @knownbits_lshr_subvector_uitofp(<4 x i32> %x) { 665; X86-LABEL: knownbits_lshr_subvector_uitofp: 666; X86: # %bb.0: 667; X86-NEXT: vpsrld $2, %xmm0, %xmm1 668; X86-NEXT: vpsrld $1, %xmm0, %xmm0 669; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 670; X86-NEXT: vcvtdq2pd %xmm0, %xmm0 671; X86-NEXT: retl 672; 673; X64-LABEL: knownbits_lshr_subvector_uitofp: 674; X64: # %bb.0: 675; X64-NEXT: vpsrld $2, %xmm0, %xmm1 676; X64-NEXT: vpsrld $1, %xmm0, %xmm0 677; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 678; X64-NEXT: vcvtdq2pd %xmm0, %xmm0 679; X64-NEXT: retq 680 %1 = lshr <4 x i32> %x, <i32 1, i32 2, i32 0, i32 0> 681 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 682 %3 = uitofp <2 x i32> %2 to <2 x double> 683 ret <2 x double> %3 684} 685