1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64 4 5define <2 x i64> @freeze_insert_vector_elt(<2 x i64> %a0) { 6; CHECK-LABEL: freeze_insert_vector_elt: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 9; CHECK-NEXT: ret{{[l|q]}} 10 %idx0 = insertelement <2 x i64> %a0, i64 0, i64 0 11 %freeze0 = freeze <2 x i64> %idx0 12 %idx1 = insertelement <2 x i64> %freeze0, i64 0, i64 1 13 %freeze1 = freeze <2 x i64> %idx1 14 ret <2 x i64> %freeze1 15} 16 17define <4 x i32> @freeze_insert_subvector(<8 x i32> %a0) nounwind { 18; CHECK-LABEL: freeze_insert_subvector: 19; CHECK: # %bb.0: 20; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 21; CHECK-NEXT: ret{{[l|q]}} 22 %x = shufflevector <8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 23 %y = freeze <8 x i32> %x 24 %z = shufflevector <8 x i32> %y, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 25 ret <4 x i32> %z 26} 27 28define <2 x i64> @freeze_sign_extend_vector_inreg(<16 x i8> %a0) nounwind { 29; CHECK-LABEL: freeze_sign_extend_vector_inreg: 30; CHECK: # %bb.0: 31; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0 32; CHECK-NEXT: ret{{[l|q]}} 33 %x = sext <16 x i8> %a0 to <16 x i32> 34 %y = shufflevector <16 x i32> %x, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 35 %z = freeze <4 x i32> %y 36 %w = sext <4 x i32> %z to <4 x i64> 37 %r = shufflevector <4 x i64> %w, <4 x i64> poison, <2 x i32> <i32 0, i32 1> 38 ret <2 x i64> %r 39} 40 41define <2 x i64> @freeze_zero_extend_vector_inreg(<16 x i8> %a0) nounwind { 42; CHECK-LABEL: freeze_zero_extend_vector_inreg: 43; CHECK: # %bb.0: 44; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 45; CHECK-NEXT: ret{{[l|q]}} 46 %x = zext <16 x i8> %a0 to <16 x i32> 47 %y = shufflevector <16 x i32> %x, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 48 %z = freeze <4 x i32> %y 49 %w = zext <4 x i32> %z to <4 x i64> 50 %r = shufflevector <4 x i64> %w, <4 x i64> poison, <2 x i32> <i32 0, i32 1> 51 ret <2 x i64> %r 52} 53 54define <4 x i32> @freeze_pshufd(<4 x i32> %a0) nounwind { 55; CHECK-LABEL: freeze_pshufd: 56; CHECK: # %bb.0: 57; CHECK-NEXT: ret{{[l|q]}} 58 %x = shufflevector <4 x i32> %a0, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 59 %y = freeze <4 x i32> %x 60 %z = shufflevector <4 x i32> %y, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 61 ret <4 x i32> %z 62} 63 64define <4 x float> @freeze_permilps(<4 x float> %a0) nounwind { 65; CHECK-LABEL: freeze_permilps: 66; CHECK: # %bb.0: 67; CHECK-NEXT: ret{{[l|q]}} 68 %x = shufflevector <4 x float> %a0, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 69 %y = freeze <4 x float> %x 70 %z = shufflevector <4 x float> %y, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 71 ret <4 x float> %z 72} 73 74define void @freeze_bitcast_from_wider_elt(ptr %origin, ptr %dst) nounwind { 75; X86-LABEL: freeze_bitcast_from_wider_elt: 76; X86: # %bb.0: 77; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 78; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 79; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 80; X86-NEXT: vmovsd %xmm0, (%eax) 81; X86-NEXT: retl 82; 83; X64-LABEL: freeze_bitcast_from_wider_elt: 84; X64: # %bb.0: 85; X64-NEXT: movq (%rdi), %rax 86; X64-NEXT: movq %rax, (%rsi) 87; X64-NEXT: retq 88 %i0 = load <4 x i16>, ptr %origin 89 %i1 = bitcast <4 x i16> %i0 to <8 x i8> 90 %i2 = freeze <8 x i8> %i1 91 %i3 = bitcast <8 x i8> %i2 to i64 92 store i64 %i3, ptr %dst 93 ret void 94} 95define void @freeze_bitcast_from_wider_elt_escape(ptr %origin, ptr %escape, ptr %dst) nounwind { 96; X86-LABEL: freeze_bitcast_from_wider_elt_escape: 97; X86: # %bb.0: 98; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 99; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 100; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 101; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 102; X86-NEXT: vmovsd %xmm0, (%ecx) 103; X86-NEXT: vmovsd %xmm0, (%eax) 104; X86-NEXT: retl 105; 106; X64-LABEL: freeze_bitcast_from_wider_elt_escape: 107; X64: # %bb.0: 108; X64-NEXT: movq (%rdi), %rax 109; X64-NEXT: movq %rax, (%rsi) 110; X64-NEXT: movq %rax, (%rdx) 111; X64-NEXT: retq 112 %i0 = load <4 x i16>, ptr %origin 113 %i1 = bitcast <4 x i16> %i0 to <8 x i8> 114 store <8 x i8> %i1, ptr %escape 115 %i2 = freeze <8 x i8> %i1 116 %i3 = bitcast <8 x i8> %i2 to i64 117 store i64 %i3, ptr %dst 118 ret void 119} 120 121define void @freeze_bitcast_to_wider_elt(ptr %origin, ptr %dst) nounwind { 122; X86-LABEL: freeze_bitcast_to_wider_elt: 123; X86: # %bb.0: 124; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 125; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 126; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 127; X86-NEXT: vmovsd %xmm0, (%eax) 128; X86-NEXT: retl 129; 130; X64-LABEL: freeze_bitcast_to_wider_elt: 131; X64: # %bb.0: 132; X64-NEXT: movq (%rdi), %rax 133; X64-NEXT: movq %rax, (%rsi) 134; X64-NEXT: retq 135 %i0 = load <8 x i8>, ptr %origin 136 %i1 = bitcast <8 x i8> %i0 to <4 x i16> 137 %i2 = freeze <4 x i16> %i1 138 %i3 = bitcast <4 x i16> %i2 to i64 139 store i64 %i3, ptr %dst 140 ret void 141} 142define void @freeze_bitcast_to_wider_elt_escape(ptr %origin, ptr %escape, ptr %dst) nounwind { 143; X86-LABEL: freeze_bitcast_to_wider_elt_escape: 144; X86: # %bb.0: 145; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 146; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 147; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 148; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 149; X86-NEXT: vmovsd %xmm0, (%ecx) 150; X86-NEXT: vmovsd %xmm0, (%eax) 151; X86-NEXT: retl 152; 153; X64-LABEL: freeze_bitcast_to_wider_elt_escape: 154; X64: # %bb.0: 155; X64-NEXT: movq (%rdi), %rax 156; X64-NEXT: movq %rax, (%rsi) 157; X64-NEXT: movq %rax, (%rdx) 158; X64-NEXT: retq 159 %i0 = load <8 x i8>, ptr %origin 160 %i1 = bitcast <8 x i8> %i0 to <4 x i16> 161 store <4 x i16> %i1, ptr %escape 162 %i2 = freeze <4 x i16> %i1 163 %i3 = bitcast <4 x i16> %i2 to i64 164 store i64 %i3, ptr %dst 165 ret void 166} 167 168define void @freeze_extractelement(ptr %origin0, ptr %origin1, ptr %dst) nounwind { 169; X86-LABEL: freeze_extractelement: 170; X86: # %bb.0: 171; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 172; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 173; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 174; X86-NEXT: vmovdqa (%edx), %xmm0 175; X86-NEXT: vpand (%ecx), %xmm0, %xmm0 176; X86-NEXT: vpextrb $6, %xmm0, (%eax) 177; X86-NEXT: retl 178; 179; X64-LABEL: freeze_extractelement: 180; X64: # %bb.0: 181; X64-NEXT: vmovdqa (%rdi), %xmm0 182; X64-NEXT: vpand (%rsi), %xmm0, %xmm0 183; X64-NEXT: vpextrb $6, %xmm0, (%rdx) 184; X64-NEXT: retq 185 %i0 = load <16 x i8>, ptr %origin0 186 %i1 = load <16 x i8>, ptr %origin1 187 %i2 = and <16 x i8> %i0, %i1 188 %i3 = freeze <16 x i8> %i2 189 %i4 = extractelement <16 x i8> %i3, i64 6 190 store i8 %i4, ptr %dst 191 ret void 192} 193define void @freeze_extractelement_escape(ptr %origin0, ptr %origin1, ptr %dst, ptr %escape) nounwind { 194; X86-LABEL: freeze_extractelement_escape: 195; X86: # %bb.0: 196; X86-NEXT: pushl %esi 197; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 198; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 199; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 200; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 201; X86-NEXT: vmovdqa (%esi), %xmm0 202; X86-NEXT: vpand (%edx), %xmm0, %xmm0 203; X86-NEXT: vmovdqa %xmm0, (%ecx) 204; X86-NEXT: vpextrb $6, %xmm0, (%eax) 205; X86-NEXT: popl %esi 206; X86-NEXT: retl 207; 208; X64-LABEL: freeze_extractelement_escape: 209; X64: # %bb.0: 210; X64-NEXT: vmovdqa (%rdi), %xmm0 211; X64-NEXT: vpand (%rsi), %xmm0, %xmm0 212; X64-NEXT: vmovdqa %xmm0, (%rcx) 213; X64-NEXT: vpextrb $6, %xmm0, (%rdx) 214; X64-NEXT: retq 215 %i0 = load <16 x i8>, ptr %origin0 216 %i1 = load <16 x i8>, ptr %origin1 217 %i2 = and <16 x i8> %i0, %i1 218 %i3 = freeze <16 x i8> %i2 219 store <16 x i8> %i3, ptr %escape 220 %i4 = extractelement <16 x i8> %i3, i64 6 221 store i8 %i4, ptr %dst 222 ret void 223} 224 225; It would be a miscompilation to pull freeze out of extractelement here. 226define void @freeze_extractelement_extra_use(ptr %origin0, ptr %origin1, i64 %idx0, i64 %idx1, ptr %dst, ptr %escape) nounwind { 227; X86-LABEL: freeze_extractelement_extra_use: 228; X86: # %bb.0: 229; X86-NEXT: pushl %ebp 230; X86-NEXT: movl %esp, %ebp 231; X86-NEXT: pushl %edi 232; X86-NEXT: pushl %esi 233; X86-NEXT: andl $-16, %esp 234; X86-NEXT: subl $16, %esp 235; X86-NEXT: movl 24(%ebp), %eax 236; X86-NEXT: andl $15, %eax 237; X86-NEXT: movl 16(%ebp), %ecx 238; X86-NEXT: andl $15, %ecx 239; X86-NEXT: movl 32(%ebp), %edx 240; X86-NEXT: movl 12(%ebp), %esi 241; X86-NEXT: movl 8(%ebp), %edi 242; X86-NEXT: vmovaps (%edi), %xmm0 243; X86-NEXT: vandps (%esi), %xmm0, %xmm0 244; X86-NEXT: vmovaps %xmm0, (%esp) 245; X86-NEXT: movzbl (%esp,%ecx), %ecx 246; X86-NEXT: cmpb (%esp,%eax), %cl 247; X86-NEXT: sete (%edx) 248; X86-NEXT: leal -8(%ebp), %esp 249; X86-NEXT: popl %esi 250; X86-NEXT: popl %edi 251; X86-NEXT: popl %ebp 252; X86-NEXT: retl 253; 254; X64-LABEL: freeze_extractelement_extra_use: 255; X64: # %bb.0: 256; X64-NEXT: andl $15, %ecx 257; X64-NEXT: andl $15, %edx 258; X64-NEXT: vmovaps (%rdi), %xmm0 259; X64-NEXT: vandps (%rsi), %xmm0, %xmm0 260; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 261; X64-NEXT: movzbl -24(%rsp,%rdx), %eax 262; X64-NEXT: cmpb -24(%rsp,%rcx), %al 263; X64-NEXT: sete (%r8) 264; X64-NEXT: retq 265 %i0 = load <16 x i8>, ptr %origin0 266 %i1 = load <16 x i8>, ptr %origin1 267 %i2 = and <16 x i8> %i0, %i1 268 %i3 = freeze <16 x i8> %i2 269 %i4 = extractelement <16 x i8> %i3, i64 %idx0 270 %i5 = extractelement <16 x i8> %i3, i64 %idx1 271 %i6 = icmp eq i8 %i4, %i5 272 store i1 %i6, ptr %dst 273 ret void 274} 275 276define void @freeze_buildvector_single_maybe_poison_operand(ptr %origin, ptr %dst) nounwind { 277; X86-LABEL: freeze_buildvector_single_maybe_poison_operand: 278; X86: # %bb.0: 279; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 280; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 281; X86-NEXT: vbroadcastss {{.*#+}} xmm0 = [42,42,42,42] 282; X86-NEXT: vpinsrd $0, (%ecx), %xmm0, %xmm0 283; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 284; X86-NEXT: vmovdqa %xmm0, (%eax) 285; X86-NEXT: retl 286; 287; X64-LABEL: freeze_buildvector_single_maybe_poison_operand: 288; X64: # %bb.0: 289; X64-NEXT: vpbroadcastd {{.*#+}} xmm0 = [42,42,42,42] 290; X64-NEXT: vpinsrd $0, (%rdi), %xmm0, %xmm0 291; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] 292; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 293; X64-NEXT: vmovdqa %xmm0, (%rsi) 294; X64-NEXT: retq 295 %i0.src = load i32, ptr %origin 296 %i0 = and i32 %i0.src, 15 297 %i1 = insertelement <4 x i32> poison, i32 %i0, i64 0 298 %i2 = insertelement <4 x i32> %i1, i32 42, i64 1 299 %i3 = insertelement <4 x i32> %i2, i32 42, i64 2 300 %i4 = insertelement <4 x i32> %i3, i32 42, i64 3 301 %i5 = freeze <4 x i32> %i4 302 %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7> 303 store <4 x i32> %i6, ptr %dst 304 ret void 305} 306 307define void @freeze_buildvector_single_repeated_maybe_poison_operand(ptr %origin, ptr %dst) nounwind { 308; X86-LABEL: freeze_buildvector_single_repeated_maybe_poison_operand: 309; X86: # %bb.0: 310; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 311; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 312; X86-NEXT: movl (%ecx), %ecx 313; X86-NEXT: andl $15, %ecx 314; X86-NEXT: vbroadcastss {{.*#+}} xmm0 = [42,42,42,42] 315; X86-NEXT: vpinsrd $0, %ecx, %xmm0, %xmm0 316; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 317; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 318; X86-NEXT: vmovdqa %xmm0, (%eax) 319; X86-NEXT: retl 320; 321; X64-LABEL: freeze_buildvector_single_repeated_maybe_poison_operand: 322; X64: # %bb.0: 323; X64-NEXT: vpbroadcastd {{.*#+}} xmm0 = [42,42,42,42] 324; X64-NEXT: vpinsrd $0, (%rdi), %xmm0, %xmm0 325; X64-NEXT: vpbroadcastq %xmm0, %xmm0 326; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] 327; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 328; X64-NEXT: vmovdqa %xmm0, (%rsi) 329; X64-NEXT: retq 330 %i0.src = load i32, ptr %origin 331 %i0 = and i32 %i0.src, 15 332 %i1 = insertelement <4 x i32> poison, i32 %i0, i64 0 333 %i2 = insertelement <4 x i32> %i1, i32 42, i64 1 334 %i3 = insertelement <4 x i32> %i2, i32 %i0, i64 2 335 %i4 = insertelement <4 x i32> %i3, i32 42, i64 3 336 %i5 = freeze <4 x i32> %i4 337 %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7> 338 store <4 x i32> %i6, ptr %dst 339 ret void 340} 341 342define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind { 343; X86-LABEL: freeze_two_frozen_buildvectors: 344; X86: # %bb.0: 345; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 346; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 347; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 348; X86-NEXT: movl (%edx), %edx 349; X86-NEXT: andl $15, %edx 350; X86-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 351; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7] 352; X86-NEXT: vpand %xmm1, %xmm0, %xmm0 353; X86-NEXT: vmovdqa %xmm0, (%ecx) 354; X86-NEXT: vmovd %edx, %xmm0 355; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 356; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 357; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7] 358; X86-NEXT: vpand %xmm1, %xmm0, %xmm0 359; X86-NEXT: vmovdqa %xmm0, (%eax) 360; X86-NEXT: retl 361; 362; X64-LABEL: freeze_two_frozen_buildvectors: 363; X64: # %bb.0: 364; X64-NEXT: movl (%rdi), %eax 365; X64-NEXT: andl $15, %eax 366; X64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 367; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] 368; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 369; X64-NEXT: vmovdqa %xmm0, (%rdx) 370; X64-NEXT: vmovd %eax, %xmm0 371; X64-NEXT: vpbroadcastd %xmm0, %xmm0 372; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 373; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3] 374; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 375; X64-NEXT: vmovdqa %xmm0, (%rcx) 376; X64-NEXT: retq 377 %i0.src = load i32, ptr %origin0 378 %i0 = and i32 %i0.src, 15 379 %i1.src = load i32, ptr %origin1 380 %i1 = and i32 %i0.src, 15 381 %i2 = insertelement <4 x i32> poison, i32 %i0, i64 1 382 %i3 = and <4 x i32> %i2, <i32 7, i32 7, i32 7, i32 7> 383 %i4 = freeze <4 x i32> %i3 384 store <4 x i32> %i4, ptr %dst0 385 %i5 = insertelement <4 x i32> poison, i32 %i1, i64 2 386 %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7> 387 %i7 = freeze <4 x i32> %i6 388 store <4 x i32> %i7, ptr %dst1 389 ret void 390} 391 392define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind { 393; X86-LABEL: freeze_two_buildvectors_only_one_frozen: 394; X86: # %bb.0: 395; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 396; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 397; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 398; X86-NEXT: movl (%edx), %edx 399; X86-NEXT: andl $15, %edx 400; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 401; X86-NEXT: vmovd %edx, %xmm1 402; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1] 403; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5,6,7] 404; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [7,7,7,7] 405; X86-NEXT: vpand %xmm2, %xmm0, %xmm0 406; X86-NEXT: vmovdqa %xmm0, (%ecx) 407; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] 408; X86-NEXT: vpand %xmm2, %xmm0, %xmm0 409; X86-NEXT: vmovdqa %xmm0, (%eax) 410; X86-NEXT: retl 411; 412; X64-LABEL: freeze_two_buildvectors_only_one_frozen: 413; X64: # %bb.0: 414; X64-NEXT: movl (%rdi), %eax 415; X64-NEXT: andl $15, %eax 416; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 417; X64-NEXT: vmovd %eax, %xmm1 418; X64-NEXT: vpbroadcastd %xmm1, %xmm1 419; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 420; X64-NEXT: vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7] 421; X64-NEXT: vpand %xmm2, %xmm0, %xmm0 422; X64-NEXT: vmovdqa %xmm0, (%rdx) 423; X64-NEXT: vpand %xmm2, %xmm1, %xmm0 424; X64-NEXT: vmovdqa %xmm0, (%rcx) 425; X64-NEXT: retq 426 %i0.src = load i32, ptr %origin0 427 %i0 = and i32 %i0.src, 15 428 %i1.src = load i32, ptr %origin1 429 %i1 = and i32 %i0.src, 15 430 %i2 = insertelement <4 x i32> poison, i32 %i0, i64 1 431 %i3 = and <4 x i32> %i2, <i32 7, i32 7, i32 7, i32 7> 432 %i4 = freeze <4 x i32> %i3 433 store <4 x i32> %i4, ptr %dst0 434 %i5 = insertelement <4 x i32> poison, i32 %i1, i64 2 435 %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7> 436 store <4 x i32> %i6, ptr %dst1 437 ret void 438} 439 440define void @freeze_two_buildvectors_one_undef_elt(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind { 441; X86-LABEL: freeze_two_buildvectors_one_undef_elt: 442; X86: # %bb.0: 443; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 444; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 445; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 446; X86-NEXT: movl (%edx), %edx 447; X86-NEXT: andl $15, %edx 448; X86-NEXT: vmovddup {{.*#+}} xmm0 = [7,0,7,0] 449; X86-NEXT: # xmm0 = mem[0,0] 450; X86-NEXT: vmovd %edx, %xmm1 451; X86-NEXT: vpand %xmm0, %xmm1, %xmm2 452; X86-NEXT: vmovdqa %xmm2, (%ecx) 453; X86-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 454; X86-NEXT: vpand %xmm0, %xmm1, %xmm0 455; X86-NEXT: vmovdqa %xmm0, (%eax) 456; X86-NEXT: retl 457; 458; X64-LABEL: freeze_two_buildvectors_one_undef_elt: 459; X64: # %bb.0: 460; X64-NEXT: movq (%rdi), %rax 461; X64-NEXT: vmovd %eax, %xmm0 462; X64-NEXT: vpbroadcastd %xmm0, %xmm0 463; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 464; X64-NEXT: vmovdqa %xmm0, (%rdx) 465; X64-NEXT: vmovdqa %xmm0, (%rcx) 466; X64-NEXT: retq 467 %i0.src = load i64, ptr %origin0 468 %i0 = and i64 %i0.src, 15 469 %i1.src = load i64, ptr %origin1 470 %i1 = and i64 %i0.src, 15 471 %i2 = insertelement <2 x i64> poison, i64 %i0, i64 0 472 %i3 = and <2 x i64> %i2, <i64 7, i64 7> 473 %i4 = freeze <2 x i64> %i3 474 store <2 x i64> %i4, ptr %dst0 475 %i5 = insertelement <2 x i64> poison, i64 %i1, i64 1 476 %i6 = and <2 x i64> %i5, <i64 7, i64 7> 477 store <2 x i64> %i6, ptr %dst1 478 ret void 479} 480 481define void @freeze_buildvector(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst) nounwind { 482; X86-LABEL: freeze_buildvector: 483; X86: # %bb.0: 484; X86-NEXT: pushl %edi 485; X86-NEXT: pushl %esi 486; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 487; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 488; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 489; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 490; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 491; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 492; X86-NEXT: vpinsrd $1, (%esi), %xmm0, %xmm0 493; X86-NEXT: vpinsrd $2, (%edx), %xmm0, %xmm0 494; X86-NEXT: vpinsrd $3, (%ecx), %xmm0, %xmm0 495; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 496; X86-NEXT: vmovdqa %xmm0, (%eax) 497; X86-NEXT: popl %esi 498; X86-NEXT: popl %edi 499; X86-NEXT: retl 500; 501; X64-LABEL: freeze_buildvector: 502; X64: # %bb.0: 503; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 504; X64-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 505; X64-NEXT: vpinsrd $2, (%rdx), %xmm0, %xmm0 506; X64-NEXT: vpinsrd $3, (%rcx), %xmm0, %xmm0 507; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] 508; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 509; X64-NEXT: vmovdqa %xmm0, (%r8) 510; X64-NEXT: retq 511 %i0.src = load i32, ptr %origin0 512 %i1.src = load i32, ptr %origin1 513 %i2.src = load i32, ptr %origin2 514 %i3.src = load i32, ptr %origin3 515 %i0 = and i32 %i0.src, 15 516 %i1 = and i32 %i1.src, 15 517 %i2 = and i32 %i2.src, 15 518 %i3 = and i32 %i3.src, 15 519 %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0 520 %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1 521 %i6 = insertelement <4 x i32> %i5, i32 %i2, i64 2 522 %i7 = insertelement <4 x i32> %i6, i32 %i3, i64 3 523 %i8 = freeze <4 x i32> %i7 524 %i9 = and <4 x i32> %i8, <i32 7, i32 7, i32 7, i32 7> 525 store <4 x i32> %i9, ptr %dst 526 ret void 527} 528 529define void @freeze_buildvector_one_undef_elt(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst) nounwind { 530; X86-LABEL: freeze_buildvector_one_undef_elt: 531; X86: # %bb.0: 532; X86-NEXT: pushl %esi 533; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 534; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 535; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 536; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 537; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 538; X86-NEXT: vpinsrd $1, (%edx), %xmm0, %xmm0 539; X86-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 540; X86-NEXT: vpinsrd $3, (%ecx), %xmm0, %xmm0 541; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 542; X86-NEXT: vmovdqa %xmm0, (%eax) 543; X86-NEXT: popl %esi 544; X86-NEXT: retl 545; 546; X64-LABEL: freeze_buildvector_one_undef_elt: 547; X64: # %bb.0: 548; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 549; X64-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 550; X64-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 551; X64-NEXT: vpinsrd $3, (%rcx), %xmm0, %xmm0 552; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] 553; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 554; X64-NEXT: vmovdqa %xmm0, (%r8) 555; X64-NEXT: retq 556 %i0.src = load i32, ptr %origin0 557 %i1.src = load i32, ptr %origin1 558 %i3.src = load i32, ptr %origin3 559 %i0 = and i32 %i0.src, 15 560 %i1 = and i32 %i1.src, 15 561 %i3 = and i32 %i3.src, 15 562 %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0 563 %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1 564 %i7 = insertelement <4 x i32> %i5, i32 %i3, i64 3 565 %i8 = freeze <4 x i32> %i7 566 %i9 = and <4 x i32> %i8, <i32 7, i32 7, i32 7, i32 7> 567 store <4 x i32> %i9, ptr %dst 568 ret void 569} 570 571define void @freeze_buildvector_extrause(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst, ptr %escape) nounwind { 572; X86-LABEL: freeze_buildvector_extrause: 573; X86: # %bb.0: 574; X86-NEXT: pushl %ebx 575; X86-NEXT: pushl %edi 576; X86-NEXT: pushl %esi 577; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 578; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 579; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 580; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 581; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 582; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 583; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 584; X86-NEXT: vpinsrd $1, (%edi), %xmm0, %xmm0 585; X86-NEXT: vpinsrd $2, (%esi), %xmm0, %xmm0 586; X86-NEXT: vpinsrd $3, (%edx), %xmm0, %xmm0 587; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 588; X86-NEXT: vmovdqa %xmm0, (%ecx) 589; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 590; X86-NEXT: vmovdqa %xmm0, (%eax) 591; X86-NEXT: popl %esi 592; X86-NEXT: popl %edi 593; X86-NEXT: popl %ebx 594; X86-NEXT: retl 595; 596; X64-LABEL: freeze_buildvector_extrause: 597; X64: # %bb.0: 598; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 599; X64-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 600; X64-NEXT: vpinsrd $2, (%rdx), %xmm0, %xmm0 601; X64-NEXT: vpinsrd $3, (%rcx), %xmm0, %xmm0 602; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] 603; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 604; X64-NEXT: vmovdqa %xmm0, (%r9) 605; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] 606; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 607; X64-NEXT: vmovdqa %xmm0, (%r8) 608; X64-NEXT: retq 609 %i0.src = load i32, ptr %origin0 610 %i1.src = load i32, ptr %origin1 611 %i2.src = load i32, ptr %origin2 612 %i3.src = load i32, ptr %origin3 613 %i0 = and i32 %i0.src, 15 614 %i1 = and i32 %i1.src, 15 615 %i2 = and i32 %i2.src, 15 616 %i3 = and i32 %i3.src, 15 617 %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0 618 %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1 619 %i6 = insertelement <4 x i32> %i5, i32 %i2, i64 2 620 %i7 = insertelement <4 x i32> %i6, i32 %i3, i64 3 621 store <4 x i32> %i7, ptr %escape 622 %i8 = freeze <4 x i32> %i7 623 %i9 = and <4 x i32> %i8, <i32 7, i32 7, i32 7, i32 7> 624 store <4 x i32> %i9, ptr %dst 625 ret void 626} 627 628define void @pr59677(i32 %x, ptr %out) nounwind { 629; X86-LABEL: pr59677: 630; X86: # %bb.0: 631; X86-NEXT: pushl %esi 632; X86-NEXT: pushl %eax 633; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 634; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 635; X86-NEXT: vpaddd %xmm0, %xmm0, %xmm0 636; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 637; X86-NEXT: vmovss %xmm0, (%esp) 638; X86-NEXT: calll sinf 639; X86-NEXT: fstps (%esi) 640; X86-NEXT: addl $4, %esp 641; X86-NEXT: popl %esi 642; X86-NEXT: retl 643; 644; X64-LABEL: pr59677: 645; X64: # %bb.0: 646; X64-NEXT: pushq %rbx 647; X64-NEXT: movq %rsi, %rbx 648; X64-NEXT: vmovd %edi, %xmm0 649; X64-NEXT: vpaddd %xmm0, %xmm0, %xmm0 650; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 651; X64-NEXT: callq sinf@PLT 652; X64-NEXT: vmovss %xmm0, (%rbx) 653; X64-NEXT: popq %rbx 654; X64-NEXT: retq 655 %i0 = or i32 %x, 1 656 %i1 = insertelement <4 x i32> zeroinitializer, i32 %x, i64 0 657 %i2 = insertelement <4 x i32> %i1, i32 %i0, i64 1 658 %i3 = shl <4 x i32> %i2, <i32 1, i32 1, i32 1, i32 1> 659 %i4 = sitofp <4 x i32> %i3 to <4 x float> 660 %i5 = tail call <4 x float> @llvm.sin.v4f32(<4 x float> %i4) 661 %i6 = extractelement <4 x float> %i5, i64 0 662 store float %i6, ptr %out, align 4 663 ret void 664} 665declare <4 x float> @llvm.sin.v4f32(<4 x float>) 666 667; Test that we can eliminate freeze by changing the BUILD_VECTOR to a splat 668; zero vector. 669define void @freeze_buildvector_not_simple_type(ptr %dst) nounwind { 670; X86-LABEL: freeze_buildvector_not_simple_type: 671; X86: # %bb.0: 672; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 673; X86-NEXT: movb $0, 4(%eax) 674; X86-NEXT: movl $0, (%eax) 675; X86-NEXT: retl 676; 677; X64-LABEL: freeze_buildvector_not_simple_type: 678; X64: # %bb.0: 679; X64-NEXT: movb $0, 4(%rdi) 680; X64-NEXT: movl $0, (%rdi) 681; X64-NEXT: retq 682 %i0 = freeze <5 x i8> <i8 poison, i8 0, i8 0, i8 undef, i8 0> 683 store <5 x i8> %i0, ptr %dst 684 ret void 685} 686