1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-unknown -mattr=+mmx,+sse2,+ssse3 | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2,+ssse3 | FileCheck %s --check-prefixes=X64,ALIGN 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2,+ssse3,sse-unaligned-mem | FileCheck %s --check-prefixes=X64,UNALIGN 5 6; There are no MMX operations in @t1 7 8define void @t1(i32 %a, ptr %P) nounwind { 9; X86-LABEL: t1: 10; X86: # %bb.0: 11; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 12; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 13; X86-NEXT: pslld $12, %xmm0 14; X86-NEXT: psllq $32, %xmm0 15; X86-NEXT: movq %xmm0, (%eax) 16; X86-NEXT: retl 17; 18; X64-LABEL: t1: 19; X64: # %bb.0: 20; X64-NEXT: movd %edi, %xmm0 21; X64-NEXT: pslld $12, %xmm0 22; X64-NEXT: psllq $32, %xmm0 23; X64-NEXT: movq %xmm0, (%rsi) 24; X64-NEXT: retq 25 %tmp12 = shl i32 %a, 12 26 %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1 27 %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0 28 %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64> 29 store <1 x i64> %tmp23, ptr %P 30 ret void 31} 32 33define <4 x float> @t2(ptr %P) nounwind { 34; X86-LABEL: t2: 35; X86: # %bb.0: 36; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 37; X86-NEXT: xorps %xmm0, %xmm0 38; X86-NEXT: xorps %xmm1, %xmm1 39; X86-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] 40; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 41; X86-NEXT: retl 42; 43; X64-LABEL: t2: 44; X64: # %bb.0: 45; X64-NEXT: xorps %xmm0, %xmm0 46; X64-NEXT: xorps %xmm1, %xmm1 47; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] 48; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 49; X64-NEXT: retq 50 %tmp1 = load <4 x float>, ptr %P 51 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 > 52 ret <4 x float> %tmp2 53} 54 55define <4 x float> @t3(ptr %P) nounwind { 56; X86-LABEL: t3: 57; X86: # %bb.0: 58; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 59; X86-NEXT: xorps %xmm0, %xmm0 60; X86-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 61; X86-NEXT: retl 62; 63; X64-LABEL: t3: 64; X64: # %bb.0: 65; X64-NEXT: xorps %xmm0, %xmm0 66; X64-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 67; X64-NEXT: retq 68 %tmp1 = load <4 x float>, ptr %P 69 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 > 70 ret <4 x float> %tmp2 71} 72 73define <4 x float> @t4(ptr %P) nounwind { 74; X86-LABEL: t4: 75; X86: # %bb.0: 76; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 77; X86-NEXT: xorps %xmm1, %xmm1 78; X86-NEXT: xorps %xmm0, %xmm0 79; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0] 80; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 81; X86-NEXT: retl 82; 83; X64-LABEL: t4: 84; X64: # %bb.0: 85; X64-NEXT: xorps %xmm1, %xmm1 86; X64-NEXT: xorps %xmm0, %xmm0 87; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0] 88; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 89; X64-NEXT: retq 90 %tmp1 = load <4 x float>, ptr %P 91 %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 > 92 ret <4 x float> %tmp2 93} 94 95define <4 x float> @t4_under_aligned(ptr %P) nounwind { 96; X86-LABEL: t4_under_aligned: 97; X86: # %bb.0: 98; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 99; X86-NEXT: movups (%eax), %xmm0 100; X86-NEXT: xorps %xmm1, %xmm1 101; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0] 102; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] 103; X86-NEXT: retl 104; 105; ALIGN-LABEL: t4_under_aligned: 106; ALIGN: # %bb.0: 107; ALIGN-NEXT: movups (%rdi), %xmm0 108; ALIGN-NEXT: xorps %xmm1, %xmm1 109; ALIGN-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0] 110; ALIGN-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] 111; ALIGN-NEXT: retq 112; 113; UNALIGN-LABEL: t4_under_aligned: 114; UNALIGN: # %bb.0: 115; UNALIGN-NEXT: xorps %xmm1, %xmm1 116; UNALIGN-NEXT: xorps %xmm0, %xmm0 117; UNALIGN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0] 118; UNALIGN-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 119; UNALIGN-NEXT: retq 120 %tmp1 = load <4 x float>, ptr %P, align 4 121 %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 > 122 ret <4 x float> %tmp2 123} 124 125define <16 x i8> @t5(<16 x i8> %x) nounwind { 126; X86-LABEL: t5: 127; X86: # %bb.0: 128; X86-NEXT: psrlw $8, %xmm0 129; X86-NEXT: retl 130; 131; X64-LABEL: t5: 132; X64: # %bb.0: 133; X64-NEXT: psrlw $8, %xmm0 134; X64-NEXT: retq 135 %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17> 136 ret <16 x i8> %s 137} 138 139define <16 x i8> @t6(<16 x i8> %x) nounwind { 140; X86-LABEL: t6: 141; X86: # %bb.0: 142; X86-NEXT: psrlw $8, %xmm0 143; X86-NEXT: retl 144; 145; X64-LABEL: t6: 146; X64: # %bb.0: 147; X64-NEXT: psrlw $8, %xmm0 148; X64-NEXT: retq 149 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 150 ret <16 x i8> %s 151} 152 153define <16 x i8> @t7(<16 x i8> %x) nounwind { 154; X86-LABEL: t7: 155; X86: # %bb.0: 156; X86-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] 157; X86-NEXT: retl 158; 159; X64-LABEL: t7: 160; X64: # %bb.0: 161; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] 162; X64-NEXT: retq 163 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2> 164 ret <16 x i8> %s 165} 166 167define <16 x i8> @t8(<16 x i8> %x) nounwind { 168; X86-LABEL: t8: 169; X86: # %bb.0: 170; X86-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 171; X86-NEXT: retl 172; 173; X64-LABEL: t8: 174; X64: # %bb.0: 175; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 176; X64-NEXT: retq 177 %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17> 178 ret <16 x i8> %s 179} 180 181define <16 x i8> @t9(<16 x i8> %x) nounwind { 182; X86-LABEL: t9: 183; X86: # %bb.0: 184; X86-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 185; X86-NEXT: retl 186; 187; X64-LABEL: t9: 188; X64: # %bb.0: 189; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 190; X64-NEXT: retq 191 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 undef, i32 undef> 192 ret <16 x i8> %s 193} 194