1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 -code-model=small | FileCheck %s --check-prefix=X64 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 -code-model=medium | FileCheck %s --check-prefix=X64 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 -code-model=large | FileCheck %s --check-prefix=X64-LARGE 6 7define double @mmx_zero(double, double, double, double) nounwind { 8; X86-LABEL: mmx_zero: 9; X86: # %bb.0: 10; X86-NEXT: pushl %ebp 11; X86-NEXT: movl %esp, %ebp 12; X86-NEXT: andl $-8, %esp 13; X86-NEXT: subl $16, %esp 14; X86-NEXT: movq 8(%ebp), %mm0 15; X86-NEXT: movq 16(%ebp), %mm5 16; X86-NEXT: movq %mm5, (%esp) # 8-byte Spill 17; X86-NEXT: movq %mm0, %mm3 18; X86-NEXT: paddd %mm5, %mm3 19; X86-NEXT: pxor %mm1, %mm1 20; X86-NEXT: movq %mm3, %mm6 21; X86-NEXT: pmuludq %mm1, %mm6 22; X86-NEXT: movq 24(%ebp), %mm4 23; X86-NEXT: movq %mm6, %mm2 24; X86-NEXT: paddd %mm4, %mm2 25; X86-NEXT: paddw %mm2, %mm0 26; X86-NEXT: movq %mm5, %mm1 27; X86-NEXT: paddw %mm0, %mm1 28; X86-NEXT: movq 32(%ebp), %mm5 29; X86-NEXT: movq %mm1, %mm7 30; X86-NEXT: pmuludq %mm5, %mm7 31; X86-NEXT: paddw %mm4, %mm7 32; X86-NEXT: paddw %mm7, %mm5 33; X86-NEXT: paddw %mm5, %mm2 34; X86-NEXT: paddw %mm2, %mm0 35; X86-NEXT: paddw %mm6, %mm0 36; X86-NEXT: pmuludq %mm3, %mm0 37; X86-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}, %mm0 38; X86-NEXT: paddw %mm1, %mm0 39; X86-NEXT: pmuludq %mm7, %mm0 40; X86-NEXT: pmuludq (%esp), %mm0 # 8-byte Folded Reload 41; X86-NEXT: paddw %mm5, %mm0 42; X86-NEXT: paddw %mm2, %mm0 43; X86-NEXT: movq2dq %mm0, %xmm0 44; X86-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) 45; X86-NEXT: fldl {{[0-9]+}}(%esp) 46; X86-NEXT: movl %ebp, %esp 47; X86-NEXT: popl %ebp 48; X86-NEXT: retl 49; 50; X64-LABEL: mmx_zero: 51; X64: # %bb.0: 52; X64-NEXT: movdq2q %xmm0, %mm0 53; X64-NEXT: movdq2q %xmm1, %mm5 54; X64-NEXT: movq %mm5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 55; X64-NEXT: movq %mm0, %mm3 56; X64-NEXT: paddd %mm5, %mm3 57; X64-NEXT: pxor %mm1, %mm1 58; X64-NEXT: movq %mm3, %mm6 59; X64-NEXT: pmuludq %mm1, %mm6 60; X64-NEXT: movdq2q %xmm2, %mm4 61; X64-NEXT: movq %mm6, %mm2 62; X64-NEXT: paddd %mm4, %mm2 63; X64-NEXT: paddw %mm2, %mm0 64; X64-NEXT: movq %mm5, %mm1 65; X64-NEXT: paddw %mm0, %mm1 66; X64-NEXT: movdq2q %xmm3, %mm5 67; X64-NEXT: movq %mm1, %mm7 68; X64-NEXT: pmuludq %mm5, %mm7 69; X64-NEXT: paddw %mm4, %mm7 70; X64-NEXT: paddw %mm7, %mm5 71; X64-NEXT: paddw %mm5, %mm2 72; X64-NEXT: paddw %mm2, %mm0 73; X64-NEXT: paddw %mm6, %mm0 74; X64-NEXT: pmuludq %mm3, %mm0 75; X64-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %mm0 76; X64-NEXT: paddw %mm1, %mm0 77; X64-NEXT: pmuludq %mm7, %mm0 78; X64-NEXT: pmuludq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 79; X64-NEXT: paddw %mm5, %mm0 80; X64-NEXT: paddw %mm2, %mm0 81; X64-NEXT: movq2dq %mm0, %xmm0 82; X64-NEXT: retq 83; 84; X64-LARGE-LABEL: mmx_zero: 85; X64-LARGE: # %bb.0: 86; X64-LARGE-NEXT: movdq2q %xmm0, %mm0 87; X64-LARGE-NEXT: movdq2q %xmm1, %mm5 88; X64-LARGE-NEXT: movq %mm5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 89; X64-LARGE-NEXT: movq %mm0, %mm3 90; X64-LARGE-NEXT: paddd %mm5, %mm3 91; X64-LARGE-NEXT: pxor %mm1, %mm1 92; X64-LARGE-NEXT: movq %mm3, %mm6 93; X64-LARGE-NEXT: pmuludq %mm1, %mm6 94; X64-LARGE-NEXT: movdq2q %xmm2, %mm4 95; X64-LARGE-NEXT: movq %mm6, %mm2 96; X64-LARGE-NEXT: paddd %mm4, %mm2 97; X64-LARGE-NEXT: paddw %mm2, %mm0 98; X64-LARGE-NEXT: movq %mm5, %mm1 99; X64-LARGE-NEXT: paddw %mm0, %mm1 100; X64-LARGE-NEXT: movdq2q %xmm3, %mm5 101; X64-LARGE-NEXT: movq %mm1, %mm7 102; X64-LARGE-NEXT: pmuludq %mm5, %mm7 103; X64-LARGE-NEXT: paddw %mm4, %mm7 104; X64-LARGE-NEXT: paddw %mm7, %mm5 105; X64-LARGE-NEXT: paddw %mm5, %mm2 106; X64-LARGE-NEXT: paddw %mm2, %mm0 107; X64-LARGE-NEXT: paddw %mm6, %mm0 108; X64-LARGE-NEXT: pmuludq %mm3, %mm0 109; X64-LARGE-NEXT: pxor %mm3, %mm3 110; X64-LARGE-NEXT: paddw %mm3, %mm0 111; X64-LARGE-NEXT: paddw %mm1, %mm0 112; X64-LARGE-NEXT: pmuludq %mm7, %mm0 113; X64-LARGE-NEXT: pmuludq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 114; X64-LARGE-NEXT: paddw %mm5, %mm0 115; X64-LARGE-NEXT: paddw %mm2, %mm0 116; X64-LARGE-NEXT: movq2dq %mm0, %xmm0 117; X64-LARGE-NEXT: retq 118 %5 = bitcast double %0 to <1 x i64> 119 %6 = bitcast double %1 to <1 x i64> 120 %7 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %5, <1 x i64> %6) 121 %8 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %7, <1 x i64> bitcast (double 0.000000e+00 to <1 x i64>)) 122 %9 = bitcast double %2 to <1 x i64> 123 %10 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %8, <1 x i64> %9) 124 %11 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %5, <1 x i64> %10) 125 %12 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %6, <1 x i64> %11) 126 %13 = bitcast double %3 to <1 x i64> 127 %14 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %12, <1 x i64> %13) 128 %15 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %14, <1 x i64> %9) 129 %16 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %15, <1 x i64> %13) 130 %17 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %16, <1 x i64> %10) 131 %18 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %17, <1 x i64> %11) 132 %19 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %18, <1 x i64> %8) 133 %20 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %19, <1 x i64> %7) 134 %21 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %20, <1 x i64> bitcast (double 0.000000e+00 to <1 x i64>)) 135 %22 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %21, <1 x i64> %12) 136 %23 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %22, <1 x i64> %15) 137 %24 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %23, <1 x i64> %6) 138 %25 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %24, <1 x i64> %16) 139 %26 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %25, <1 x i64> %17) 140 %27 = bitcast <1 x i64> %26 to double 141 ret double %27 142} 143 144declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) 145declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) 146declare <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64>, <1 x i64>) 147