#
49b090ed |
| 12-Dec-2006 |
Chris Lattner <sabre@nondot.org> |
teach scev to analyze X*4|1 like X*4+c. This allows us to produce:
LBB1_1: #bb movdqa (%esi), %xmm2 movaps %xmm2, %xmm3 punpcklbw %xmm0, %xmm3 movaps %xmm3, %xmm4
teach scev to analyze X*4|1 like X*4+c. This allows us to produce:
LBB1_1: #bb movdqa (%esi), %xmm2 movaps %xmm2, %xmm3 punpcklbw %xmm0, %xmm3 movaps %xmm3, %xmm4 punpcklwd %xmm0, %xmm4 cvtdq2ps %xmm4, %xmm4 mulps %xmm1, %xmm4 movaps %xmm4, (%edi) leal 1(,%eax,4), %ebx shll $4, %ebx punpckhwd %xmm0, %xmm3 cvtdq2ps %xmm3, %xmm3 mulps %xmm1, %xmm3 movaps %xmm3, (%edx,%ebx) leal 2(,%eax,4), %ebx shll $4, %ebx punpckhbw %xmm0, %xmm2 movaps %xmm2, %xmm3 punpcklwd %xmm0, %xmm3 cvtdq2ps %xmm3, %xmm3 mulps %xmm1, %xmm3 movaps %xmm3, (%edx,%ebx) leal 3(,%eax,4), %ebx shll $4, %ebx punpckhwd %xmm0, %xmm2 cvtdq2ps %xmm2, %xmm2 mulps %xmm1, %xmm2 movaps %xmm2, (%edx,%ebx) addl $64, %edi incl %eax addl $16, %esi cmpl %ecx, %eax jne LBB1_1 #bb
instead of:
LBB1_1: #bb movdqa (%esi), %xmm2 movaps %xmm2, %xmm3 punpcklbw %xmm0, %xmm3 movaps %xmm3, %xmm4 punpcklwd %xmm0, %xmm4 cvtdq2ps %xmm4, %xmm4 mulps %xmm1, %xmm4 movaps %xmm4, (%edi) leal 1(,%eax,4), %ebx shll $4, %ebx punpckhwd %xmm0, %xmm3 cvtdq2ps %xmm3, %xmm3 mulps %xmm1, %xmm3 movaps %xmm3, (%edx,%ebx) leal 2(,%eax,4), %ebx shll $4, %ebx punpckhbw %xmm0, %xmm2 movaps %xmm2, %xmm3 punpcklwd %xmm0, %xmm3 cvtdq2ps %xmm3, %xmm3 mulps %xmm1, %xmm3 movaps %xmm3, (%edx,%ebx) leal 3(,%eax,4), %ebx shll $4, %ebx punpckhwd %xmm0, %xmm2 cvtdq2ps %xmm2, %xmm2 mulps %xmm1, %xmm2 movaps %xmm2, (%edx,%ebx) addl $64, %edi incl %eax addl $16, %esi cmpl %ecx, %eax jne LBB1_1 #bb
for a testcase.
llvm-svn: 32463
show more ...
|