Lines Matching +full:sub +full:- +full:blocks
1 /*-
33 #define BLOCK_SIZE_MASK (BLOCK_SIZE - 1)
35 /* Minimum 8 byte alignment, to avoid cache-inhibited alignment faults.*/
60 beqlr-
62 beqlr-
65 std %r3, -8(%r1) /* save dst */
96 addi %r3, %r3, -1
98 addi %r4, %r4, -1
99 li %r0, -1
116 addi %r5, %r5, -1
118 beq- .Ldone
129 lbzu %r0, -1(%r4)
130 addi %r5, %r5, -1
131 stbu %r0, -1(%r3)
133 beq- .Ldone
145 li %r0, -1
146 li %r8, -16
147 li %r9, -15
149 addi %r3, %r3, -1
150 addi %r4, %r4, -1
153 srdi. %r6, %r5, 4 /* number of 16-bytes */
156 /* pre-adjustment */
171 /* post-adjustment */
172 sub %r3, %r3, %r9
173 sub %r4, %r4, %r9
176 andi. %r6, %r5, 0x0f /* number of 1-bytes */
177 beq .Ldone /* 1-bytes == 0? done */
190 ld %r3, -8(%r1) /* restore dst */
196 /* set up multi-phase copy parameters */
202 sub %r8, %r5, %r7
205 /* r10 = BLOCKS in the aligned section of the buffer */
213 std %r7, -32(%r1) /* bytes to copy in phase 1 */
214 std %r10, -40(%r1) /* BLOCKS to copy in phase 2 */
215 std %r9, -48(%r1) /* bytes to copy in phase 3 */
226 std %r8, -16(%r1) /* 16-byte increment (16) */
227 std %r7, -24(%r1) /* 16-byte pre/post adjustment (0) */
233 std %r9, -32(%r1) /* bytes to copy in phase 1 */
234 std %r10, -40(%r1) /* BLOCKS to copy in phase 2 */
235 std %r7, -48(%r1) /* bytes to copy in phase 3 */
237 li %r0, -1 /* increment for phases 1 and 3 */
238 add %r6, %r5, %r0 /* r6 = len - 1 */
239 li %r5, -BLOCK_SIZE /* increment for phase 2 */
245 li %r7, -15
246 li %r8, -31
247 li %r9, -47
248 li %r10, -63
250 add %r6, %r7, %r0 /* r6 = -16 */
251 std %r6, -16(%r1) /* 16-byte increment (-16) */
252 std %r7, -24(%r1) /* 16-byte pre/post adjustment (-15) */
255 ld %r6, -32(%r1) /* bytes to copy in phase 1 */
269 ld %r6, -40(%r1) /* BLOCKS to copy in phase 2 */
277 std %r14, -56(%r1)
278 std %r15, -64(%r1)
279 std %r16, -72(%r1)
280 std %r17, -80(%r1)
281 std %r18, -88(%r1)
282 std %r19, -96(%r1)
283 std %r20, -104(%r1)
284 std %r21, -112(%r1)
318 ld %r14, -56(%r1)
319 ld %r15, -64(%r1)
320 ld %r16, -72(%r1)
321 ld %r17, -80(%r1)
322 ld %r18, -88(%r1)
323 ld %r19, -96(%r1)
324 ld %r20, -104(%r1)
325 ld %r21, -112(%r1)
329 /* load registers for transitioning into the single-phase logic */
330 ld %r5, -48(%r1) /* bytes to copy in phase 3 */
331 ld %r8, -16(%r1) /* 16-byte increment */
332 ld %r9, -24(%r1) /* 16-byte pre/post adjustment */
337 .section .note.GNU-stack,"",%progbits