1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2; Copyright(c) 2011-2018 Intel Corporation All rights reserved. 3; 4; Redistribution and use in source and binary forms, with or without 5; modification, are permitted provided that the following conditions 6; are met: 7; * Redistributions of source code must retain the above copyright 8; notice, this list of conditions and the following disclaimer. 9; * Redistributions in binary form must reproduce the above copyright 10; notice, this list of conditions and the following disclaimer in 11; the documentation and/or other materials provided with the 12; distribution. 13; * Neither the name of Intel Corporation nor the names of its 14; contributors may be used to endorse or promote products derived 15; from this software without specific prior written permission. 16; 17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29 30%include "reg_sizes.asm" 31 32%ifidn __OUTPUT_FORMAT__, elf64 33 %define arg0 rdi 34 %define arg1 rsi 35 %define arg2 rdx 36 %define arg3 rcx 37 %define arg4 r8 38 %define arg5 r9 39 %define tmp r11 40 %define tmpb r11b 41 %define tmp3 arg4 42 %define return rax 43 %define func(x) x: endbranch 44 %define FUNC_SAVE 45 %define FUNC_RESTORE 46%endif 47 48%ifidn __OUTPUT_FORMAT__, win64 49 %define arg0 rcx 50 %define arg1 rdx 51 %define arg2 r8 52 %define arg3 r9 53 %define tmp r11 54 %define tmpb r11b 55 %define tmp3 r10 56 %define return rax 57 %define func(x) proc_frame x 58 %macro FUNC_SAVE 0 59 end_prolog 60 %endmacro 61 %macro FUNC_RESTORE 0 62 %endmacro 63%endif 64 65%define src arg0 66%define len arg1 67%define ptr arg2 68%define pos return 69 70default rel 71 72[bits 64] 73section .text 74 75align 16 76mk_global mem_zero_detect_sse, function 77func(mem_zero_detect_sse) 78 FUNC_SAVE 79 mov pos, 0 80 sub len, 4*16 81 jle .mem_z_small_block 82 83.mem_z_loop: 84 movdqu xmm0, [src+pos] 85 movdqu xmm1, [src+pos+1*16] 86 movdqu xmm2, [src+pos+2*16] 87 movdqu xmm3, [src+pos+3*16] 88 ptest xmm0, xmm0 89 jnz .return_fail 90 ptest xmm1, xmm1 91 jnz .return_fail 92 ptest xmm2, xmm2 93 jnz .return_fail 94 ptest xmm3, xmm3 95 jnz .return_fail 96 add pos, 4*16 97 cmp pos, len 98 jl .mem_z_loop 99 100.mem_z_last_block: 101 movdqu xmm0, [src+len] 102 movdqu xmm1, [src+len+1*16] 103 movdqu xmm2, [src+len+2*16] 104 movdqu xmm3, [src+len+3*16] 105 ptest xmm0, xmm0 106 jnz .return_fail 107 ptest xmm1, xmm1 108 jnz .return_fail 109 ptest xmm2, xmm2 110 jnz .return_fail 111 ptest xmm3, xmm3 112 jnz .return_fail 113 114.return_pass: 115 mov return, 0 116 FUNC_RESTORE 117 ret 118 119 120.mem_z_small_block: 121 add len, 4*16 122 cmp len, 2*16 123 jl .mem_z_lt32 124 movdqu xmm0, [src] 125 movdqu xmm1, [src+16] 126 movdqu xmm2, [src+len-2*16] 127 movdqu xmm3, [src+len-1*16] 128 ptest xmm0, xmm0 129 jnz .return_fail 130 ptest xmm1, xmm1 131 jnz .return_fail 132 ptest xmm2, xmm2 133 jnz .return_fail 134 ptest xmm3, xmm3 135 jnz .return_fail 136 jmp .return_pass 137 138.mem_z_lt32: 139 cmp len, 16 140 jl .mem_z_lt16 141 movdqu xmm0, [src] 142 movdqu xmm1, [src+len-16] 143 ptest xmm0, xmm0 144 jnz .return_fail 145 ptest xmm1, xmm1 146 jnz .return_fail 147 jmp .return_pass 148 149.mem_z_lt16: 150 cmp len, 8 151 jl .mem_z_lt8 152 mov tmp, [src] 153 mov tmp3,[src+len-8] 154 or tmp, tmp3 155 test tmp, tmp 156 jnz .return_fail 157 jmp .return_pass 158 159.mem_z_lt8: 160 cmp len, 0 161 je .return_pass 162.mem_z_1byte_loop: 163 mov tmpb, [src+pos] 164 cmp tmpb, 0 165 jnz .return_fail 166 add pos, 1 167 cmp pos, len 168 jl .mem_z_1byte_loop 169 jmp .return_pass 170 171.return_fail: 172 mov return, 1 173 FUNC_RESTORE 174 ret 175 176endproc_frame 177