1/********************************************************************** 2 Copyright(c) 2019 Arm Corporation All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 * Redistributions of source code must retain the above copyright 8 notice, this list of conditions and the following disclaimer. 9 * Redistributions in binary form must reproduce the above copyright 10 notice, this list of conditions and the following disclaimer in 11 the documentation and/or other materials provided with the 12 distribution. 13 * Neither the name of Arm Corporation nor the names of its 14 contributors may be used to endorse or promote products derived 15 from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28**********************************************************************/ 29 .arch armv8-a+crc 30 .text 31 .align 2 32 33#include "lz0a_const_aarch64.h" 34#include "data_struct_aarch64.h" 35#include "huffman_aarch64.h" 36#include "bitbuf2_aarch64.h" 37#include "stdmac_aarch64.h" 38 39 40/* 41declare Macros 42*/ 43 44.macro declare_generic_reg name:req,reg:req,default:req 45 \name .req \default\reg 46 w_\name .req w\reg 47 x_\name .req x\reg 48.endm 49 50.macro update_state stream:req,start_in:req,next_in:req,end_in:req, \ 51 m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req 52 53 //m_out_buf=bytes_written 54 sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start 55 cmp next_in,start_in 56 bls skip_has_hist 57 mov w_\tmp0,1 58 strb w_\tmp0,[x_\stream,_internal_state_has_hist] 59skip_has_hist: 60 ldr w_\tmp0,[\stream,_total_in] 61 ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out 62 63 str x_\next_in,[\stream,_next_in] 64 sub x_\start_in,x_\next_in,x_\start_in 65 sub x_\end_in,x_\end_in,x_\next_in 66 add w_\tmp0,w_\tmp0,w_\start_in 67 stp w_\end_in,w_\tmp0,[\stream,_avail_in] 68 //next_in=avail_out,start_in=total_out 69 ldp w_\next_in,w_\start_in,[\stream,_avail_out] 70 add x_\m_out_start,x_\m_out_start,x_\m_out_buf 71 str x_\m_out_start,[\stream,_next_out] 72 add w_\start_in,w_\start_in,w_\m_out_buf 73 sub w_\next_in,w_\next_in,w_\m_out_buf 74 stp w_\next_in,w_\start_in,[\stream,_avail_out] 75.endm 76 .global isal_deflate_finish_aarch64 77 .arch armv8-a+crc 78 .type isal_deflate_finish_aarch64, %function 79/* 80 void isal_deflate_finish_aarch64(struct isal_zstream *stream) 81*/ 82 declare_generic_reg stream, 0,x //struct isal_zstream *stream 83 declare_generic_reg state, 8,x //&stream->state 84 declare_generic_reg avail_in, 9,w 85 declare_generic_reg end_of_stream, 10,w //can be used in loop 86 87 declare_generic_reg hash_mask, 11,w 88 declare_generic_reg match_length, 12,w 89 declare_generic_reg hufftables, 13,x 90 91 declare_generic_reg m_out_buf, 14,x 92 declare_generic_reg m_out_start, 15,x 93 declare_generic_reg m_out_end, 16,x 94 declare_generic_reg m_bits, 17,x 95 declare_generic_reg m_bit_count, 18,w 96 97 declare_generic_reg start_in, 19,x 98 declare_generic_reg end_in, 20,x 99 declare_generic_reg next_in, 21,x 100 declare_generic_reg loop_end_cnt, 22,x 101 102 declare_generic_reg literal, 23,w 103 declare_generic_reg hash, 24,w 104 declare_generic_reg dist, 25,w 105 106 declare_generic_reg last_seen, 26,x 107 declare_generic_reg file_start, 27,x 108 declare_generic_reg hist_size, 28,w 109 110 declare_generic_reg tmp0, 5 ,w 111 declare_generic_reg tmp1, 6 ,w 112 declare_generic_reg tmp2, 7 ,w 113 114 declare_generic_reg code, 3,x 115 declare_generic_reg code_len, 24,x 116 declare_generic_reg code2, 10,x 117 declare_generic_reg code_len2, 4,x 118 119 120isal_deflate_finish_aarch64: 121 //save registers 122 push_stack 123 124 // set_buf(&state->bitbuf, stream->next_out, stream->avail_out); 125 ldr w_m_out_end,[stream,_avail_out] 126 ldr m_out_buf,[stream,_next_out] 127 add m_out_end,m_out_buf,w_m_out_end,uxtw 128 sub m_out_end,m_out_end , 8 129 mov m_out_start,m_out_buf 130 stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf] 131 str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start] 132 ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count] 133 ldr m_bits ,[stream,_internal_state_bitbuf_m_bits] 134 135 //init variables 136 //last_seen=&stream.internal_state.head = _internal_state+_head 137 add last_seen,stream,65536 138 add last_seen,last_seen,_internal_state+_head -65536 139 140 141 //start_in=stream->next_in;next_in=start_in 142 ldr avail_in, [stream, _avail_in] 143 ldr start_in,[stream,_next_in] 144 mov next_in,start_in 145 add end_in,start_in,avail_in,uxtw //avail_in reg is free now 146 ldr hufftables,[stream,_hufftables] 147 cbz avail_in, update_not_full 148 149 150 sub loop_end_cnt,end_in,4 //loop end 151 cmp next_in,loop_end_cnt 152 153 154 //file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in); 155 ldr w_file_start,[stream,_total_in] 156 sub file_start, next_in, w_file_start, uxtw 157 158 //uint32_t hist_size = state->dist_mask; 159 ldr hist_size,[stream,_internal_state + _dist_mask] 160 161 //uint32_t hash_mask = state->hash_mask; 162 ldr hash_mask,[stream,_internal_state + _hash_mask] 163 164 bhi main_loop_end 165main_loop_start: 166 //is_full(&state->bitbuf) 167 cmp m_out_buf,m_out_end 168 bhi update_state_exit 169 170 ldr literal,[next_in] 171 crc32cw hash,wzr,literal 172 and hash,hash,hash_mask 173 174 ///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; 175 ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash] 176 sub x_dist,next_in,file_start 177 //last_seen[hash] = (uint64_t) (next_in - file_start); 178 strh dist,[last_seen,x_hash,lsl 1] 179 sub dist,dist,w_tmp0 180 and dist,dist,0xffff 181 182 sub w_tmp0,dist,1 183 cmp hist_size,w_tmp0 184 bls get_lit_code 185 186 /// match_length = compare258(next_in - dist, next_in, 258); 187 sub x_tmp2,next_in,x_dist 188 sub x_hash,end_in,next_in 189 compare_max_258_bytes tmp2,next_in,hash,match_length,tmp0,tmp1 190 cmp match_length,3 191 bls get_lit_code 192 193 get_len_code hufftables,match_length,code,code_len,tmp0 194 get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2 195 196 //code |= code2 << code_len; 197 //code_len += code_len2; 198 lsl code2,code2,code_len 199 orr code,code,code2 200 add code_len,code_len,code_len2 201 202 //next_in += match_length; 203 add next_in,next_in,match_length,uxtw 204 205 //write_bits(&state->bitbuf, code, code_len); 206 update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf 207 208 cmp next_in,loop_end_cnt 209 bls main_loop_start 210 b main_loop_end 211get_lit_code: 212 //get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); 213 and literal,literal,0xff 214 get_lit_code hufftables,literal,code,code_len 215 216 //next_in++; 217 add next_in,next_in,1 218 219 //write_bits(&state->bitbuf, code, code_len); 220 update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf 221 cmp next_in,loop_end_cnt 222 bls main_loop_start 223main_loop_end: 224 sub loop_end_cnt,end_in,1 225 cmp next_in,loop_end_cnt 226 bhi update_not_full 227second_loop_start: 228 cmp m_out_buf,m_out_end 229 bhi update_state_exit 230 ldr literal,[next_in] 231 and literal,literal,0xff 232 get_lit_code hufftables,literal,code,code_len 233 //next_in++; 234 add next_in,next_in,1 235 236 //write_bits(&state->bitbuf, code, code_len); 237 update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf 238 cmp next_in,loop_end_cnt 239 bls second_loop_start 240 241update_not_full: 242 cmp m_out_buf,m_out_end 243 bhi update_state_exit 244 245 mov literal,256 246 get_lit_code hufftables,literal,code,code_len 247 248 //write_bits(&state->bitbuf, code, code_len); 249 update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf 250 ldrh w_end_of_stream, [stream, _end_of_stream] 251 mov w_tmp0,1 252 strb w_tmp0,[stream,_internal_state_has_eob] 253 cmp w_end_of_stream,w_tmp0 254 mov w_tmp0, ZSTATE_TRL 255 mov w_tmp1, ZSTATE_SYNC_FLUSH 256 csel w_tmp0,w_tmp0,w_tmp1,eq 257 str w_tmp0, [stream, _internal_state+_state] 258 259update_state_exit: 260 update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1 261 pop_stack 262 ret 263 264 .size isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64 265