1/********************************************************************** 2 Copyright(c) 2019 Arm Corporation All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 * Redistributions of source code must retain the above copyright 8 notice, this list of conditions and the following disclaimer. 9 * Redistributions in binary form must reproduce the above copyright 10 notice, this list of conditions and the following disclaimer in 11 the documentation and/or other materials provided with the 12 distribution. 13 * Neither the name of Arm Corporation nor the names of its 14 contributors may be used to endorse or promote products derived 15 from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28**********************************************************************/ 29 30#include "../include/aarch64_label.h" 31 32 .arch armv8-a+crc 33 .text 34 .align 2 35 36#include "lz0a_const_aarch64.h" 37#include "data_struct_aarch64.h" 38#include "huffman_aarch64.h" 39#include "bitbuf2_aarch64.h" 40#include "stdmac_aarch64.h" 41 42 43/* 44declare Macros 45*/ 46 47.macro declare_generic_reg name:req,reg:req,default:req 48 \name .req \default\reg 49 w_\name .req w\reg 50 x_\name .req x\reg 51.endm 52 53.macro update_state stream:req,start_in:req,next_in:req,end_in:req, \ 54 m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req 55 56 //m_out_buf=bytes_written 57 sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start 58 cmp next_in,start_in 59 bls skip_has_hist 60 mov w_\tmp0,1 61 strb w_\tmp0,[x_\stream,_internal_state_has_hist] 62skip_has_hist: 63 ldr w_\tmp0,[\stream,_total_in] 64 ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out 65 66 str x_\next_in,[\stream,_next_in] 67 sub x_\start_in,x_\next_in,x_\start_in 68 sub x_\end_in,x_\end_in,x_\next_in 69 add w_\tmp0,w_\tmp0,w_\start_in 70 stp w_\end_in,w_\tmp0,[\stream,_avail_in] 71 //next_in=avail_out,start_in=total_out 72 ldp w_\next_in,w_\start_in,[\stream,_avail_out] 73 add x_\m_out_start,x_\m_out_start,x_\m_out_buf 74 str x_\m_out_start,[\stream,_next_out] 75 add w_\start_in,w_\start_in,w_\m_out_buf 76 sub w_\next_in,w_\next_in,w_\m_out_buf 77 stp w_\next_in,w_\start_in,[\stream,_avail_out] 78.endm 79 .global cdecl(isal_deflate_finish_aarch64) 80 .arch armv8-a+crc 81#ifndef __APPLE__ 82 .type isal_deflate_finish_aarch64, %function 83#endif 84/* 85 void isal_deflate_finish_aarch64(struct isal_zstream *stream) 86*/ 87 declare_generic_reg stream, 0,x //struct isal_zstream *stream 88 declare_generic_reg state, 8,x //&stream->state 89 declare_generic_reg avail_in, 9,w 90 declare_generic_reg end_of_stream, 10,w //can be used in loop 91 92 declare_generic_reg hash_mask, 11,w 93 declare_generic_reg match_length, 12,w 94 declare_generic_reg hufftables, 13,x 95 96 declare_generic_reg m_out_buf, 14,x 97 declare_generic_reg m_out_start, 15,x 98 declare_generic_reg m_out_end, 16,x 99 declare_generic_reg m_bits, 17,x 100 declare_generic_reg m_bit_count, 2,w 101 102 declare_generic_reg start_in, 19,x 103 declare_generic_reg end_in, 20,x 104 declare_generic_reg next_in, 21,x 105 declare_generic_reg loop_end_cnt, 22,x 106 107 declare_generic_reg literal, 23,w 108 declare_generic_reg hash, 24,w 109 declare_generic_reg dist, 25,w 110 111 declare_generic_reg last_seen, 26,x 112 declare_generic_reg file_start, 27,x 113 declare_generic_reg hist_size, 28,w 114 115 declare_generic_reg tmp0, 5 ,w 116 declare_generic_reg tmp1, 6 ,w 117 declare_generic_reg tmp2, 7 ,w 118 119 declare_generic_reg code, 3,x 120 declare_generic_reg code_len, 24,x 121 declare_generic_reg code2, 10,x 122 declare_generic_reg code_len2, 4,x 123 124 125cdecl(isal_deflate_finish_aarch64): 126 //save registers 127 push_stack 128 129 // set_buf(&state->bitbuf, stream->next_out, stream->avail_out); 130 ldr w_m_out_end,[stream,_avail_out] 131 ldr m_out_buf,[stream,_next_out] 132 add m_out_end,m_out_buf,w_m_out_end,uxtw 133 sub m_out_end,m_out_end , 8 134 mov m_out_start,m_out_buf 135 stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf] 136 str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start] 137 ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count] 138 ldr m_bits ,[stream,_internal_state_bitbuf_m_bits] 139 140 //init variables 141 //last_seen=&stream.internal_state.head = _internal_state+_head 142 add last_seen,stream,65536 143 add last_seen,last_seen,_internal_state+_head -65536 144 145 146 //start_in=stream->next_in;next_in=start_in 147 ldr avail_in, [stream, _avail_in] 148 ldr start_in,[stream,_next_in] 149 mov next_in,start_in 150 add end_in,start_in,avail_in,uxtw //avail_in reg is free now 151 ldr hufftables,[stream,_hufftables] 152 cbz avail_in, update_not_full 153 154 155 sub loop_end_cnt,end_in,4 //loop end 156 cmp next_in,loop_end_cnt 157 158 159 //file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in); 160 ldr w_file_start,[stream,_total_in] 161 sub file_start, next_in, w_file_start, uxtw 162 163 //uint32_t hist_size = state->dist_mask; 164 ldr hist_size,[stream,_internal_state + _dist_mask] 165 166 //uint32_t hash_mask = state->hash_mask; 167 ldr hash_mask,[stream,_internal_state + _hash_mask] 168 169 bhi main_loop_end 170main_loop_start: 171 //is_full(&state->bitbuf) 172 cmp m_out_buf,m_out_end 173 bhi update_state_exit 174 175 ldr literal,[next_in] 176 crc32cw hash,wzr,literal 177 and hash,hash,hash_mask 178 179 ///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; 180 ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash] 181 sub x_dist,next_in,file_start 182 //last_seen[hash] = (uint64_t) (next_in - file_start); 183 strh dist,[last_seen,x_hash,lsl 1] 184 sub dist,dist,w_tmp0 185 and dist,dist,0xffff 186 187 sub w_tmp0,dist,1 188 cmp hist_size,w_tmp0 189 bls get_lit_code 190 191 /// match_length = compare258(next_in - dist, next_in, 258); 192 sub x_tmp2,next_in,x_dist 193 sub x_hash,end_in,next_in 194 compare_max_258_bytes tmp2,next_in,hash,match_length,tmp0,tmp1 195 cmp match_length,3 196 bls get_lit_code 197 198 get_len_code hufftables,match_length,code,code_len,tmp0 199 get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2 200 201 //code |= code2 << code_len; 202 //code_len += code_len2; 203 lsl code2,code2,code_len 204 orr code,code,code2 205 add code_len,code_len,code_len2 206 207 //next_in += match_length; 208 add next_in,next_in,match_length,uxtw 209 210 //write_bits(&state->bitbuf, code, code_len); 211 update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf 212 213 cmp next_in,loop_end_cnt 214 bls main_loop_start 215 b main_loop_end 216get_lit_code: 217 //get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); 218 and literal,literal,0xff 219 get_lit_code hufftables,literal,code,code_len 220 221 //next_in++; 222 add next_in,next_in,1 223 224 //write_bits(&state->bitbuf, code, code_len); 225 update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf 226 cmp next_in,loop_end_cnt 227 bls main_loop_start 228main_loop_end: 229 sub loop_end_cnt,end_in,1 230 cmp next_in,loop_end_cnt 231 bhi update_not_full 232second_loop_start: 233 cmp m_out_buf,m_out_end 234 bhi update_state_exit 235 ldr literal,[next_in] 236 and literal,literal,0xff 237 get_lit_code hufftables,literal,code,code_len 238 //next_in++; 239 add next_in,next_in,1 240 241 //write_bits(&state->bitbuf, code, code_len); 242 update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf 243 cmp next_in,loop_end_cnt 244 bls second_loop_start 245 246update_not_full: 247 cmp m_out_buf,m_out_end 248 bhi update_state_exit 249 250 mov literal,256 251 get_lit_code hufftables,literal,code,code_len 252 253 //write_bits(&state->bitbuf, code, code_len); 254 update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf 255 ldrh w_end_of_stream, [stream, _end_of_stream] 256 mov w_tmp0,1 257 strb w_tmp0,[stream,_internal_state_has_eob] 258 cmp w_end_of_stream,w_tmp0 259 mov w_tmp0, ZSTATE_TRL 260 mov w_tmp1, ZSTATE_SYNC_FLUSH 261 csel w_tmp0,w_tmp0,w_tmp1,eq 262 str w_tmp0, [stream, _internal_state+_state] 263 264update_state_exit: 265 update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1 266 pop_stack 267 ret 268#ifndef __APPLE__ 269 .size isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64 270#endif 271