1/********************************************************************** 2 Copyright(c) 2019 Arm Corporation All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 * Redistributions of source code must retain the above copyright 8 notice, this list of conditions and the following disclaimer. 9 * Redistributions in binary form must reproduce the above copyright 10 notice, this list of conditions and the following disclaimer in 11 the documentation and/or other materials provided with the 12 distribution. 13 * Neither the name of Arm Corporation nor the names of its 14 contributors may be used to endorse or promote products derived 15 from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28**********************************************************************/ 29 30#include "../include/aarch64_label.h" 31 32 .arch armv8-a+crc+crypto 33 .text 34 .align 2 35 36#include "lz0a_const_aarch64.h" 37#include "data_struct_aarch64.h" 38#include "huffman_aarch64.h" 39#include "bitbuf2_aarch64.h" 40#include "stdmac_aarch64.h" 41 42/* 43declare Macros 44*/ 45 46.macro declare_generic_reg name:req,reg:req,default:req 47 \name .req \default\reg 48 w_\name .req w\reg 49 x_\name .req x\reg 50.endm 51 52.macro tzbytecnt param0:req,param1:req 53 rbit x_\param1, x_\param0 54 cmp x_\param0, 0 55 clz x_\param1, x_\param1 56 mov w_\param0, 8 57 lsr w_\param1, w_\param1, 3 58 csel w_\param0, w_\param1, w_\param0, ne 59.endm 60 61.macro write_deflate_icf param0:req,param1:req,param2:req,param3:req 62 orr w_\param1, w_\param1, w_\param3, lsl 19 63 orr w_\param1, w_\param1, w_\param2, lsl 10 64 str w_\param1, [x_\param0] 65.endm 66 67 .align 2 68 .global cdecl(gen_icf_map_h1_aarch64) 69#ifndef __APPLE__ 70 .type gen_icf_map_h1_aarch64, %function 71#endif 72 73 /* arguments */ 74 declare_generic_reg stream_param, 0,x 75 declare_generic_reg matches_icf_lookup_param, 1,x 76 declare_generic_reg input_size_param, 2,x 77 78 declare_generic_reg param0, 0,x 79 declare_generic_reg param1, 1,x 80 declare_generic_reg param2, 2,x 81 declare_generic_reg param3, 3,x 82 83 /* return */ 84 declare_generic_reg ret_val, 0,x 85 86 /* variables */ 87 declare_generic_reg input_size, 3,x 88 declare_generic_reg next_in, 4,x 89 declare_generic_reg matches_icf_lookup, 6,x 90 declare_generic_reg hash_table, 7,x 91 declare_generic_reg end_in, 8,x 92 declare_generic_reg file_start, 9,x 93 declare_generic_reg hash_mask, 10,w 94 declare_generic_reg hist_size, 11,w 95 declare_generic_reg stream_saved, 12,x 96 declare_generic_reg literal_32, 13,w 97 declare_generic_reg literal_1, 14,w 98 declare_generic_reg dist, 15,w 99 100 declare_generic_reg tmp_has_hist, 0,w 101 declare_generic_reg tmp_offset_hash_table, 1,x 102 declare_generic_reg tmp0, 0,x 103 declare_generic_reg tmp1, 1,x 104 declare_generic_reg tmp2, 2,x 105 declare_generic_reg tmp3, 3,x 106 declare_generic_reg tmp5, 5,x 107 108/* constant */ 109.equ ISAL_LOOK_AHEAD, 288 110.equ SHORTEST_MATCH, 4 111.equ LEN_OFFSET, 254 112 113/* mask */ 114.equ mask_10bit, 1023 115.equ mask_lit_dist, 0x7800 116 117/* offset of struct isal_zstream */ 118.equ offset_next_in, 0 119.equ offset_avail_in, 8 120.equ offset_total_in, 12 121.equ offset_next_out, 16 122.equ offset_avail_out, 24 123.equ offset_total_out, 28 124.equ offset_hufftables, 32 125.equ offset_level, 40 126.equ offset_level_buf_size, 44 127.equ offset_level_buf, 48 128.equ offset_end_of_stream, 56 129.equ offset_flush, 58 130.equ offset_gzip_flag, 60 131.equ offset_hist_bits, 62 132.equ offset_state, 64 133.equ offset_state_block_end, 72 134.equ offset_state_dist_mask, 76 135.equ offset_state_has_hist, 135 136 137/* offset of struct level_buf */ 138.equ offset_hash_map_hash_table, 4712 139 140/* 141uint64_t gen_icf_map_h1_base(struct isal_zstream *stream, 142 struct deflate_icf *matches_icf_lookup, uint64_t input_size) 143*/ 144 145cdecl(gen_icf_map_h1_aarch64): 146 cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287 147 bls .fast_exit 148 stp x29, x30, [sp, -16]! 149 150 mov stream_saved, stream_param 151 mov matches_icf_lookup, matches_icf_lookup_param 152 mov x29, sp 153 154 ldrb tmp_has_hist, [stream_saved, offset_state_has_hist] 155 mov tmp_offset_hash_table, offset_hash_map_hash_table 156 ldr end_in, [stream_saved, offset_next_in] 157 mov input_size, input_size_param 158 ldr hash_table, [stream_saved, offset_level_buf] 159 ldr w_file_start, [stream_saved, offset_total_in] 160 ldp hist_size, hash_mask, [stream_saved, offset_state_dist_mask] 161 add hash_table, hash_table, tmp_offset_hash_table 162 sub file_start, end_in, file_start 163 cbz tmp_has_hist, .igzip_no_hist 164 b .while_check1 165 166 .align 3 167.igzip_no_hist: 168 ldrb w_tmp1, [end_in] 169 add next_in, end_in, 1 170 ldrh w_tmp0, [matches_icf_lookup] 171 bfi w_tmp0, w_tmp1, 0, 10 172 strh w_tmp0, [matches_icf_lookup] 173 ldr w_tmp0, [matches_icf_lookup] 174 and w_tmp0, w_tmp0, mask_10bit 175 orr w_tmp0, w_tmp0, mask_lit_dist 176 str w_tmp0, [matches_icf_lookup], 4 177 ldr w_tmp0, [end_in] 178 crc32cw w_tmp0, wzr, w_tmp0 179 180 and w_tmp5, w_tmp0, hash_mask 181 sub x_tmp1, end_in, file_start 182 mov w_tmp2, 1 183 mov x_tmp0, 1 184 strh w_tmp1, [hash_table, x_tmp5, lsl 1] 185 strb w_tmp2, [stream_saved, offset_state_has_hist] 186 b .while_check2 187 188.while_check1: 189 mov next_in, end_in 190 mov x_tmp0, 0 191 192.while_check2: 193 sub input_size, input_size, #288 194 add end_in, end_in, input_size 195 cmp next_in, end_in 196 bcs .exit 197 mov literal_32, 32 198 mov literal_1, 1 199 b .while_loop 200 201 .align 3 202.new_match_found: 203 clz w_tmp5, w_tmp2 204 add w_tmp1, w_tmp0, LEN_OFFSET 205 sub w_tmp5, literal_32, w_tmp5 206 cmp dist, 2 207 sub w_tmp5, w_tmp5, #2 208 bls .skip_compute_dist_icf_code 209 210 lsl w_tmp3, literal_1, w_tmp5 211 sub w_tmp3, w_tmp3, #1 212 lsr w_tmp0, w_tmp2, w_tmp5 213 and w_tmp3, w_tmp3, w_tmp2 214 add w_tmp2, w_tmp0, w_tmp5, lsl 1 215 216.skip_compute_dist_icf_code: 217 mov param0, matches_icf_lookup 218 write_deflate_icf param0,param1,param2,param3 219 220 add next_in, next_in, 1 221 add matches_icf_lookup, matches_icf_lookup, 4 222 cmp next_in, end_in 223 beq .save_with_exit 224 225.while_loop: 226 ldr w_tmp0, [next_in] 227 crc32cw w_tmp0, wzr, w_tmp0 228 229 and w_tmp0, w_tmp0, hash_mask 230 sub x_tmp1, next_in, file_start 231 lsl x_tmp0, x_tmp0, 1 232 sub w_tmp2, w_tmp1, #1 233 ldrh w_tmp3, [hash_table, x_tmp0] 234 strh w_tmp1, [hash_table, x_tmp0] 235 sub w_tmp2, w_tmp2, w_tmp3 236 and w_tmp2, w_tmp2, hist_size 237 add dist, w_tmp2, 1 238 ldr x_tmp0, [next_in] 239 sub x_tmp1, next_in, w_dist, uxtw 240 ldr x_tmp1, [x_tmp1] 241 eor x_tmp0, x_tmp1, x_tmp0 242 tzbytecnt param0,param1 243 244 cmp w_tmp0, (SHORTEST_MATCH-1) 245 mov w_tmp3, 0 246 bhi .new_match_found 247 248 ldrb w_param1, [next_in] 249 mov x_param0, matches_icf_lookup 250 mov w_param3, 0 251 mov w_param2, 0x1e 252 write_deflate_icf param0,param1,param2,param3 253 254 add next_in, next_in, 1 255 add matches_icf_lookup, matches_icf_lookup, 4 256 cmp next_in, end_in 257 bne .while_loop 258 259.save_with_exit: 260 ldr ret_val, [stream_saved, offset_next_in] 261 sub ret_val, next_in, ret_val 262 263.exit: 264 ldp x29, x30, [sp], 16 265 ret 266 267 .align 3 268.fast_exit: 269 mov ret_val, 0 270 ret 271#ifndef __APPLE__ 272 .size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64 273#endif 274