1/********************************************************************** 2 Copyright(c) 2019 Arm Corporation All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 * Redistributions of source code must retain the above copyright 8 notice, this list of conditions and the following disclaimer. 9 * Redistributions in binary form must reproduce the above copyright 10 notice, this list of conditions and the following disclaimer in 11 the documentation and/or other materials provided with the 12 distribution. 13 * Neither the name of Arm Corporation nor the names of its 14 contributors may be used to endorse or promote products derived 15 from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28**********************************************************************/ 29 .arch armv8-a+crc+crypto 30 .text 31 .align 2 32 33#include "lz0a_const_aarch64.h" 34#include "data_struct_aarch64.h" 35#include "huffman_aarch64.h" 36#include "bitbuf2_aarch64.h" 37#include "stdmac_aarch64.h" 38 39/* 40declare Macros 41*/ 42 43.macro declare_generic_reg name:req,reg:req,default:req 44 \name .req \default\reg 45 w_\name .req w\reg 46 x_\name .req x\reg 47.endm 48 49.macro tzbytecnt param0:req,param1:req 50 rbit x_\param1, x_\param0 51 cmp x_\param0, 0 52 clz x_\param1, x_\param1 53 mov w_\param0, 8 54 lsr w_\param1, w_\param1, 3 55 csel w_\param0, w_\param1, w_\param0, ne 56.endm 57 58.macro write_deflate_icf param0:req,param1:req,param2:req,param3:req 59 orr w_\param1, w_\param1, w_\param3, lsl 19 60 orr w_\param1, w_\param1, w_\param2, lsl 10 61 str w_\param1, [x_\param0] 62.endm 63 64 .align 2 65 .global gen_icf_map_h1_aarch64 66 .type gen_icf_map_h1_aarch64, %function 67 68 /* arguments */ 69 declare_generic_reg stream_param, 0,x 70 declare_generic_reg matches_icf_lookup_param, 1,x 71 declare_generic_reg input_size_param, 2,x 72 73 declare_generic_reg param0, 0,x 74 declare_generic_reg param1, 1,x 75 declare_generic_reg param2, 2,x 76 declare_generic_reg param3, 3,x 77 78 /* return */ 79 declare_generic_reg ret_val, 0,x 80 81 /* variables */ 82 declare_generic_reg input_size, 3,x 83 declare_generic_reg next_in, 4,x 84 declare_generic_reg matches_icf_lookup, 6,x 85 declare_generic_reg hash_table, 7,x 86 declare_generic_reg end_in, 8,x 87 declare_generic_reg file_start, 9,x 88 declare_generic_reg hash_mask, 10,w 89 declare_generic_reg hist_size, 11,w 90 declare_generic_reg stream_saved, 12,x 91 declare_generic_reg literal_32, 13,w 92 declare_generic_reg literal_1, 14,w 93 declare_generic_reg dist, 15,w 94 95 declare_generic_reg tmp_has_hist, 0,w 96 declare_generic_reg tmp_offset_hash_table, 1,x 97 declare_generic_reg tmp0, 0,x 98 declare_generic_reg tmp1, 1,x 99 declare_generic_reg tmp2, 2,x 100 declare_generic_reg tmp3, 3,x 101 declare_generic_reg tmp5, 5,x 102 103/* constant */ 104.equ ISAL_LOOK_AHEAD, 288 105.equ SHORTEST_MATCH, 4 106.equ LEN_OFFSET, 254 107 108/* mask */ 109.equ mask_10bit, 1023 110.equ mask_lit_dist, 0x7800 111 112/* offset of struct isal_zstream */ 113.equ offset_next_in, 0 114.equ offset_avail_in, 8 115.equ offset_total_in, 12 116.equ offset_next_out, 16 117.equ offset_avail_out, 24 118.equ offset_total_out, 28 119.equ offset_hufftables, 32 120.equ offset_level, 40 121.equ offset_level_buf_size, 44 122.equ offset_level_buf, 48 123.equ offset_end_of_stream, 56 124.equ offset_flush, 58 125.equ offset_gzip_flag, 60 126.equ offset_hist_bits, 62 127.equ offset_state, 64 128.equ offset_state_block_end, 72 129.equ offset_state_dist_mask, 76 130.equ offset_state_has_hist, 135 131 132/* offset of struct level_buf */ 133.equ offset_hash_map_hash_table, 4712 134 135/* 136uint64_t gen_icf_map_h1_base(struct isal_zstream *stream, 137 struct deflate_icf *matches_icf_lookup, uint64_t input_size) 138*/ 139 140gen_icf_map_h1_aarch64: 141 cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287 142 bls .fast_exit 143 stp x29, x30, [sp, -16]! 144 145 mov stream_saved, stream_param 146 mov matches_icf_lookup, matches_icf_lookup_param 147 mov x29, sp 148 149 ldrb tmp_has_hist, [stream_saved, offset_state_has_hist] 150 mov tmp_offset_hash_table, offset_hash_map_hash_table 151 ldr end_in, [stream_saved, offset_next_in] 152 mov input_size, input_size_param 153 ldr hash_table, [stream_saved, offset_level_buf] 154 ldr w_file_start, [stream_saved, offset_total_in] 155 ldp hist_size, hash_mask, [stream_saved, offset_state_dist_mask] 156 add hash_table, hash_table, tmp_offset_hash_table 157 sub file_start, end_in, file_start 158 cbz tmp_has_hist, .igzip_no_hist 159 b .while_check1 160 161 .align 3 162.igzip_no_hist: 163 ldrb w_tmp1, [end_in] 164 add next_in, end_in, 1 165 ldrh w_tmp0, [matches_icf_lookup] 166 bfi w_tmp0, w_tmp1, 0, 10 167 strh w_tmp0, [matches_icf_lookup] 168 ldr w_tmp0, [matches_icf_lookup] 169 and w_tmp0, w_tmp0, mask_10bit 170 orr w_tmp0, w_tmp0, mask_lit_dist 171 str w_tmp0, [matches_icf_lookup], 4 172 ldr w_tmp0, [end_in] 173 crc32cw w_tmp0, wzr, w_tmp0 174 175 and w_tmp5, w_tmp0, hash_mask 176 sub x_tmp1, end_in, file_start 177 mov w_tmp2, 1 178 mov x_tmp0, 1 179 strh w_tmp1, [hash_table, x_tmp5, lsl 1] 180 strb w_tmp2, [stream_saved, offset_state_has_hist] 181 b .while_check2 182 183.while_check1: 184 mov next_in, end_in 185 mov x_tmp0, 0 186 187.while_check2: 188 sub input_size, input_size, #288 189 add end_in, end_in, input_size 190 cmp next_in, end_in 191 bcs .exit 192 mov literal_32, 32 193 mov literal_1, 1 194 b .while_loop 195 196 .align 3 197.new_match_found: 198 clz w_tmp5, w_tmp2 199 add w_tmp1, w_tmp0, LEN_OFFSET 200 sub w_tmp5, literal_32, w_tmp5 201 cmp dist, 2 202 sub w_tmp5, w_tmp5, #2 203 bls .skip_compute_dist_icf_code 204 205 lsl w_tmp3, literal_1, w_tmp5 206 sub w_tmp3, w_tmp3, #1 207 lsr w_tmp0, w_tmp2, w_tmp5 208 and w_tmp3, w_tmp3, w_tmp2 209 add w_tmp2, w_tmp0, w_tmp5, lsl 1 210 211.skip_compute_dist_icf_code: 212 mov param0, matches_icf_lookup 213 write_deflate_icf param0,param1,param2,param3 214 215 add next_in, next_in, 1 216 add matches_icf_lookup, matches_icf_lookup, 4 217 cmp next_in, end_in 218 beq .save_with_exit 219 220.while_loop: 221 ldr w_tmp0, [next_in] 222 crc32cw w_tmp0, wzr, w_tmp0 223 224 and w_tmp0, w_tmp0, hash_mask 225 sub x_tmp1, next_in, file_start 226 lsl x_tmp0, x_tmp0, 1 227 sub w_tmp2, w_tmp1, #1 228 ldrh w_tmp3, [hash_table, x_tmp0] 229 strh w_tmp1, [hash_table, x_tmp0] 230 sub w_tmp2, w_tmp2, w_tmp3 231 and w_tmp2, w_tmp2, hist_size 232 add dist, w_tmp2, 1 233 ldr x_tmp0, [next_in] 234 sub x_tmp1, next_in, w_dist, uxtw 235 ldr x_tmp1, [x_tmp1] 236 eor x_tmp0, x_tmp1, x_tmp0 237 tzbytecnt param0,param1 238 239 cmp w_tmp0, (SHORTEST_MATCH-1) 240 mov w_tmp3, 0 241 bhi .new_match_found 242 243 ldrb w_param1, [next_in] 244 mov x_param0, matches_icf_lookup 245 mov w_param3, 0 246 mov w_param2, 0x1e 247 write_deflate_icf param0,param1,param2,param3 248 249 add next_in, next_in, 1 250 add matches_icf_lookup, matches_icf_lookup, 4 251 cmp next_in, end_in 252 bne .while_loop 253 254.save_with_exit: 255 ldr ret_val, [stream_saved, offset_next_in] 256 sub ret_val, next_in, ret_val 257 258.exit: 259 ldp x29, x30, [sp], 16 260 ret 261 262 .align 3 263.fast_exit: 264 mov ret_val, 0 265 ret 266 .size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64 267