| /isa-l/erasure_code/ |
| H A D | gf_2vect_dot_prod_avx2_gfni.asm | 178 ;; Encodes 96 bytes of all "k" sources into 2x 96 bytes (parity disk) 219 ;; Encodes 64 bytes of all "k" sources into 2x 64 bytes (parity disks) 231 XLDR x0l, [ptr + pos] ;; Get next source vector low 32 bytes 232 XLDR x0h, [ptr + pos + 32] ;; Get next source vector high 32 bytes 252 ;; Encodes 32 bytes of all "k" sources into 2x 32 bytes (parity disks) 262 XLDR x0, [ptr + pos] ;Get next source vector (32 bytes) 279 ;; Encodes less than 32 bytes of all "k" sources into 2 parity disks 326 add pos, 96 ;; Loop on 96 bytes at a time first 337 add pos, 64 ;; encode next 64 bytes 346 add pos, 32 ;; encode next 32 bytes [all …]
|
| H A D | gf_vect_dot_prod_avx2_gfni.asm | 154 ;; Encodes 96 bytes of all "k" sources into 96 bytes (single parity disk) 187 ;; Encodes 64 bytes of all "k" sources into 64 bytes (single parity disk) 216 ;; Encodes 32 bytes of all "k" sources into 32 bytes (single parity disks) 241 ;; Encodes less than 32 bytes of all "k" sources into single parity disks 282 add pos, 96 ;; Loop on 96 bytes at a time first 293 add pos, 64 ;; encode next 64 bytes 302 add pos, 32 ;; encode next 32 bytes 309 ENCODE_LT_32B len ;; encode final bytes
|
| H A D | gf_2vect_mad_avx2_gfni.asm | 156 ;; Encodes 96 bytes of a single source into 2x 96 bytes (parity disks) 185 ;; Encodes 64 bytes of a single source into 2x 64 bytes (parity disks) 208 ;; Encodes 32 bytes of a single source into 2x 32 bytes (parity disks) 225 ;; Encodes less than 32 bytes of a single source into 2x parity disks 263 add pos, 96 ;; loop on 96 bytes at a time 271 ENCODE_64B_2 ;; encode next 64 bytes 280 ENCODE_32B_2 ;; encode next 32 bytes 289 ENCODE_LT_32B_2 len ;; encode final bytes
|
| H A D | gf_3vect_mad_avx2_gfni.asm | 160 ;; Encodes 64 bytes of a single source into 3x 64 bytes (parity disks) 164 XLDR x0l, [src + pos] ;; read low 32 bytes 175 XLDR x0h, [src + pos + 32] ;; read high 32 bytes 188 ;; Encodes 32 bytes of a single source into 3x 32 bytes (parity disks) 206 ;; Encodes less than 32 bytes of a single source into 3x parity disks 247 ENCODE_64B_3 ;; loop on 64 bytes at a time 258 ENCODE_32B_3 ;; encode next 32 bytes 267 ENCODE_LT_32B_3 len ;; encode final bytes
|
| H A D | gf_vect_mad_avx2_gfni.asm | 134 ;; Encodes 96 bytes of a single source and updates a single parity disk 156 ;; Encodes 64 bytes of a single source and updates a single parity disk 174 ;; Encodes 32 bytes of a single source and updates single parity disk 188 ;; Encodes less than 32 bytes of a single source and updates parity disk 218 ENCODE_96B ;; loop on 96 bytes at a time 228 ENCODE_64B ;; encode next 64 bytes 237 ENCODE_32B ;; encode next 32 bytes 246 ENCODE_LT_32B len ;; encode final bytes
|
| H A D | gf_3vect_dot_prod_avx2_gfni.asm | 187 ;; Encodes 64 bytes of all "k" sources into 3x 64 bytes (parity disks) 201 XLDR x0l, [ptr + pos] ;; Get next source vector low 32 bytes 202 XLDR x0h, [ptr + pos + 32] ;; Get next source vector high 32 bytes 225 ;; Encodes 32 bytes of all "k" sources into 3x 32 bytes (parity disks) 236 XLDR x0, [ptr + pos] ;Get next source vector (32 bytes) 255 ;; Encodes less than 32 bytes of all "k" sources into 3 parity disks 308 add pos, 64 ;; Loop on 64 bytes at a time first 319 add pos, 32 ;; encode next 32 bytes
|
| H A D | gf_vect_dot_prod_avx512_gfni.asm | 120 ;; Encodes 64 bytes of all "k" sources into 64 bytes (single parity disk) 132 vmovdqu8 x0{%%KMASK}, [ptr + pos] ;Get next source vector (less than 64 bytes) 134 XLDR x0, [ptr + pos] ;Get next source vector (64 bytes) 167 add pos, 64 ;Loop on 64 bytes at a time
|
| H A D | gf_2vect_dot_prod_avx512_gfni.asm | 132 ;; Encodes 64 bytes of all "k" sources into 2x 64 bytes (parity disks) 145 vmovdqu8 x0{%%KMASK}, [ptr + pos] ;Get next source vector (less than 64 bytes) 147 XLDR x0, [ptr + pos] ;Get next source vector (64 bytes) 186 add pos, 64 ;Loop on 64 bytes at a time
|
| H A D | gf_3vect_dot_prod_avx512_gfni.asm | 143 ;; Encodes 64 bytes of all "k" sources into 3x 64 bytes (parity disks) 157 vmovdqu8 x0{%%KMASK}, [ptr + pos] ;Get next source vector (less than 64 bytes) 159 XLDR x0, [ptr + pos] ;Get next source vector (64 bytes) 202 add pos, 64 ;Loop on 64 bytes at a time
|
| H A D | gf_4vect_dot_prod_avx512_gfni.asm | 163 ;; Encodes 64 bytes of all "k" sources into 4x 64 bytes (parity disks) 178 vmovdqu8 x0{%%KMASK}, [ptr + pos] ;Get next source vector (less than 64 bytes) 180 XLDR x0, [ptr + pos] ;Get next source vector (64 bytes) 230 add pos, 64 ;Loop on 64 bytes at a time
|
| H A D | gf_4vect_mad_avx2_gfni.asm | 150 ;; Encodes 32 bytes of a single source into 4x 32 bytes (parity disks) 171 ;; Encodes less than 32 bytes of a single source into 4x parity disks 219 ENCODE_32B_4 ;; loop on 32 bytes at a time 230 ENCODE_LT_32B_4 len ;; encode final bytes
|
| H A D | gf_5vect_mad_avx2_gfni.asm | 169 ;; Encodes 32 bytes of a single source into 5x 32 bytes (parity disks) 192 ;; Encodes less than 32 bytes of a single source into 5x parity disks 245 ENCODE_32B_5 ;; loop on 32 bytes at a time 256 ENCODE_LT_32B_5 len ;; encode final bytes
|
| H A D | gf_5vect_dot_prod_avx512_gfni.asm | 176 ;; Encodes 64 bytes of all "k" sources into 5x 64 bytes (parity disks) 192 vmovdqu8 x0{%%KMASK}, [ptr + pos] ;Get next source vector (less than 64 bytes) 194 XLDR x0, [ptr + pos] ;Get next source vector (64 bytes) 251 add pos, 64 ;Loop on 64 bytes at a time
|
| H A D | gf_6vect_dot_prod_avx512_gfni.asm | 189 ;; Encodes 64 bytes of all "k" sources into 6x 64 bytes (parity disks) 206 vmovdqu8 x0{%%KMASK}, [ptr + pos] ;Get next source vector (less than 64 bytes) 208 XLDR x0, [ptr + pos] ;Get next source vector (64 bytes) 269 add pos, 64 ;Loop on 64 bytes at a time
|
| /isa-l/crc/ |
| H A D | crc32_iscsi_01.asm | 40 ; efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so 119 ;;;; Calculate CRC of unaligned bytes of the buffer (if any) ;;; 137 ;; compute num of bytes to be processed 138 mov tmp, len ;; save num bytes in tmp 152 ;; eax contains floor(bytes / 24) = num 24-byte chunks to do 335 ;; we can't read 8 bytes, as this might go beyond the end of the buffer 359 ;;4) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full) 373 add bufptmp, 128 ;; buf +=64; (next 64 bytes) 384 add bufptmp, 64 ;; buf +=64; (next 64 bytes) 394 add bufptmp, 32 ;; buf +=32; (next 32 bytes) [all …]
|
| H A D | crc32_iscsi_by16_10.asm | 157 ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The fold_128_B_loop 180 ;; y bytes of the buffer, where 0 <= y < 128. 181 ;; The 128 bytes of folded data is in 2 of the zmm registers: 188 ;; If there are still 64 bytes left, folds from 128 bytes to 64 bytes 189 ;; and handles the next 64 bytes 222 ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory 223 ; we can fold 16 bytes at a time if y>=16 238 ;now we have 16+z bytes left to reduce, where 0<= z < 16. 247 ; here we are getting data that is less than 16 bytes. 249 ; the input pointer before the actual point, to receive exactly 16 bytes. [all …]
|
| H A D | crc32_gzip_refl_by16_10.asm | 170 ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The fold_128_B_loop 193 ;; y bytes of the buffer, where 0 <= y < 128. 194 ;; The 128 bytes of folded data is in 2 of the zmm registers: 201 ;; If there are still 64 bytes left, folds from 128 bytes to 64 bytes 202 ;; and handles the next 64 bytes 235 ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory 236 ; we can fold 16 bytes at a time if y>=16 251 ;now we have 16+z bytes left to reduce, where 0<= z < 16. 260 ; here we are getting data that is less than 16 bytes. 262 ; the input pointer before the actual point, to receive exactly 16 bytes. [all …]
|
| H A D | crc32_ieee_by16_10.asm | 176 ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The fold_128_B_loop 203 ;; y bytes of the buffer, where 0 <= y < 128. 204 ;; The 128 bytes of folded data is in 2 of the zmm registers: 211 ;; If there are still 64 bytes left, folds from 128 bytes to 64 bytes 212 ;; and handles the next 64 bytes 245 ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory 246 ; we can fold 16 bytes at a time if y>=16 264 ;now we have 16+z bytes left to reduce, where 0<= z < 16. 273 ; here we are getting data that is less than 16 bytes. 275 ; the input pointer before the actual point, to receive exactly 16 bytes. [all …]
|
| H A D | crc32_iscsi_00.asm | 212 ;;;; Calculate CRC of unaligned bytes of the buffer (if any) ;;;; 240 ;;4) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of rdx are full) 251 add bufptmp, 128 ;; buf +=64; (next 64 bytes) 262 add bufptmp, 64 ;; buf +=64; (next 64 bytes) 272 add bufptmp, 32 ;; buf +=32; (next 32 bytes) 282 add bufptmp, 16 ;; buf +=16; (next 16 bytes) 288 add bufptmp, 8 ;; buf +=8; (next 8 bytes) 290 mov rbx, qword [bufptmp] ;; load a 8-bytes from the buffer: 295 shr rbx, 32 ;; get next 3 bytes
|
| H A D | crc64_iso_norm_by16_10.asm | 205 ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory 206 ; we can fold 16 bytes at a time if y>=16 224 ;now we have 16+z bytes left to reduce, where 0<= z < 16. 231 ; here we are getting data that is less than 16 bytes. 233 ; the input pointer before the actual point, to receive exactly 16 bytes. 247 ; shift xmm2 to the left by arg3 bytes 250 ; shift xmm7 to the right by 16-arg3 bytes 381 ; Right shift (8-length) bytes in XMM
|
| /isa-l/igzip/ |
| H A D | bitbuf2.h | 98 uint32_t bytes; in flush() local 101 bytes = (me->m_bit_count + 7) / 8; in flush() 102 me->m_out_buf += bytes; in flush()
|
| H A D | igzip_body.asm | 112 blen_mem_offset equ 0 ; local variable (8 bytes) 115 gpr_save_mem_offset equ 32 ; gpr save area (8*8 bytes) 116 xmm_save_mem_offset equ 32 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned) 119 ;;; we want it aligned to 16 bytes
|
| H A D | igzip.c | 155 uint64_t bytes; in sync_flush() local 171 bytes = buffer_used(&state->bitbuf); in sync_flush() 173 stream->avail_out -= bytes; in sync_flush() 174 stream->total_out += bytes; in sync_flush() 188 int bytes = 0; in flush_write_buffer() local 193 bytes = buffer_used(&state->bitbuf); in flush_write_buffer() 194 stream->avail_out -= bytes; in flush_write_buffer() 195 stream->total_out += bytes; in flush_write_buffer() 623 uint32_t bytes; in write_constant_compressed_stateless() local 694 bytes = buffer_used(&state->bitbuf); in write_constant_compressed_stateless() [all …]
|
| H A D | igzip_icf_body_h1_gr_bt.asm | 122 m_out_end equ 0 ; local variable (8 bytes) 131 gpr_save_mem_offset equ 80 ; gpr save area (8*8 bytes) 132 xmm_save_mem_offset equ gpr_save_mem_offset + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned) 136 ;;; we want it aligned to 16 bytes
|
| /isa-l/include/ |
| H A D | memcpy.asm | 41 ; SIZE : register: length in bytes (not modified) 194 %define %%SIZE %3 ; register: length in bytes (not modified) 373 ; SIZE : register: length in bytes (not modified) 417 %define %%SIZE %3 ; register: length in bytes (not modified) 488 ; (from 0 to 16 bytes) of data from memory to SIMD registers, 496 ; SIZE : register: length in bytes (not modified) 538 %define %%SIZE %3 ; [in] length in bytes (0-16 bytes) 627 %define %%SIZE %3 ; [in] length in bytes (0-32 bytes) 712 ; Move last bytes loaded to upper half and load 16 bytes in lower half 721 %define %%SIZE %3 ; register: length in bytes (not modified)
|