xref: /isa-l/igzip/proc_heap.asm (revision cd888f01a447dd04c3a8b50362079648d432d2ca)
1b1c45175SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2b1c45175SGreg Tucker;  Copyright(c) 2011-2018 Intel Corporation All rights reserved.
3b1c45175SGreg Tucker;
4b1c45175SGreg Tucker;  Redistribution and use in source and binary forms, with or without
5b1c45175SGreg Tucker;  modification, are permitted provided that the following conditions
6b1c45175SGreg Tucker;  are met:
7b1c45175SGreg Tucker;    * Redistributions of source code must retain the above copyright
8b1c45175SGreg Tucker;      notice, this list of conditions and the following disclaimer.
9b1c45175SGreg Tucker;    * Redistributions in binary form must reproduce the above copyright
10b1c45175SGreg Tucker;      notice, this list of conditions and the following disclaimer in
11b1c45175SGreg Tucker;      the documentation and/or other materials provided with the
12b1c45175SGreg Tucker;      distribution.
13b1c45175SGreg Tucker;    * Neither the name of Intel Corporation nor the names of its
14b1c45175SGreg Tucker;      contributors may be used to endorse or promote products derived
15b1c45175SGreg Tucker;      from this software without specific prior written permission.
16b1c45175SGreg Tucker;
17b1c45175SGreg Tucker;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18b1c45175SGreg Tucker;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19b1c45175SGreg Tucker;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20b1c45175SGreg Tucker;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21b1c45175SGreg Tucker;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22b1c45175SGreg Tucker;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23b1c45175SGreg Tucker;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24b1c45175SGreg Tucker;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25b1c45175SGreg Tucker;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26b1c45175SGreg Tucker;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27b1c45175SGreg Tucker;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28b1c45175SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29b1c45175SGreg Tucker
3001dfbcc4SRoy Oursler; returns modified node_ptr
3101dfbcc4SRoy Oursler; uint32_t proc_heap(uint64_t *heap, uint32_t  heap_size);
3201dfbcc4SRoy Oursler
3301dfbcc4SRoy Oursler%include "reg_sizes.asm"
3401dfbcc4SRoy Oursler%include "heap_macros.asm"
3501dfbcc4SRoy Oursler
3601dfbcc4SRoy Oursler%ifidn __OUTPUT_FORMAT__, win64
3701dfbcc4SRoy Oursler%define heap		rcx	; pointer, 64-bit
3801dfbcc4SRoy Oursler%define heap_size	rdx
3901dfbcc4SRoy Oursler%define	arg3		r8
4001dfbcc4SRoy Oursler%define child		rsi
4101dfbcc4SRoy Oursler%define tmp32		rdi
4201dfbcc4SRoy Oursler%else
4301dfbcc4SRoy Oursler%define heap		rdi
4401dfbcc4SRoy Oursler%define heap_size	rsi
4501dfbcc4SRoy Oursler%define	arg3		rdx
4601dfbcc4SRoy Oursler%define child		rcx
4701dfbcc4SRoy Oursler%define tmp32		rdx
4801dfbcc4SRoy Oursler%endif
4901dfbcc4SRoy Oursler
5001dfbcc4SRoy Oursler%define node_ptr	rax
5101dfbcc4SRoy Oursler%define h1		r8
5201dfbcc4SRoy Oursler%define h2		r9
5301dfbcc4SRoy Oursler%define h3		r10
5401dfbcc4SRoy Oursler%define i		r11
5501dfbcc4SRoy Oursler%define tmp2		r12
5601dfbcc4SRoy Oursler
57ede04f0aSGreg Tucker[bits 64]
58ede04f0aSGreg Tuckerdefault rel
59ede04f0aSGreg Tuckersection .text
60ede04f0aSGreg Tucker
61ec6e5de6SGreg Tucker	global build_huff_tree
62ec6e5de6SGreg Tuckerbuild_huff_tree:
63*cd888f01SH.J. Lu	endbranch
64a41d352aSRoy Oursler%ifidn __OUTPUT_FORMAT__, win64
65a41d352aSRoy Oursler	push	rsi
66a41d352aSRoy Oursler	push	rdi
67a41d352aSRoy Oursler%endif
6801dfbcc4SRoy Oursler	push	r12
69a41d352aSRoy Oursler
70a41d352aSRoy Oursler	mov	node_ptr, arg3
7101dfbcc4SRoy Oursler.main_loop:
7201dfbcc4SRoy Oursler	; REMOVE_MIN64(heap, heap_size, h1);
7301dfbcc4SRoy Oursler	mov	h2, [heap + heap_size*8]
7401dfbcc4SRoy Oursler	mov	h1, [heap + 1*8]
7501dfbcc4SRoy Oursler	mov	qword [heap + heap_size*8], -1
7601dfbcc4SRoy Oursler	dec	heap_size
7701dfbcc4SRoy Oursler	mov	[heap + 1*8], h2
7801dfbcc4SRoy Oursler
7901dfbcc4SRoy Oursler	mov	i, 1
8001dfbcc4SRoy Oursler	heapify	heap, heap_size, i, child, h2, h3, tmp32, tmp2
8101dfbcc4SRoy Oursler
8201dfbcc4SRoy Oursler	mov	h2, [heap + 1*8]
8301dfbcc4SRoy Oursler	lea	h3, [h1 + h2]
8401dfbcc4SRoy Oursler	mov	[heap + node_ptr*8], h1 %+ w
8501dfbcc4SRoy Oursler	mov	[heap + node_ptr*8 - 8], h2 %+ w
8601dfbcc4SRoy Oursler
877e1a3374SGreg Tucker	and 	h3, ~0xffff
8801dfbcc4SRoy Oursler	or	h3, node_ptr
8901dfbcc4SRoy Oursler	sub	node_ptr, 2
9001dfbcc4SRoy Oursler
9101dfbcc4SRoy Oursler	; replace_min64(heap, heap_size, h3)
9201dfbcc4SRoy Oursler	mov	[heap + 1*8], h3
9301dfbcc4SRoy Oursler	mov	i, 1
9401dfbcc4SRoy Oursler	heapify	heap, heap_size, i, child, h2, h3, tmp32, tmp2
9501dfbcc4SRoy Oursler
9601dfbcc4SRoy Oursler	cmp	heap_size, 1
9701dfbcc4SRoy Oursler	ja	.main_loop
9801dfbcc4SRoy Oursler
9901dfbcc4SRoy Oursler	mov	h1, [heap + 1*8]
10001dfbcc4SRoy Oursler	mov	[heap + node_ptr*8], h1 %+ w
10101dfbcc4SRoy Oursler
10201dfbcc4SRoy Oursler	pop	r12
103a41d352aSRoy Oursler%ifidn __OUTPUT_FORMAT__, win64
10401dfbcc4SRoy Oursler	pop	rdi
10501dfbcc4SRoy Oursler	pop	rsi
106a41d352aSRoy Oursler%endif
10701dfbcc4SRoy Oursler	ret
10801dfbcc4SRoy Oursler
10901dfbcc4SRoy Oursleralign 32
110ec6e5de6SGreg Tucker	global	build_heap
111ec6e5de6SGreg Tuckerbuild_heap:
112*cd888f01SH.J. Lu	endbranch
113a41d352aSRoy Oursler%ifidn __OUTPUT_FORMAT__, win64
11401dfbcc4SRoy Oursler	push	rsi
11501dfbcc4SRoy Oursler	push	rdi
116a41d352aSRoy Oursler%endif
11701dfbcc4SRoy Oursler	push	r12
11801dfbcc4SRoy Oursler	mov	qword [heap + heap_size*8 + 8], -1
11901dfbcc4SRoy Oursler	mov	i, heap_size
12001dfbcc4SRoy Oursler	shr	i, 1
12101dfbcc4SRoy Oursler.loop:
12201dfbcc4SRoy Oursler	mov	h1, i
12301dfbcc4SRoy Oursler	heapify	heap, heap_size, h1, child, h2, h3, tmp32, tmp2
12401dfbcc4SRoy Oursler	dec	i
12501dfbcc4SRoy Oursler	jnz	.loop
12601dfbcc4SRoy Oursler
12701dfbcc4SRoy Oursler	pop	r12
128a41d352aSRoy Oursler%ifidn __OUTPUT_FORMAT__, win64
12901dfbcc4SRoy Oursler	pop	rdi
13001dfbcc4SRoy Oursler	pop	rsi
131a41d352aSRoy Oursler%endif
13201dfbcc4SRoy Oursler	ret
133