xref: /plan9/sys/src/cmd/gs/src/igcstr.c (revision 593dc095aefb2a85c828727bbfa9da139a49bdf4)
1 /* Copyright (C) 1994, 1995, 1996, 1998, 1999, 2000 Aladdin Enterprises.  All rights reserved.
2 
3   This software is provided AS-IS with no warranty, either express or
4   implied.
5 
6   This software is distributed under license and may not be copied,
7   modified or distributed except as expressly authorized under the terms
8   of the license contained in the file LICENSE in this distribution.
9 
10   For more information about licensing, please refer to
11   http://www.ghostscript.com/licensing/. For information on
12   commercial licensing, go to http://www.artifex.com/licensing/ or
13   contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14   San Rafael, CA  94903, U.S.A., +1(415)492-9861.
15 */
16 
17 /* $Id: igcstr.c,v 1.7 2005/09/05 13:58:55 leonardo Exp $ */
18 /* String GC routines for Ghostscript */
19 #include "memory_.h"
20 #include "ghost.h"
21 #include "gsmdebug.h"
22 #include "gsstruct.h"
23 #include "iastate.h"
24 #include "igcstr.h"
25 
26 /* Forward references */
27 private bool gc_mark_string(const byte *, uint, bool, const chunk_t *);
28 
29 /* (Un)mark the strings in a chunk. */
30 void
gc_strings_set_marks(chunk_t * cp,bool mark)31 gc_strings_set_marks(chunk_t * cp, bool mark)
32 {
33     if (cp->smark != 0) {
34 	if_debug3('6', "[6]clearing string marks 0x%lx[%u] to %d\n",
35 		  (ulong) cp->smark, cp->smark_size, (int)mark);
36 	memset(cp->smark, 0, cp->smark_size);
37 	if (mark)
38 	    gc_mark_string(cp->sbase, cp->climit - cp->sbase, true, cp);
39     }
40 }
41 
42 /* We mark strings a word at a time. */
43 typedef string_mark_unit bword;
44 
45 #define bword_log2_bytes log2_sizeof_string_mark_unit
46 #define bword_bytes (1 << bword_log2_bytes)
47 #define bword_log2_bits (bword_log2_bytes + 3)
48 #define bword_bits (1 << bword_log2_bits)
49 #define bword_1s (~(bword)0)
50 /* Compensate for byte order reversal if necessary. */
51 #if arch_is_big_endian
52 #  if bword_bytes == 2
53 #    define bword_swap_bytes(m) m = (m << 8) | (m >> 8)
54 #  else				/* bword_bytes == 4 */
55 #    define bword_swap_bytes(m)\
56 	m = (m << 24) | ((m & 0xff00) << 8) | ((m >> 8) & 0xff00) | (m >> 24)
57 #  endif
58 #else
59 #  define bword_swap_bytes(m) DO_NOTHING
60 #endif
61 
62 /* (Un)mark a string in a known chunk.  Return true iff any new marks. */
63 private bool
gc_mark_string(const byte * ptr,uint size,bool set,const chunk_t * cp)64 gc_mark_string(const byte * ptr, uint size, bool set, const chunk_t * cp)
65 {
66     uint offset = ptr - cp->sbase;
67     bword *bp = (bword *) (cp->smark + ((offset & -bword_bits) >> 3));
68     uint bn = offset & (bword_bits - 1);
69     bword m = bword_1s << bn;
70     uint left = size;
71     bword marks = 0;
72 
73     bword_swap_bytes(m);
74     if (set) {
75 	if (left + bn >= bword_bits) {
76 	    marks |= ~*bp & m;
77 	    *bp |= m;
78 	    m = bword_1s, left -= bword_bits - bn, bp++;
79 	    while (left >= bword_bits) {
80 		marks |= ~*bp;
81 		*bp = bword_1s;
82 		left -= bword_bits, bp++;
83 	    }
84 	}
85 	if (left) {
86 	    bword_swap_bytes(m);
87 	    m -= m << left;
88 	    bword_swap_bytes(m);
89 	    marks |= ~*bp & m;
90 	    *bp |= m;
91 	}
92     } else {
93 	if (left + bn >= bword_bits) {
94 	    *bp &= ~m;
95 	    m = bword_1s, left -= bword_bits - bn, bp++;
96 	    if (left >= bword_bits * 5) {
97 		memset(bp, 0, (left & -bword_bits) >> 3);
98 		bp += left >> bword_log2_bits;
99 		left &= bword_bits - 1;
100 	    } else
101 		while (left >= bword_bits) {
102 		    *bp = 0;
103 		    left -= bword_bits, bp++;
104 		}
105 	}
106 	if (left) {
107 	    bword_swap_bytes(m);
108 	    m -= m << left;
109 	    bword_swap_bytes(m);
110 	    *bp &= ~m;
111 	}
112     }
113     return marks != 0;
114 }
115 
116 /* Mark a string.  Return true if any new marks. */
117 bool
gc_string_mark(const byte * ptr,uint size,bool set,gc_state_t * gcst)118 gc_string_mark(const byte * ptr, uint size, bool set, gc_state_t * gcst)
119 {
120     const chunk_t *cp;
121     bool marks;
122 
123     if (size == 0)
124 	return false;
125 #define dprintstr()\
126   dputc('('); fwrite(ptr, 1, min(size, 20), dstderr);\
127   dputs((size <= 20 ? ")" : "...)"))
128     if (!(cp = gc_locate(ptr, gcst))) {		/* not in a chunk */
129 #ifdef DEBUG
130 	if (gs_debug_c('5')) {
131 	    dlprintf2("[5]0x%lx[%u]", (ulong) ptr, size);
132 	    dprintstr();
133 	    dputs(" not in a chunk\n");
134 	}
135 #endif
136 	return false;
137     }
138     if (cp->smark == 0)		/* not marking strings */
139 	return false;
140 #ifdef DEBUG
141     if (ptr < cp->ctop) {
142 	lprintf4("String pointer 0x%lx[%u] outside [0x%lx..0x%lx)\n",
143 		 (ulong) ptr, size, (ulong) cp->ctop, (ulong) cp->climit);
144 	return false;
145     } else if (ptr + size > cp->climit) {	/*
146 						 * If this is the bottommost string in a chunk that has
147 						 * an inner chunk, the string's starting address is both
148 						 * cp->ctop of the outer chunk and cp->climit of the inner;
149 						 * gc_locate may incorrectly attribute the string to the
150 						 * inner chunk because of this.  This doesn't affect
151 						 * marking or relocation, since the machinery for these
152 						 * is all associated with the outermost chunk,
153 						 * but it can cause the validity check to fail.
154 						 * Check for this case now.
155 						 */
156 	const chunk_t *scp = cp;
157 
158 	while (ptr == scp->climit && scp->outer != 0)
159 	    scp = scp->outer;
160 	if (ptr + size > scp->climit) {
161 	    lprintf4("String pointer 0x%lx[%u] outside [0x%lx..0x%lx)\n",
162 		     (ulong) ptr, size,
163 		     (ulong) scp->ctop, (ulong) scp->climit);
164 	    return false;
165 	}
166     }
167 #endif
168     marks = gc_mark_string(ptr, size, set, cp);
169 #ifdef DEBUG
170     if (gs_debug_c('5')) {
171 	dlprintf4("[5]%s%smarked 0x%lx[%u]",
172 		  (marks ? "" : "already "), (set ? "" : "un"),
173 		  (ulong) ptr, size);
174 	dprintstr();
175 	dputc('\n');
176     }
177 #endif
178     return marks;
179 }
180 
181 /* Clear the relocation for strings. */
182 /* This requires setting the marks. */
183 void
gc_strings_clear_reloc(chunk_t * cp)184 gc_strings_clear_reloc(chunk_t * cp)
185 {
186     if (cp->sreloc != 0) {
187 	gc_strings_set_marks(cp, true);
188 	if_debug1('6', "[6]clearing string reloc 0x%lx\n",
189 		  (ulong) cp->sreloc);
190 	gc_strings_set_reloc(cp);
191     }
192 }
193 
194 /* Count the 0-bits in a byte. */
195 private const byte count_zero_bits_table[256] =
196 {
197 #define o4(n) n,n-1,n-1,n-2
198 #define o16(n) o4(n),o4(n-1),o4(n-1),o4(n-2)
199 #define o64(n) o16(n),o16(n-1),o16(n-1),o16(n-2)
200     o64(8), o64(7), o64(7), o64(6)
201 };
202 
203 #define byte_count_zero_bits(byt)\
204   (uint)(count_zero_bits_table[byt])
205 #define byte_count_one_bits(byt)\
206   (uint)(8 - count_zero_bits_table[byt])
207 
208 /* Set the relocation for the strings in a chunk. */
209 /* The sreloc table stores the relocated offset from climit for */
210 /* the beginning of each block of string_data_quantum characters. */
211 void
gc_strings_set_reloc(chunk_t * cp)212 gc_strings_set_reloc(chunk_t * cp)
213 {
214     if (cp->sreloc != 0 && cp->smark != 0) {
215 	byte *bot = cp->ctop;
216 	byte *top = cp->climit;
217 	uint count =
218 	    (top - bot + (string_data_quantum - 1)) >>
219 	    log2_string_data_quantum;
220 	string_reloc_offset *relp =
221 	    cp->sreloc +
222 	    (cp->smark_size >> (log2_string_data_quantum - 3));
223 	register const byte *bitp = cp->smark + cp->smark_size;
224 	register string_reloc_offset reloc = 0;
225 
226 	/* Skip initial unrelocated strings quickly. */
227 #if string_data_quantum == bword_bits || string_data_quantum == bword_bits * 2
228 	{
229 	    /* Work around the alignment aliasing bug. */
230 	    const bword *wp = (const bword *)bitp;
231 
232 #if string_data_quantum == bword_bits
233 #  define RELOC_TEST_1S(wp) (wp[-1])
234 #else /* string_data_quantum == bword_bits * 2 */
235 #  define RELOC_TEST_1S(wp) (wp[-1] & wp[-2])
236 #endif
237 	    while (count && RELOC_TEST_1S(wp) == bword_1s) {
238 		wp -= string_data_quantum / bword_bits;
239 		*--relp = reloc += string_data_quantum;
240 		--count;
241 	    }
242 #undef RELOC_TEST_1S
243 	    bitp = (const byte *)wp;
244 	}
245 #endif
246 	while (count--) {
247 	    bitp -= string_data_quantum / 8;
248 	    reloc += string_data_quantum -
249 		byte_count_zero_bits(bitp[0]);
250 	    reloc -= byte_count_zero_bits(bitp[1]);
251 	    reloc -= byte_count_zero_bits(bitp[2]);
252 	    reloc -= byte_count_zero_bits(bitp[3]);
253 #if log2_string_data_quantum > 5
254 	    reloc -= byte_count_zero_bits(bitp[4]);
255 	    reloc -= byte_count_zero_bits(bitp[5]);
256 	    reloc -= byte_count_zero_bits(bitp[6]);
257 	    reloc -= byte_count_zero_bits(bitp[7]);
258 #endif
259 	    *--relp = reloc;
260 	}
261     }
262     cp->sdest = cp->climit;
263 }
264 
265 /* Relocate a string pointer. */
266 void
igc_reloc_string(gs_string * sptr,gc_state_t * gcst)267 igc_reloc_string(gs_string * sptr, gc_state_t * gcst)
268 {
269     byte *ptr;
270     const chunk_t *cp;
271     uint offset;
272     uint reloc;
273     const byte *bitp;
274     byte byt;
275 
276     if (sptr->size == 0) {
277 	sptr->data = 0;
278 	return;
279     }
280     ptr = sptr->data;
281     if (!(cp = gc_locate(ptr, gcst)))	/* not in a chunk */
282 	return;
283     if (cp->sreloc == 0 || cp->smark == 0)	/* not marking strings */
284 	return;
285     offset = ptr - cp->sbase;
286     reloc = cp->sreloc[offset >> log2_string_data_quantum];
287     bitp = &cp->smark[offset >> 3];
288     switch (offset & (string_data_quantum - 8)) {
289 #if log2_string_data_quantum > 5
290 	case 56:
291 	    reloc -= byte_count_one_bits(bitp[-7]);
292 	case 48:
293 	    reloc -= byte_count_one_bits(bitp[-6]);
294 	case 40:
295 	    reloc -= byte_count_one_bits(bitp[-5]);
296 	case 32:
297 	    reloc -= byte_count_one_bits(bitp[-4]);
298 #endif
299 	case 24:
300 	    reloc -= byte_count_one_bits(bitp[-3]);
301 	case 16:
302 	    reloc -= byte_count_one_bits(bitp[-2]);
303 	case 8:
304 	    reloc -= byte_count_one_bits(bitp[-1]);
305     }
306     byt = *bitp & (0xff >> (8 - (offset & 7)));
307     reloc -= byte_count_one_bits(byt);
308     if_debug2('5', "[5]relocate string 0x%lx to 0x%lx\n",
309 	      (ulong) ptr, (ulong) (cp->sdest - reloc));
310     sptr->data = cp->sdest - reloc;
311 }
312 void
igc_reloc_const_string(gs_const_string * sptr,gc_state_t * gcst)313 igc_reloc_const_string(gs_const_string * sptr, gc_state_t * gcst)
314 {   /* We assume the representation of byte * and const byte * is */
315     /* the same.... */
316     igc_reloc_string((gs_string *) sptr, gcst);
317 }
318 void
igc_reloc_param_string(gs_param_string * sptr,gc_state_t * gcst)319 igc_reloc_param_string(gs_param_string * sptr, gc_state_t * gcst)
320 {
321     if (!sptr->persistent) {
322 	/* We assume that gs_param_string is a subclass of gs_string. */
323 	igc_reloc_string((gs_string *)sptr, gcst);
324     }
325 }
326 
327 /* Compact the strings in a chunk. */
328 void
gc_strings_compact(chunk_t * cp)329 gc_strings_compact(chunk_t * cp)
330 {
331     if (cp->smark != 0) {
332 	byte *hi = cp->climit;
333 	byte *lo = cp->ctop;
334 	const byte *from = hi;
335 	byte *to = hi;
336 	const byte *bp = cp->smark + cp->smark_size;
337 
338 #ifdef DEBUG
339 	if (gs_debug_c('4') || gs_debug_c('5')) {
340 	    byte *base = cp->sbase;
341 	    uint i = (lo - base) & -string_data_quantum;
342 	    uint n = ROUND_UP(hi - base, string_data_quantum);
343 
344 #define R 16
345 	    for (; i < n; i += R) {
346 		uint j;
347 
348 		dlprintf1("[4]0x%lx: ", (ulong) (base + i));
349 		for (j = i; j < i + R; j++) {
350 		    byte ch = base[j];
351 
352 		    if (ch <= 31) {
353 			dputc('^');
354 			dputc(ch + 0100);
355 		    } else
356 			dputc(ch);
357 		}
358 		dputc(' ');
359 		for (j = i; j < i + R; j++)
360 		    dputc((cp->smark[j >> 3] & (1 << (j & 7)) ?
361 			   '+' : '.'));
362 #undef R
363 		if (!(i & (string_data_quantum - 1)))
364 		    dprintf1(" %u", cp->sreloc[i >> log2_string_data_quantum]);
365 		dputc('\n');
366 	    }
367 	}
368 #endif
369 	/*
370 	 * Skip unmodified strings quickly.  We know that cp->smark is
371 	 * aligned to a string_mark_unit.
372 	 */
373 	{
374 	    /* Work around the alignment aliasing bug. */
375 	    const bword *wp = (const bword *)bp;
376 
377 	    while (to > lo && wp[-1] == bword_1s)
378 		to -= bword_bits, --wp;
379 	    bp = (const byte *)wp;
380 	    while (to > lo && bp[-1] == 0xff)
381 		to -= 8, --bp;
382 	}
383 	from = to;
384 	while (from > lo) {
385 	    byte b = *--bp;
386 
387 	    from -= 8;
388 	    switch (b) {
389 		case 0xff:
390 		    to -= 8;
391 		    /*
392 		     * Since we've seen a byte other than 0xff, we know
393 		     * to != from at this point.
394 		     */
395 		    to[7] = from[7];
396 		    to[6] = from[6];
397 		    to[5] = from[5];
398 		    to[4] = from[4];
399 		    to[3] = from[3];
400 		    to[2] = from[2];
401 		    to[1] = from[1];
402 		    to[0] = from[0];
403 		    break;
404 		default:
405 		    if (b & 0x80)
406 			*--to = from[7];
407 		    if (b & 0x40)
408 			*--to = from[6];
409 		    if (b & 0x20)
410 			*--to = from[5];
411 		    if (b & 0x10)
412 			*--to = from[4];
413 		    if (b & 8)
414 			*--to = from[3];
415 		    if (b & 4)
416 			*--to = from[2];
417 		    if (b & 2)
418 			*--to = from[1];
419 		    if (b & 1)
420 			*--to = from[0];
421 		    /* falls through */
422 		case 0:
423 		    ;
424 	    }
425 	}
426 	gs_alloc_fill(cp->ctop, gs_alloc_fill_collected,
427 		      to - cp->ctop);
428 	cp->ctop = to;
429     }
430 }
431