xref: /plan9/sys/src/cmd/gs/src/gsfunc0.c (revision 593dc095aefb2a85c828727bbfa9da139a49bdf4)
1 /* Copyright (C) 1997, 2000 Aladdin Enterprises.  All rights reserved.
2 
3   This software is provided AS-IS with no warranty, either express or
4   implied.
5 
6   This software is distributed under license and may not be copied,
7   modified or distributed except as expressly authorized under the terms
8   of the license contained in the file LICENSE in this distribution.
9 
10   For more information about licensing, please refer to
11   http://www.ghostscript.com/licensing/. For information on
12   commercial licensing, go to http://www.artifex.com/licensing/ or
13   contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14   San Rafael, CA  94903, U.S.A., +1(415)492-9861.
15 */
16 
17 /* $Id: gsfunc0.c,v 1.27 2005/07/15 03:36:03 dan Exp $ */
18 /* Implementation of FunctionType 0 (Sampled) Functions */
19 #include "math_.h"
20 #include "gx.h"
21 #include "gserrors.h"
22 #include "gsfunc0.h"
23 #include "gsparam.h"
24 #include "gxfarith.h"
25 #include "gxfunc.h"
26 #include "stream.h"
27 
28 #define POLE_CACHE_DEBUG 0      /* A temporary development technology need.
29 				   Remove after the beta testing. */
30 #define POLE_CACHE_GENERIC_1D 1 /* A temporary development technology need.
31 				   Didn't decide yet - see fn_Sd_evaluate_cubic_cached_1d. */
32 #define POLE_CACHE_IGNORE 0     /* A temporary development technology need.
33 				   Remove after the beta testing. */
34 
35 #if POLE_CACHE_DEBUG
36 #   include <assert.h>
37 #endif
38 
39 #define MAX_FAST_COMPS 8
40 
41 typedef struct gs_function_Sd_s {
42     gs_function_head_t head;
43     gs_function_Sd_params_t params;
44 } gs_function_Sd_t;
45 
46 /* GC descriptor */
47 private_st_function_Sd();
48 private
ENUM_PTRS_WITH(function_Sd_enum_ptrs,gs_function_Sd_t * pfn)49 ENUM_PTRS_WITH(function_Sd_enum_ptrs, gs_function_Sd_t *pfn)
50 {
51     index -= 6;
52     if (index < st_data_source_max_ptrs)
53 	return ENUM_USING(st_data_source, &pfn->params.DataSource,
54 			  sizeof(pfn->params.DataSource), index);
55     return ENUM_USING_PREFIX(st_function, st_data_source_max_ptrs);
56 }
57 ENUM_PTR3(0, gs_function_Sd_t, params.Encode, params.Decode, params.Size);
58 ENUM_PTR3(3, gs_function_Sd_t, params.pole, params.array_step, params.stream_step);
59 ENUM_PTRS_END
60 private
RELOC_PTRS_WITH(function_Sd_reloc_ptrs,gs_function_Sd_t * pfn)61 RELOC_PTRS_WITH(function_Sd_reloc_ptrs, gs_function_Sd_t *pfn)
62 {
63     RELOC_PREFIX(st_function);
64     RELOC_USING(st_data_source, &pfn->params.DataSource,
65 		sizeof(pfn->params.DataSource));
66     RELOC_PTR3(gs_function_Sd_t, params.Encode, params.Decode, params.Size);
67     RELOC_PTR3(gs_function_Sd_t, params.pole, params.array_step, params.stream_step);
68 }
69 RELOC_PTRS_END
70 
71 /* Define the maximum plausible number of inputs and outputs */
72 /* for a Sampled function. */
73 #define max_Sd_m 16
74 #define max_Sd_n 16
75 
76 /* Get one set of sample values. */
77 #define SETUP_SAMPLES(bps, nbytes)\
78 	int n = pfn->params.n;\
79 	byte buf[max_Sd_n * ((bps + 7) >> 3)];\
80 	const byte *p;\
81 	int i;\
82 \
83 	data_source_access(&pfn->params.DataSource, offset >> 3,\
84 			   nbytes, buf, &p)
85 
86 private int
fn_gets_1(const gs_function_Sd_t * pfn,ulong offset,uint * samples)87 fn_gets_1(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
88 {
89     SETUP_SAMPLES(1, ((offset & 7) + n + 7) >> 3);
90     for (i = 0; i < n; ++i) {
91 	samples[i] = (*p >> (~offset & 7)) & 1;
92 	if (!(++offset & 7))
93 	    p++;
94     }
95     return 0;
96 }
97 private int
fn_gets_2(const gs_function_Sd_t * pfn,ulong offset,uint * samples)98 fn_gets_2(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
99 {
100     SETUP_SAMPLES(2, (((offset & 7) >> 1) + n + 3) >> 2);
101     for (i = 0; i < n; ++i) {
102 	samples[i] = (*p >> (6 - (offset & 7))) & 3;
103 	if (!((offset += 2) & 7))
104 	    p++;
105     }
106     return 0;
107 }
108 private int
fn_gets_4(const gs_function_Sd_t * pfn,ulong offset,uint * samples)109 fn_gets_4(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
110 {
111     SETUP_SAMPLES(4, (((offset & 7) >> 2) + n + 1) >> 1);
112     for (i = 0; i < n; ++i) {
113 	samples[i] = ((offset ^= 4) & 4 ? *p >> 4 : *p++ & 0xf);
114     }
115     return 0;
116 }
117 private int
fn_gets_8(const gs_function_Sd_t * pfn,ulong offset,uint * samples)118 fn_gets_8(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
119 {
120     SETUP_SAMPLES(8, n);
121     for (i = 0; i < n; ++i) {
122 	samples[i] = *p++;
123     }
124     return 0;
125 }
126 private int
fn_gets_12(const gs_function_Sd_t * pfn,ulong offset,uint * samples)127 fn_gets_12(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
128 {
129     SETUP_SAMPLES(12, (((offset & 7) >> 2) + 3 * n + 1) >> 1);
130     for (i = 0; i < n; ++i) {
131 	if (offset & 4)
132 	    samples[i] = ((*p & 0xf) << 8) + p[1], p += 2;
133 	else
134 	    samples[i] = (*p << 4) + (p[1] >> 4), p++;
135 	offset ^= 4;
136     }
137     return 0;
138 }
139 private int
fn_gets_16(const gs_function_Sd_t * pfn,ulong offset,uint * samples)140 fn_gets_16(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
141 {
142     SETUP_SAMPLES(16, n * 2);
143     for (i = 0; i < n; ++i) {
144 	samples[i] = (*p << 8) + p[1];
145 	p += 2;
146     }
147     return 0;
148 }
149 private int
fn_gets_24(const gs_function_Sd_t * pfn,ulong offset,uint * samples)150 fn_gets_24(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
151 {
152     SETUP_SAMPLES(24, n * 3);
153     for (i = 0; i < n; ++i) {
154 	samples[i] = (*p << 16) + (p[1] << 8) + p[2];
155 	p += 3;
156     }
157     return 0;
158 }
159 private int
fn_gets_32(const gs_function_Sd_t * pfn,ulong offset,uint * samples)160 fn_gets_32(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
161 {
162     SETUP_SAMPLES(32, n * 4);
163     for (i = 0; i < n; ++i) {
164 	samples[i] = (*p << 24) + (p[1] << 16) + (p[2] << 8) + p[3];
165 	p += 4;
166     }
167     return 0;
168 }
169 
170 private int (*const fn_get_samples[]) (const gs_function_Sd_t * pfn,
171 				       ulong offset, uint * samples) =
172 {
173     0, fn_gets_1, fn_gets_2, 0, fn_gets_4, 0, 0, 0,
174 	fn_gets_8, 0, 0, 0, fn_gets_12, 0, 0, 0,
175 	fn_gets_16, 0, 0, 0, 0, 0, 0, 0,
176 	fn_gets_24, 0, 0, 0, 0, 0, 0, 0,
177 	fn_gets_32
178 };
179 
180 /*
181  * Compute a value by cubic interpolation.
182  * f[] = f(0), f(1), f(2), f(3); 1 < x < 2.
183  * The formula is derived from those presented in
184  * http://www.cs.uwa.edu.au/undergraduate/units/233.413/Handouts/Lecture04.html
185  * (thanks to Raph Levien for the reference).
186  */
187 private double
interpolate_cubic(floatp x,floatp f0,floatp f1,floatp f2,floatp f3)188 interpolate_cubic(floatp x, floatp f0, floatp f1, floatp f2, floatp f3)
189 {
190     /*
191      * The parameter 'a' affects the contribution of the high-frequency
192      * components.  The abovementioned source suggests a = -0.5.
193      */
194 #define a (-0.5)
195 #define SQR(v) ((v) * (v))
196 #define CUBE(v) ((v) * (v) * (v))
197     const double xm1 = x - 1, m2x = 2 - x, m3x = 3 - x;
198     const double c =
199 	(a * CUBE(x) - 5 * a * SQR(x) + 8 * a * x - 4 * a) * f0 +
200 	((a+2) * CUBE(xm1) - (a+3) * SQR(xm1) + 1) * f1 +
201 	((a+2) * CUBE(m2x) - (a+3) * SQR(m2x) + 1) * f2 +
202 	(a * CUBE(m3x) - 5 * a * SQR(m3x) + 8 * a * m3x - 4 * a) * f3;
203 
204     if_debug6('~', "[~](%g, %g, %g, %g)order3(%g) => %g\n",
205 	      f0, f1, f2, f3, x, c);
206     return c;
207 #undef a
208 #undef SQR
209 #undef CUBE
210 }
211 
212 /*
213  * Compute a value by quadratic interpolation.
214  * f[] = f(0), f(1), f(2); 0 < x < 1.
215  *
216  * We used to use a quadratic formula for this, derived from
217  * f(0) = f0, f(1) = f1, f'(1) = (f2 - f0) / 2, but now we
218  * match what we believe is Acrobat Reader's behavior.
219  */
220 inline private double
interpolate_quadratic(floatp x,floatp f0,floatp f1,floatp f2)221 interpolate_quadratic(floatp x, floatp f0, floatp f1, floatp f2)
222 {
223     return interpolate_cubic(x + 1, f0, f0, f1, f2);
224 }
225 
226 /* Calculate a result by multicubic interpolation. */
227 private void
fn_interpolate_cubic(const gs_function_Sd_t * pfn,const float * fparts,const int * iparts,const ulong * factors,float * samples,ulong offset,int m)228 fn_interpolate_cubic(const gs_function_Sd_t *pfn, const float *fparts,
229 		     const int *iparts, const ulong *factors,
230 		     float *samples, ulong offset, int m)
231 {
232     int j;
233 
234 top:
235     if (m == 0) {
236 	uint sdata[max_Sd_n];
237 
238 	(*fn_get_samples[pfn->params.BitsPerSample])(pfn, offset, sdata);
239 	for (j = pfn->params.n - 1; j >= 0; --j)
240 	    samples[j] = (float)sdata[j];
241     } else {
242 	float fpart = *fparts++;
243 	int ipart = *iparts++;
244 	ulong delta = *factors++;
245 	int size = pfn->params.Size[pfn->params.m - m];
246 	float samples1[max_Sd_n], samplesm1[max_Sd_n], samples2[max_Sd_n];
247 
248 	--m;
249 	if (is_fzero(fpart))
250 	    goto top;
251 	fn_interpolate_cubic(pfn, fparts, iparts, factors, samples,
252 			     offset, m);
253 	fn_interpolate_cubic(pfn, fparts, iparts, factors, samples1,
254 			     offset + delta, m);
255 	/* Ensure we don't try to access out of bounds. */
256 	/*
257 	 * If size == 1, the only possible value for ipart and fpart is
258 	 * 0, so we've already handled this case.
259 	 */
260 	if (size == 2) {	/* ipart = 0 */
261 	    /* Use linear interpolation. */
262 	    for (j = pfn->params.n - 1; j >= 0; --j)
263 		samples[j] += (samples1[j] - samples[j]) * fpart;
264 	    return;
265 	}
266 	if (ipart == 0) {
267 	    /* Use quadratic interpolation. */
268 	    fn_interpolate_cubic(pfn, fparts, iparts, factors,
269 				 samples2, offset + delta * 2, m);
270 	    for (j = pfn->params.n - 1; j >= 0; --j)
271 		samples[j] =
272 		    interpolate_quadratic(fpart, samples[j],
273 					  samples1[j], samples2[j]);
274 	    return;
275 	}
276 	/* At this point we know ipart > 0, size >= 3. */
277 	fn_interpolate_cubic(pfn, fparts, iparts, factors, samplesm1,
278 			     offset - delta, m);
279 	if (ipart == size - 2) {
280 	    /* Use quadratic interpolation. */
281 	    for (j = pfn->params.n - 1; j >= 0; --j)
282 		samples[j] =
283 		    interpolate_quadratic(1 - fpart, samples1[j],
284 					  samples[j], samplesm1[j]);
285 	    return;
286 	}
287 	/* Now we know 0 < ipart < size - 2, size > 3. */
288 	fn_interpolate_cubic(pfn, fparts, iparts, factors,
289 			     samples2, offset + delta * 2, m);
290 	for (j = pfn->params.n - 1; j >= 0; --j)
291 	    samples[j] =
292 		interpolate_cubic(fpart + 1, samplesm1[j], samples[j],
293 				  samples1[j], samples2[j]);
294     }
295 }
296 
297 /* Calculate a result by multilinear interpolation. */
298 private void
fn_interpolate_linear(const gs_function_Sd_t * pfn,const float * fparts,const ulong * factors,float * samples,ulong offset,int m)299 fn_interpolate_linear(const gs_function_Sd_t *pfn, const float *fparts,
300 		 const ulong *factors, float *samples, ulong offset, int m)
301 {
302     int j;
303 
304 top:
305     if (m == 0) {
306 	uint sdata[max_Sd_n];
307 
308 	(*fn_get_samples[pfn->params.BitsPerSample])(pfn, offset, sdata);
309 	for (j = pfn->params.n - 1; j >= 0; --j)
310 	    samples[j] = (float)sdata[j];
311     } else {
312 	float fpart = *fparts++;
313 	float samples1[max_Sd_n];
314 
315 	if (is_fzero(fpart)) {
316 	    ++factors;
317 	    --m;
318 	    goto top;
319 	}
320 	fn_interpolate_linear(pfn, fparts, factors + 1, samples,
321 			      offset, m - 1);
322 	fn_interpolate_linear(pfn, fparts, factors + 1, samples1,
323 			      offset + *factors, m - 1);
324 	for (j = pfn->params.n - 1; j >= 0; --j)
325 	    samples[j] += (samples1[j] - samples[j]) * fpart;
326     }
327 }
328 
329 private inline double
fn_Sd_encode(const gs_function_Sd_t * pfn,int i,double sample)330 fn_Sd_encode(const gs_function_Sd_t *pfn, int i, double sample)
331 {
332     float d0, d1, r0, r1;
333     double value;
334     int bps = pfn->params.BitsPerSample;
335     /* x86 machines have problems with shifts if bps >= 32 */
336     uint max_samp = (bps < (sizeof(uint) * 8)) ? ((1 << bps) - 1) : max_uint;
337 
338     if (pfn->params.Range)
339 	r0 = pfn->params.Range[2 * i], r1 = pfn->params.Range[2 * i + 1];
340     else
341 	r0 = 0, r1 = (float)((1 << bps) - 1);
342     if (pfn->params.Decode)
343 	d0 = pfn->params.Decode[2 * i], d1 = pfn->params.Decode[2 * i + 1];
344     else
345 	d0 = r0, d1 = r1;
346 
347     value = sample * (d1 - d0) / max_samp + d0;
348     if (value < r0)
349 	value = r0;
350     else if (value > r1)
351 	value = r1;
352     return value;
353 }
354 
355 /* Evaluate a Sampled function. */
356 /* A generic algorithm with a recursion by dimentions. */
357 private int
fn_Sd_evaluate_general(const gs_function_t * pfn_common,const float * in,float * out)358 fn_Sd_evaluate_general(const gs_function_t * pfn_common, const float *in, float *out)
359 {
360     const gs_function_Sd_t *pfn = (const gs_function_Sd_t *)pfn_common;
361     int bps = pfn->params.BitsPerSample;
362     ulong offset = 0;
363     int i;
364     float encoded[max_Sd_m];
365     int iparts[max_Sd_m];	/* only needed for cubic interpolation */
366     ulong factors[max_Sd_m];
367     float samples[max_Sd_n];
368 
369     /* Encode the input values. */
370 
371     for (i = 0; i < pfn->params.m; ++i) {
372 	float d0 = pfn->params.Domain[2 * i],
373 	    d1 = pfn->params.Domain[2 * i + 1];
374 	float arg = in[i], enc;
375 
376 	if (arg < d0)
377 	    arg = d0;
378 	else if (arg > d1)
379 	    arg = d1;
380 	if (pfn->params.Encode) {
381 	    float e0 = pfn->params.Encode[2 * i];
382 	    float e1 = pfn->params.Encode[2 * i + 1];
383 
384 	    enc = (arg - d0) * (e1 - e0) / (d1 - d0) + e0;
385 	    if (enc < 0)
386 		encoded[i] = 0;
387 	    else if (enc >= pfn->params.Size[i] - 1)
388 		encoded[i] = (float)pfn->params.Size[i] - 1;
389 	    else
390 		encoded[i] = enc;
391 	} else {
392 	    /* arg is guaranteed to be in bounds, ergo so is enc */
393 	    encoded[i] = (arg - d0) * (pfn->params.Size[i] - 1) / (d1 - d0);
394 	}
395     }
396 
397     /* Look up and interpolate the output values. */
398 
399     {
400 	ulong factor = bps * pfn->params.n;
401 
402 	for (i = 0; i < pfn->params.m; factor *= pfn->params.Size[i++]) {
403 	    int ipart = (int)encoded[i];
404 
405 	    offset += (factors[i] = factor) * ipart;
406 	    iparts[i] = ipart;	/* only needed for cubic interpolation */
407 	    encoded[i] -= ipart;
408 	}
409     }
410     if (pfn->params.Order == 3)
411 	fn_interpolate_cubic(pfn, encoded, iparts, factors, samples,
412 			     offset, pfn->params.m);
413     else
414 	fn_interpolate_linear(pfn, encoded, factors, samples, offset,
415 			      pfn->params.m);
416 
417     /* Encode the output values. */
418 
419     for (i = 0; i < pfn->params.n; ++i)
420 	out[i] = (float)fn_Sd_encode(pfn, i, samples[i]);
421 
422     return 0;
423 }
424 
425 static const double double_stub = 1e90;
426 
427 private inline void
fn_make_cubic_poles(double * p,double f0,double f1,double f2,double f3,const int pole_step_minor)428 fn_make_cubic_poles(double *p, double f0, double f1, double f2, double f3,
429 	    const int pole_step_minor)
430 {   /* The following is poles of the polinomial,
431        which represents interpolate_cubic in [1,2]. */
432     const double a = -0.5;
433 
434     p[pole_step_minor * 1] = (a*f0 + 3*f1 - a*f2)/3.0;
435     p[pole_step_minor * 2] = (-a*f1 + 3*f2 + a*f3)/3.0;
436 }
437 
438 private void
fn_make_poles(double * p,const int pole_step,int power,int bias)439 fn_make_poles(double *p, const int pole_step, int power, int bias)
440 {
441     const int pole_step_minor = pole_step / 3;
442     switch(power) {
443 	case 1: /* A linear 3d power curve. */
444 	    /* bias must be 0. */
445 	    p[pole_step_minor * 1] = (2 * p[pole_step * 0] + 1 * p[pole_step * 1]) / 3;
446 	    p[pole_step_minor * 2] = (1 * p[pole_step * 0] + 2 * p[pole_step * 1]) / 3;
447 	    break;
448 	case 2:
449 	    /* bias may be be 0 or 1. */
450 	    /* Duplicate the beginning or the ending pole (the old code compatible). */
451 	    fn_make_cubic_poles(p + pole_step * bias,
452 		    p[pole_step * 0], p[pole_step * bias],
453 		    p[pole_step * (1 + bias)], p[pole_step * 2],
454 		    pole_step_minor);
455 	    break;
456 	case 3:
457 	    /* bias must be 1. */
458 	    fn_make_cubic_poles(p + pole_step * bias,
459 		    p[pole_step * 0], p[pole_step * 1], p[pole_step * 2], p[pole_step * 3],
460 		    pole_step_minor);
461 	    break;
462 	default: /* Must not happen. */
463 	   DO_NOTHING;
464     }
465 }
466 
467 /* Evaluate a Sampled function.
468    A cubic interpolation with a pole cache.
469    Allows a fast check for extreme suspection. */
470 /* This implementation is a particular case of 1 dimension.
471    maybe we'll use as an optimisation of the generic case,
472    so keep it for a while. */
473 private int
fn_Sd_evaluate_cubic_cached_1d(const gs_function_Sd_t * pfn,const float * in,float * out)474 fn_Sd_evaluate_cubic_cached_1d(const gs_function_Sd_t *pfn, const float *in, float *out)
475 {
476     float d0 = pfn->params.Domain[2 * 0];
477     float d1 = pfn->params.Domain[2 * 0 + 1];
478     float x0 = in[0];
479     const int pole_step_minor = pfn->params.n;
480     const int pole_step = 3 * pole_step_minor;
481     int i0; /* A cell index. */
482     int ib, ie, i, k;
483     double *p, t0, t1, tt;
484 
485     if (x0 < d0)
486 	x0 = d0;
487     if (x0 > d1)
488 	x0 = d1;
489     tt = (in[0] - d0) * (pfn->params.Size[0] - 1) / (d1 - d0);
490     i0 = (int)floor(tt);
491     ib = max(i0 - 1, 0);
492     ie = min(pfn->params.Size[0], i0 + 3);
493     for (i = ib; i < ie; i++) {
494 	if (pfn->params.pole[i * pole_step] == double_stub) {
495 	    uint sdata[max_Sd_n];
496 	    int bps = pfn->params.BitsPerSample;
497 
498 	    p = &pfn->params.pole[i * pole_step];
499 	    fn_get_samples[pfn->params.BitsPerSample](pfn, i * bps * pfn->params.n, sdata);
500 	    for (k = 0; k < pfn->params.n; k++, p++)
501 		*p = fn_Sd_encode(pfn, k, (double)sdata[k]);
502 	}
503     }
504     p = &pfn->params.pole[i0 * pole_step];
505     t0 = tt - i0;
506     if (t0 == 0) {
507 	for (k = 0; k < pfn->params.n; k++, p++)
508 	    out[k] = *p;
509     } else {
510 	if (p[1 * pole_step_minor] == double_stub) {
511 	    for (k = 0; k < pfn->params.n; k++)
512 		fn_make_poles(&pfn->params.pole[ib * pole_step + k], pole_step,
513 			ie - ib - 1, i0 - ib);
514 	}
515 	t1 = 1 - t0;
516 	for (k = 0; k < pfn->params.n; k++, p++) {
517 	    double y = p[0 * pole_step_minor] * t1 * t1 * t1 +
518 		       p[1 * pole_step_minor] * t1 * t1 * t0 * 3 +
519 		       p[2 * pole_step_minor] * t1 * t0 * t0 * 3 +
520 		       p[3 * pole_step_minor] * t0 * t0 * t0;
521 	    if (y < pfn->params.Range[0])
522 		y = pfn->params.Range[0];
523 	    if (y > pfn->params.Range[1])
524 		y = pfn->params.Range[1];
525 	    out[k] = y;
526 	}
527     }
528     return 0;
529 }
530 
531 private inline void
decode_argument(const gs_function_Sd_t * pfn,const float * in,double T[max_Sd_m],int I[max_Sd_m])532 decode_argument(const gs_function_Sd_t *pfn, const float *in, double T[max_Sd_m], int I[max_Sd_m])
533 {
534     int i;
535 
536     for (i = 0; i < pfn->params.m; i++) {
537 	float xi = in[i];
538 	float d0 = pfn->params.Domain[2 * i + 0];
539 	float d1 = pfn->params.Domain[2 * i + 1];
540 	double t;
541 
542 	if (xi < d0)
543 	    xi = d0;
544 	if (xi > d1)
545 	    xi = d1;
546 	t = (xi - d0) * (pfn->params.Size[i] - 1) / (d1 - d0);
547 	I[i] = (int)floor(t);
548 	T[i] = t - I[i];
549     }
550 }
551 
552 private inline void
index_span(const gs_function_Sd_t * pfn,int * I,double * T,int ii,int * Ii,int * ib,int * ie)553 index_span(const gs_function_Sd_t *pfn, int *I, double *T, int ii, int *Ii, int *ib, int *ie)
554 {
555     *Ii = I[ii];
556     if (T[ii] != 0) {
557 	*ib = max(*Ii - 1, 0);
558 	*ie = min(pfn->params.Size[ii], *Ii + 3);
559     } else {
560 	*ib = *Ii;
561 	*ie = *Ii + 1;
562     }
563 }
564 
565 private inline int
load_vector_to(const gs_function_Sd_t * pfn,int s_offset,double * V)566 load_vector_to(const gs_function_Sd_t *pfn, int s_offset, double *V)
567 {
568     uint sdata[max_Sd_n];
569     int k, code;
570 
571     code = fn_get_samples[pfn->params.BitsPerSample](pfn, s_offset, sdata);
572     if (code < 0)
573 	return code;
574     for (k = 0; k < pfn->params.n; k++)
575 	V[k] = fn_Sd_encode(pfn, k, (double)sdata[k]);
576     return 0;
577 }
578 
579 private inline int
load_vector(const gs_function_Sd_t * pfn,int a_offset,int s_offset)580 load_vector(const gs_function_Sd_t *pfn, int a_offset, int s_offset)
581 {
582     if (*(pfn->params.pole + a_offset) == double_stub) {
583 	uint sdata[max_Sd_n];
584 	int k, code;
585 
586 	code = fn_get_samples[pfn->params.BitsPerSample](pfn, s_offset, sdata);
587 	if (code < 0)
588 	    return code;
589 	for (k = 0; k < pfn->params.n; k++)
590 	    *(pfn->params.pole + a_offset + k) = fn_Sd_encode(pfn, k, (double)sdata[k]);
591     }
592     return 0;
593 }
594 
595 private inline void
interpolate_vector(const gs_function_Sd_t * pfn,int offset,int pole_step,int power,int bias)596 interpolate_vector(const gs_function_Sd_t *pfn, int offset, int pole_step, int power, int bias)
597 {
598     int k;
599 
600     for (k = 0; k < pfn->params.n; k++)
601 	fn_make_poles(pfn->params.pole + offset + k, pole_step, power, bias);
602 }
603 
604 private inline void
interpolate_tensors(const gs_function_Sd_t * pfn,int * I,double * T,int offset,int pole_step,int power,int bias,int ii)605 interpolate_tensors(const gs_function_Sd_t *pfn, int *I, double *T,
606 	int offset, int pole_step, int power, int bias, int ii)
607 {
608     if (ii < 0)
609 	interpolate_vector(pfn, offset, pole_step, power, bias);
610     else {
611 	int s = pfn->params.array_step[ii];
612 	int Ii = I[ii];
613 
614 	if (T[ii] == 0) {
615 	    interpolate_tensors(pfn, I, T, offset + Ii * s, pole_step, power, bias, ii - 1);
616 	} else {
617 	    int l;
618 
619 	    for (l = 0; l < 4; l++)
620 		interpolate_tensors(pfn, I, T, offset + Ii * s + l * s / 3, pole_step, power, bias, ii - 1);
621 	}
622     }
623 }
624 
625 private inline bool
is_tensor_done(const gs_function_Sd_t * pfn,int * I,double * T,int a_offset,int ii)626 is_tensor_done(const gs_function_Sd_t *pfn, int *I, double *T, int a_offset, int ii)
627 {
628     /* Check an inner pole of the cell. */
629     int i, o = 0;
630 
631     for (i = ii; i >= 0; i--) {
632 	o += I[i] * pfn->params.array_step[i];
633 	if (T[i] != 0)
634 	    o += pfn->params.array_step[i] / 3;
635     }
636     if (*(pfn->params.pole + a_offset + o) != double_stub)
637 	return true;
638     return false;
639 }
640 
641 /* Creates a tensor of Bezier coefficients by node interpolation. */
642 private inline int
make_interpolation_tensor(const gs_function_Sd_t * pfn,int * I,double * T,int a_offset,int s_offset,int ii)643 make_interpolation_tensor(const gs_function_Sd_t *pfn, int *I, double *T,
644 			    int a_offset, int s_offset, int ii)
645 {
646     /* Well, this function isn't obvious. Trying to explain what it does.
647 
648        Suppose we have a 4x4x4...x4 hypercube of nodes, and we want to build
649        a multicubic interpolation function for the inner 2x2x2...x2 hypercube.
650        We represent the multicubic function with a tensor of Besier poles,
651        and the size of the tensor is 4x4x....x4. Note that the outer corners
652        of the tensor are equal to the corners of the 2x2x...x2 hypercube.
653 
654        We organized the 'pole' array so that a tensor of a cell
655        occupies the cell, and tensors for neighbour cells have a common hyperplane.
656 
657        For a 1-dimentional case the let the nodes are p0, p1, p2, p3,
658        and let the tensor coefficients are q0, q1, q2, q3.
659        We choose a cubic approximation, in which tangents at nodes p1, p2
660        are parallel to (p2 - p0) and (p3 - p1) correspondingly.
661        (Well, this doesn't give a the minimal curvity, but likely it is
662        what Adobe implementations do, see the bug 687352,
663        and we agree that it's some reasonable).
664 
665        For a 2-dimensional case we apply the 1-dimentional case through
666        the first dimension, and then construct a surface by varying the
667        second coordinate as a parameter. It gives a bicubic surface,
668        and the result doesn't depend on the order of coordinates
669        (I proved the latter with Matematica 3.0).
670        Then we know that an interpolation by one coordinate and
671        a differentiation by another coordinate are interchangeble operators.
672        Due to that poles of the interpolated function are same as
673        interpolated poles of the function (well, we didn't spend time
674        for a strong proof, but this fact was confirmed with testing the
675        implementation with POLE_CACHE_DEBUG).
676 
677        Then we apply the 2-dimentional considerations recursively
678        to all dimensions. This is exactly what this function does.
679 
680        When the source node array have an insufficient nomber of nodes
681        along a dimension, we duplicate ending nodes. This solution is done
682        choosen to the compatibility to the old code, but definitely
683        there exists a better one.
684      */
685     int code;
686 
687     if (ii < 0) {
688 	if (POLE_CACHE_IGNORE || *(pfn->params.pole + a_offset) == double_stub) {
689 	    code = load_vector(pfn, a_offset, s_offset);
690 	    if (code < 0)
691 		return code;
692 	}
693     } else {
694 	int Ii, ib, ie, i;
695 	int sa = pfn->params.array_step[ii];
696 	int ss = pfn->params.stream_step[ii];
697 
698 	index_span(pfn, I, T, ii, &Ii, &ib, &ie);
699 	if (POLE_CACHE_IGNORE || !is_tensor_done(pfn, I, T, a_offset, ii)) {
700 	    for (i = ib; i < ie; i++) {
701 		code = make_interpolation_tensor(pfn, I, T,
702 				a_offset + i * sa, s_offset + i * ss, ii - 1);
703 		if (code < 0)
704 		    return code;
705 	    }
706 	    if (T[ii] != 0)
707 		interpolate_tensors(pfn, I, T, a_offset + ib * sa, sa, ie - ib - 1,
708 				Ii - ib, ii - 1);
709 	}
710     }
711     return 0;
712 }
713 
714 /* Creates a subarray of samples. */
715 private inline int
make_interpolation_nodes(const gs_function_Sd_t * pfn,double * T0,double * T1,int * I,double * T,int a_offset,int s_offset,int ii)716 make_interpolation_nodes(const gs_function_Sd_t *pfn, double *T0, double *T1,
717 			    int *I, double *T,
718 			    int a_offset, int s_offset, int ii)
719 {
720     int code;
721 
722     if (ii < 0) {
723 	if (POLE_CACHE_IGNORE || *(pfn->params.pole + a_offset) == double_stub) {
724 	    code = load_vector(pfn, a_offset, s_offset);
725 	    if (code < 0)
726 		return code;
727 	}
728 	if (pfn->params.Order == 3) {
729 	    code = make_interpolation_tensor(pfn, I, T, 0, 0, pfn->params.m - 1);
730 	}
731     } else {
732 	int i;
733 	int i0 = (int)floor(T0[ii]);
734 	int i1 = (int)ceil(T1[ii]);
735 	int sa = pfn->params.array_step[ii];
736 	int ss = pfn->params.stream_step[ii];
737 
738 	if (i0 < 0 || i0 >= pfn->params.Size[ii])
739 	    return_error(gs_error_unregistered); /* Must not happen. */
740 	if (i1 < 0 || i1 >= pfn->params.Size[ii])
741 	    return_error(gs_error_unregistered); /* Must not happen. */
742 	I[ii] = i0;
743 	T[ii] = (i1 > i0 ? 1 : 0);
744 	for (i = i0; i <= i1; i++) {
745 	    code = make_interpolation_nodes(pfn, T0, T1, I, T,
746 			    a_offset + i * sa, s_offset + i * ss, ii - 1);
747 	    if (code < 0)
748 		return code;
749 	}
750     }
751     return 0;
752 }
753 
754 private inline int
evaluate_from_tenzor(const gs_function_Sd_t * pfn,int * I,double * T,int offset,int ii,double * y)755 evaluate_from_tenzor(const gs_function_Sd_t *pfn, int *I, double *T, int offset, int ii, double *y)
756 {
757     int s = pfn->params.array_step[ii], k, l, code;
758 
759     if (ii < 0) {
760 	for (k = 0; k < pfn->params.n; k++)
761 	    y[k] = *(pfn->params.pole + offset + k);
762     } else if (T[ii] == 0) {
763 	return evaluate_from_tenzor(pfn, I, T, offset + s * I[ii], ii - 1, y);
764     } else {
765 	double t0 = T[ii], t1 = 1 - t0;
766 	double p[4][max_Sd_n];
767 
768 	for (l = 0; l < 4; l++) {
769 	    code = evaluate_from_tenzor(pfn, I, T, offset + s * I[ii] + l * (s / 3), ii - 1, p[l]);
770 	    if (code < 0)
771 		return code;
772 	}
773 	for (k = 0; k < pfn->params.n; k++)
774 	    y[k] = p[0][k] * t1 * t1 * t1 +
775 		   p[1][k] * t1 * t1 * t0 * 3 +
776 		   p[2][k] * t1 * t0 * t0 * 3 +
777 	   p[3][k] * t0 * t0 * t0;
778     }
779     return 0;
780 }
781 
782 
783 /* Evaluate a Sampled function. */
784 /* A cubic interpolation with pole cache. */
785 /* Allows a fast check for extreme suspection with is_tensor_monotonic. */
786 private int
fn_Sd_evaluate_multicubic_cached(const gs_function_Sd_t * pfn,const float * in,float * out)787 fn_Sd_evaluate_multicubic_cached(const gs_function_Sd_t *pfn, const float *in, float *out)
788 {
789     double T[max_Sd_m], y[max_Sd_n];
790     int I[max_Sd_m], k, code;
791 
792     decode_argument(pfn, in, T, I);
793     code = make_interpolation_tensor(pfn, I, T, 0, 0, pfn->params.m - 1);
794     if (code < 0)
795 	return code;
796     evaluate_from_tenzor(pfn, I, T, 0, pfn->params.m - 1, y);
797     for (k = 0; k < pfn->params.n; k++) {
798 	double yk = y[k];
799 
800 	if (yk < pfn->params.Range[k * 2 + 0])
801 	    yk = pfn->params.Range[k * 2 + 0];
802 	if (yk > pfn->params.Range[k * 2 + 1])
803 	    yk = pfn->params.Range[k * 2 + 1];
804 	out[k] = yk;
805     }
806     return 0;
807 }
808 
809 /* Evaluate a Sampled function. */
810 private int
fn_Sd_evaluate(const gs_function_t * pfn_common,const float * in,float * out)811 fn_Sd_evaluate(const gs_function_t * pfn_common, const float *in, float *out)
812 {
813     const gs_function_Sd_t *pfn = (const gs_function_Sd_t *)pfn_common;
814     int code;
815 
816     if (pfn->params.Order == 3) {
817 	if (POLE_CACHE_GENERIC_1D || pfn->params.m > 1)
818 	    code = fn_Sd_evaluate_multicubic_cached(pfn, in, out);
819 	else
820 	    code = fn_Sd_evaluate_cubic_cached_1d(pfn, in, out);
821 #	if POLE_CACHE_DEBUG
822 	{   float y[max_Sd_n];
823 	    int k, code1;
824 
825 	    code1 = fn_Sd_evaluate_general(pfn_common, in, y);
826 	    assert(code == code1);
827 	    for (k = 0; k < pfn->params.n; k++)
828 		assert(any_abs(y[k] - out[k]) <= 1e-6 * (pfn->params.Range[k * 2 + 1] - pfn->params.Range[k * 2 + 0]));
829 	}
830 #	endif
831     } else
832 	code = fn_Sd_evaluate_general(pfn_common, in, out);
833     return code;
834 }
835 
836 /* Map a function subdomain to the sample index subdomain. */
837 private inline int
get_scaled_range(const gs_function_Sd_t * const pfn,const float * lower,const float * upper,int i,float * pw0,float * pw1)838 get_scaled_range(const gs_function_Sd_t *const pfn,
839 		   const float *lower, const float *upper,
840 		   int i, float *pw0, float *pw1)
841 {
842     float d0 = pfn->params.Domain[i * 2 + 0], d1 = pfn->params.Domain[i * 2 + 1];
843     float v0 = lower[i], v1 = upper[i];
844     float e0, e1, w0, w1, w;
845     const float small_noize = (float)1e-6;
846 
847     if (v0 < d0 || v0 > d1)
848 	return gs_error_rangecheck;
849     if (pfn->params.Encode)
850 	e0 = pfn->params.Encode[i * 2 + 0], e1 = pfn->params.Encode[i * 2 + 1];
851     else
852 	e0 = 0, e1 = (float)pfn->params.Size[i] - 1;
853     w0 = (v0 - d0) * (e1 - e0) / (d1 - d0) + e0;
854     if (w0 < 0)
855 	w0 = 0;
856     else if (w0 >= pfn->params.Size[i] - 1)
857 	w0 = (float)pfn->params.Size[i] - 1;
858     w1 = (v1 - d0) * (e1 - e0) / (d1 - d0) + e0;
859     if (w1 < 0)
860 	w1 = 0;
861     else if (w1 >= pfn->params.Size[i] - 1)
862 	w1 = (float)pfn->params.Size[i] - 1;
863     if (w0 > w1) {
864 	w = w0; w0 = w1; w1 = w;
865     }
866     if (floor(w0 + 1) - w0 < small_noize * any_abs(e1 - e0))
867 	w0 = (floor(w0) + 1);
868     if (w1 - floor(w1) < small_noize * any_abs(e1 - e0))
869 	w1 = floor(w1);
870     if (w0 > w1)
871 	w0 = w1;
872     *pw0 = w0;
873     *pw1 = w1;
874     return 0;
875 }
876 
877 /* Copy a tensor to a differently indexed pole array. */
878 private int
copy_poles(const gs_function_Sd_t * pfn,int * I,double * T0,double * T1,int a_offset,int ii,double * pole,int p_offset,int pole_step)879 copy_poles(const gs_function_Sd_t *pfn, int *I, double *T0, double *T1, int a_offset,
880 		int ii, double *pole, int p_offset, int pole_step)
881 {
882     int i, ei, sa, code;
883     int order = pfn->params.Order;
884 
885     if (pole_step <= 0)
886 	return_error(gs_error_limitcheck); /* Too small buffer. */
887     ei = (T0[ii] == T1[ii] ? 1 : order + 1);
888     sa = pfn->params.array_step[ii];
889     if (ii == 0) {
890 	for (i = 0; i < ei; i++)
891 	    *(pole + p_offset + i * pole_step) =
892 		    *(pfn->params.pole + a_offset + I[ii] * sa + i * (sa / order));
893     } else {
894 	for (i = 0; i < ei; i++) {
895 	    code = copy_poles(pfn, I, T0, T1, a_offset + I[ii] * sa + i * (sa / order), ii - 1,
896 			    pole, p_offset + i * pole_step, pole_step / 4);
897 	    if (code < 0)
898 		return code;
899 	}
900     }
901     return 0;
902 }
903 
904 private inline void
subcurve(double * pole,int pole_step,double t0,double t1)905 subcurve(double *pole, int pole_step, double t0, double t1)
906 {
907     /* Generated with subcurve.nb using Mathematica 3.0. */
908     double q0 = pole[pole_step * 0];
909     double q1 = pole[pole_step * 1];
910     double q2 = pole[pole_step * 2];
911     double q3 = pole[pole_step * 3];
912     double t01 = t0 - 1, t11 = t1 - 1;
913     double small = 1e-13;
914 
915 #define Power2(a) (a) * (a)
916 #define Power3(a) (a) * (a) * (a)
917     pole[pole_step * 0] = t0*(t0*(q3*t0 - 3*q2*t01) + 3*q1*Power2(t01)) - q0*Power3(t01);
918     pole[pole_step * 1] = q1*t01*(-2*t0 - t1 + 3*t0*t1) + t0*(q2*t0 + 2*q2*t1 -
919 			    3*q2*t0*t1 + q3*t0*t1) - q0*t11*Power2(t01);
920     pole[pole_step * 2] = t1*(2*q2*t0 + q2*t1 - 3*q2*t0*t1 + q3*t0*t1) +
921 			    q1*(-t0 - 2*t1 + 3*t0*t1)*t11 - q0*t01*Power2(t11);
922     pole[pole_step * 3] = t1*(t1*(3*q2 - 3*q2*t1 + q3*t1) +
923 			    3*q1*Power2(t11)) - q0*Power3(t11);
924 #undef Power2
925 #undef Power3
926     if (any_abs(pole[pole_step * 1] - pole[pole_step * 0]) < small)
927 	pole[pole_step * 1] = pole[pole_step * 0];
928     if (any_abs(pole[pole_step * 2] - pole[pole_step * 3]) < small)
929 	pole[pole_step * 2] = pole[pole_step * 3];
930 }
931 
932 private inline void
subline(double * pole,int pole_step,double t0,double t1)933 subline(double *pole, int pole_step, double t0, double t1)
934 {
935     double q0 = pole[pole_step * 0];
936     double q1 = pole[pole_step * 1];
937 
938     pole[pole_step * 0] = (1 - t0) * q0 + t0 * q1;
939     pole[pole_step * 1] = (1 - t1) * q0 + t1 * q1;
940 }
941 
942 private void
clamp_poles(double * T0,double * T1,int ii,int i,double * pole,int p_offset,int pole_step,int pole_step_i,int order)943 clamp_poles(double *T0, double *T1, int ii, int i, double * pole,
944 		int p_offset, int pole_step, int pole_step_i, int order)
945 {
946     if (ii < 0) {
947 	if (order == 3)
948 	    subcurve(pole + p_offset, pole_step_i, T0[i], T1[i]);
949 	else
950 	    subline(pole + p_offset, pole_step_i, T0[i], T1[i]);
951     } else if (i == ii) {
952 	clamp_poles(T0, T1, ii - 1, i, pole, p_offset, pole_step / 4, pole_step, order);
953     } else {
954 	int j, ei = (T0[ii] == T1[ii] ? 1 : order + 1);
955 
956 	for (j = 0; j < ei; j++)
957 	    clamp_poles(T0, T1, ii - 1, i, pole, p_offset + j * pole_step,
958 			    pole_step / 4, pole_step_i, order);
959     }
960 }
961 
962 private inline int /* 3 - don't know, 2 - decreesing, 0 - constant, 1 - increasing. */
curve_monotonity(double * pole,int pole_step)963 curve_monotonity(double *pole, int pole_step)
964 {
965     double p0 = pole[pole_step * 0];
966     double p1 = pole[pole_step * 1];
967     double p2 = pole[pole_step * 2];
968     double p3 = pole[pole_step * 3];
969 
970     if (p0 == p1 && any_abs(p1 - p2) < 1e-13 && p2 == p3)
971 	return 0;
972     if (p0 <= p1 && p1 <= p2 && p2 <= p3)
973 	return 1;
974     if (p0 >= p1 && p1 >= p2 && p2 >= p3)
975 	return 2;
976     /* Maybe not monotonic.
977        Don't want to solve quadratic equations, so return "don't know".
978        This case should be rare.
979      */
980     return 3;
981 }
982 
983 private inline int /* 2 - decreesing, 0 - constant, 1 - increasing. */
line_monotonity(double * pole,int pole_step)984 line_monotonity(double *pole, int pole_step)
985 {
986     double p0 = pole[pole_step * 0];
987     double p1 = pole[pole_step * 1];
988 
989     if (p1 - p0 > 1e-13)
990 	return 1;
991     if (p0 - p1 > 1e-13)
992 	return 2;
993     return 0;
994 }
995 
996 private int /* 3 bits per guide : 3 - non-monotonic or don't know,
997 		    2 - decreesing, 0 - constant, 1 - increasing.
998 		    The number of guides is order+1. */
tensor_dimension_monotonity(const double * T0,const double * T1,int ii,int i0,double * pole,int p_offset,int pole_step,int pole_step_i,int order)999 tensor_dimension_monotonity(const double *T0, const double *T1, int ii, int i0, double *pole,
1000 		int p_offset, int pole_step, int pole_step_i, int order)
1001 {
1002     if (ii < 0) {
1003 	if (order == 3)
1004 	    return curve_monotonity(pole + p_offset, pole_step_i);
1005 	else
1006 	    return line_monotonity(pole + p_offset, pole_step_i);
1007     } else if (i0 == ii) {
1008 	/* Delay the dimension till the end, and adjust pole_step. */
1009 	return tensor_dimension_monotonity(T0, T1, ii - 1, i0, pole, p_offset,
1010 			    pole_step / 4, pole_step, order);
1011     } else {
1012 	int j, ei = (T0[ii] == T1[ii] ? 1 : order + 1), m = 0, mm;
1013 
1014 	for (j = 0; j < ei; j++) {
1015 	    mm = tensor_dimension_monotonity(T0, T1, ii - 1, i0, pole, p_offset + j * pole_step,
1016 			    pole_step/ 4, pole_step_i, order);
1017 	    m |= mm << (j * 3);
1018 	    if (mm == 3) {
1019 		/* If one guide is not monotonic, the dimension is not monotonic.
1020 		   Can return early. */
1021 		break;
1022 	    }
1023 	}
1024 	return m;
1025     }
1026 }
1027 
1028 private inline int
is_tensor_monotonic_by_dimension(const gs_function_Sd_t * pfn,int * I,double * T0,double * T1,int i0,int k,uint * mask)1029 is_tensor_monotonic_by_dimension(const gs_function_Sd_t *pfn, int *I, double *T0, double *T1, int i0, int k,
1030 		    uint *mask /* 3 bits per guide : 3 - non-monotonic or don't know,
1031 		    2 - decreesing, 0 - constant, 1 - increasing.
1032 		    The number of guides is order+1. */)
1033 {
1034     double pole[4*4*4]; /* For a while restricting with 3-in cubic functions.
1035                  More arguments need a bigger buffer, but the rest of code is same. */
1036     int i, code, ii = pfn->params.m - 1;
1037     double TT0[3], TT1[3];
1038 
1039     *mask = 0;
1040     if (ii >= 3) {
1041 	 /* Unimplemented. We don't know practical cases,
1042 	    because currently it is only called while decomposing a shading.  */
1043 	return_error(gs_error_limitcheck);
1044     }
1045     code = copy_poles(pfn, I, T0, T1, k, ii, pole, 0, count_of(pole) / 4);
1046     if (code < 0)
1047 	return code;
1048     for (i = ii; i >= 0; i--) {
1049 	TT0[i] = 0;
1050 	if (T0[i] != T1[i]) {
1051 	    if (T0[i] != 0 || T1[i] != 1)
1052 		clamp_poles(T0, T1, ii, i, pole, 0, count_of(pole) / 4, -1, pfn->params.Order);
1053 	    TT1[i] = 1;
1054 	} else
1055 	    TT1[i] = 0;
1056     }
1057     *mask = tensor_dimension_monotonity(TT0, TT1, ii, i0, pole, 0,
1058 			count_of(pole) / 4, -1, pfn->params.Order);
1059     return 0;
1060 }
1061 
1062 private int /* error code */
is_lattice_monotonic_by_dimension(const gs_function_Sd_t * pfn,const double * T0,const double * T1,int * I,double * S0,double * S1,int ii,int i0,int k,uint * mask)1063 is_lattice_monotonic_by_dimension(const gs_function_Sd_t *pfn, const double *T0, const double *T1,
1064 	int *I, double *S0, double *S1, int ii, int i0, int k,
1065 	uint *mask /* 3 bits per guide : 1 - non-monotonic or don't know, 0 - monotonic;
1066 		      The number of guides is order+1. */)
1067 {
1068     if (ii == -1) {
1069 	/* fixme : could cache the cell monotonity against redundant evaluation. */
1070 	return is_tensor_monotonic_by_dimension(pfn, I, S0, S1, i0, k, mask);
1071     } else {
1072 	int i1 = (ii > i0 ? ii : ii == 0 ? i0 : ii - 1); /* Delay the dimension i0 till the end of recursion. */
1073 	int j, code;
1074 	int bi = (int)floor(T0[i1]);
1075 	int ei = (int)floor(T1[i1]);
1076 	uint m, mm, m1 = 0x49249249 & ((1 << ((pfn->params.Order + 1) * 3)) - 1);
1077 
1078 	if (floor(T1[i1]) == T1[i1])
1079 	    ei --;
1080 	m = 0;
1081 	for (j = bi; j <= ei; j++) {
1082 	    /* fixme : A better performance may be obtained with comparing central nodes with side ones. */
1083 	    I[i1] = j;
1084 	    S0[i1] = max(T0[i1] - j, 0);
1085 	    S1[i1] = min(T1[i1] - j, 1);
1086 	    code = is_lattice_monotonic_by_dimension(pfn, T0, T1, I, S0, S1, ii - 1, i0, k, &mm);
1087 	    if (code < 0)
1088 		return code;
1089 	    m |= mm;
1090 	    if (m == m1) /* Don't return early - shadings need to know about all dimensions. */
1091 		break;
1092 	}
1093 	if (ii == 0) {
1094 	    /* Detect non-monotonic guides. */
1095 	    m = m & (m >> 1);
1096 	}
1097 	*mask = m;
1098 	return 0;
1099     }
1100 }
1101 
1102 private inline int /* error code */
is_lattice_monotonic(const gs_function_Sd_t * pfn,const double * T0,const double * T1,int * I,double * S0,double * S1,int k,uint * mask)1103 is_lattice_monotonic(const gs_function_Sd_t *pfn, const double *T0, const double *T1,
1104 	 int *I, double *S0, double *S1,
1105 	 int k, uint *mask /* 1 bit per dimension : 1 - non-monotonic or don't know,
1106 		      0 - monotonic. */)
1107 {
1108     uint m, mm = 0;
1109     int i, code;
1110 
1111     for (i = 0; i < pfn->params.m; i++) {
1112 	if (T0[i] != T1[i]) {
1113 	    code = is_lattice_monotonic_by_dimension(pfn, T0, T1, I, S0, S1, pfn->params.m - 1, i, k, &m);
1114 	    if (code < 0)
1115 		return code;
1116 	    if (m)
1117 		mm |= 1 << i;
1118 	}
1119     }
1120     *mask = mm;
1121     return 0;
1122 }
1123 
1124 private int /* 3 bits per result : 3 - non-monotonic or don't know,
1125 	       2 - decreesing, 0 - constant, 1 - increasing,
1126 	       <0 - error. */
fn_Sd_1arg_linear_monotonic_rec(const gs_function_Sd_t * const pfn,int i0,int i1,const double * V0,const double * V1)1127 fn_Sd_1arg_linear_monotonic_rec(const gs_function_Sd_t *const pfn, int i0, int i1,
1128 				const double *V0, const double *V1)
1129 {
1130     if (i1 - i0 <= 1) {
1131 	int code = 0, i;
1132 
1133 	for (i = 0; i < pfn->params.n; i++) {
1134 	    if (V0[i] < V1[i])
1135 		code |= 1 << (i * 3);
1136 	    else if (V0[i] > V1[i])
1137 		code |= 2 << (i * 3);
1138 	}
1139 	return code;
1140     } else {
1141 	double VV[MAX_FAST_COMPS];
1142 	int ii = (i0 + i1) / 2, code, cod1;
1143 
1144 	code = load_vector_to(pfn, ii * pfn->params.n * pfn->params.BitsPerSample, VV);
1145 	if (code < 0)
1146 	    return code;
1147 	if (code & (code >>1))
1148 	    return code; /* Not monotonic by some component of the result. */
1149 	code = fn_Sd_1arg_linear_monotonic_rec(pfn, i0, ii, V0, VV);
1150 	if (code < 0)
1151 	    return code;
1152 	cod1 = fn_Sd_1arg_linear_monotonic_rec(pfn, ii, i1, VV, V1);
1153 	if (cod1 < 0)
1154 	    return cod1;
1155 	return code | cod1;
1156     }
1157 }
1158 
1159 private int
fn_Sd_1arg_linear_monotonic(const gs_function_Sd_t * const pfn,double T0,double T1,uint * mask)1160 fn_Sd_1arg_linear_monotonic(const gs_function_Sd_t *const pfn, double T0, double T1,
1161 			    uint *mask /* 1 - non-monotonic or don't know, 0 - monotonic. */)
1162 {
1163     int i0 = (int)floor(T0);
1164     int i1 = (int)ceil(T1), code;
1165     double V0[MAX_FAST_COMPS], V1[MAX_FAST_COMPS];
1166 
1167     if (i1 - i0 > 1) {
1168 	code = load_vector_to(pfn, i0 * pfn->params.n * pfn->params.BitsPerSample, V0);
1169 	if (code < 0)
1170 	    return code;
1171 	code = load_vector_to(pfn, i1 * pfn->params.n * pfn->params.BitsPerSample, V1);
1172 	if (code < 0)
1173 	    return code;
1174 	code = fn_Sd_1arg_linear_monotonic_rec(pfn, i0, i1, V0, V1);
1175 	if (code < 0)
1176 	    return code;
1177 	if (code & (code >> 1)) {
1178 	    *mask = 1;
1179 	    return 0;
1180 	}
1181     }
1182     *mask = 0;
1183     return 1;
1184 }
1185 
1186 #define DEBUG_Sd_1arg 0
1187 
1188 /* Test whether a Sampled function is monotonic. */
1189 private int /* 1 = monotonic, 0 = not or don't know, <0 = error. */
fn_Sd_is_monotonic_aux(const gs_function_Sd_t * const pfn,const float * lower,const float * upper,uint * mask)1190 fn_Sd_is_monotonic_aux(const gs_function_Sd_t *const pfn,
1191 		   const float *lower, const float *upper,
1192 		   uint *mask /* 1 bit per dimension : 1 - non-monotonic or don't know,
1193 		      0 - monotonic. */)
1194 {
1195     int i, code, ii = pfn->params.m - 1;
1196     int I[4];
1197     double T0[count_of(I)], T1[count_of(I)];
1198     double S0[count_of(I)], S1[count_of(I)];
1199     uint m, mm, m1;
1200 #   if DEBUG_Sd_1arg
1201     int code1, mask1;
1202 #   endif
1203 
1204     if (ii >= count_of(T0)) {
1205 	 /* Unimplemented. We don't know practical cases,
1206 	    because currently it is only called while decomposing a shading.  */
1207 	return_error(gs_error_limitcheck);
1208     }
1209     for (i = 0; i <= ii; i++) {
1210 	float w0, w1;
1211 
1212 	code = get_scaled_range(pfn, lower, upper, i, &w0, &w1);
1213 	if (code < 0)
1214 	    return code;
1215 	T0[i] = w0;
1216 	T1[i] = w1;
1217     }
1218     if (pfn->params.m == 1 && pfn->params.Order == 1 && pfn->params.n <= MAX_FAST_COMPS) {
1219 	code = fn_Sd_1arg_linear_monotonic(pfn, T0[0], T1[0], mask);
1220 #	if !DEBUG_Sd_1arg
1221 	    return code;
1222 #	else
1223 	    mask1 = *mask;
1224 	    code1 = code;
1225 #	endif
1226     }
1227     m1 = (1 << pfn->params.m )- 1;
1228     code = make_interpolation_nodes(pfn, T0, T1, I, S0, 0, 0, ii);
1229     if (code < 0)
1230 	return code;
1231     mm = 0;
1232     for (i = 0; i < pfn->params.n; i++) {
1233 	code = is_lattice_monotonic(pfn, T0, T1, I, S0, S1, i, &m);
1234 	if (code < 0)
1235 	    return code;
1236 	mm |= m;
1237 	if (mm == m1) /* Don't return early - shadings need to know about all dimensions. */
1238 	    break;
1239     }
1240 #   if DEBUG_Sd_1arg
1241 	if (mask1 != mm)
1242 	    return_error(gs_error_unregistered);
1243 	if (code1 != !mm)
1244 	    return_error(gs_error_unregistered);
1245 #   endif
1246     *mask = mm;
1247     return !mm;
1248 }
1249 
1250 /* Test whether a Sampled function is monotonic. */
1251 /* 1 = monotonic, 0 = don't know, <0 = error. */
1252 private int
fn_Sd_is_monotonic(const gs_function_t * pfn_common,const float * lower,const float * upper,uint * mask)1253 fn_Sd_is_monotonic(const gs_function_t * pfn_common,
1254 		   const float *lower, const float *upper, uint *mask)
1255 {
1256     const gs_function_Sd_t *const pfn =
1257 	(const gs_function_Sd_t *)pfn_common;
1258 
1259     return fn_Sd_is_monotonic_aux(pfn, lower, upper, mask);
1260 }
1261 
1262 /* Return Sampled function information. */
1263 private void
fn_Sd_get_info(const gs_function_t * pfn_common,gs_function_info_t * pfi)1264 fn_Sd_get_info(const gs_function_t *pfn_common, gs_function_info_t *pfi)
1265 {
1266     const gs_function_Sd_t *const pfn =
1267 	(const gs_function_Sd_t *)pfn_common;
1268     long size;
1269     int i;
1270 
1271     gs_function_get_info_default(pfn_common, pfi);
1272     pfi->DataSource = &pfn->params.DataSource;
1273     for (i = 0, size = 1; i < pfn->params.m; ++i)
1274 	size *= pfn->params.Size[i];
1275     pfi->data_size =
1276 	(size * pfn->params.n * pfn->params.BitsPerSample + 7) >> 3;
1277 }
1278 
1279 /* Write Sampled function parameters on a parameter list. */
1280 private int
fn_Sd_get_params(const gs_function_t * pfn_common,gs_param_list * plist)1281 fn_Sd_get_params(const gs_function_t *pfn_common, gs_param_list *plist)
1282 {
1283     const gs_function_Sd_t *const pfn =
1284 	(const gs_function_Sd_t *)pfn_common;
1285     int ecode = fn_common_get_params(pfn_common, plist);
1286     int code;
1287 
1288     if (pfn->params.Order != 1) {
1289 	if ((code = param_write_int(plist, "Order", &pfn->params.Order)) < 0)
1290 	    ecode = code;
1291     }
1292     if ((code = param_write_int(plist, "BitsPerSample",
1293 				&pfn->params.BitsPerSample)) < 0)
1294 	ecode = code;
1295     if (pfn->params.Encode) {
1296 	if ((code = param_write_float_values(plist, "Encode",
1297 					     pfn->params.Encode,
1298 					     2 * pfn->params.m, false)) < 0)
1299 	    ecode = code;
1300     }
1301     if (pfn->params.Decode) {
1302 	if ((code = param_write_float_values(plist, "Decode",
1303 					     pfn->params.Decode,
1304 					     2 * pfn->params.n, false)) < 0)
1305 	    ecode = code;
1306     }
1307     if (pfn->params.Size) {
1308 	if ((code = param_write_int_values(plist, "Size", pfn->params.Size,
1309 					   pfn->params.m, false)) < 0)
1310 	    ecode = code;
1311     }
1312     return ecode;
1313 }
1314 
1315 /* Make a scaled copy of a Sampled function. */
1316 private int
fn_Sd_make_scaled(const gs_function_Sd_t * pfn,gs_function_Sd_t ** ppsfn,const gs_range_t * pranges,gs_memory_t * mem)1317 fn_Sd_make_scaled(const gs_function_Sd_t *pfn, gs_function_Sd_t **ppsfn,
1318 		  const gs_range_t *pranges, gs_memory_t *mem)
1319 {
1320     gs_function_Sd_t *psfn =
1321 	gs_alloc_struct(mem, gs_function_Sd_t, &st_function_Sd,
1322 			"fn_Sd_make_scaled");
1323     int code;
1324 
1325     if (psfn == 0)
1326 	return_error(gs_error_VMerror);
1327     psfn->params = pfn->params;
1328     psfn->params.Encode = 0;		/* in case of failure */
1329     psfn->params.Decode = 0;
1330     psfn->params.Size =
1331 	fn_copy_values(pfn->params.Size, pfn->params.m, sizeof(int), mem);
1332     if ((code = (psfn->params.Size == 0 ?
1333 		 gs_note_error(gs_error_VMerror) : 0)) < 0 ||
1334 	(code = fn_common_scale((gs_function_t *)psfn,
1335 				(const gs_function_t *)pfn,
1336 				pranges, mem)) < 0 ||
1337 	(code = fn_scale_pairs(&psfn->params.Encode, pfn->params.Encode,
1338 			       pfn->params.m, NULL, mem)) < 0 ||
1339 	(code = fn_scale_pairs(&psfn->params.Decode, pfn->params.Decode,
1340 			       pfn->params.n, pranges, mem)) < 0) {
1341 	gs_function_free((gs_function_t *)psfn, true, mem);
1342     } else
1343 	*ppsfn = psfn;
1344     return code;
1345 }
1346 
1347 /* Free the parameters of a Sampled function. */
1348 void
gs_function_Sd_free_params(gs_function_Sd_params_t * params,gs_memory_t * mem)1349 gs_function_Sd_free_params(gs_function_Sd_params_t * params, gs_memory_t * mem)
1350 {
1351     gs_free_const_object(mem, params->Size, "Size");
1352     gs_free_const_object(mem, params->Decode, "Decode");
1353     gs_free_const_object(mem, params->Encode, "Encode");
1354     fn_common_free_params((gs_function_params_t *) params, mem);
1355     gs_free_object(mem, params->pole, "gs_function_Sd_free_params");
1356     gs_free_object(mem, params->array_step, "gs_function_Sd_free_params");
1357     gs_free_object(mem, params->stream_step, "gs_function_Sd_free_params");
1358 }
1359 
1360 /* aA helper for gs_function_Sd_serialize. */
serialize_array(const float * a,int half_size,stream * s)1361 private int serialize_array(const float *a, int half_size, stream *s)
1362 {
1363     uint n;
1364     const float dummy[2] = {0, 0};
1365     int i, code;
1366 
1367     if (a != NULL)
1368 	return sputs(s, (const byte *)a, sizeof(a[0]) * half_size * 2, &n);
1369     for (i = 0; i < half_size; i++) {
1370 	code = sputs(s, (const byte *)dummy, sizeof(dummy), &n);
1371 	if (code < 0)
1372 	    return code;
1373     }
1374     return 0;
1375 }
1376 
1377 /* Serialize. */
1378 private int
gs_function_Sd_serialize(const gs_function_t * pfn,stream * s)1379 gs_function_Sd_serialize(const gs_function_t * pfn, stream *s)
1380 {
1381     uint n;
1382     const gs_function_Sd_params_t * p = (const gs_function_Sd_params_t *)&pfn->params;
1383     gs_function_info_t info;
1384     int code = fn_common_serialize(pfn, s);
1385     ulong pos;
1386     uint count;
1387     byte buf[100];
1388     const byte *ptr;
1389 
1390     if (code < 0)
1391 	return code;
1392     code = sputs(s, (const byte *)&p->Order, sizeof(p->Order), &n);
1393     if (code < 0)
1394 	return code;
1395     code = sputs(s, (const byte *)&p->BitsPerSample, sizeof(p->BitsPerSample), &n);
1396     if (code < 0)
1397 	return code;
1398     code = serialize_array(p->Encode, p->m, s);
1399     if (code < 0)
1400 	return code;
1401     code = serialize_array(p->Decode, p->n, s);
1402     if (code < 0)
1403 	return code;
1404     gs_function_get_info(pfn, &info);
1405     code = sputs(s, (const byte *)&info.data_size, sizeof(info.data_size), &n);
1406     if (code < 0)
1407 	return code;
1408     for (pos = 0; pos < info.data_size; pos += count) {
1409 	count = min(sizeof(buf), info.data_size - pos);
1410 	data_source_access_only(info.DataSource, pos, count, buf, &ptr);
1411 	code = sputs(s, ptr, count, &n);
1412 	if (code < 0)
1413 	    return code;
1414     }
1415     return 0;
1416 }
1417 
1418 /* Allocate and initialize a Sampled function. */
1419 int
gs_function_Sd_init(gs_function_t ** ppfn,const gs_function_Sd_params_t * params,gs_memory_t * mem)1420 gs_function_Sd_init(gs_function_t ** ppfn,
1421 		  const gs_function_Sd_params_t * params, gs_memory_t * mem)
1422 {
1423     static const gs_function_head_t function_Sd_head = {
1424 	function_type_Sampled,
1425 	{
1426 	    (fn_evaluate_proc_t) fn_Sd_evaluate,
1427 	    (fn_is_monotonic_proc_t) fn_Sd_is_monotonic,
1428 	    (fn_get_info_proc_t) fn_Sd_get_info,
1429 	    (fn_get_params_proc_t) fn_Sd_get_params,
1430 	    (fn_make_scaled_proc_t) fn_Sd_make_scaled,
1431 	    (fn_free_params_proc_t) gs_function_Sd_free_params,
1432 	    fn_common_free,
1433 	    (fn_serialize_proc_t) gs_function_Sd_serialize,
1434 	}
1435     };
1436     int code;
1437     int i;
1438 
1439     *ppfn = 0;			/* in case of error */
1440     code = fn_check_mnDR((const gs_function_params_t *)params,
1441 			 params->m, params->n);
1442     if (code < 0)
1443 	return code;
1444     if (params->m > max_Sd_m)
1445 	return_error(gs_error_limitcheck);
1446     switch (params->Order) {
1447 	case 0:		/* use default */
1448 	case 1:
1449 	case 3:
1450 	    break;
1451 	default:
1452 	    return_error(gs_error_rangecheck);
1453     }
1454     switch (params->BitsPerSample) {
1455 	case 1:
1456 	case 2:
1457 	case 4:
1458 	case 8:
1459 	case 12:
1460 	case 16:
1461 	case 24:
1462 	case 32:
1463 	    break;
1464 	default:
1465 	    return_error(gs_error_rangecheck);
1466     }
1467     for (i = 0; i < params->m; ++i)
1468 	if (params->Size[i] <= 0)
1469 	    return_error(gs_error_rangecheck);
1470     {
1471 	gs_function_Sd_t *pfn =
1472 	    gs_alloc_struct(mem, gs_function_Sd_t, &st_function_Sd,
1473 			    "gs_function_Sd_init");
1474 	int bps, sa, ss, i, order;
1475 
1476 	if (pfn == 0)
1477 	    return_error(gs_error_VMerror);
1478 	pfn->params = *params;
1479 	if (params->Order == 0)
1480 	    pfn->params.Order = 1;	/* default */
1481 	pfn->params.pole = NULL;
1482 	pfn->params.array_step = NULL;
1483 	pfn->params.stream_step = NULL;
1484 	pfn->head = function_Sd_head;
1485 	pfn->params.array_size = 0;
1486 	if (pfn->params.m == 1 && pfn->params.Order == 1 && pfn->params.n <= MAX_FAST_COMPS && !DEBUG_Sd_1arg) {
1487 	    /* Won't use pole cache. Call fn_Sd_1arg_linear_monotonic instead. */
1488 	} else {
1489 	    pfn->params.array_step = (int *)gs_alloc_byte_array(mem,
1490 				    max_Sd_m, sizeof(int), "gs_function_Sd_init");
1491 	    pfn->params.stream_step = (int *)gs_alloc_byte_array(mem,
1492 				    max_Sd_m, sizeof(int), "gs_function_Sd_init");
1493 	    if (pfn->params.array_step == NULL || pfn->params.stream_step == NULL)
1494 		return_error(gs_error_VMerror);
1495 	    bps = pfn->params.BitsPerSample;
1496 	    sa = pfn->params.n;
1497 	    ss = pfn->params.n * bps;
1498 	    order = pfn->params.Order;
1499 	    for (i = 0; i < pfn->params.m; i++) {
1500 		pfn->params.array_step[i] = sa * order;
1501 		sa = (pfn->params.Size[i] * order - (order - 1)) * sa;
1502 		pfn->params.stream_step[i] = ss;
1503 		ss = pfn->params.Size[i] * ss;
1504 	    }
1505 	    pfn->params.pole = (double *)gs_alloc_byte_array(mem,
1506 				    sa, sizeof(double), "gs_function_Sd_init");
1507 	    if (pfn->params.pole == NULL)
1508 		return_error(gs_error_VMerror);
1509 	    for (i = 0; i < sa; i++)
1510 		pfn->params.pole[i] = double_stub;
1511 	    pfn->params.array_size = sa;
1512 	}
1513 	*ppfn = (gs_function_t *) pfn;
1514     }
1515     return 0;
1516 }
1517