1 /* Copyright (C) 1997, 2000 Aladdin Enterprises. All rights reserved.
2
3 This software is provided AS-IS with no warranty, either express or
4 implied.
5
6 This software is distributed under license and may not be copied,
7 modified or distributed except as expressly authorized under the terms
8 of the license contained in the file LICENSE in this distribution.
9
10 For more information about licensing, please refer to
11 http://www.ghostscript.com/licensing/. For information on
12 commercial licensing, go to http://www.artifex.com/licensing/ or
13 contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14 San Rafael, CA 94903, U.S.A., +1(415)492-9861.
15 */
16
17 /* $Id: gsfunc0.c,v 1.27 2005/07/15 03:36:03 dan Exp $ */
18 /* Implementation of FunctionType 0 (Sampled) Functions */
19 #include "math_.h"
20 #include "gx.h"
21 #include "gserrors.h"
22 #include "gsfunc0.h"
23 #include "gsparam.h"
24 #include "gxfarith.h"
25 #include "gxfunc.h"
26 #include "stream.h"
27
28 #define POLE_CACHE_DEBUG 0 /* A temporary development technology need.
29 Remove after the beta testing. */
30 #define POLE_CACHE_GENERIC_1D 1 /* A temporary development technology need.
31 Didn't decide yet - see fn_Sd_evaluate_cubic_cached_1d. */
32 #define POLE_CACHE_IGNORE 0 /* A temporary development technology need.
33 Remove after the beta testing. */
34
35 #if POLE_CACHE_DEBUG
36 # include <assert.h>
37 #endif
38
39 #define MAX_FAST_COMPS 8
40
41 typedef struct gs_function_Sd_s {
42 gs_function_head_t head;
43 gs_function_Sd_params_t params;
44 } gs_function_Sd_t;
45
46 /* GC descriptor */
47 private_st_function_Sd();
48 private
ENUM_PTRS_WITH(function_Sd_enum_ptrs,gs_function_Sd_t * pfn)49 ENUM_PTRS_WITH(function_Sd_enum_ptrs, gs_function_Sd_t *pfn)
50 {
51 index -= 6;
52 if (index < st_data_source_max_ptrs)
53 return ENUM_USING(st_data_source, &pfn->params.DataSource,
54 sizeof(pfn->params.DataSource), index);
55 return ENUM_USING_PREFIX(st_function, st_data_source_max_ptrs);
56 }
57 ENUM_PTR3(0, gs_function_Sd_t, params.Encode, params.Decode, params.Size);
58 ENUM_PTR3(3, gs_function_Sd_t, params.pole, params.array_step, params.stream_step);
59 ENUM_PTRS_END
60 private
RELOC_PTRS_WITH(function_Sd_reloc_ptrs,gs_function_Sd_t * pfn)61 RELOC_PTRS_WITH(function_Sd_reloc_ptrs, gs_function_Sd_t *pfn)
62 {
63 RELOC_PREFIX(st_function);
64 RELOC_USING(st_data_source, &pfn->params.DataSource,
65 sizeof(pfn->params.DataSource));
66 RELOC_PTR3(gs_function_Sd_t, params.Encode, params.Decode, params.Size);
67 RELOC_PTR3(gs_function_Sd_t, params.pole, params.array_step, params.stream_step);
68 }
69 RELOC_PTRS_END
70
71 /* Define the maximum plausible number of inputs and outputs */
72 /* for a Sampled function. */
73 #define max_Sd_m 16
74 #define max_Sd_n 16
75
76 /* Get one set of sample values. */
77 #define SETUP_SAMPLES(bps, nbytes)\
78 int n = pfn->params.n;\
79 byte buf[max_Sd_n * ((bps + 7) >> 3)];\
80 const byte *p;\
81 int i;\
82 \
83 data_source_access(&pfn->params.DataSource, offset >> 3,\
84 nbytes, buf, &p)
85
86 private int
fn_gets_1(const gs_function_Sd_t * pfn,ulong offset,uint * samples)87 fn_gets_1(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
88 {
89 SETUP_SAMPLES(1, ((offset & 7) + n + 7) >> 3);
90 for (i = 0; i < n; ++i) {
91 samples[i] = (*p >> (~offset & 7)) & 1;
92 if (!(++offset & 7))
93 p++;
94 }
95 return 0;
96 }
97 private int
fn_gets_2(const gs_function_Sd_t * pfn,ulong offset,uint * samples)98 fn_gets_2(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
99 {
100 SETUP_SAMPLES(2, (((offset & 7) >> 1) + n + 3) >> 2);
101 for (i = 0; i < n; ++i) {
102 samples[i] = (*p >> (6 - (offset & 7))) & 3;
103 if (!((offset += 2) & 7))
104 p++;
105 }
106 return 0;
107 }
108 private int
fn_gets_4(const gs_function_Sd_t * pfn,ulong offset,uint * samples)109 fn_gets_4(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
110 {
111 SETUP_SAMPLES(4, (((offset & 7) >> 2) + n + 1) >> 1);
112 for (i = 0; i < n; ++i) {
113 samples[i] = ((offset ^= 4) & 4 ? *p >> 4 : *p++ & 0xf);
114 }
115 return 0;
116 }
117 private int
fn_gets_8(const gs_function_Sd_t * pfn,ulong offset,uint * samples)118 fn_gets_8(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
119 {
120 SETUP_SAMPLES(8, n);
121 for (i = 0; i < n; ++i) {
122 samples[i] = *p++;
123 }
124 return 0;
125 }
126 private int
fn_gets_12(const gs_function_Sd_t * pfn,ulong offset,uint * samples)127 fn_gets_12(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
128 {
129 SETUP_SAMPLES(12, (((offset & 7) >> 2) + 3 * n + 1) >> 1);
130 for (i = 0; i < n; ++i) {
131 if (offset & 4)
132 samples[i] = ((*p & 0xf) << 8) + p[1], p += 2;
133 else
134 samples[i] = (*p << 4) + (p[1] >> 4), p++;
135 offset ^= 4;
136 }
137 return 0;
138 }
139 private int
fn_gets_16(const gs_function_Sd_t * pfn,ulong offset,uint * samples)140 fn_gets_16(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
141 {
142 SETUP_SAMPLES(16, n * 2);
143 for (i = 0; i < n; ++i) {
144 samples[i] = (*p << 8) + p[1];
145 p += 2;
146 }
147 return 0;
148 }
149 private int
fn_gets_24(const gs_function_Sd_t * pfn,ulong offset,uint * samples)150 fn_gets_24(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
151 {
152 SETUP_SAMPLES(24, n * 3);
153 for (i = 0; i < n; ++i) {
154 samples[i] = (*p << 16) + (p[1] << 8) + p[2];
155 p += 3;
156 }
157 return 0;
158 }
159 private int
fn_gets_32(const gs_function_Sd_t * pfn,ulong offset,uint * samples)160 fn_gets_32(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
161 {
162 SETUP_SAMPLES(32, n * 4);
163 for (i = 0; i < n; ++i) {
164 samples[i] = (*p << 24) + (p[1] << 16) + (p[2] << 8) + p[3];
165 p += 4;
166 }
167 return 0;
168 }
169
170 private int (*const fn_get_samples[]) (const gs_function_Sd_t * pfn,
171 ulong offset, uint * samples) =
172 {
173 0, fn_gets_1, fn_gets_2, 0, fn_gets_4, 0, 0, 0,
174 fn_gets_8, 0, 0, 0, fn_gets_12, 0, 0, 0,
175 fn_gets_16, 0, 0, 0, 0, 0, 0, 0,
176 fn_gets_24, 0, 0, 0, 0, 0, 0, 0,
177 fn_gets_32
178 };
179
180 /*
181 * Compute a value by cubic interpolation.
182 * f[] = f(0), f(1), f(2), f(3); 1 < x < 2.
183 * The formula is derived from those presented in
184 * http://www.cs.uwa.edu.au/undergraduate/units/233.413/Handouts/Lecture04.html
185 * (thanks to Raph Levien for the reference).
186 */
187 private double
interpolate_cubic(floatp x,floatp f0,floatp f1,floatp f2,floatp f3)188 interpolate_cubic(floatp x, floatp f0, floatp f1, floatp f2, floatp f3)
189 {
190 /*
191 * The parameter 'a' affects the contribution of the high-frequency
192 * components. The abovementioned source suggests a = -0.5.
193 */
194 #define a (-0.5)
195 #define SQR(v) ((v) * (v))
196 #define CUBE(v) ((v) * (v) * (v))
197 const double xm1 = x - 1, m2x = 2 - x, m3x = 3 - x;
198 const double c =
199 (a * CUBE(x) - 5 * a * SQR(x) + 8 * a * x - 4 * a) * f0 +
200 ((a+2) * CUBE(xm1) - (a+3) * SQR(xm1) + 1) * f1 +
201 ((a+2) * CUBE(m2x) - (a+3) * SQR(m2x) + 1) * f2 +
202 (a * CUBE(m3x) - 5 * a * SQR(m3x) + 8 * a * m3x - 4 * a) * f3;
203
204 if_debug6('~', "[~](%g, %g, %g, %g)order3(%g) => %g\n",
205 f0, f1, f2, f3, x, c);
206 return c;
207 #undef a
208 #undef SQR
209 #undef CUBE
210 }
211
212 /*
213 * Compute a value by quadratic interpolation.
214 * f[] = f(0), f(1), f(2); 0 < x < 1.
215 *
216 * We used to use a quadratic formula for this, derived from
217 * f(0) = f0, f(1) = f1, f'(1) = (f2 - f0) / 2, but now we
218 * match what we believe is Acrobat Reader's behavior.
219 */
220 inline private double
interpolate_quadratic(floatp x,floatp f0,floatp f1,floatp f2)221 interpolate_quadratic(floatp x, floatp f0, floatp f1, floatp f2)
222 {
223 return interpolate_cubic(x + 1, f0, f0, f1, f2);
224 }
225
226 /* Calculate a result by multicubic interpolation. */
227 private void
fn_interpolate_cubic(const gs_function_Sd_t * pfn,const float * fparts,const int * iparts,const ulong * factors,float * samples,ulong offset,int m)228 fn_interpolate_cubic(const gs_function_Sd_t *pfn, const float *fparts,
229 const int *iparts, const ulong *factors,
230 float *samples, ulong offset, int m)
231 {
232 int j;
233
234 top:
235 if (m == 0) {
236 uint sdata[max_Sd_n];
237
238 (*fn_get_samples[pfn->params.BitsPerSample])(pfn, offset, sdata);
239 for (j = pfn->params.n - 1; j >= 0; --j)
240 samples[j] = (float)sdata[j];
241 } else {
242 float fpart = *fparts++;
243 int ipart = *iparts++;
244 ulong delta = *factors++;
245 int size = pfn->params.Size[pfn->params.m - m];
246 float samples1[max_Sd_n], samplesm1[max_Sd_n], samples2[max_Sd_n];
247
248 --m;
249 if (is_fzero(fpart))
250 goto top;
251 fn_interpolate_cubic(pfn, fparts, iparts, factors, samples,
252 offset, m);
253 fn_interpolate_cubic(pfn, fparts, iparts, factors, samples1,
254 offset + delta, m);
255 /* Ensure we don't try to access out of bounds. */
256 /*
257 * If size == 1, the only possible value for ipart and fpart is
258 * 0, so we've already handled this case.
259 */
260 if (size == 2) { /* ipart = 0 */
261 /* Use linear interpolation. */
262 for (j = pfn->params.n - 1; j >= 0; --j)
263 samples[j] += (samples1[j] - samples[j]) * fpart;
264 return;
265 }
266 if (ipart == 0) {
267 /* Use quadratic interpolation. */
268 fn_interpolate_cubic(pfn, fparts, iparts, factors,
269 samples2, offset + delta * 2, m);
270 for (j = pfn->params.n - 1; j >= 0; --j)
271 samples[j] =
272 interpolate_quadratic(fpart, samples[j],
273 samples1[j], samples2[j]);
274 return;
275 }
276 /* At this point we know ipart > 0, size >= 3. */
277 fn_interpolate_cubic(pfn, fparts, iparts, factors, samplesm1,
278 offset - delta, m);
279 if (ipart == size - 2) {
280 /* Use quadratic interpolation. */
281 for (j = pfn->params.n - 1; j >= 0; --j)
282 samples[j] =
283 interpolate_quadratic(1 - fpart, samples1[j],
284 samples[j], samplesm1[j]);
285 return;
286 }
287 /* Now we know 0 < ipart < size - 2, size > 3. */
288 fn_interpolate_cubic(pfn, fparts, iparts, factors,
289 samples2, offset + delta * 2, m);
290 for (j = pfn->params.n - 1; j >= 0; --j)
291 samples[j] =
292 interpolate_cubic(fpart + 1, samplesm1[j], samples[j],
293 samples1[j], samples2[j]);
294 }
295 }
296
297 /* Calculate a result by multilinear interpolation. */
298 private void
fn_interpolate_linear(const gs_function_Sd_t * pfn,const float * fparts,const ulong * factors,float * samples,ulong offset,int m)299 fn_interpolate_linear(const gs_function_Sd_t *pfn, const float *fparts,
300 const ulong *factors, float *samples, ulong offset, int m)
301 {
302 int j;
303
304 top:
305 if (m == 0) {
306 uint sdata[max_Sd_n];
307
308 (*fn_get_samples[pfn->params.BitsPerSample])(pfn, offset, sdata);
309 for (j = pfn->params.n - 1; j >= 0; --j)
310 samples[j] = (float)sdata[j];
311 } else {
312 float fpart = *fparts++;
313 float samples1[max_Sd_n];
314
315 if (is_fzero(fpart)) {
316 ++factors;
317 --m;
318 goto top;
319 }
320 fn_interpolate_linear(pfn, fparts, factors + 1, samples,
321 offset, m - 1);
322 fn_interpolate_linear(pfn, fparts, factors + 1, samples1,
323 offset + *factors, m - 1);
324 for (j = pfn->params.n - 1; j >= 0; --j)
325 samples[j] += (samples1[j] - samples[j]) * fpart;
326 }
327 }
328
329 private inline double
fn_Sd_encode(const gs_function_Sd_t * pfn,int i,double sample)330 fn_Sd_encode(const gs_function_Sd_t *pfn, int i, double sample)
331 {
332 float d0, d1, r0, r1;
333 double value;
334 int bps = pfn->params.BitsPerSample;
335 /* x86 machines have problems with shifts if bps >= 32 */
336 uint max_samp = (bps < (sizeof(uint) * 8)) ? ((1 << bps) - 1) : max_uint;
337
338 if (pfn->params.Range)
339 r0 = pfn->params.Range[2 * i], r1 = pfn->params.Range[2 * i + 1];
340 else
341 r0 = 0, r1 = (float)((1 << bps) - 1);
342 if (pfn->params.Decode)
343 d0 = pfn->params.Decode[2 * i], d1 = pfn->params.Decode[2 * i + 1];
344 else
345 d0 = r0, d1 = r1;
346
347 value = sample * (d1 - d0) / max_samp + d0;
348 if (value < r0)
349 value = r0;
350 else if (value > r1)
351 value = r1;
352 return value;
353 }
354
355 /* Evaluate a Sampled function. */
356 /* A generic algorithm with a recursion by dimentions. */
357 private int
fn_Sd_evaluate_general(const gs_function_t * pfn_common,const float * in,float * out)358 fn_Sd_evaluate_general(const gs_function_t * pfn_common, const float *in, float *out)
359 {
360 const gs_function_Sd_t *pfn = (const gs_function_Sd_t *)pfn_common;
361 int bps = pfn->params.BitsPerSample;
362 ulong offset = 0;
363 int i;
364 float encoded[max_Sd_m];
365 int iparts[max_Sd_m]; /* only needed for cubic interpolation */
366 ulong factors[max_Sd_m];
367 float samples[max_Sd_n];
368
369 /* Encode the input values. */
370
371 for (i = 0; i < pfn->params.m; ++i) {
372 float d0 = pfn->params.Domain[2 * i],
373 d1 = pfn->params.Domain[2 * i + 1];
374 float arg = in[i], enc;
375
376 if (arg < d0)
377 arg = d0;
378 else if (arg > d1)
379 arg = d1;
380 if (pfn->params.Encode) {
381 float e0 = pfn->params.Encode[2 * i];
382 float e1 = pfn->params.Encode[2 * i + 1];
383
384 enc = (arg - d0) * (e1 - e0) / (d1 - d0) + e0;
385 if (enc < 0)
386 encoded[i] = 0;
387 else if (enc >= pfn->params.Size[i] - 1)
388 encoded[i] = (float)pfn->params.Size[i] - 1;
389 else
390 encoded[i] = enc;
391 } else {
392 /* arg is guaranteed to be in bounds, ergo so is enc */
393 encoded[i] = (arg - d0) * (pfn->params.Size[i] - 1) / (d1 - d0);
394 }
395 }
396
397 /* Look up and interpolate the output values. */
398
399 {
400 ulong factor = bps * pfn->params.n;
401
402 for (i = 0; i < pfn->params.m; factor *= pfn->params.Size[i++]) {
403 int ipart = (int)encoded[i];
404
405 offset += (factors[i] = factor) * ipart;
406 iparts[i] = ipart; /* only needed for cubic interpolation */
407 encoded[i] -= ipart;
408 }
409 }
410 if (pfn->params.Order == 3)
411 fn_interpolate_cubic(pfn, encoded, iparts, factors, samples,
412 offset, pfn->params.m);
413 else
414 fn_interpolate_linear(pfn, encoded, factors, samples, offset,
415 pfn->params.m);
416
417 /* Encode the output values. */
418
419 for (i = 0; i < pfn->params.n; ++i)
420 out[i] = (float)fn_Sd_encode(pfn, i, samples[i]);
421
422 return 0;
423 }
424
425 static const double double_stub = 1e90;
426
427 private inline void
fn_make_cubic_poles(double * p,double f0,double f1,double f2,double f3,const int pole_step_minor)428 fn_make_cubic_poles(double *p, double f0, double f1, double f2, double f3,
429 const int pole_step_minor)
430 { /* The following is poles of the polinomial,
431 which represents interpolate_cubic in [1,2]. */
432 const double a = -0.5;
433
434 p[pole_step_minor * 1] = (a*f0 + 3*f1 - a*f2)/3.0;
435 p[pole_step_minor * 2] = (-a*f1 + 3*f2 + a*f3)/3.0;
436 }
437
438 private void
fn_make_poles(double * p,const int pole_step,int power,int bias)439 fn_make_poles(double *p, const int pole_step, int power, int bias)
440 {
441 const int pole_step_minor = pole_step / 3;
442 switch(power) {
443 case 1: /* A linear 3d power curve. */
444 /* bias must be 0. */
445 p[pole_step_minor * 1] = (2 * p[pole_step * 0] + 1 * p[pole_step * 1]) / 3;
446 p[pole_step_minor * 2] = (1 * p[pole_step * 0] + 2 * p[pole_step * 1]) / 3;
447 break;
448 case 2:
449 /* bias may be be 0 or 1. */
450 /* Duplicate the beginning or the ending pole (the old code compatible). */
451 fn_make_cubic_poles(p + pole_step * bias,
452 p[pole_step * 0], p[pole_step * bias],
453 p[pole_step * (1 + bias)], p[pole_step * 2],
454 pole_step_minor);
455 break;
456 case 3:
457 /* bias must be 1. */
458 fn_make_cubic_poles(p + pole_step * bias,
459 p[pole_step * 0], p[pole_step * 1], p[pole_step * 2], p[pole_step * 3],
460 pole_step_minor);
461 break;
462 default: /* Must not happen. */
463 DO_NOTHING;
464 }
465 }
466
467 /* Evaluate a Sampled function.
468 A cubic interpolation with a pole cache.
469 Allows a fast check for extreme suspection. */
470 /* This implementation is a particular case of 1 dimension.
471 maybe we'll use as an optimisation of the generic case,
472 so keep it for a while. */
473 private int
fn_Sd_evaluate_cubic_cached_1d(const gs_function_Sd_t * pfn,const float * in,float * out)474 fn_Sd_evaluate_cubic_cached_1d(const gs_function_Sd_t *pfn, const float *in, float *out)
475 {
476 float d0 = pfn->params.Domain[2 * 0];
477 float d1 = pfn->params.Domain[2 * 0 + 1];
478 float x0 = in[0];
479 const int pole_step_minor = pfn->params.n;
480 const int pole_step = 3 * pole_step_minor;
481 int i0; /* A cell index. */
482 int ib, ie, i, k;
483 double *p, t0, t1, tt;
484
485 if (x0 < d0)
486 x0 = d0;
487 if (x0 > d1)
488 x0 = d1;
489 tt = (in[0] - d0) * (pfn->params.Size[0] - 1) / (d1 - d0);
490 i0 = (int)floor(tt);
491 ib = max(i0 - 1, 0);
492 ie = min(pfn->params.Size[0], i0 + 3);
493 for (i = ib; i < ie; i++) {
494 if (pfn->params.pole[i * pole_step] == double_stub) {
495 uint sdata[max_Sd_n];
496 int bps = pfn->params.BitsPerSample;
497
498 p = &pfn->params.pole[i * pole_step];
499 fn_get_samples[pfn->params.BitsPerSample](pfn, i * bps * pfn->params.n, sdata);
500 for (k = 0; k < pfn->params.n; k++, p++)
501 *p = fn_Sd_encode(pfn, k, (double)sdata[k]);
502 }
503 }
504 p = &pfn->params.pole[i0 * pole_step];
505 t0 = tt - i0;
506 if (t0 == 0) {
507 for (k = 0; k < pfn->params.n; k++, p++)
508 out[k] = *p;
509 } else {
510 if (p[1 * pole_step_minor] == double_stub) {
511 for (k = 0; k < pfn->params.n; k++)
512 fn_make_poles(&pfn->params.pole[ib * pole_step + k], pole_step,
513 ie - ib - 1, i0 - ib);
514 }
515 t1 = 1 - t0;
516 for (k = 0; k < pfn->params.n; k++, p++) {
517 double y = p[0 * pole_step_minor] * t1 * t1 * t1 +
518 p[1 * pole_step_minor] * t1 * t1 * t0 * 3 +
519 p[2 * pole_step_minor] * t1 * t0 * t0 * 3 +
520 p[3 * pole_step_minor] * t0 * t0 * t0;
521 if (y < pfn->params.Range[0])
522 y = pfn->params.Range[0];
523 if (y > pfn->params.Range[1])
524 y = pfn->params.Range[1];
525 out[k] = y;
526 }
527 }
528 return 0;
529 }
530
531 private inline void
decode_argument(const gs_function_Sd_t * pfn,const float * in,double T[max_Sd_m],int I[max_Sd_m])532 decode_argument(const gs_function_Sd_t *pfn, const float *in, double T[max_Sd_m], int I[max_Sd_m])
533 {
534 int i;
535
536 for (i = 0; i < pfn->params.m; i++) {
537 float xi = in[i];
538 float d0 = pfn->params.Domain[2 * i + 0];
539 float d1 = pfn->params.Domain[2 * i + 1];
540 double t;
541
542 if (xi < d0)
543 xi = d0;
544 if (xi > d1)
545 xi = d1;
546 t = (xi - d0) * (pfn->params.Size[i] - 1) / (d1 - d0);
547 I[i] = (int)floor(t);
548 T[i] = t - I[i];
549 }
550 }
551
552 private inline void
index_span(const gs_function_Sd_t * pfn,int * I,double * T,int ii,int * Ii,int * ib,int * ie)553 index_span(const gs_function_Sd_t *pfn, int *I, double *T, int ii, int *Ii, int *ib, int *ie)
554 {
555 *Ii = I[ii];
556 if (T[ii] != 0) {
557 *ib = max(*Ii - 1, 0);
558 *ie = min(pfn->params.Size[ii], *Ii + 3);
559 } else {
560 *ib = *Ii;
561 *ie = *Ii + 1;
562 }
563 }
564
565 private inline int
load_vector_to(const gs_function_Sd_t * pfn,int s_offset,double * V)566 load_vector_to(const gs_function_Sd_t *pfn, int s_offset, double *V)
567 {
568 uint sdata[max_Sd_n];
569 int k, code;
570
571 code = fn_get_samples[pfn->params.BitsPerSample](pfn, s_offset, sdata);
572 if (code < 0)
573 return code;
574 for (k = 0; k < pfn->params.n; k++)
575 V[k] = fn_Sd_encode(pfn, k, (double)sdata[k]);
576 return 0;
577 }
578
579 private inline int
load_vector(const gs_function_Sd_t * pfn,int a_offset,int s_offset)580 load_vector(const gs_function_Sd_t *pfn, int a_offset, int s_offset)
581 {
582 if (*(pfn->params.pole + a_offset) == double_stub) {
583 uint sdata[max_Sd_n];
584 int k, code;
585
586 code = fn_get_samples[pfn->params.BitsPerSample](pfn, s_offset, sdata);
587 if (code < 0)
588 return code;
589 for (k = 0; k < pfn->params.n; k++)
590 *(pfn->params.pole + a_offset + k) = fn_Sd_encode(pfn, k, (double)sdata[k]);
591 }
592 return 0;
593 }
594
595 private inline void
interpolate_vector(const gs_function_Sd_t * pfn,int offset,int pole_step,int power,int bias)596 interpolate_vector(const gs_function_Sd_t *pfn, int offset, int pole_step, int power, int bias)
597 {
598 int k;
599
600 for (k = 0; k < pfn->params.n; k++)
601 fn_make_poles(pfn->params.pole + offset + k, pole_step, power, bias);
602 }
603
604 private inline void
interpolate_tensors(const gs_function_Sd_t * pfn,int * I,double * T,int offset,int pole_step,int power,int bias,int ii)605 interpolate_tensors(const gs_function_Sd_t *pfn, int *I, double *T,
606 int offset, int pole_step, int power, int bias, int ii)
607 {
608 if (ii < 0)
609 interpolate_vector(pfn, offset, pole_step, power, bias);
610 else {
611 int s = pfn->params.array_step[ii];
612 int Ii = I[ii];
613
614 if (T[ii] == 0) {
615 interpolate_tensors(pfn, I, T, offset + Ii * s, pole_step, power, bias, ii - 1);
616 } else {
617 int l;
618
619 for (l = 0; l < 4; l++)
620 interpolate_tensors(pfn, I, T, offset + Ii * s + l * s / 3, pole_step, power, bias, ii - 1);
621 }
622 }
623 }
624
625 private inline bool
is_tensor_done(const gs_function_Sd_t * pfn,int * I,double * T,int a_offset,int ii)626 is_tensor_done(const gs_function_Sd_t *pfn, int *I, double *T, int a_offset, int ii)
627 {
628 /* Check an inner pole of the cell. */
629 int i, o = 0;
630
631 for (i = ii; i >= 0; i--) {
632 o += I[i] * pfn->params.array_step[i];
633 if (T[i] != 0)
634 o += pfn->params.array_step[i] / 3;
635 }
636 if (*(pfn->params.pole + a_offset + o) != double_stub)
637 return true;
638 return false;
639 }
640
641 /* Creates a tensor of Bezier coefficients by node interpolation. */
642 private inline int
make_interpolation_tensor(const gs_function_Sd_t * pfn,int * I,double * T,int a_offset,int s_offset,int ii)643 make_interpolation_tensor(const gs_function_Sd_t *pfn, int *I, double *T,
644 int a_offset, int s_offset, int ii)
645 {
646 /* Well, this function isn't obvious. Trying to explain what it does.
647
648 Suppose we have a 4x4x4...x4 hypercube of nodes, and we want to build
649 a multicubic interpolation function for the inner 2x2x2...x2 hypercube.
650 We represent the multicubic function with a tensor of Besier poles,
651 and the size of the tensor is 4x4x....x4. Note that the outer corners
652 of the tensor are equal to the corners of the 2x2x...x2 hypercube.
653
654 We organized the 'pole' array so that a tensor of a cell
655 occupies the cell, and tensors for neighbour cells have a common hyperplane.
656
657 For a 1-dimentional case the let the nodes are p0, p1, p2, p3,
658 and let the tensor coefficients are q0, q1, q2, q3.
659 We choose a cubic approximation, in which tangents at nodes p1, p2
660 are parallel to (p2 - p0) and (p3 - p1) correspondingly.
661 (Well, this doesn't give a the minimal curvity, but likely it is
662 what Adobe implementations do, see the bug 687352,
663 and we agree that it's some reasonable).
664
665 For a 2-dimensional case we apply the 1-dimentional case through
666 the first dimension, and then construct a surface by varying the
667 second coordinate as a parameter. It gives a bicubic surface,
668 and the result doesn't depend on the order of coordinates
669 (I proved the latter with Matematica 3.0).
670 Then we know that an interpolation by one coordinate and
671 a differentiation by another coordinate are interchangeble operators.
672 Due to that poles of the interpolated function are same as
673 interpolated poles of the function (well, we didn't spend time
674 for a strong proof, but this fact was confirmed with testing the
675 implementation with POLE_CACHE_DEBUG).
676
677 Then we apply the 2-dimentional considerations recursively
678 to all dimensions. This is exactly what this function does.
679
680 When the source node array have an insufficient nomber of nodes
681 along a dimension, we duplicate ending nodes. This solution is done
682 choosen to the compatibility to the old code, but definitely
683 there exists a better one.
684 */
685 int code;
686
687 if (ii < 0) {
688 if (POLE_CACHE_IGNORE || *(pfn->params.pole + a_offset) == double_stub) {
689 code = load_vector(pfn, a_offset, s_offset);
690 if (code < 0)
691 return code;
692 }
693 } else {
694 int Ii, ib, ie, i;
695 int sa = pfn->params.array_step[ii];
696 int ss = pfn->params.stream_step[ii];
697
698 index_span(pfn, I, T, ii, &Ii, &ib, &ie);
699 if (POLE_CACHE_IGNORE || !is_tensor_done(pfn, I, T, a_offset, ii)) {
700 for (i = ib; i < ie; i++) {
701 code = make_interpolation_tensor(pfn, I, T,
702 a_offset + i * sa, s_offset + i * ss, ii - 1);
703 if (code < 0)
704 return code;
705 }
706 if (T[ii] != 0)
707 interpolate_tensors(pfn, I, T, a_offset + ib * sa, sa, ie - ib - 1,
708 Ii - ib, ii - 1);
709 }
710 }
711 return 0;
712 }
713
714 /* Creates a subarray of samples. */
715 private inline int
make_interpolation_nodes(const gs_function_Sd_t * pfn,double * T0,double * T1,int * I,double * T,int a_offset,int s_offset,int ii)716 make_interpolation_nodes(const gs_function_Sd_t *pfn, double *T0, double *T1,
717 int *I, double *T,
718 int a_offset, int s_offset, int ii)
719 {
720 int code;
721
722 if (ii < 0) {
723 if (POLE_CACHE_IGNORE || *(pfn->params.pole + a_offset) == double_stub) {
724 code = load_vector(pfn, a_offset, s_offset);
725 if (code < 0)
726 return code;
727 }
728 if (pfn->params.Order == 3) {
729 code = make_interpolation_tensor(pfn, I, T, 0, 0, pfn->params.m - 1);
730 }
731 } else {
732 int i;
733 int i0 = (int)floor(T0[ii]);
734 int i1 = (int)ceil(T1[ii]);
735 int sa = pfn->params.array_step[ii];
736 int ss = pfn->params.stream_step[ii];
737
738 if (i0 < 0 || i0 >= pfn->params.Size[ii])
739 return_error(gs_error_unregistered); /* Must not happen. */
740 if (i1 < 0 || i1 >= pfn->params.Size[ii])
741 return_error(gs_error_unregistered); /* Must not happen. */
742 I[ii] = i0;
743 T[ii] = (i1 > i0 ? 1 : 0);
744 for (i = i0; i <= i1; i++) {
745 code = make_interpolation_nodes(pfn, T0, T1, I, T,
746 a_offset + i * sa, s_offset + i * ss, ii - 1);
747 if (code < 0)
748 return code;
749 }
750 }
751 return 0;
752 }
753
754 private inline int
evaluate_from_tenzor(const gs_function_Sd_t * pfn,int * I,double * T,int offset,int ii,double * y)755 evaluate_from_tenzor(const gs_function_Sd_t *pfn, int *I, double *T, int offset, int ii, double *y)
756 {
757 int s = pfn->params.array_step[ii], k, l, code;
758
759 if (ii < 0) {
760 for (k = 0; k < pfn->params.n; k++)
761 y[k] = *(pfn->params.pole + offset + k);
762 } else if (T[ii] == 0) {
763 return evaluate_from_tenzor(pfn, I, T, offset + s * I[ii], ii - 1, y);
764 } else {
765 double t0 = T[ii], t1 = 1 - t0;
766 double p[4][max_Sd_n];
767
768 for (l = 0; l < 4; l++) {
769 code = evaluate_from_tenzor(pfn, I, T, offset + s * I[ii] + l * (s / 3), ii - 1, p[l]);
770 if (code < 0)
771 return code;
772 }
773 for (k = 0; k < pfn->params.n; k++)
774 y[k] = p[0][k] * t1 * t1 * t1 +
775 p[1][k] * t1 * t1 * t0 * 3 +
776 p[2][k] * t1 * t0 * t0 * 3 +
777 p[3][k] * t0 * t0 * t0;
778 }
779 return 0;
780 }
781
782
783 /* Evaluate a Sampled function. */
784 /* A cubic interpolation with pole cache. */
785 /* Allows a fast check for extreme suspection with is_tensor_monotonic. */
786 private int
fn_Sd_evaluate_multicubic_cached(const gs_function_Sd_t * pfn,const float * in,float * out)787 fn_Sd_evaluate_multicubic_cached(const gs_function_Sd_t *pfn, const float *in, float *out)
788 {
789 double T[max_Sd_m], y[max_Sd_n];
790 int I[max_Sd_m], k, code;
791
792 decode_argument(pfn, in, T, I);
793 code = make_interpolation_tensor(pfn, I, T, 0, 0, pfn->params.m - 1);
794 if (code < 0)
795 return code;
796 evaluate_from_tenzor(pfn, I, T, 0, pfn->params.m - 1, y);
797 for (k = 0; k < pfn->params.n; k++) {
798 double yk = y[k];
799
800 if (yk < pfn->params.Range[k * 2 + 0])
801 yk = pfn->params.Range[k * 2 + 0];
802 if (yk > pfn->params.Range[k * 2 + 1])
803 yk = pfn->params.Range[k * 2 + 1];
804 out[k] = yk;
805 }
806 return 0;
807 }
808
809 /* Evaluate a Sampled function. */
810 private int
fn_Sd_evaluate(const gs_function_t * pfn_common,const float * in,float * out)811 fn_Sd_evaluate(const gs_function_t * pfn_common, const float *in, float *out)
812 {
813 const gs_function_Sd_t *pfn = (const gs_function_Sd_t *)pfn_common;
814 int code;
815
816 if (pfn->params.Order == 3) {
817 if (POLE_CACHE_GENERIC_1D || pfn->params.m > 1)
818 code = fn_Sd_evaluate_multicubic_cached(pfn, in, out);
819 else
820 code = fn_Sd_evaluate_cubic_cached_1d(pfn, in, out);
821 # if POLE_CACHE_DEBUG
822 { float y[max_Sd_n];
823 int k, code1;
824
825 code1 = fn_Sd_evaluate_general(pfn_common, in, y);
826 assert(code == code1);
827 for (k = 0; k < pfn->params.n; k++)
828 assert(any_abs(y[k] - out[k]) <= 1e-6 * (pfn->params.Range[k * 2 + 1] - pfn->params.Range[k * 2 + 0]));
829 }
830 # endif
831 } else
832 code = fn_Sd_evaluate_general(pfn_common, in, out);
833 return code;
834 }
835
836 /* Map a function subdomain to the sample index subdomain. */
837 private inline int
get_scaled_range(const gs_function_Sd_t * const pfn,const float * lower,const float * upper,int i,float * pw0,float * pw1)838 get_scaled_range(const gs_function_Sd_t *const pfn,
839 const float *lower, const float *upper,
840 int i, float *pw0, float *pw1)
841 {
842 float d0 = pfn->params.Domain[i * 2 + 0], d1 = pfn->params.Domain[i * 2 + 1];
843 float v0 = lower[i], v1 = upper[i];
844 float e0, e1, w0, w1, w;
845 const float small_noize = (float)1e-6;
846
847 if (v0 < d0 || v0 > d1)
848 return gs_error_rangecheck;
849 if (pfn->params.Encode)
850 e0 = pfn->params.Encode[i * 2 + 0], e1 = pfn->params.Encode[i * 2 + 1];
851 else
852 e0 = 0, e1 = (float)pfn->params.Size[i] - 1;
853 w0 = (v0 - d0) * (e1 - e0) / (d1 - d0) + e0;
854 if (w0 < 0)
855 w0 = 0;
856 else if (w0 >= pfn->params.Size[i] - 1)
857 w0 = (float)pfn->params.Size[i] - 1;
858 w1 = (v1 - d0) * (e1 - e0) / (d1 - d0) + e0;
859 if (w1 < 0)
860 w1 = 0;
861 else if (w1 >= pfn->params.Size[i] - 1)
862 w1 = (float)pfn->params.Size[i] - 1;
863 if (w0 > w1) {
864 w = w0; w0 = w1; w1 = w;
865 }
866 if (floor(w0 + 1) - w0 < small_noize * any_abs(e1 - e0))
867 w0 = (floor(w0) + 1);
868 if (w1 - floor(w1) < small_noize * any_abs(e1 - e0))
869 w1 = floor(w1);
870 if (w0 > w1)
871 w0 = w1;
872 *pw0 = w0;
873 *pw1 = w1;
874 return 0;
875 }
876
877 /* Copy a tensor to a differently indexed pole array. */
878 private int
copy_poles(const gs_function_Sd_t * pfn,int * I,double * T0,double * T1,int a_offset,int ii,double * pole,int p_offset,int pole_step)879 copy_poles(const gs_function_Sd_t *pfn, int *I, double *T0, double *T1, int a_offset,
880 int ii, double *pole, int p_offset, int pole_step)
881 {
882 int i, ei, sa, code;
883 int order = pfn->params.Order;
884
885 if (pole_step <= 0)
886 return_error(gs_error_limitcheck); /* Too small buffer. */
887 ei = (T0[ii] == T1[ii] ? 1 : order + 1);
888 sa = pfn->params.array_step[ii];
889 if (ii == 0) {
890 for (i = 0; i < ei; i++)
891 *(pole + p_offset + i * pole_step) =
892 *(pfn->params.pole + a_offset + I[ii] * sa + i * (sa / order));
893 } else {
894 for (i = 0; i < ei; i++) {
895 code = copy_poles(pfn, I, T0, T1, a_offset + I[ii] * sa + i * (sa / order), ii - 1,
896 pole, p_offset + i * pole_step, pole_step / 4);
897 if (code < 0)
898 return code;
899 }
900 }
901 return 0;
902 }
903
904 private inline void
subcurve(double * pole,int pole_step,double t0,double t1)905 subcurve(double *pole, int pole_step, double t0, double t1)
906 {
907 /* Generated with subcurve.nb using Mathematica 3.0. */
908 double q0 = pole[pole_step * 0];
909 double q1 = pole[pole_step * 1];
910 double q2 = pole[pole_step * 2];
911 double q3 = pole[pole_step * 3];
912 double t01 = t0 - 1, t11 = t1 - 1;
913 double small = 1e-13;
914
915 #define Power2(a) (a) * (a)
916 #define Power3(a) (a) * (a) * (a)
917 pole[pole_step * 0] = t0*(t0*(q3*t0 - 3*q2*t01) + 3*q1*Power2(t01)) - q0*Power3(t01);
918 pole[pole_step * 1] = q1*t01*(-2*t0 - t1 + 3*t0*t1) + t0*(q2*t0 + 2*q2*t1 -
919 3*q2*t0*t1 + q3*t0*t1) - q0*t11*Power2(t01);
920 pole[pole_step * 2] = t1*(2*q2*t0 + q2*t1 - 3*q2*t0*t1 + q3*t0*t1) +
921 q1*(-t0 - 2*t1 + 3*t0*t1)*t11 - q0*t01*Power2(t11);
922 pole[pole_step * 3] = t1*(t1*(3*q2 - 3*q2*t1 + q3*t1) +
923 3*q1*Power2(t11)) - q0*Power3(t11);
924 #undef Power2
925 #undef Power3
926 if (any_abs(pole[pole_step * 1] - pole[pole_step * 0]) < small)
927 pole[pole_step * 1] = pole[pole_step * 0];
928 if (any_abs(pole[pole_step * 2] - pole[pole_step * 3]) < small)
929 pole[pole_step * 2] = pole[pole_step * 3];
930 }
931
932 private inline void
subline(double * pole,int pole_step,double t0,double t1)933 subline(double *pole, int pole_step, double t0, double t1)
934 {
935 double q0 = pole[pole_step * 0];
936 double q1 = pole[pole_step * 1];
937
938 pole[pole_step * 0] = (1 - t0) * q0 + t0 * q1;
939 pole[pole_step * 1] = (1 - t1) * q0 + t1 * q1;
940 }
941
942 private void
clamp_poles(double * T0,double * T1,int ii,int i,double * pole,int p_offset,int pole_step,int pole_step_i,int order)943 clamp_poles(double *T0, double *T1, int ii, int i, double * pole,
944 int p_offset, int pole_step, int pole_step_i, int order)
945 {
946 if (ii < 0) {
947 if (order == 3)
948 subcurve(pole + p_offset, pole_step_i, T0[i], T1[i]);
949 else
950 subline(pole + p_offset, pole_step_i, T0[i], T1[i]);
951 } else if (i == ii) {
952 clamp_poles(T0, T1, ii - 1, i, pole, p_offset, pole_step / 4, pole_step, order);
953 } else {
954 int j, ei = (T0[ii] == T1[ii] ? 1 : order + 1);
955
956 for (j = 0; j < ei; j++)
957 clamp_poles(T0, T1, ii - 1, i, pole, p_offset + j * pole_step,
958 pole_step / 4, pole_step_i, order);
959 }
960 }
961
962 private inline int /* 3 - don't know, 2 - decreesing, 0 - constant, 1 - increasing. */
curve_monotonity(double * pole,int pole_step)963 curve_monotonity(double *pole, int pole_step)
964 {
965 double p0 = pole[pole_step * 0];
966 double p1 = pole[pole_step * 1];
967 double p2 = pole[pole_step * 2];
968 double p3 = pole[pole_step * 3];
969
970 if (p0 == p1 && any_abs(p1 - p2) < 1e-13 && p2 == p3)
971 return 0;
972 if (p0 <= p1 && p1 <= p2 && p2 <= p3)
973 return 1;
974 if (p0 >= p1 && p1 >= p2 && p2 >= p3)
975 return 2;
976 /* Maybe not monotonic.
977 Don't want to solve quadratic equations, so return "don't know".
978 This case should be rare.
979 */
980 return 3;
981 }
982
983 private inline int /* 2 - decreesing, 0 - constant, 1 - increasing. */
line_monotonity(double * pole,int pole_step)984 line_monotonity(double *pole, int pole_step)
985 {
986 double p0 = pole[pole_step * 0];
987 double p1 = pole[pole_step * 1];
988
989 if (p1 - p0 > 1e-13)
990 return 1;
991 if (p0 - p1 > 1e-13)
992 return 2;
993 return 0;
994 }
995
996 private int /* 3 bits per guide : 3 - non-monotonic or don't know,
997 2 - decreesing, 0 - constant, 1 - increasing.
998 The number of guides is order+1. */
tensor_dimension_monotonity(const double * T0,const double * T1,int ii,int i0,double * pole,int p_offset,int pole_step,int pole_step_i,int order)999 tensor_dimension_monotonity(const double *T0, const double *T1, int ii, int i0, double *pole,
1000 int p_offset, int pole_step, int pole_step_i, int order)
1001 {
1002 if (ii < 0) {
1003 if (order == 3)
1004 return curve_monotonity(pole + p_offset, pole_step_i);
1005 else
1006 return line_monotonity(pole + p_offset, pole_step_i);
1007 } else if (i0 == ii) {
1008 /* Delay the dimension till the end, and adjust pole_step. */
1009 return tensor_dimension_monotonity(T0, T1, ii - 1, i0, pole, p_offset,
1010 pole_step / 4, pole_step, order);
1011 } else {
1012 int j, ei = (T0[ii] == T1[ii] ? 1 : order + 1), m = 0, mm;
1013
1014 for (j = 0; j < ei; j++) {
1015 mm = tensor_dimension_monotonity(T0, T1, ii - 1, i0, pole, p_offset + j * pole_step,
1016 pole_step/ 4, pole_step_i, order);
1017 m |= mm << (j * 3);
1018 if (mm == 3) {
1019 /* If one guide is not monotonic, the dimension is not monotonic.
1020 Can return early. */
1021 break;
1022 }
1023 }
1024 return m;
1025 }
1026 }
1027
1028 private inline int
is_tensor_monotonic_by_dimension(const gs_function_Sd_t * pfn,int * I,double * T0,double * T1,int i0,int k,uint * mask)1029 is_tensor_monotonic_by_dimension(const gs_function_Sd_t *pfn, int *I, double *T0, double *T1, int i0, int k,
1030 uint *mask /* 3 bits per guide : 3 - non-monotonic or don't know,
1031 2 - decreesing, 0 - constant, 1 - increasing.
1032 The number of guides is order+1. */)
1033 {
1034 double pole[4*4*4]; /* For a while restricting with 3-in cubic functions.
1035 More arguments need a bigger buffer, but the rest of code is same. */
1036 int i, code, ii = pfn->params.m - 1;
1037 double TT0[3], TT1[3];
1038
1039 *mask = 0;
1040 if (ii >= 3) {
1041 /* Unimplemented. We don't know practical cases,
1042 because currently it is only called while decomposing a shading. */
1043 return_error(gs_error_limitcheck);
1044 }
1045 code = copy_poles(pfn, I, T0, T1, k, ii, pole, 0, count_of(pole) / 4);
1046 if (code < 0)
1047 return code;
1048 for (i = ii; i >= 0; i--) {
1049 TT0[i] = 0;
1050 if (T0[i] != T1[i]) {
1051 if (T0[i] != 0 || T1[i] != 1)
1052 clamp_poles(T0, T1, ii, i, pole, 0, count_of(pole) / 4, -1, pfn->params.Order);
1053 TT1[i] = 1;
1054 } else
1055 TT1[i] = 0;
1056 }
1057 *mask = tensor_dimension_monotonity(TT0, TT1, ii, i0, pole, 0,
1058 count_of(pole) / 4, -1, pfn->params.Order);
1059 return 0;
1060 }
1061
1062 private int /* error code */
is_lattice_monotonic_by_dimension(const gs_function_Sd_t * pfn,const double * T0,const double * T1,int * I,double * S0,double * S1,int ii,int i0,int k,uint * mask)1063 is_lattice_monotonic_by_dimension(const gs_function_Sd_t *pfn, const double *T0, const double *T1,
1064 int *I, double *S0, double *S1, int ii, int i0, int k,
1065 uint *mask /* 3 bits per guide : 1 - non-monotonic or don't know, 0 - monotonic;
1066 The number of guides is order+1. */)
1067 {
1068 if (ii == -1) {
1069 /* fixme : could cache the cell monotonity against redundant evaluation. */
1070 return is_tensor_monotonic_by_dimension(pfn, I, S0, S1, i0, k, mask);
1071 } else {
1072 int i1 = (ii > i0 ? ii : ii == 0 ? i0 : ii - 1); /* Delay the dimension i0 till the end of recursion. */
1073 int j, code;
1074 int bi = (int)floor(T0[i1]);
1075 int ei = (int)floor(T1[i1]);
1076 uint m, mm, m1 = 0x49249249 & ((1 << ((pfn->params.Order + 1) * 3)) - 1);
1077
1078 if (floor(T1[i1]) == T1[i1])
1079 ei --;
1080 m = 0;
1081 for (j = bi; j <= ei; j++) {
1082 /* fixme : A better performance may be obtained with comparing central nodes with side ones. */
1083 I[i1] = j;
1084 S0[i1] = max(T0[i1] - j, 0);
1085 S1[i1] = min(T1[i1] - j, 1);
1086 code = is_lattice_monotonic_by_dimension(pfn, T0, T1, I, S0, S1, ii - 1, i0, k, &mm);
1087 if (code < 0)
1088 return code;
1089 m |= mm;
1090 if (m == m1) /* Don't return early - shadings need to know about all dimensions. */
1091 break;
1092 }
1093 if (ii == 0) {
1094 /* Detect non-monotonic guides. */
1095 m = m & (m >> 1);
1096 }
1097 *mask = m;
1098 return 0;
1099 }
1100 }
1101
1102 private inline int /* error code */
is_lattice_monotonic(const gs_function_Sd_t * pfn,const double * T0,const double * T1,int * I,double * S0,double * S1,int k,uint * mask)1103 is_lattice_monotonic(const gs_function_Sd_t *pfn, const double *T0, const double *T1,
1104 int *I, double *S0, double *S1,
1105 int k, uint *mask /* 1 bit per dimension : 1 - non-monotonic or don't know,
1106 0 - monotonic. */)
1107 {
1108 uint m, mm = 0;
1109 int i, code;
1110
1111 for (i = 0; i < pfn->params.m; i++) {
1112 if (T0[i] != T1[i]) {
1113 code = is_lattice_monotonic_by_dimension(pfn, T0, T1, I, S0, S1, pfn->params.m - 1, i, k, &m);
1114 if (code < 0)
1115 return code;
1116 if (m)
1117 mm |= 1 << i;
1118 }
1119 }
1120 *mask = mm;
1121 return 0;
1122 }
1123
1124 private int /* 3 bits per result : 3 - non-monotonic or don't know,
1125 2 - decreesing, 0 - constant, 1 - increasing,
1126 <0 - error. */
fn_Sd_1arg_linear_monotonic_rec(const gs_function_Sd_t * const pfn,int i0,int i1,const double * V0,const double * V1)1127 fn_Sd_1arg_linear_monotonic_rec(const gs_function_Sd_t *const pfn, int i0, int i1,
1128 const double *V0, const double *V1)
1129 {
1130 if (i1 - i0 <= 1) {
1131 int code = 0, i;
1132
1133 for (i = 0; i < pfn->params.n; i++) {
1134 if (V0[i] < V1[i])
1135 code |= 1 << (i * 3);
1136 else if (V0[i] > V1[i])
1137 code |= 2 << (i * 3);
1138 }
1139 return code;
1140 } else {
1141 double VV[MAX_FAST_COMPS];
1142 int ii = (i0 + i1) / 2, code, cod1;
1143
1144 code = load_vector_to(pfn, ii * pfn->params.n * pfn->params.BitsPerSample, VV);
1145 if (code < 0)
1146 return code;
1147 if (code & (code >>1))
1148 return code; /* Not monotonic by some component of the result. */
1149 code = fn_Sd_1arg_linear_monotonic_rec(pfn, i0, ii, V0, VV);
1150 if (code < 0)
1151 return code;
1152 cod1 = fn_Sd_1arg_linear_monotonic_rec(pfn, ii, i1, VV, V1);
1153 if (cod1 < 0)
1154 return cod1;
1155 return code | cod1;
1156 }
1157 }
1158
1159 private int
fn_Sd_1arg_linear_monotonic(const gs_function_Sd_t * const pfn,double T0,double T1,uint * mask)1160 fn_Sd_1arg_linear_monotonic(const gs_function_Sd_t *const pfn, double T0, double T1,
1161 uint *mask /* 1 - non-monotonic or don't know, 0 - monotonic. */)
1162 {
1163 int i0 = (int)floor(T0);
1164 int i1 = (int)ceil(T1), code;
1165 double V0[MAX_FAST_COMPS], V1[MAX_FAST_COMPS];
1166
1167 if (i1 - i0 > 1) {
1168 code = load_vector_to(pfn, i0 * pfn->params.n * pfn->params.BitsPerSample, V0);
1169 if (code < 0)
1170 return code;
1171 code = load_vector_to(pfn, i1 * pfn->params.n * pfn->params.BitsPerSample, V1);
1172 if (code < 0)
1173 return code;
1174 code = fn_Sd_1arg_linear_monotonic_rec(pfn, i0, i1, V0, V1);
1175 if (code < 0)
1176 return code;
1177 if (code & (code >> 1)) {
1178 *mask = 1;
1179 return 0;
1180 }
1181 }
1182 *mask = 0;
1183 return 1;
1184 }
1185
1186 #define DEBUG_Sd_1arg 0
1187
1188 /* Test whether a Sampled function is monotonic. */
1189 private int /* 1 = monotonic, 0 = not or don't know, <0 = error. */
fn_Sd_is_monotonic_aux(const gs_function_Sd_t * const pfn,const float * lower,const float * upper,uint * mask)1190 fn_Sd_is_monotonic_aux(const gs_function_Sd_t *const pfn,
1191 const float *lower, const float *upper,
1192 uint *mask /* 1 bit per dimension : 1 - non-monotonic or don't know,
1193 0 - monotonic. */)
1194 {
1195 int i, code, ii = pfn->params.m - 1;
1196 int I[4];
1197 double T0[count_of(I)], T1[count_of(I)];
1198 double S0[count_of(I)], S1[count_of(I)];
1199 uint m, mm, m1;
1200 # if DEBUG_Sd_1arg
1201 int code1, mask1;
1202 # endif
1203
1204 if (ii >= count_of(T0)) {
1205 /* Unimplemented. We don't know practical cases,
1206 because currently it is only called while decomposing a shading. */
1207 return_error(gs_error_limitcheck);
1208 }
1209 for (i = 0; i <= ii; i++) {
1210 float w0, w1;
1211
1212 code = get_scaled_range(pfn, lower, upper, i, &w0, &w1);
1213 if (code < 0)
1214 return code;
1215 T0[i] = w0;
1216 T1[i] = w1;
1217 }
1218 if (pfn->params.m == 1 && pfn->params.Order == 1 && pfn->params.n <= MAX_FAST_COMPS) {
1219 code = fn_Sd_1arg_linear_monotonic(pfn, T0[0], T1[0], mask);
1220 # if !DEBUG_Sd_1arg
1221 return code;
1222 # else
1223 mask1 = *mask;
1224 code1 = code;
1225 # endif
1226 }
1227 m1 = (1 << pfn->params.m )- 1;
1228 code = make_interpolation_nodes(pfn, T0, T1, I, S0, 0, 0, ii);
1229 if (code < 0)
1230 return code;
1231 mm = 0;
1232 for (i = 0; i < pfn->params.n; i++) {
1233 code = is_lattice_monotonic(pfn, T0, T1, I, S0, S1, i, &m);
1234 if (code < 0)
1235 return code;
1236 mm |= m;
1237 if (mm == m1) /* Don't return early - shadings need to know about all dimensions. */
1238 break;
1239 }
1240 # if DEBUG_Sd_1arg
1241 if (mask1 != mm)
1242 return_error(gs_error_unregistered);
1243 if (code1 != !mm)
1244 return_error(gs_error_unregistered);
1245 # endif
1246 *mask = mm;
1247 return !mm;
1248 }
1249
1250 /* Test whether a Sampled function is monotonic. */
1251 /* 1 = monotonic, 0 = don't know, <0 = error. */
1252 private int
fn_Sd_is_monotonic(const gs_function_t * pfn_common,const float * lower,const float * upper,uint * mask)1253 fn_Sd_is_monotonic(const gs_function_t * pfn_common,
1254 const float *lower, const float *upper, uint *mask)
1255 {
1256 const gs_function_Sd_t *const pfn =
1257 (const gs_function_Sd_t *)pfn_common;
1258
1259 return fn_Sd_is_monotonic_aux(pfn, lower, upper, mask);
1260 }
1261
1262 /* Return Sampled function information. */
1263 private void
fn_Sd_get_info(const gs_function_t * pfn_common,gs_function_info_t * pfi)1264 fn_Sd_get_info(const gs_function_t *pfn_common, gs_function_info_t *pfi)
1265 {
1266 const gs_function_Sd_t *const pfn =
1267 (const gs_function_Sd_t *)pfn_common;
1268 long size;
1269 int i;
1270
1271 gs_function_get_info_default(pfn_common, pfi);
1272 pfi->DataSource = &pfn->params.DataSource;
1273 for (i = 0, size = 1; i < pfn->params.m; ++i)
1274 size *= pfn->params.Size[i];
1275 pfi->data_size =
1276 (size * pfn->params.n * pfn->params.BitsPerSample + 7) >> 3;
1277 }
1278
1279 /* Write Sampled function parameters on a parameter list. */
1280 private int
fn_Sd_get_params(const gs_function_t * pfn_common,gs_param_list * plist)1281 fn_Sd_get_params(const gs_function_t *pfn_common, gs_param_list *plist)
1282 {
1283 const gs_function_Sd_t *const pfn =
1284 (const gs_function_Sd_t *)pfn_common;
1285 int ecode = fn_common_get_params(pfn_common, plist);
1286 int code;
1287
1288 if (pfn->params.Order != 1) {
1289 if ((code = param_write_int(plist, "Order", &pfn->params.Order)) < 0)
1290 ecode = code;
1291 }
1292 if ((code = param_write_int(plist, "BitsPerSample",
1293 &pfn->params.BitsPerSample)) < 0)
1294 ecode = code;
1295 if (pfn->params.Encode) {
1296 if ((code = param_write_float_values(plist, "Encode",
1297 pfn->params.Encode,
1298 2 * pfn->params.m, false)) < 0)
1299 ecode = code;
1300 }
1301 if (pfn->params.Decode) {
1302 if ((code = param_write_float_values(plist, "Decode",
1303 pfn->params.Decode,
1304 2 * pfn->params.n, false)) < 0)
1305 ecode = code;
1306 }
1307 if (pfn->params.Size) {
1308 if ((code = param_write_int_values(plist, "Size", pfn->params.Size,
1309 pfn->params.m, false)) < 0)
1310 ecode = code;
1311 }
1312 return ecode;
1313 }
1314
1315 /* Make a scaled copy of a Sampled function. */
1316 private int
fn_Sd_make_scaled(const gs_function_Sd_t * pfn,gs_function_Sd_t ** ppsfn,const gs_range_t * pranges,gs_memory_t * mem)1317 fn_Sd_make_scaled(const gs_function_Sd_t *pfn, gs_function_Sd_t **ppsfn,
1318 const gs_range_t *pranges, gs_memory_t *mem)
1319 {
1320 gs_function_Sd_t *psfn =
1321 gs_alloc_struct(mem, gs_function_Sd_t, &st_function_Sd,
1322 "fn_Sd_make_scaled");
1323 int code;
1324
1325 if (psfn == 0)
1326 return_error(gs_error_VMerror);
1327 psfn->params = pfn->params;
1328 psfn->params.Encode = 0; /* in case of failure */
1329 psfn->params.Decode = 0;
1330 psfn->params.Size =
1331 fn_copy_values(pfn->params.Size, pfn->params.m, sizeof(int), mem);
1332 if ((code = (psfn->params.Size == 0 ?
1333 gs_note_error(gs_error_VMerror) : 0)) < 0 ||
1334 (code = fn_common_scale((gs_function_t *)psfn,
1335 (const gs_function_t *)pfn,
1336 pranges, mem)) < 0 ||
1337 (code = fn_scale_pairs(&psfn->params.Encode, pfn->params.Encode,
1338 pfn->params.m, NULL, mem)) < 0 ||
1339 (code = fn_scale_pairs(&psfn->params.Decode, pfn->params.Decode,
1340 pfn->params.n, pranges, mem)) < 0) {
1341 gs_function_free((gs_function_t *)psfn, true, mem);
1342 } else
1343 *ppsfn = psfn;
1344 return code;
1345 }
1346
1347 /* Free the parameters of a Sampled function. */
1348 void
gs_function_Sd_free_params(gs_function_Sd_params_t * params,gs_memory_t * mem)1349 gs_function_Sd_free_params(gs_function_Sd_params_t * params, gs_memory_t * mem)
1350 {
1351 gs_free_const_object(mem, params->Size, "Size");
1352 gs_free_const_object(mem, params->Decode, "Decode");
1353 gs_free_const_object(mem, params->Encode, "Encode");
1354 fn_common_free_params((gs_function_params_t *) params, mem);
1355 gs_free_object(mem, params->pole, "gs_function_Sd_free_params");
1356 gs_free_object(mem, params->array_step, "gs_function_Sd_free_params");
1357 gs_free_object(mem, params->stream_step, "gs_function_Sd_free_params");
1358 }
1359
1360 /* aA helper for gs_function_Sd_serialize. */
serialize_array(const float * a,int half_size,stream * s)1361 private int serialize_array(const float *a, int half_size, stream *s)
1362 {
1363 uint n;
1364 const float dummy[2] = {0, 0};
1365 int i, code;
1366
1367 if (a != NULL)
1368 return sputs(s, (const byte *)a, sizeof(a[0]) * half_size * 2, &n);
1369 for (i = 0; i < half_size; i++) {
1370 code = sputs(s, (const byte *)dummy, sizeof(dummy), &n);
1371 if (code < 0)
1372 return code;
1373 }
1374 return 0;
1375 }
1376
1377 /* Serialize. */
1378 private int
gs_function_Sd_serialize(const gs_function_t * pfn,stream * s)1379 gs_function_Sd_serialize(const gs_function_t * pfn, stream *s)
1380 {
1381 uint n;
1382 const gs_function_Sd_params_t * p = (const gs_function_Sd_params_t *)&pfn->params;
1383 gs_function_info_t info;
1384 int code = fn_common_serialize(pfn, s);
1385 ulong pos;
1386 uint count;
1387 byte buf[100];
1388 const byte *ptr;
1389
1390 if (code < 0)
1391 return code;
1392 code = sputs(s, (const byte *)&p->Order, sizeof(p->Order), &n);
1393 if (code < 0)
1394 return code;
1395 code = sputs(s, (const byte *)&p->BitsPerSample, sizeof(p->BitsPerSample), &n);
1396 if (code < 0)
1397 return code;
1398 code = serialize_array(p->Encode, p->m, s);
1399 if (code < 0)
1400 return code;
1401 code = serialize_array(p->Decode, p->n, s);
1402 if (code < 0)
1403 return code;
1404 gs_function_get_info(pfn, &info);
1405 code = sputs(s, (const byte *)&info.data_size, sizeof(info.data_size), &n);
1406 if (code < 0)
1407 return code;
1408 for (pos = 0; pos < info.data_size; pos += count) {
1409 count = min(sizeof(buf), info.data_size - pos);
1410 data_source_access_only(info.DataSource, pos, count, buf, &ptr);
1411 code = sputs(s, ptr, count, &n);
1412 if (code < 0)
1413 return code;
1414 }
1415 return 0;
1416 }
1417
1418 /* Allocate and initialize a Sampled function. */
1419 int
gs_function_Sd_init(gs_function_t ** ppfn,const gs_function_Sd_params_t * params,gs_memory_t * mem)1420 gs_function_Sd_init(gs_function_t ** ppfn,
1421 const gs_function_Sd_params_t * params, gs_memory_t * mem)
1422 {
1423 static const gs_function_head_t function_Sd_head = {
1424 function_type_Sampled,
1425 {
1426 (fn_evaluate_proc_t) fn_Sd_evaluate,
1427 (fn_is_monotonic_proc_t) fn_Sd_is_monotonic,
1428 (fn_get_info_proc_t) fn_Sd_get_info,
1429 (fn_get_params_proc_t) fn_Sd_get_params,
1430 (fn_make_scaled_proc_t) fn_Sd_make_scaled,
1431 (fn_free_params_proc_t) gs_function_Sd_free_params,
1432 fn_common_free,
1433 (fn_serialize_proc_t) gs_function_Sd_serialize,
1434 }
1435 };
1436 int code;
1437 int i;
1438
1439 *ppfn = 0; /* in case of error */
1440 code = fn_check_mnDR((const gs_function_params_t *)params,
1441 params->m, params->n);
1442 if (code < 0)
1443 return code;
1444 if (params->m > max_Sd_m)
1445 return_error(gs_error_limitcheck);
1446 switch (params->Order) {
1447 case 0: /* use default */
1448 case 1:
1449 case 3:
1450 break;
1451 default:
1452 return_error(gs_error_rangecheck);
1453 }
1454 switch (params->BitsPerSample) {
1455 case 1:
1456 case 2:
1457 case 4:
1458 case 8:
1459 case 12:
1460 case 16:
1461 case 24:
1462 case 32:
1463 break;
1464 default:
1465 return_error(gs_error_rangecheck);
1466 }
1467 for (i = 0; i < params->m; ++i)
1468 if (params->Size[i] <= 0)
1469 return_error(gs_error_rangecheck);
1470 {
1471 gs_function_Sd_t *pfn =
1472 gs_alloc_struct(mem, gs_function_Sd_t, &st_function_Sd,
1473 "gs_function_Sd_init");
1474 int bps, sa, ss, i, order;
1475
1476 if (pfn == 0)
1477 return_error(gs_error_VMerror);
1478 pfn->params = *params;
1479 if (params->Order == 0)
1480 pfn->params.Order = 1; /* default */
1481 pfn->params.pole = NULL;
1482 pfn->params.array_step = NULL;
1483 pfn->params.stream_step = NULL;
1484 pfn->head = function_Sd_head;
1485 pfn->params.array_size = 0;
1486 if (pfn->params.m == 1 && pfn->params.Order == 1 && pfn->params.n <= MAX_FAST_COMPS && !DEBUG_Sd_1arg) {
1487 /* Won't use pole cache. Call fn_Sd_1arg_linear_monotonic instead. */
1488 } else {
1489 pfn->params.array_step = (int *)gs_alloc_byte_array(mem,
1490 max_Sd_m, sizeof(int), "gs_function_Sd_init");
1491 pfn->params.stream_step = (int *)gs_alloc_byte_array(mem,
1492 max_Sd_m, sizeof(int), "gs_function_Sd_init");
1493 if (pfn->params.array_step == NULL || pfn->params.stream_step == NULL)
1494 return_error(gs_error_VMerror);
1495 bps = pfn->params.BitsPerSample;
1496 sa = pfn->params.n;
1497 ss = pfn->params.n * bps;
1498 order = pfn->params.Order;
1499 for (i = 0; i < pfn->params.m; i++) {
1500 pfn->params.array_step[i] = sa * order;
1501 sa = (pfn->params.Size[i] * order - (order - 1)) * sa;
1502 pfn->params.stream_step[i] = ss;
1503 ss = pfn->params.Size[i] * ss;
1504 }
1505 pfn->params.pole = (double *)gs_alloc_byte_array(mem,
1506 sa, sizeof(double), "gs_function_Sd_init");
1507 if (pfn->params.pole == NULL)
1508 return_error(gs_error_VMerror);
1509 for (i = 0; i < sa; i++)
1510 pfn->params.pole[i] = double_stub;
1511 pfn->params.array_size = sa;
1512 }
1513 *ppfn = (gs_function_t *) pfn;
1514 }
1515 return 0;
1516 }
1517