1*a28cd43dSSascha Wildner /* ******************************************************************
2*a28cd43dSSascha Wildner * hist : Histogram functions
3*a28cd43dSSascha Wildner * part of Finite State Entropy project
4*a28cd43dSSascha Wildner * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
5*a28cd43dSSascha Wildner *
6*a28cd43dSSascha Wildner * You can contact the author at :
7*a28cd43dSSascha Wildner * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
8*a28cd43dSSascha Wildner * - Public forum : https://groups.google.com/forum/#!forum/lz4c
9*a28cd43dSSascha Wildner *
10*a28cd43dSSascha Wildner * This source code is licensed under both the BSD-style license (found in the
11*a28cd43dSSascha Wildner * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12*a28cd43dSSascha Wildner * in the COPYING file in the root directory of this source tree).
13*a28cd43dSSascha Wildner * You may select, at your option, one of the above-listed licenses.
14*a28cd43dSSascha Wildner ****************************************************************** */
15*a28cd43dSSascha Wildner
16*a28cd43dSSascha Wildner /* --- dependencies --- */
17*a28cd43dSSascha Wildner #include "../common/mem.h" /* U32, BYTE, etc. */
18*a28cd43dSSascha Wildner #include "../common/debug.h" /* assert, DEBUGLOG */
19*a28cd43dSSascha Wildner #include "../common/error_private.h" /* ERROR */
20*a28cd43dSSascha Wildner #include "hist.h"
21*a28cd43dSSascha Wildner
22*a28cd43dSSascha Wildner
23*a28cd43dSSascha Wildner /* --- Error management --- */
HIST_isError(size_t code)24*a28cd43dSSascha Wildner unsigned HIST_isError(size_t code) { return ERR_isError(code); }
25*a28cd43dSSascha Wildner
26*a28cd43dSSascha Wildner /*-**************************************************************
27*a28cd43dSSascha Wildner * Histogram functions
28*a28cd43dSSascha Wildner ****************************************************************/
HIST_count_simple(unsigned * count,unsigned * maxSymbolValuePtr,const void * src,size_t srcSize)29*a28cd43dSSascha Wildner unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
30*a28cd43dSSascha Wildner const void* src, size_t srcSize)
31*a28cd43dSSascha Wildner {
32*a28cd43dSSascha Wildner const BYTE* ip = (const BYTE*)src;
33*a28cd43dSSascha Wildner const BYTE* const end = ip + srcSize;
34*a28cd43dSSascha Wildner unsigned maxSymbolValue = *maxSymbolValuePtr;
35*a28cd43dSSascha Wildner unsigned largestCount=0;
36*a28cd43dSSascha Wildner
37*a28cd43dSSascha Wildner ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
38*a28cd43dSSascha Wildner if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
39*a28cd43dSSascha Wildner
40*a28cd43dSSascha Wildner while (ip<end) {
41*a28cd43dSSascha Wildner assert(*ip <= maxSymbolValue);
42*a28cd43dSSascha Wildner count[*ip++]++;
43*a28cd43dSSascha Wildner }
44*a28cd43dSSascha Wildner
45*a28cd43dSSascha Wildner while (!count[maxSymbolValue]) maxSymbolValue--;
46*a28cd43dSSascha Wildner *maxSymbolValuePtr = maxSymbolValue;
47*a28cd43dSSascha Wildner
48*a28cd43dSSascha Wildner { U32 s;
49*a28cd43dSSascha Wildner for (s=0; s<=maxSymbolValue; s++)
50*a28cd43dSSascha Wildner if (count[s] > largestCount) largestCount = count[s];
51*a28cd43dSSascha Wildner }
52*a28cd43dSSascha Wildner
53*a28cd43dSSascha Wildner return largestCount;
54*a28cd43dSSascha Wildner }
55*a28cd43dSSascha Wildner
56*a28cd43dSSascha Wildner typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
57*a28cd43dSSascha Wildner
58*a28cd43dSSascha Wildner /* HIST_count_parallel_wksp() :
59*a28cd43dSSascha Wildner * store histogram into 4 intermediate tables, recombined at the end.
60*a28cd43dSSascha Wildner * this design makes better use of OoO cpus,
61*a28cd43dSSascha Wildner * and is noticeably faster when some values are heavily repeated.
62*a28cd43dSSascha Wildner * But it needs some additional workspace for intermediate tables.
63*a28cd43dSSascha Wildner * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
64*a28cd43dSSascha Wildner * @return : largest histogram frequency,
65*a28cd43dSSascha Wildner * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
HIST_count_parallel_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,HIST_checkInput_e check,U32 * const workSpace)66*a28cd43dSSascha Wildner static size_t HIST_count_parallel_wksp(
67*a28cd43dSSascha Wildner unsigned* count, unsigned* maxSymbolValuePtr,
68*a28cd43dSSascha Wildner const void* source, size_t sourceSize,
69*a28cd43dSSascha Wildner HIST_checkInput_e check,
70*a28cd43dSSascha Wildner U32* const workSpace)
71*a28cd43dSSascha Wildner {
72*a28cd43dSSascha Wildner const BYTE* ip = (const BYTE*)source;
73*a28cd43dSSascha Wildner const BYTE* const iend = ip+sourceSize;
74*a28cd43dSSascha Wildner size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
75*a28cd43dSSascha Wildner unsigned max=0;
76*a28cd43dSSascha Wildner U32* const Counting1 = workSpace;
77*a28cd43dSSascha Wildner U32* const Counting2 = Counting1 + 256;
78*a28cd43dSSascha Wildner U32* const Counting3 = Counting2 + 256;
79*a28cd43dSSascha Wildner U32* const Counting4 = Counting3 + 256;
80*a28cd43dSSascha Wildner
81*a28cd43dSSascha Wildner /* safety checks */
82*a28cd43dSSascha Wildner assert(*maxSymbolValuePtr <= 255);
83*a28cd43dSSascha Wildner if (!sourceSize) {
84*a28cd43dSSascha Wildner ZSTD_memset(count, 0, countSize);
85*a28cd43dSSascha Wildner *maxSymbolValuePtr = 0;
86*a28cd43dSSascha Wildner return 0;
87*a28cd43dSSascha Wildner }
88*a28cd43dSSascha Wildner ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
89*a28cd43dSSascha Wildner
90*a28cd43dSSascha Wildner /* by stripes of 16 bytes */
91*a28cd43dSSascha Wildner { U32 cached = MEM_read32(ip); ip += 4;
92*a28cd43dSSascha Wildner while (ip < iend-15) {
93*a28cd43dSSascha Wildner U32 c = cached; cached = MEM_read32(ip); ip += 4;
94*a28cd43dSSascha Wildner Counting1[(BYTE) c ]++;
95*a28cd43dSSascha Wildner Counting2[(BYTE)(c>>8) ]++;
96*a28cd43dSSascha Wildner Counting3[(BYTE)(c>>16)]++;
97*a28cd43dSSascha Wildner Counting4[ c>>24 ]++;
98*a28cd43dSSascha Wildner c = cached; cached = MEM_read32(ip); ip += 4;
99*a28cd43dSSascha Wildner Counting1[(BYTE) c ]++;
100*a28cd43dSSascha Wildner Counting2[(BYTE)(c>>8) ]++;
101*a28cd43dSSascha Wildner Counting3[(BYTE)(c>>16)]++;
102*a28cd43dSSascha Wildner Counting4[ c>>24 ]++;
103*a28cd43dSSascha Wildner c = cached; cached = MEM_read32(ip); ip += 4;
104*a28cd43dSSascha Wildner Counting1[(BYTE) c ]++;
105*a28cd43dSSascha Wildner Counting2[(BYTE)(c>>8) ]++;
106*a28cd43dSSascha Wildner Counting3[(BYTE)(c>>16)]++;
107*a28cd43dSSascha Wildner Counting4[ c>>24 ]++;
108*a28cd43dSSascha Wildner c = cached; cached = MEM_read32(ip); ip += 4;
109*a28cd43dSSascha Wildner Counting1[(BYTE) c ]++;
110*a28cd43dSSascha Wildner Counting2[(BYTE)(c>>8) ]++;
111*a28cd43dSSascha Wildner Counting3[(BYTE)(c>>16)]++;
112*a28cd43dSSascha Wildner Counting4[ c>>24 ]++;
113*a28cd43dSSascha Wildner }
114*a28cd43dSSascha Wildner ip-=4;
115*a28cd43dSSascha Wildner }
116*a28cd43dSSascha Wildner
117*a28cd43dSSascha Wildner /* finish last symbols */
118*a28cd43dSSascha Wildner while (ip<iend) Counting1[*ip++]++;
119*a28cd43dSSascha Wildner
120*a28cd43dSSascha Wildner { U32 s;
121*a28cd43dSSascha Wildner for (s=0; s<256; s++) {
122*a28cd43dSSascha Wildner Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
123*a28cd43dSSascha Wildner if (Counting1[s] > max) max = Counting1[s];
124*a28cd43dSSascha Wildner } }
125*a28cd43dSSascha Wildner
126*a28cd43dSSascha Wildner { unsigned maxSymbolValue = 255;
127*a28cd43dSSascha Wildner while (!Counting1[maxSymbolValue]) maxSymbolValue--;
128*a28cd43dSSascha Wildner if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
129*a28cd43dSSascha Wildner *maxSymbolValuePtr = maxSymbolValue;
130*a28cd43dSSascha Wildner ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */
131*a28cd43dSSascha Wildner }
132*a28cd43dSSascha Wildner return (size_t)max;
133*a28cd43dSSascha Wildner }
134*a28cd43dSSascha Wildner
135*a28cd43dSSascha Wildner /* HIST_countFast_wksp() :
136*a28cd43dSSascha Wildner * Same as HIST_countFast(), but using an externally provided scratch buffer.
137*a28cd43dSSascha Wildner * `workSpace` is a writable buffer which must be 4-bytes aligned,
138*a28cd43dSSascha Wildner * `workSpaceSize` must be >= HIST_WKSP_SIZE
139*a28cd43dSSascha Wildner */
HIST_countFast_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,void * workSpace,size_t workSpaceSize)140*a28cd43dSSascha Wildner size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
141*a28cd43dSSascha Wildner const void* source, size_t sourceSize,
142*a28cd43dSSascha Wildner void* workSpace, size_t workSpaceSize)
143*a28cd43dSSascha Wildner {
144*a28cd43dSSascha Wildner if (sourceSize < 1500) /* heuristic threshold */
145*a28cd43dSSascha Wildner return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
146*a28cd43dSSascha Wildner if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
147*a28cd43dSSascha Wildner if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
148*a28cd43dSSascha Wildner return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
149*a28cd43dSSascha Wildner }
150*a28cd43dSSascha Wildner
151*a28cd43dSSascha Wildner /* HIST_count_wksp() :
152*a28cd43dSSascha Wildner * Same as HIST_count(), but using an externally provided scratch buffer.
153*a28cd43dSSascha Wildner * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
HIST_count_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,void * workSpace,size_t workSpaceSize)154*a28cd43dSSascha Wildner size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
155*a28cd43dSSascha Wildner const void* source, size_t sourceSize,
156*a28cd43dSSascha Wildner void* workSpace, size_t workSpaceSize)
157*a28cd43dSSascha Wildner {
158*a28cd43dSSascha Wildner if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
159*a28cd43dSSascha Wildner if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
160*a28cd43dSSascha Wildner if (*maxSymbolValuePtr < 255)
161*a28cd43dSSascha Wildner return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
162*a28cd43dSSascha Wildner *maxSymbolValuePtr = 255;
163*a28cd43dSSascha Wildner return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
164*a28cd43dSSascha Wildner }
165*a28cd43dSSascha Wildner
166*a28cd43dSSascha Wildner #ifndef ZSTD_NO_UNUSED_FUNCTIONS
167*a28cd43dSSascha Wildner /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
HIST_countFast(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize)168*a28cd43dSSascha Wildner size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
169*a28cd43dSSascha Wildner const void* source, size_t sourceSize)
170*a28cd43dSSascha Wildner {
171*a28cd43dSSascha Wildner unsigned tmpCounters[HIST_WKSP_SIZE_U32];
172*a28cd43dSSascha Wildner return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
173*a28cd43dSSascha Wildner }
174*a28cd43dSSascha Wildner
HIST_count(unsigned * count,unsigned * maxSymbolValuePtr,const void * src,size_t srcSize)175*a28cd43dSSascha Wildner size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
176*a28cd43dSSascha Wildner const void* src, size_t srcSize)
177*a28cd43dSSascha Wildner {
178*a28cd43dSSascha Wildner unsigned tmpCounters[HIST_WKSP_SIZE_U32];
179*a28cd43dSSascha Wildner return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
180*a28cd43dSSascha Wildner }
181*a28cd43dSSascha Wildner #endif
182