1*3117ece4Schristos /* ****************************************************************** 2*3117ece4Schristos * hist : Histogram functions 3*3117ece4Schristos * part of Finite State Entropy project 4*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 5*3117ece4Schristos * 6*3117ece4Schristos * You can contact the author at : 7*3117ece4Schristos * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 8*3117ece4Schristos * - Public forum : https://groups.google.com/forum/#!forum/lz4c 9*3117ece4Schristos * 10*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 11*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 12*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 13*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 14*3117ece4Schristos ****************************************************************** */ 15*3117ece4Schristos 16*3117ece4Schristos /* --- dependencies --- */ 17*3117ece4Schristos #include "../common/mem.h" /* U32, BYTE, etc. */ 18*3117ece4Schristos #include "../common/debug.h" /* assert, DEBUGLOG */ 19*3117ece4Schristos #include "../common/error_private.h" /* ERROR */ 20*3117ece4Schristos #include "hist.h" 21*3117ece4Schristos 22*3117ece4Schristos 23*3117ece4Schristos /* --- Error management --- */ 24*3117ece4Schristos unsigned HIST_isError(size_t code) { return ERR_isError(code); } 25*3117ece4Schristos 26*3117ece4Schristos /*-************************************************************** 27*3117ece4Schristos * Histogram functions 28*3117ece4Schristos ****************************************************************/ 29*3117ece4Schristos unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, 30*3117ece4Schristos const void* src, size_t srcSize) 31*3117ece4Schristos { 32*3117ece4Schristos const BYTE* ip = (const BYTE*)src; 33*3117ece4Schristos const BYTE* const end = ip + srcSize; 34*3117ece4Schristos unsigned maxSymbolValue = *maxSymbolValuePtr; 35*3117ece4Schristos unsigned largestCount=0; 36*3117ece4Schristos 37*3117ece4Schristos ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); 38*3117ece4Schristos if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } 39*3117ece4Schristos 40*3117ece4Schristos while (ip<end) { 41*3117ece4Schristos assert(*ip <= maxSymbolValue); 42*3117ece4Schristos count[*ip++]++; 43*3117ece4Schristos } 44*3117ece4Schristos 45*3117ece4Schristos while (!count[maxSymbolValue]) maxSymbolValue--; 46*3117ece4Schristos *maxSymbolValuePtr = maxSymbolValue; 47*3117ece4Schristos 48*3117ece4Schristos { U32 s; 49*3117ece4Schristos for (s=0; s<=maxSymbolValue; s++) 50*3117ece4Schristos if (count[s] > largestCount) largestCount = count[s]; 51*3117ece4Schristos } 52*3117ece4Schristos 53*3117ece4Schristos return largestCount; 54*3117ece4Schristos } 55*3117ece4Schristos 56*3117ece4Schristos typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; 57*3117ece4Schristos 58*3117ece4Schristos /* HIST_count_parallel_wksp() : 59*3117ece4Schristos * store histogram into 4 intermediate tables, recombined at the end. 60*3117ece4Schristos * this design makes better use of OoO cpus, 61*3117ece4Schristos * and is noticeably faster when some values are heavily repeated. 62*3117ece4Schristos * But it needs some additional workspace for intermediate tables. 63*3117ece4Schristos * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32. 64*3117ece4Schristos * @return : largest histogram frequency, 65*3117ece4Schristos * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */ 66*3117ece4Schristos static size_t HIST_count_parallel_wksp( 67*3117ece4Schristos unsigned* count, unsigned* maxSymbolValuePtr, 68*3117ece4Schristos const void* source, size_t sourceSize, 69*3117ece4Schristos HIST_checkInput_e check, 70*3117ece4Schristos U32* const workSpace) 71*3117ece4Schristos { 72*3117ece4Schristos const BYTE* ip = (const BYTE*)source; 73*3117ece4Schristos const BYTE* const iend = ip+sourceSize; 74*3117ece4Schristos size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count); 75*3117ece4Schristos unsigned max=0; 76*3117ece4Schristos U32* const Counting1 = workSpace; 77*3117ece4Schristos U32* const Counting2 = Counting1 + 256; 78*3117ece4Schristos U32* const Counting3 = Counting2 + 256; 79*3117ece4Schristos U32* const Counting4 = Counting3 + 256; 80*3117ece4Schristos 81*3117ece4Schristos /* safety checks */ 82*3117ece4Schristos assert(*maxSymbolValuePtr <= 255); 83*3117ece4Schristos if (!sourceSize) { 84*3117ece4Schristos ZSTD_memset(count, 0, countSize); 85*3117ece4Schristos *maxSymbolValuePtr = 0; 86*3117ece4Schristos return 0; 87*3117ece4Schristos } 88*3117ece4Schristos ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned)); 89*3117ece4Schristos 90*3117ece4Schristos /* by stripes of 16 bytes */ 91*3117ece4Schristos { U32 cached = MEM_read32(ip); ip += 4; 92*3117ece4Schristos while (ip < iend-15) { 93*3117ece4Schristos U32 c = cached; cached = MEM_read32(ip); ip += 4; 94*3117ece4Schristos Counting1[(BYTE) c ]++; 95*3117ece4Schristos Counting2[(BYTE)(c>>8) ]++; 96*3117ece4Schristos Counting3[(BYTE)(c>>16)]++; 97*3117ece4Schristos Counting4[ c>>24 ]++; 98*3117ece4Schristos c = cached; cached = MEM_read32(ip); ip += 4; 99*3117ece4Schristos Counting1[(BYTE) c ]++; 100*3117ece4Schristos Counting2[(BYTE)(c>>8) ]++; 101*3117ece4Schristos Counting3[(BYTE)(c>>16)]++; 102*3117ece4Schristos Counting4[ c>>24 ]++; 103*3117ece4Schristos c = cached; cached = MEM_read32(ip); ip += 4; 104*3117ece4Schristos Counting1[(BYTE) c ]++; 105*3117ece4Schristos Counting2[(BYTE)(c>>8) ]++; 106*3117ece4Schristos Counting3[(BYTE)(c>>16)]++; 107*3117ece4Schristos Counting4[ c>>24 ]++; 108*3117ece4Schristos c = cached; cached = MEM_read32(ip); ip += 4; 109*3117ece4Schristos Counting1[(BYTE) c ]++; 110*3117ece4Schristos Counting2[(BYTE)(c>>8) ]++; 111*3117ece4Schristos Counting3[(BYTE)(c>>16)]++; 112*3117ece4Schristos Counting4[ c>>24 ]++; 113*3117ece4Schristos } 114*3117ece4Schristos ip-=4; 115*3117ece4Schristos } 116*3117ece4Schristos 117*3117ece4Schristos /* finish last symbols */ 118*3117ece4Schristos while (ip<iend) Counting1[*ip++]++; 119*3117ece4Schristos 120*3117ece4Schristos { U32 s; 121*3117ece4Schristos for (s=0; s<256; s++) { 122*3117ece4Schristos Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; 123*3117ece4Schristos if (Counting1[s] > max) max = Counting1[s]; 124*3117ece4Schristos } } 125*3117ece4Schristos 126*3117ece4Schristos { unsigned maxSymbolValue = 255; 127*3117ece4Schristos while (!Counting1[maxSymbolValue]) maxSymbolValue--; 128*3117ece4Schristos if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall); 129*3117ece4Schristos *maxSymbolValuePtr = maxSymbolValue; 130*3117ece4Schristos ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */ 131*3117ece4Schristos } 132*3117ece4Schristos return (size_t)max; 133*3117ece4Schristos } 134*3117ece4Schristos 135*3117ece4Schristos /* HIST_countFast_wksp() : 136*3117ece4Schristos * Same as HIST_countFast(), but using an externally provided scratch buffer. 137*3117ece4Schristos * `workSpace` is a writable buffer which must be 4-bytes aligned, 138*3117ece4Schristos * `workSpaceSize` must be >= HIST_WKSP_SIZE 139*3117ece4Schristos */ 140*3117ece4Schristos size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 141*3117ece4Schristos const void* source, size_t sourceSize, 142*3117ece4Schristos void* workSpace, size_t workSpaceSize) 143*3117ece4Schristos { 144*3117ece4Schristos if (sourceSize < 1500) /* heuristic threshold */ 145*3117ece4Schristos return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); 146*3117ece4Schristos if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 147*3117ece4Schristos if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); 148*3117ece4Schristos return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); 149*3117ece4Schristos } 150*3117ece4Schristos 151*3117ece4Schristos /* HIST_count_wksp() : 152*3117ece4Schristos * Same as HIST_count(), but using an externally provided scratch buffer. 153*3117ece4Schristos * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ 154*3117ece4Schristos size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 155*3117ece4Schristos const void* source, size_t sourceSize, 156*3117ece4Schristos void* workSpace, size_t workSpaceSize) 157*3117ece4Schristos { 158*3117ece4Schristos if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 159*3117ece4Schristos if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); 160*3117ece4Schristos if (*maxSymbolValuePtr < 255) 161*3117ece4Schristos return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); 162*3117ece4Schristos *maxSymbolValuePtr = 255; 163*3117ece4Schristos return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); 164*3117ece4Schristos } 165*3117ece4Schristos 166*3117ece4Schristos #ifndef ZSTD_NO_UNUSED_FUNCTIONS 167*3117ece4Schristos /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ 168*3117ece4Schristos size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, 169*3117ece4Schristos const void* source, size_t sourceSize) 170*3117ece4Schristos { 171*3117ece4Schristos unsigned tmpCounters[HIST_WKSP_SIZE_U32]; 172*3117ece4Schristos return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); 173*3117ece4Schristos } 174*3117ece4Schristos 175*3117ece4Schristos size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, 176*3117ece4Schristos const void* src, size_t srcSize) 177*3117ece4Schristos { 178*3117ece4Schristos unsigned tmpCounters[HIST_WKSP_SIZE_U32]; 179*3117ece4Schristos return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); 180*3117ece4Schristos } 181*3117ece4Schristos #endif 182