Lines Matching +full:zero +full:- +full:point
1 //===- FakeQuantSupport.cpp - Support utilities for FakeQuant ops ---------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
19 // Hard-coded type mapping from TFLite.
23 qmin = -128;
32 qmin = -32768;
60 // to include 0.0, but the range width size (rmax-rmin) isn't changed. The zero
61 // point is derived from the shifted range, and the scale isn't changed. As
72 scale = (rmax - rmin) / (qmaxDouble - qminDouble);
74 // Zero point computation.
78 // The arithmetic error on the zero point computed from either pair will be
81 const double zeroPointFromMin = qminDouble - rmin / scale;
84 const double zeroPointFromMax = qmaxDouble - rmax / scale;
92 // Now nudge the zero point to be an integer.
102 // By construction, the nudged zero point should always be in range.
123 // 0.0s, so the scale is set to 1.0 and the tensor can be quantized to zero
125 if (std::fabs(rmax - rmin) < std::numeric_limits<double>::epsilon()) {
145 return (emitError(loc, "mismatched per-axis min and max size: ")
167 if (std::fabs(rmax - rmin) < std::numeric_limits<double>::epsilon()) {