src/std/numeric.d

181254a7Smrg// Written in the D programming language.
181254a7Smrg
181254a7Smrg/**
181254a7SmrgThis module is a port of a growing fragment of the $(D_PARAM numeric)
*b1e83836Smrgheader in Alexander Stepanov's $(LINK2 https://en.wikipedia.org/wiki/Standard_Template_Library,
181254a7SmrgStandard Template Library), with a few additions.
181254a7Smrg
181254a7SmrgMacros:
181254a7SmrgCopyright: Copyright Andrei Alexandrescu 2008 - 2009.
181254a7SmrgLicense:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
181254a7SmrgAuthors:   $(HTTP erdani.org, Andrei Alexandrescu),
181254a7Smrg                   Don Clugston, Robert Jacques, Ilya Yaroshenko
*b1e83836SmrgSource:    $(PHOBOSSRC std/numeric.d)
181254a7Smrg*/
181254a7Smrg/*
181254a7Smrg         Copyright Andrei Alexandrescu 2008 - 2009.
181254a7SmrgDistributed under the Boost Software License, Version 1.0.
181254a7Smrg   (See accompanying file LICENSE_1_0.txt or copy at
181254a7Smrg         http://www.boost.org/LICENSE_1_0.txt)
181254a7Smrg*/
181254a7Smrgmodule std.numeric;
181254a7Smrg
181254a7Smrgimport std.complex;
181254a7Smrgimport std.math;
*b1e83836Smrgimport core.math : fabs, ldexp, sin, sqrt;
181254a7Smrgimport std.range.primitives;
181254a7Smrgimport std.traits;
181254a7Smrgimport std.typecons;
181254a7Smrg
181254a7Smrg/// Format flags for CustomFloat.
181254a7Smrgpublic enum CustomFloatFlags
181254a7Smrg{
181254a7Smrg    /// Adds a sign bit to allow for signed numbers.
181254a7Smrg    signed = 1,
181254a7Smrg
181254a7Smrg    /**
181254a7Smrg     * Store values in normalized form by default. The actual precision of the
181254a7Smrg     * significand is extended by 1 bit by assuming an implicit leading bit of 1
*b1e83836Smrg     * instead of 0. i.e. `1.nnnn` instead of `0.nnnn`.
181254a7Smrg     * True for all $(LINK2 https://en.wikipedia.org/wiki/IEEE_floating_point, IEE754) types
181254a7Smrg     */
181254a7Smrg    storeNormalized = 2,
181254a7Smrg
181254a7Smrg    /**
181254a7Smrg     * Stores the significand in $(LINK2 https://en.wikipedia.org/wiki/IEEE_754-1985#Denormalized_numbers,
181254a7Smrg     * IEEE754 denormalized) form when the exponent is 0. Required to express the value 0.
181254a7Smrg     */
181254a7Smrg    allowDenorm = 4,
181254a7Smrg
181254a7Smrg    /**
181254a7Smrg      * Allows the storage of $(LINK2 https://en.wikipedia.org/wiki/IEEE_754-1985#Positive_and_negative_infinity,
181254a7Smrg      * IEEE754 _infinity) values.
181254a7Smrg      */
181254a7Smrg    infinity = 8,
181254a7Smrg
181254a7Smrg    /// Allows the storage of $(LINK2 https://en.wikipedia.org/wiki/NaN, IEEE754 Not a Number) values.
181254a7Smrg    nan = 16,
181254a7Smrg
181254a7Smrg    /**
181254a7Smrg     * If set, select an exponent bias such that max_exp = 1.
181254a7Smrg     * i.e. so that the maximum value is >= 1.0 and < 2.0.
181254a7Smrg     * Ignored if the exponent bias is manually specified.
181254a7Smrg     */
181254a7Smrg    probability = 32,
181254a7Smrg
181254a7Smrg    /// If set, unsigned custom floats are assumed to be negative.
181254a7Smrg    negativeUnsigned = 64,
181254a7Smrg
181254a7Smrg    /**If set, 0 is the only allowed $(LINK2 https://en.wikipedia.org/wiki/IEEE_754-1985#Denormalized_numbers,
181254a7Smrg     * IEEE754 denormalized) number.
181254a7Smrg     * Requires allowDenorm and storeNormalized.
181254a7Smrg     */
181254a7Smrg    allowDenormZeroOnly = 128 | allowDenorm | storeNormalized,
181254a7Smrg
181254a7Smrg    /// Include _all of the $(LINK2 https://en.wikipedia.org/wiki/IEEE_floating_point, IEEE754) options.
181254a7Smrg    ieee = signed | storeNormalized | allowDenorm | infinity | nan ,
181254a7Smrg
181254a7Smrg    /// Include none of the above options.
181254a7Smrg    none = 0
181254a7Smrg}
181254a7Smrg
181254a7Smrgprivate template CustomFloatParams(uint bits)
181254a7Smrg{
181254a7Smrg    enum CustomFloatFlags flags = CustomFloatFlags.ieee
181254a7Smrg                ^ ((bits == 80) ? CustomFloatFlags.storeNormalized : CustomFloatFlags.none);
181254a7Smrg    static if (bits ==  8) alias CustomFloatParams = CustomFloatParams!( 4,  3, flags);
181254a7Smrg    static if (bits == 16) alias CustomFloatParams = CustomFloatParams!(10,  5, flags);
181254a7Smrg    static if (bits == 32) alias CustomFloatParams = CustomFloatParams!(23,  8, flags);
181254a7Smrg    static if (bits == 64) alias CustomFloatParams = CustomFloatParams!(52, 11, flags);
181254a7Smrg    static if (bits == 80) alias CustomFloatParams = CustomFloatParams!(64, 15, flags);
181254a7Smrg}
181254a7Smrg
181254a7Smrgprivate template CustomFloatParams(uint precision, uint exponentWidth, CustomFloatFlags flags)
181254a7Smrg{
181254a7Smrg    import std.meta : AliasSeq;
181254a7Smrg    alias CustomFloatParams =
181254a7Smrg        AliasSeq!(
181254a7Smrg            precision,
181254a7Smrg            exponentWidth,
181254a7Smrg            flags,
181254a7Smrg            (1 << (exponentWidth - ((flags & flags.probability) == 0)))
181254a7Smrg             - ((flags & (flags.nan | flags.infinity)) != 0) - ((flags & flags.probability) != 0)
181254a7Smrg        ); // ((flags & CustomFloatFlags.probability) == 0)
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
181254a7Smrg * Allows user code to define custom floating-point formats. These formats are
181254a7Smrg * for storage only; all operations on them are performed by first implicitly
*b1e83836Smrg * extracting them to `real` first. After the operation is completed the
181254a7Smrg * result can be stored in a custom floating-point value via assignment.
181254a7Smrg */
181254a7Smrgtemplate CustomFloat(uint bits)
181254a7Smrgif (bits == 8 || bits == 16 || bits == 32 || bits == 64 || bits == 80)
181254a7Smrg{
181254a7Smrg    alias CustomFloat = CustomFloat!(CustomFloatParams!(bits));
181254a7Smrg}
181254a7Smrg
181254a7Smrg/// ditto
181254a7Smrgtemplate CustomFloat(uint precision, uint exponentWidth, CustomFloatFlags flags = CustomFloatFlags.ieee)
181254a7Smrgif (((flags & flags.signed) + precision + exponentWidth) % 8 == 0 && precision + exponentWidth > 0)
181254a7Smrg{
181254a7Smrg    alias CustomFloat = CustomFloat!(CustomFloatParams!(precision, exponentWidth, flags));
181254a7Smrg}
181254a7Smrg
181254a7Smrg///
181254a7Smrg@safe unittest
181254a7Smrg{
*b1e83836Smrg    import std.math.trigonometry : sin, cos;
181254a7Smrg
181254a7Smrg    // Define a 16-bit floating point values
181254a7Smrg    CustomFloat!16                                x;     // Using the number of bits
181254a7Smrg    CustomFloat!(10, 5)                           y;     // Using the precision and exponent width
181254a7Smrg    CustomFloat!(10, 5,CustomFloatFlags.ieee)     z;     // Using the precision, exponent width and format flags
181254a7Smrg    CustomFloat!(10, 5,CustomFloatFlags.ieee, 15) w;     // Using the precision, exponent width, format flags and exponent offset bias
181254a7Smrg
181254a7Smrg    // Use the 16-bit floats mostly like normal numbers
181254a7Smrg    w = x*y - 1;
181254a7Smrg
181254a7Smrg    // Functions calls require conversion
181254a7Smrg    z = sin(+x)           + cos(+y);                     // Use unary plus to concisely convert to a real
181254a7Smrg    z = sin(x.get!float)  + cos(y.get!float);            // Or use get!T
181254a7Smrg    z = sin(cast(float) x) + cos(cast(float) y);           // Or use cast(T) to explicitly convert
181254a7Smrg
181254a7Smrg    // Define a 8-bit custom float for storing probabilities
181254a7Smrg    alias Probability = CustomFloat!(4, 4, CustomFloatFlags.ieee^CustomFloatFlags.probability^CustomFloatFlags.signed );
181254a7Smrg    auto p = Probability(0.5);
181254a7Smrg}
181254a7Smrg
*b1e83836Smrg// Facilitate converting numeric types to custom float
*b1e83836Smrgprivate union ToBinary(F)
*b1e83836Smrgif (is(typeof(CustomFloatParams!(F.sizeof*8))) || is(F == real))
*b1e83836Smrg{
*b1e83836Smrg    F set;
*b1e83836Smrg
*b1e83836Smrg    // If on Linux or Mac, where 80-bit reals are padded, ignore the
*b1e83836Smrg    // padding.
*b1e83836Smrg    import std.algorithm.comparison : min;
*b1e83836Smrg    CustomFloat!(CustomFloatParams!(min(F.sizeof*8, 80))) get;
*b1e83836Smrg
*b1e83836Smrg    // Convert F to the correct binary type.
*b1e83836Smrg    static typeof(get) opCall(F value)
*b1e83836Smrg    {
*b1e83836Smrg        ToBinary r;
*b1e83836Smrg        r.set = value;
*b1e83836Smrg        return r.get;
*b1e83836Smrg    }
*b1e83836Smrg    alias get this;
*b1e83836Smrg}
*b1e83836Smrg
181254a7Smrg/// ditto
181254a7Smrgstruct CustomFloat(uint             precision,  // fraction bits (23 for float)
181254a7Smrg                   uint             exponentWidth,  // exponent bits (8 for float)  Exponent width
181254a7Smrg                   CustomFloatFlags flags,
181254a7Smrg                   uint             bias)
*b1e83836Smrgif (isCorrectCustomFloat(precision, exponentWidth, flags))
181254a7Smrg{
181254a7Smrg    import std.bitmanip : bitfields;
181254a7Smrg    import std.meta : staticIndexOf;
181254a7Smrgprivate:
181254a7Smrg    // get the correct unsigned bitfield type to support > 32 bits
181254a7Smrg    template uType(uint bits)
181254a7Smrg    {
181254a7Smrg        static if (bits <= size_t.sizeof*8)  alias uType = size_t;
181254a7Smrg        else                                alias uType = ulong ;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    // get the correct signed   bitfield type to support > 32 bits
181254a7Smrg    template sType(uint bits)
181254a7Smrg    {
181254a7Smrg        static if (bits <= ptrdiff_t.sizeof*8-1) alias sType = ptrdiff_t;
181254a7Smrg        else                                    alias sType = long;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    alias T_sig = uType!precision;
181254a7Smrg    alias T_exp = uType!exponentWidth;
181254a7Smrg    alias T_signed_exp = sType!exponentWidth;
181254a7Smrg
181254a7Smrg    alias Flags = CustomFloatFlags;
181254a7Smrg
181254a7Smrg    // Perform IEEE rounding with round to nearest detection
181254a7Smrg    void roundedShift(T,U)(ref T sig, U shift)
181254a7Smrg    {
*b1e83836Smrg        if (shift >= T.sizeof*8)
*b1e83836Smrg        {
*b1e83836Smrg            // avoid illegal shift
*b1e83836Smrg            sig = 0;
*b1e83836Smrg        }
*b1e83836Smrg        else if (sig << (T.sizeof*8 - shift) == cast(T) 1uL << (T.sizeof*8 - 1))
181254a7Smrg        {
181254a7Smrg            // round to even
181254a7Smrg            sig >>= shift;
181254a7Smrg            sig  += sig & 1;
181254a7Smrg        }
181254a7Smrg        else
181254a7Smrg        {
181254a7Smrg            sig >>= shift - 1;
181254a7Smrg            sig  += sig & 1;
181254a7Smrg            // Perform standard rounding
181254a7Smrg            sig >>= 1;
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    // Convert the current value to signed exponent, normalized form
181254a7Smrg    void toNormalized(T,U)(ref T sig, ref U exp)
181254a7Smrg    {
181254a7Smrg        sig = significand;
181254a7Smrg        auto shift = (T.sizeof*8) - precision;
181254a7Smrg        exp = exponent;
181254a7Smrg        static if (flags&(Flags.infinity|Flags.nan))
181254a7Smrg        {
181254a7Smrg            // Handle inf or nan
181254a7Smrg            if (exp == exponent_max)
181254a7Smrg            {
181254a7Smrg                exp = exp.max;
181254a7Smrg                sig <<= shift;
181254a7Smrg                static if (flags&Flags.storeNormalized)
181254a7Smrg                {
181254a7Smrg                    // Save inf/nan in denormalized format
181254a7Smrg                    sig >>= 1;
181254a7Smrg                    sig  += cast(T) 1uL << (T.sizeof*8 - 1);
181254a7Smrg                }
181254a7Smrg                return;
181254a7Smrg            }
181254a7Smrg        }
181254a7Smrg        if ((~flags&Flags.storeNormalized) ||
181254a7Smrg            // Convert denormalized form to normalized form
181254a7Smrg            ((flags&Flags.allowDenorm) && exp == 0))
181254a7Smrg        {
181254a7Smrg            if (sig > 0)
181254a7Smrg            {
181254a7Smrg                import core.bitop : bsr;
181254a7Smrg                auto shift2 = precision - bsr(sig);
181254a7Smrg                exp  -= shift2-1;
181254a7Smrg                shift += shift2;
181254a7Smrg            }
181254a7Smrg            else                                // value = 0.0
181254a7Smrg            {
181254a7Smrg                exp = exp.min;
181254a7Smrg                return;
181254a7Smrg            }
181254a7Smrg        }
181254a7Smrg        sig <<= shift;
181254a7Smrg        exp -= bias;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    // Set the current value from signed exponent, normalized form
181254a7Smrg    void fromNormalized(T,U)(ref T sig, ref U exp)
181254a7Smrg    {
181254a7Smrg        auto shift = (T.sizeof*8) - precision;
181254a7Smrg        if (exp == exp.max)
181254a7Smrg        {
181254a7Smrg            // infinity or nan
181254a7Smrg            exp = exponent_max;
181254a7Smrg            static if (flags & Flags.storeNormalized)
181254a7Smrg                sig <<= 1;
181254a7Smrg
181254a7Smrg            // convert back to normalized form
181254a7Smrg            static if (~flags & Flags.infinity)
181254a7Smrg                // No infinity support?
181254a7Smrg                assert(sig != 0, "Infinity floating point value assigned to a "
181254a7Smrg                        ~ typeof(this).stringof ~ " (no infinity support).");
181254a7Smrg
181254a7Smrg            static if (~flags & Flags.nan)  // No NaN support?
181254a7Smrg                assert(sig == 0, "NaN floating point value assigned to a " ~
181254a7Smrg                        typeof(this).stringof ~ " (no nan support).");
181254a7Smrg            sig >>= shift;
181254a7Smrg            return;
181254a7Smrg        }
181254a7Smrg        if (exp == exp.min)     // 0.0
181254a7Smrg        {
181254a7Smrg             exp = 0;
181254a7Smrg             sig = 0;
181254a7Smrg             return;
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        exp += bias;
181254a7Smrg        if (exp <= 0)
181254a7Smrg        {
181254a7Smrg            static if ((flags&Flags.allowDenorm) ||
181254a7Smrg                       // Convert from normalized form to denormalized
181254a7Smrg                       (~flags&Flags.storeNormalized))
181254a7Smrg            {
181254a7Smrg                shift += -exp;
181254a7Smrg                roundedShift(sig,1);
181254a7Smrg                sig   += cast(T) 1uL << (T.sizeof*8 - 1);
181254a7Smrg                // Add the leading 1
181254a7Smrg                exp    = 0;
181254a7Smrg            }
181254a7Smrg            else
181254a7Smrg                assert((flags&Flags.storeNormalized) && exp == 0,
181254a7Smrg                    "Underflow occured assigning to a " ~
181254a7Smrg                    typeof(this).stringof ~ " (no denormal support).");
181254a7Smrg        }
181254a7Smrg        else
181254a7Smrg        {
181254a7Smrg            static if (~flags&Flags.storeNormalized)
181254a7Smrg            {
181254a7Smrg                // Convert from normalized form to denormalized
181254a7Smrg                roundedShift(sig,1);
181254a7Smrg                sig  += cast(T) 1uL << (T.sizeof*8 - 1);
181254a7Smrg                // Add the leading 1
181254a7Smrg            }
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        if (shift > 0)
181254a7Smrg            roundedShift(sig,shift);
181254a7Smrg        if (sig > significand_max)
181254a7Smrg        {
181254a7Smrg            // handle significand overflow (should only be 1 bit)
181254a7Smrg            static if (~flags&Flags.storeNormalized)
181254a7Smrg            {
181254a7Smrg                sig >>= 1;
181254a7Smrg            }
181254a7Smrg            else
181254a7Smrg                sig &= significand_max;
181254a7Smrg            exp++;
181254a7Smrg        }
181254a7Smrg        static if ((flags&Flags.allowDenormZeroOnly)==Flags.allowDenormZeroOnly)
181254a7Smrg        {
181254a7Smrg            // disallow non-zero denormals
181254a7Smrg            if (exp == 0)
181254a7Smrg            {
181254a7Smrg                sig <<= 1;
181254a7Smrg                if (sig > significand_max && (sig&significand_max) > 0)
181254a7Smrg                    // Check and round to even
181254a7Smrg                    exp++;
181254a7Smrg                sig = 0;
181254a7Smrg            }
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        if (exp >= exponent_max)
181254a7Smrg        {
181254a7Smrg            static if (flags&(Flags.infinity|Flags.nan))
181254a7Smrg            {
181254a7Smrg                sig         = 0;
181254a7Smrg                exp         = exponent_max;
181254a7Smrg                static if (~flags&(Flags.infinity))
181254a7Smrg                    assert(0, "Overflow occured assigning to a " ~
181254a7Smrg                        typeof(this).stringof ~ " (no infinity support).");
181254a7Smrg            }
181254a7Smrg            else
181254a7Smrg                assert(exp == exponent_max, "Overflow occured assigning to a "
181254a7Smrg                    ~ typeof(this).stringof ~ " (no infinity support).");
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg
181254a7Smrgpublic:
181254a7Smrg    static if (precision == 64) // CustomFloat!80 support hack
181254a7Smrg    {
181254a7Smrg        ulong significand;
181254a7Smrg        enum ulong significand_max = ulong.max;
181254a7Smrg        mixin(bitfields!(
181254a7Smrg            T_exp , "exponent", exponentWidth,
181254a7Smrg            bool  , "sign"    , flags & flags.signed ));
181254a7Smrg    }
181254a7Smrg    else
181254a7Smrg    {
181254a7Smrg        mixin(bitfields!(
181254a7Smrg            T_sig, "significand", precision,
181254a7Smrg            T_exp, "exponent"   , exponentWidth,
181254a7Smrg            bool , "sign"       , flags & flags.signed ));
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /// Returns: infinity value
181254a7Smrg    static if (flags & Flags.infinity)
181254a7Smrg        static @property CustomFloat infinity()
181254a7Smrg        {
181254a7Smrg            CustomFloat value;
181254a7Smrg            static if (flags & Flags.signed)
181254a7Smrg                value.sign          = 0;
181254a7Smrg            value.significand   = 0;
181254a7Smrg            value.exponent      = exponent_max;
181254a7Smrg            return value;
181254a7Smrg        }
181254a7Smrg
181254a7Smrg    /// Returns: NaN value
181254a7Smrg    static if (flags & Flags.nan)
181254a7Smrg        static @property CustomFloat nan()
181254a7Smrg        {
181254a7Smrg            CustomFloat value;
181254a7Smrg            static if (flags & Flags.signed)
181254a7Smrg                value.sign          = 0;
181254a7Smrg            value.significand   = cast(typeof(significand_max)) 1L << (precision-1);
181254a7Smrg            value.exponent      = exponent_max;
181254a7Smrg            return value;
181254a7Smrg        }
181254a7Smrg
181254a7Smrg    /// Returns: number of decimal digits of precision
181254a7Smrg    static @property size_t dig()
181254a7Smrg    {
*b1e83836Smrg        auto shiftcnt = precision - ((flags&Flags.storeNormalized) == 0);
*b1e83836Smrg        return shiftcnt == 64 ? 19 : cast(size_t) log10(1uL << shiftcnt);
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /// Returns: smallest increment to the value 1
181254a7Smrg    static @property CustomFloat epsilon()
181254a7Smrg    {
*b1e83836Smrg        CustomFloat one = CustomFloat(1);
*b1e83836Smrg        CustomFloat onePlusEpsilon = one;
*b1e83836Smrg        onePlusEpsilon.significand = onePlusEpsilon.significand | 1; // |= does not work here
181254a7Smrg
*b1e83836Smrg        return CustomFloat(onePlusEpsilon - one);
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /// the number of bits in mantissa
181254a7Smrg    enum mant_dig = precision + ((flags&Flags.storeNormalized) != 0);
181254a7Smrg
181254a7Smrg    /// Returns: maximum int value such that 10<sup>max_10_exp</sup> is representable
181254a7Smrg    static @property int max_10_exp(){ return cast(int) log10( +max ); }
181254a7Smrg
181254a7Smrg    /// maximum int value such that 2<sup>max_exp-1</sup> is representable
*b1e83836Smrg    enum max_exp = exponent_max - bias - ((flags & (Flags.infinity | Flags.nan)) != 0) + 1;
181254a7Smrg
181254a7Smrg    /// Returns: minimum int value such that 10<sup>min_10_exp</sup> is representable
181254a7Smrg    static @property int min_10_exp(){ return cast(int) log10( +min_normal ); }
181254a7Smrg
181254a7Smrg    /// minimum int value such that 2<sup>min_exp-1</sup> is representable as a normalized value
*b1e83836Smrg    enum min_exp = cast(T_signed_exp) -(cast(long) bias) + 1 + ((flags & Flags.allowDenorm) != 0);
181254a7Smrg
181254a7Smrg    /// Returns: largest representable value that's not infinity
181254a7Smrg    static @property CustomFloat max()
181254a7Smrg    {
181254a7Smrg        CustomFloat value;
181254a7Smrg        static if (flags & Flags.signed)
181254a7Smrg            value.sign        = 0;
181254a7Smrg        value.exponent    = exponent_max - ((flags&(flags.infinity|flags.nan)) != 0);
181254a7Smrg        value.significand = significand_max;
181254a7Smrg        return value;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /// Returns: smallest representable normalized value that's not 0
*b1e83836Smrg    static @property CustomFloat min_normal()
*b1e83836Smrg    {
181254a7Smrg        CustomFloat value;
181254a7Smrg        static if (flags & Flags.signed)
181254a7Smrg            value.sign = 0;
*b1e83836Smrg        value.exponent = (flags & Flags.allowDenorm) != 0;
181254a7Smrg        static if (flags & Flags.storeNormalized)
181254a7Smrg            value.significand = 0;
181254a7Smrg        else
181254a7Smrg            value.significand = cast(T_sig) 1uL << (precision - 1);
181254a7Smrg        return value;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /// Returns: real part
181254a7Smrg    @property CustomFloat re() { return this; }
181254a7Smrg
181254a7Smrg    /// Returns: imaginary part
181254a7Smrg    static @property CustomFloat im() { return CustomFloat(0.0f); }
181254a7Smrg
*b1e83836Smrg    /// Initialize from any `real` compatible type.
181254a7Smrg    this(F)(F input) if (__traits(compiles, cast(real) input ))
181254a7Smrg    {
181254a7Smrg        this = input;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /// Self assignment
181254a7Smrg    void opAssign(F:CustomFloat)(F input)
181254a7Smrg    {
181254a7Smrg        static if (flags & Flags.signed)
181254a7Smrg            sign        = input.sign;
181254a7Smrg        exponent    = input.exponent;
181254a7Smrg        significand = input.significand;
181254a7Smrg    }
181254a7Smrg
*b1e83836Smrg    /// Assigns from any `real` compatible type.
181254a7Smrg    void opAssign(F)(F input)
181254a7Smrg        if (__traits(compiles, cast(real) input))
181254a7Smrg    {
181254a7Smrg        import std.conv : text;
181254a7Smrg
*b1e83836Smrg        static if (staticIndexOf!(immutable F, immutable float, immutable double, immutable real) >= 0)
181254a7Smrg            auto value = ToBinary!(Unqual!F)(input);
181254a7Smrg        else
181254a7Smrg            auto value = ToBinary!(real    )(input);
181254a7Smrg
181254a7Smrg        // Assign the sign bit
181254a7Smrg        static if (~flags & Flags.signed)
181254a7Smrg            assert((!value.sign) ^ ((flags&flags.negativeUnsigned) > 0),
181254a7Smrg                "Incorrectly signed floating point value assigned to a " ~
181254a7Smrg                typeof(this).stringof ~ " (no sign support).");
181254a7Smrg        else
181254a7Smrg            sign = value.sign;
181254a7Smrg
181254a7Smrg        CommonType!(T_signed_exp ,value.T_signed_exp) exp = value.exponent;
181254a7Smrg        CommonType!(T_sig,        value.T_sig       ) sig = value.significand;
181254a7Smrg
181254a7Smrg        value.toNormalized(sig,exp);
181254a7Smrg        fromNormalized(sig,exp);
181254a7Smrg
181254a7Smrg        assert(exp <= exponent_max,    text(typeof(this).stringof ~
181254a7Smrg            " exponent too large: "   ,exp," > ",exponent_max,   "\t",input,"\t",sig));
181254a7Smrg        assert(sig <= significand_max, text(typeof(this).stringof ~
181254a7Smrg            " significand too large: ",sig," > ",significand_max,
181254a7Smrg            "\t",input,"\t",exp," ",exponent_max));
181254a7Smrg        exponent    = cast(T_exp) exp;
181254a7Smrg        significand = cast(T_sig) sig;
181254a7Smrg    }
181254a7Smrg
*b1e83836Smrg    /// Fetches the stored value either as a `float`, `double` or `real`.
181254a7Smrg    @property F get(F)()
*b1e83836Smrg        if (staticIndexOf!(immutable F, immutable float, immutable double, immutable real) >= 0)
181254a7Smrg    {
181254a7Smrg        import std.conv : text;
181254a7Smrg
181254a7Smrg        ToBinary!F result;
181254a7Smrg
181254a7Smrg        static if (flags&Flags.signed)
181254a7Smrg            result.sign = sign;
181254a7Smrg        else
181254a7Smrg            result.sign = (flags&flags.negativeUnsigned) > 0;
181254a7Smrg
181254a7Smrg        CommonType!(T_signed_exp ,result.get.T_signed_exp ) exp = exponent; // Assign the exponent and fraction
181254a7Smrg        CommonType!(T_sig,        result.get.T_sig        ) sig = significand;
181254a7Smrg
181254a7Smrg        toNormalized(sig,exp);
181254a7Smrg        result.fromNormalized(sig,exp);
181254a7Smrg        assert(exp <= result.exponent_max,    text("get exponent too large: "   ,exp," > ",result.exponent_max) );
181254a7Smrg        assert(sig <= result.significand_max, text("get significand too large: ",sig," > ",result.significand_max) );
181254a7Smrg        result.exponent     = cast(result.get.T_exp) exp;
181254a7Smrg        result.significand  = cast(result.get.T_sig) sig;
181254a7Smrg        return result.set;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    ///ditto
*b1e83836Smrg    alias opCast = get;
181254a7Smrg
*b1e83836Smrg    /// Convert the CustomFloat to a real and perform the relevant operator on the result
181254a7Smrg    real opUnary(string op)()
181254a7Smrg        if (__traits(compiles, mixin(op~`(get!real)`)) || op=="++" || op=="--")
181254a7Smrg    {
181254a7Smrg        static if (op=="++" || op=="--")
181254a7Smrg        {
181254a7Smrg            auto result = get!real;
181254a7Smrg            this = mixin(op~`result`);
181254a7Smrg            return result;
181254a7Smrg        }
181254a7Smrg        else
181254a7Smrg            return mixin(op~`get!real`);
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /// ditto
*b1e83836Smrg    // Define an opBinary `CustomFloat op CustomFloat` so that those below
*b1e83836Smrg    // do not match equally, which is disallowed by the spec:
*b1e83836Smrg    // https://dlang.org/spec/operatoroverloading.html#binary
181254a7Smrg    real opBinary(string op,T)(T b)
*b1e83836Smrg         if (__traits(compiles, mixin(`get!real`~op~`b.get!real`)))
*b1e83836Smrg     {
*b1e83836Smrg         return mixin(`get!real`~op~`b.get!real`);
*b1e83836Smrg     }
*b1e83836Smrg
*b1e83836Smrg    /// ditto
*b1e83836Smrg    real opBinary(string op,T)(T b)
*b1e83836Smrg        if ( __traits(compiles, mixin(`get!real`~op~`b`)) &&
*b1e83836Smrg            !__traits(compiles, mixin(`get!real`~op~`b.get!real`)))
181254a7Smrg    {
181254a7Smrg        return mixin(`get!real`~op~`b`);
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /// ditto
181254a7Smrg    real opBinaryRight(string op,T)(T a)
181254a7Smrg        if ( __traits(compiles, mixin(`a`~op~`get!real`)) &&
*b1e83836Smrg            !__traits(compiles, mixin(`get!real`~op~`b`)) &&
*b1e83836Smrg            !__traits(compiles, mixin(`get!real`~op~`b.get!real`)))
181254a7Smrg    {
181254a7Smrg        return mixin(`a`~op~`get!real`);
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /// ditto
181254a7Smrg    int opCmp(T)(auto ref T b)
181254a7Smrg        if (__traits(compiles, cast(real) b))
181254a7Smrg    {
181254a7Smrg        auto x = get!real;
181254a7Smrg        auto y = cast(real) b;
181254a7Smrg        return  (x >= y)-(x <= y);
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /// ditto
181254a7Smrg    void opOpAssign(string op, T)(auto ref T b)
181254a7Smrg        if (__traits(compiles, mixin(`get!real`~op~`cast(real) b`)))
181254a7Smrg    {
181254a7Smrg        return mixin(`this = this `~op~` cast(real) b`);
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /// ditto
181254a7Smrg    template toString()
181254a7Smrg    {
*b1e83836Smrg        import std.format.spec : FormatSpec;
*b1e83836Smrg        import std.format.write : formatValue;
*b1e83836Smrg        // Needs to be a template because of https://issues.dlang.org/show_bug.cgi?id=13737.
*b1e83836Smrg        void toString()(scope void delegate(const(char)[]) sink, scope const ref FormatSpec!char fmt)
181254a7Smrg        {
181254a7Smrg            sink.formatValue(get!real, fmt);
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg}
181254a7Smrg
181254a7Smrg@safe unittest
181254a7Smrg{
181254a7Smrg    import std.meta;
181254a7Smrg    alias FPTypes =
181254a7Smrg        AliasSeq!(
181254a7Smrg            CustomFloat!(5, 10),
181254a7Smrg            CustomFloat!(5, 11, CustomFloatFlags.ieee ^ CustomFloatFlags.signed),
*b1e83836Smrg            CustomFloat!(1, 7, CustomFloatFlags.ieee ^ CustomFloatFlags.signed),
181254a7Smrg            CustomFloat!(4, 3, CustomFloatFlags.ieee | CustomFloatFlags.probability ^ CustomFloatFlags.signed)
181254a7Smrg        );
181254a7Smrg
181254a7Smrg    foreach (F; FPTypes)
181254a7Smrg    {
181254a7Smrg        auto x = F(0.125);
181254a7Smrg        assert(x.get!float == 0.125F);
181254a7Smrg        assert(x.get!double == 0.125);
181254a7Smrg
181254a7Smrg        x -= 0.0625;
181254a7Smrg        assert(x.get!float == 0.0625F);
181254a7Smrg        assert(x.get!double == 0.0625);
181254a7Smrg
181254a7Smrg        x *= 2;
181254a7Smrg        assert(x.get!float == 0.125F);
181254a7Smrg        assert(x.get!double == 0.125);
181254a7Smrg
181254a7Smrg        x /= 4;
181254a7Smrg        assert(x.get!float == 0.03125);
181254a7Smrg        assert(x.get!double == 0.03125);
181254a7Smrg
181254a7Smrg        x = 0.5;
181254a7Smrg        x ^^= 4;
181254a7Smrg        assert(x.get!float == 1 / 16.0F);
181254a7Smrg        assert(x.get!double == 1 / 16.0);
181254a7Smrg    }
181254a7Smrg}
181254a7Smrg
181254a7Smrg@system unittest
181254a7Smrg{
181254a7Smrg    // @system due to to!string(CustomFloat)
181254a7Smrg    import std.conv;
181254a7Smrg    CustomFloat!(5, 10) y = CustomFloat!(5, 10)(0.125);
181254a7Smrg    assert(y.to!string == "0.125");
181254a7Smrg}
181254a7Smrg
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    alias cf = CustomFloat!(5, 2);
*b1e83836Smrg
*b1e83836Smrg    auto a = cf.infinity;
*b1e83836Smrg    assert(a.sign == 0);
*b1e83836Smrg    assert(a.exponent == 3);
*b1e83836Smrg    assert(a.significand == 0);
*b1e83836Smrg
*b1e83836Smrg    auto b = cf.nan;
*b1e83836Smrg    assert(b.exponent == 3);
*b1e83836Smrg    assert(b.significand != 0);
*b1e83836Smrg
*b1e83836Smrg    assert(cf.dig == 1);
*b1e83836Smrg
*b1e83836Smrg    auto c = cf.epsilon;
*b1e83836Smrg    assert(c.sign == 0);
*b1e83836Smrg    assert(c.exponent == 0);
*b1e83836Smrg    assert(c.significand == 1);
*b1e83836Smrg
*b1e83836Smrg    assert(cf.mant_dig == 6);
*b1e83836Smrg
*b1e83836Smrg    assert(cf.max_10_exp == 0);
*b1e83836Smrg    assert(cf.max_exp == 2);
*b1e83836Smrg    assert(cf.min_10_exp == 0);
*b1e83836Smrg    assert(cf.min_exp == 1);
*b1e83836Smrg
*b1e83836Smrg    auto d = cf.max;
*b1e83836Smrg    assert(d.sign == 0);
*b1e83836Smrg    assert(d.exponent == 2);
*b1e83836Smrg    assert(d.significand == 31);
*b1e83836Smrg
*b1e83836Smrg    auto e = cf.min_normal;
*b1e83836Smrg    assert(e.sign == 0);
*b1e83836Smrg    assert(e.exponent == 1);
*b1e83836Smrg    assert(e.significand == 0);
*b1e83836Smrg
*b1e83836Smrg    assert(e.re == e);
*b1e83836Smrg    assert(e.im == cf(0.0));
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg// check whether CustomFloats identical to float/double behave like float/double
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    import std.conv : to;
*b1e83836Smrg
*b1e83836Smrg    alias myFloat = CustomFloat!(23, 8);
*b1e83836Smrg
*b1e83836Smrg    static assert(myFloat.dig == float.dig);
*b1e83836Smrg    static assert(myFloat.mant_dig == float.mant_dig);
*b1e83836Smrg    assert(myFloat.max_10_exp == float.max_10_exp);
*b1e83836Smrg    static assert(myFloat.max_exp == float.max_exp);
*b1e83836Smrg    assert(myFloat.min_10_exp == float.min_10_exp);
*b1e83836Smrg    static assert(myFloat.min_exp == float.min_exp);
*b1e83836Smrg    assert(to!float(myFloat.epsilon) == float.epsilon);
*b1e83836Smrg    assert(to!float(myFloat.max) == float.max);
*b1e83836Smrg    assert(to!float(myFloat.min_normal) == float.min_normal);
*b1e83836Smrg
*b1e83836Smrg    alias myDouble = CustomFloat!(52, 11);
*b1e83836Smrg
*b1e83836Smrg    static assert(myDouble.dig == double.dig);
*b1e83836Smrg    static assert(myDouble.mant_dig == double.mant_dig);
*b1e83836Smrg    assert(myDouble.max_10_exp == double.max_10_exp);
*b1e83836Smrg    static assert(myDouble.max_exp == double.max_exp);
*b1e83836Smrg    assert(myDouble.min_10_exp == double.min_10_exp);
*b1e83836Smrg    static assert(myDouble.min_exp == double.min_exp);
*b1e83836Smrg    assert(to!double(myDouble.epsilon) == double.epsilon);
*b1e83836Smrg    assert(to!double(myDouble.max) == double.max);
*b1e83836Smrg    assert(to!double(myDouble.min_normal) == double.min_normal);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg// testing .dig
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    static assert(CustomFloat!(1, 6).dig == 0);
*b1e83836Smrg    static assert(CustomFloat!(9, 6).dig == 2);
*b1e83836Smrg    static assert(CustomFloat!(10, 5).dig == 3);
*b1e83836Smrg    static assert(CustomFloat!(10, 6, CustomFloatFlags.none).dig == 2);
*b1e83836Smrg    static assert(CustomFloat!(11, 5, CustomFloatFlags.none).dig == 3);
*b1e83836Smrg    static assert(CustomFloat!(64, 7).dig == 19);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg// testing .mant_dig
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    static assert(CustomFloat!(10, 5).mant_dig == 11);
*b1e83836Smrg    static assert(CustomFloat!(10, 6, CustomFloatFlags.none).mant_dig == 10);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg// testing .max_exp
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    static assert(CustomFloat!(1, 6).max_exp == 2^^5);
*b1e83836Smrg    static assert(CustomFloat!(2, 6, CustomFloatFlags.none).max_exp == 2^^5);
*b1e83836Smrg    static assert(CustomFloat!(5, 10).max_exp == 2^^9);
*b1e83836Smrg    static assert(CustomFloat!(6, 10, CustomFloatFlags.none).max_exp == 2^^9);
*b1e83836Smrg    static assert(CustomFloat!(2, 6, CustomFloatFlags.nan).max_exp == 2^^5);
*b1e83836Smrg    static assert(CustomFloat!(6, 10, CustomFloatFlags.nan).max_exp == 2^^9);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg// testing .min_exp
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    static assert(CustomFloat!(1, 6).min_exp == -2^^5+3);
*b1e83836Smrg    static assert(CustomFloat!(5, 10).min_exp == -2^^9+3);
*b1e83836Smrg    static assert(CustomFloat!(2, 6, CustomFloatFlags.none).min_exp == -2^^5+1);
*b1e83836Smrg    static assert(CustomFloat!(6, 10, CustomFloatFlags.none).min_exp == -2^^9+1);
*b1e83836Smrg    static assert(CustomFloat!(2, 6, CustomFloatFlags.nan).min_exp == -2^^5+2);
*b1e83836Smrg    static assert(CustomFloat!(6, 10, CustomFloatFlags.nan).min_exp == -2^^9+2);
*b1e83836Smrg    static assert(CustomFloat!(2, 6, CustomFloatFlags.allowDenorm).min_exp == -2^^5+2);
*b1e83836Smrg    static assert(CustomFloat!(6, 10, CustomFloatFlags.allowDenorm).min_exp == -2^^9+2);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg// testing .max_10_exp
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    assert(CustomFloat!(1, 6).max_10_exp == 9);
*b1e83836Smrg    assert(CustomFloat!(5, 10).max_10_exp == 154);
*b1e83836Smrg    assert(CustomFloat!(2, 6, CustomFloatFlags.none).max_10_exp == 9);
*b1e83836Smrg    assert(CustomFloat!(6, 10, CustomFloatFlags.none).max_10_exp == 154);
*b1e83836Smrg    assert(CustomFloat!(2, 6, CustomFloatFlags.nan).max_10_exp == 9);
*b1e83836Smrg    assert(CustomFloat!(6, 10, CustomFloatFlags.nan).max_10_exp == 154);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg// testing .min_10_exp
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    assert(CustomFloat!(1, 6).min_10_exp == -9);
*b1e83836Smrg    assert(CustomFloat!(5, 10).min_10_exp == -153);
*b1e83836Smrg    assert(CustomFloat!(2, 6, CustomFloatFlags.none).min_10_exp == -9);
*b1e83836Smrg    assert(CustomFloat!(6, 10, CustomFloatFlags.none).min_10_exp == -154);
*b1e83836Smrg    assert(CustomFloat!(2, 6, CustomFloatFlags.nan).min_10_exp == -9);
*b1e83836Smrg    assert(CustomFloat!(6, 10, CustomFloatFlags.nan).min_10_exp == -153);
*b1e83836Smrg    assert(CustomFloat!(2, 6, CustomFloatFlags.allowDenorm).min_10_exp == -9);
*b1e83836Smrg    assert(CustomFloat!(6, 10, CustomFloatFlags.allowDenorm).min_10_exp == -153);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg// testing .epsilon
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    assert(CustomFloat!(1,6).epsilon.sign == 0);
*b1e83836Smrg    assert(CustomFloat!(1,6).epsilon.exponent == 30);
*b1e83836Smrg    assert(CustomFloat!(1,6).epsilon.significand == 0);
*b1e83836Smrg    assert(CustomFloat!(2,5).epsilon.sign == 0);
*b1e83836Smrg    assert(CustomFloat!(2,5).epsilon.exponent == 13);
*b1e83836Smrg    assert(CustomFloat!(2,5).epsilon.significand == 0);
*b1e83836Smrg    assert(CustomFloat!(3,4).epsilon.sign == 0);
*b1e83836Smrg    assert(CustomFloat!(3,4).epsilon.exponent == 4);
*b1e83836Smrg    assert(CustomFloat!(3,4).epsilon.significand == 0);
*b1e83836Smrg    // the following epsilons are only available, when denormalized numbers are allowed:
*b1e83836Smrg    assert(CustomFloat!(4,3).epsilon.sign == 0);
*b1e83836Smrg    assert(CustomFloat!(4,3).epsilon.exponent == 0);
*b1e83836Smrg    assert(CustomFloat!(4,3).epsilon.significand == 4);
*b1e83836Smrg    assert(CustomFloat!(5,2).epsilon.sign == 0);
*b1e83836Smrg    assert(CustomFloat!(5,2).epsilon.exponent == 0);
*b1e83836Smrg    assert(CustomFloat!(5,2).epsilon.significand == 1);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg// testing .max
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    static assert(CustomFloat!(5,2).max.sign == 0);
*b1e83836Smrg    static assert(CustomFloat!(5,2).max.exponent == 2);
*b1e83836Smrg    static assert(CustomFloat!(5,2).max.significand == 31);
*b1e83836Smrg    static assert(CustomFloat!(4,3).max.sign == 0);
*b1e83836Smrg    static assert(CustomFloat!(4,3).max.exponent == 6);
*b1e83836Smrg    static assert(CustomFloat!(4,3).max.significand == 15);
*b1e83836Smrg    static assert(CustomFloat!(3,4).max.sign == 0);
*b1e83836Smrg    static assert(CustomFloat!(3,4).max.exponent == 14);
*b1e83836Smrg    static assert(CustomFloat!(3,4).max.significand == 7);
*b1e83836Smrg    static assert(CustomFloat!(2,5).max.sign == 0);
*b1e83836Smrg    static assert(CustomFloat!(2,5).max.exponent == 30);
*b1e83836Smrg    static assert(CustomFloat!(2,5).max.significand == 3);
*b1e83836Smrg    static assert(CustomFloat!(1,6).max.sign == 0);
*b1e83836Smrg    static assert(CustomFloat!(1,6).max.exponent == 62);
*b1e83836Smrg    static assert(CustomFloat!(1,6).max.significand == 1);
*b1e83836Smrg    static assert(CustomFloat!(3,5, CustomFloatFlags.none).max.exponent == 31);
*b1e83836Smrg    static assert(CustomFloat!(3,5, CustomFloatFlags.none).max.significand == 7);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg// testing .min_normal
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    static assert(CustomFloat!(5,2).min_normal.sign == 0);
*b1e83836Smrg    static assert(CustomFloat!(5,2).min_normal.exponent == 1);
*b1e83836Smrg    static assert(CustomFloat!(5,2).min_normal.significand == 0);
*b1e83836Smrg    static assert(CustomFloat!(4,3).min_normal.sign == 0);
*b1e83836Smrg    static assert(CustomFloat!(4,3).min_normal.exponent == 1);
*b1e83836Smrg    static assert(CustomFloat!(4,3).min_normal.significand == 0);
*b1e83836Smrg    static assert(CustomFloat!(3,4).min_normal.sign == 0);
*b1e83836Smrg    static assert(CustomFloat!(3,4).min_normal.exponent == 1);
*b1e83836Smrg    static assert(CustomFloat!(3,4).min_normal.significand == 0);
*b1e83836Smrg    static assert(CustomFloat!(2,5).min_normal.sign == 0);
*b1e83836Smrg    static assert(CustomFloat!(2,5).min_normal.exponent == 1);
*b1e83836Smrg    static assert(CustomFloat!(2,5).min_normal.significand == 0);
*b1e83836Smrg    static assert(CustomFloat!(1,6).min_normal.sign == 0);
*b1e83836Smrg    static assert(CustomFloat!(1,6).min_normal.exponent == 1);
*b1e83836Smrg    static assert(CustomFloat!(1,6).min_normal.significand == 0);
*b1e83836Smrg    static assert(CustomFloat!(3,5, CustomFloatFlags.none).min_normal.exponent == 0);
*b1e83836Smrg    static assert(CustomFloat!(3,5, CustomFloatFlags.none).min_normal.significand == 4);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    import std.math.traits : isNaN;
*b1e83836Smrg
*b1e83836Smrg    alias cf = CustomFloat!(5, 2);
*b1e83836Smrg
*b1e83836Smrg    auto f = cf.nan.get!float();
*b1e83836Smrg    assert(isNaN(f));
*b1e83836Smrg
*b1e83836Smrg    cf a;
*b1e83836Smrg    a = real.max;
*b1e83836Smrg    assert(a == cf.infinity);
*b1e83836Smrg
*b1e83836Smrg    a = 0.015625;
*b1e83836Smrg    assert(a.exponent == 0);
*b1e83836Smrg    assert(a.significand == 0);
*b1e83836Smrg
*b1e83836Smrg    a = 0.984375;
*b1e83836Smrg    assert(a.exponent == 1);
*b1e83836Smrg    assert(a.significand == 0);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg@system unittest
*b1e83836Smrg{
*b1e83836Smrg    import std.exception : assertThrown;
*b1e83836Smrg    import core.exception : AssertError;
*b1e83836Smrg
*b1e83836Smrg    alias cf = CustomFloat!(3, 5, CustomFloatFlags.none);
*b1e83836Smrg
*b1e83836Smrg    cf a;
*b1e83836Smrg    assertThrown!AssertError(a = real.max);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg@system unittest
*b1e83836Smrg{
*b1e83836Smrg    import std.exception : assertThrown;
*b1e83836Smrg    import core.exception : AssertError;
*b1e83836Smrg
*b1e83836Smrg    alias cf = CustomFloat!(3, 5, CustomFloatFlags.nan);
*b1e83836Smrg
*b1e83836Smrg    cf a;
*b1e83836Smrg    assertThrown!AssertError(a = real.max);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg@system unittest
*b1e83836Smrg{
*b1e83836Smrg    import std.exception : assertThrown;
*b1e83836Smrg    import core.exception : AssertError;
*b1e83836Smrg
*b1e83836Smrg    alias cf = CustomFloat!(24, 8, CustomFloatFlags.none);
*b1e83836Smrg
*b1e83836Smrg    cf a;
*b1e83836Smrg    assertThrown!AssertError(a = float.infinity);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrgprivate bool isCorrectCustomFloat(uint precision, uint exponentWidth, CustomFloatFlags flags) @safe pure nothrow @nogc
*b1e83836Smrg{
*b1e83836Smrg    // Restrictions from bitfield
*b1e83836Smrg    // due to CustomFloat!80 support hack precision with 64 bits is handled specially
*b1e83836Smrg    auto length = (flags & flags.signed) + exponentWidth + ((precision == 64) ? 0 : precision);
*b1e83836Smrg    if (length != 8 && length != 16 && length != 32 && length != 64) return false;
*b1e83836Smrg
*b1e83836Smrg    // mantissa needs to fit into real mantissa
*b1e83836Smrg    if (precision > real.mant_dig - 1 && precision != 64) return false;
*b1e83836Smrg
*b1e83836Smrg    // exponent needs to fit into real exponent
*b1e83836Smrg    if (1L << exponentWidth - 1 > real.max_exp) return false;
*b1e83836Smrg
*b1e83836Smrg    // mantissa should have at least one bit
*b1e83836Smrg    if (precision == 0) return false;
*b1e83836Smrg
*b1e83836Smrg    // exponent should have at least one bit, in some cases two
*b1e83836Smrg    if (exponentWidth <= ((flags & (flags.allowDenorm | flags.infinity | flags.nan)) != 0)) return false;
*b1e83836Smrg
*b1e83836Smrg    return true;
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg@safe pure nothrow @nogc unittest
*b1e83836Smrg{
*b1e83836Smrg    assert(isCorrectCustomFloat(3,4,CustomFloatFlags.ieee));
*b1e83836Smrg    assert(isCorrectCustomFloat(3,5,CustomFloatFlags.none));
*b1e83836Smrg    assert(!isCorrectCustomFloat(3,3,CustomFloatFlags.ieee));
*b1e83836Smrg    assert(isCorrectCustomFloat(64,7,CustomFloatFlags.ieee));
*b1e83836Smrg    assert(!isCorrectCustomFloat(64,4,CustomFloatFlags.ieee));
*b1e83836Smrg    assert(!isCorrectCustomFloat(508,3,CustomFloatFlags.ieee));
*b1e83836Smrg    assert(!isCorrectCustomFloat(3,100,CustomFloatFlags.ieee));
*b1e83836Smrg    assert(!isCorrectCustomFloat(0,7,CustomFloatFlags.ieee));
*b1e83836Smrg    assert(!isCorrectCustomFloat(6,1,CustomFloatFlags.ieee));
*b1e83836Smrg    assert(isCorrectCustomFloat(7,1,CustomFloatFlags.none));
*b1e83836Smrg    assert(!isCorrectCustomFloat(8,0,CustomFloatFlags.none));
*b1e83836Smrg}
*b1e83836Smrg
181254a7Smrg/**
181254a7SmrgDefines the fastest type to use when storing temporaries of a
*b1e83836Smrgcalculation intended to ultimately yield a result of type `F`
*b1e83836Smrg(where `F` must be one of `float`, `double`, or $(D
181254a7Smrgreal)). When doing a multi-step computation, you may want to store
*b1e83836Smrgintermediate results as `FPTemporary!F`.
181254a7Smrg
*b1e83836SmrgThe necessity of `FPTemporary` stems from the optimized
181254a7Smrgfloating-point operations and registers present in virtually all
181254a7Smrgprocessors. When adding numbers in the example above, the addition may
*b1e83836Smrgin fact be done in `real` precision internally. In that case,
*b1e83836Smrgstoring the intermediate `result` in $(D double format) is not only
181254a7Smrgless precise, it is also (surprisingly) slower, because a conversion
*b1e83836Smrgfrom `real` to `double` is performed every pass through the
*b1e83836Smrgloop. This being a lose-lose situation, `FPTemporary!F` has been
181254a7Smrgdefined as the $(I fastest) type to use for calculations at precision
*b1e83836Smrg`F`. There is no need to define a type for the $(I most accurate)
*b1e83836Smrgcalculations, as that is always `real`.
181254a7Smrg
*b1e83836SmrgFinally, there is no guarantee that using `FPTemporary!F` will
181254a7Smrgalways be fastest, as the speed of floating-point calculations depends
181254a7Smrgon very many factors.
181254a7Smrg */
181254a7Smrgtemplate FPTemporary(F)
181254a7Smrgif (isFloatingPoint!F)
181254a7Smrg{
181254a7Smrg    version (X86)
181254a7Smrg        alias FPTemporary = real;
181254a7Smrg    else
181254a7Smrg        alias FPTemporary = Unqual!F;
181254a7Smrg}
181254a7Smrg
181254a7Smrg///
181254a7Smrg@safe unittest
181254a7Smrg{
*b1e83836Smrg    import std.math.operations : isClose;
181254a7Smrg
181254a7Smrg    // Average numbers in an array
181254a7Smrg    double avg(in double[] a)
181254a7Smrg    {
181254a7Smrg        if (a.length == 0) return 0;
181254a7Smrg        FPTemporary!double result = 0;
181254a7Smrg        foreach (e; a) result += e;
181254a7Smrg        return result / a.length;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    auto a = [1.0, 2.0, 3.0];
*b1e83836Smrg    assert(isClose(avg(a), 2));
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
181254a7SmrgImplements the $(HTTP tinyurl.com/2zb9yr, secant method) for finding a
*b1e83836Smrgroot of the function `fun` starting from points $(D [xn_1, x_n])
*b1e83836Smrg(ideally close to the root). `Num` may be `float`, `double`,
*b1e83836Smrgor `real`.
181254a7Smrg*/
181254a7Smrgtemplate secantMethod(alias fun)
181254a7Smrg{
181254a7Smrg    import std.functional : unaryFun;
181254a7Smrg    Num secantMethod(Num)(Num xn_1, Num xn)
181254a7Smrg    {
181254a7Smrg        auto fxn = unaryFun!(fun)(xn_1), d = xn_1 - xn;
181254a7Smrg        typeof(fxn) fxn_1;
181254a7Smrg
181254a7Smrg        xn = xn_1;
*b1e83836Smrg        while (!isClose(d, 0, 0.0, 1e-5) && isFinite(d))
181254a7Smrg        {
181254a7Smrg            xn_1 = xn;
181254a7Smrg            xn -= d;
181254a7Smrg            fxn_1 = fxn;
181254a7Smrg            fxn = unaryFun!(fun)(xn);
181254a7Smrg            d *= -fxn / (fxn - fxn_1);
181254a7Smrg        }
181254a7Smrg        return xn;
181254a7Smrg    }
181254a7Smrg}
181254a7Smrg
181254a7Smrg///
181254a7Smrg@safe unittest
181254a7Smrg{
*b1e83836Smrg    import std.math.operations : isClose;
*b1e83836Smrg    import std.math.trigonometry : cos;
181254a7Smrg
181254a7Smrg    float f(float x)
181254a7Smrg    {
181254a7Smrg        return cos(x) - x*x*x;
181254a7Smrg    }
181254a7Smrg    auto x = secantMethod!(f)(0f, 1f);
*b1e83836Smrg    assert(isClose(x, 0.865474));
181254a7Smrg}
181254a7Smrg
181254a7Smrg@system unittest
181254a7Smrg{
181254a7Smrg    // @system because of __gshared stderr
*b1e83836Smrg    import std.stdio;
181254a7Smrg    scope(failure) stderr.writeln("Failure testing secantMethod");
181254a7Smrg    float f(float x)
181254a7Smrg    {
181254a7Smrg        return cos(x) - x*x*x;
181254a7Smrg    }
181254a7Smrg    immutable x = secantMethod!(f)(0f, 1f);
*b1e83836Smrg    assert(isClose(x, 0.865474));
181254a7Smrg    auto d = &f;
181254a7Smrg    immutable y = secantMethod!(d)(0f, 1f);
*b1e83836Smrg    assert(isClose(y, 0.865474));
181254a7Smrg}
181254a7Smrg
181254a7Smrg
181254a7Smrg/**
181254a7Smrg * Return true if a and b have opposite sign.
181254a7Smrg */
181254a7Smrgprivate bool oppositeSigns(T1, T2)(T1 a, T2 b)
181254a7Smrg{
181254a7Smrg    return signbit(a) != signbit(b);
181254a7Smrg}
181254a7Smrg
181254a7Smrgpublic:
181254a7Smrg
181254a7Smrg/**  Find a real root of a real function f(x) via bracketing.
181254a7Smrg *
181254a7Smrg * Given a function `f` and a range `[a .. b]` such that `f(a)`
181254a7Smrg * and `f(b)` have opposite signs or at least one of them equals ±0,
181254a7Smrg * returns the value of `x` in
181254a7Smrg * the range which is closest to a root of `f(x)`.  If `f(x)`
181254a7Smrg * has more than one root in the range, one will be chosen
181254a7Smrg * arbitrarily.  If `f(x)` returns NaN, NaN will be returned;
181254a7Smrg * otherwise, this algorithm is guaranteed to succeed.
181254a7Smrg *
181254a7Smrg * Uses an algorithm based on TOMS748, which uses inverse cubic
181254a7Smrg * interpolation whenever possible, otherwise reverting to parabolic
181254a7Smrg * or secant interpolation. Compared to TOMS748, this implementation
181254a7Smrg * improves worst-case performance by a factor of more than 100, and
181254a7Smrg * typical performance by a factor of 2. For 80-bit reals, most
181254a7Smrg * problems require 8 to 15 calls to `f(x)` to achieve full machine
181254a7Smrg * precision. The worst-case performance (pathological cases) is
181254a7Smrg * approximately twice the number of bits.
181254a7Smrg *
181254a7Smrg * References: "On Enclosing Simple Roots of Nonlinear Equations",
181254a7Smrg * G. Alefeld, F.A. Potra, Yixun Shi, Mathematics of Computation 61,
181254a7Smrg * pp733-744 (1993).  Fortran code available from $(HTTP
181254a7Smrg * www.netlib.org,www.netlib.org) as algorithm TOMS478.
181254a7Smrg *
181254a7Smrg */
*b1e83836SmrgT findRoot(T, DF, DT)(scope DF f, const T a, const T b,
181254a7Smrg    scope DT tolerance) //= (T a, T b) => false)
181254a7Smrgif (
181254a7Smrg    isFloatingPoint!T &&
181254a7Smrg    is(typeof(tolerance(T.init, T.init)) : bool) &&
181254a7Smrg    is(typeof(f(T.init)) == R, R) && isFloatingPoint!R
181254a7Smrg    )
181254a7Smrg{
181254a7Smrg    immutable fa = f(a);
181254a7Smrg    if (fa == 0)
181254a7Smrg        return a;
181254a7Smrg    immutable fb = f(b);
181254a7Smrg    if (fb == 0)
181254a7Smrg        return b;
181254a7Smrg    immutable r = findRoot(f, a, b, fa, fb, tolerance);
181254a7Smrg    // Return the first value if it is smaller or NaN
181254a7Smrg    return !(fabs(r[2]) > fabs(r[3])) ? r[0] : r[1];
181254a7Smrg}
181254a7Smrg
181254a7Smrg///ditto
*b1e83836SmrgT findRoot(T, DF)(scope DF f, const T a, const T b)
181254a7Smrg{
181254a7Smrg    return findRoot(f, a, b, (T a, T b) => false);
181254a7Smrg}
181254a7Smrg
181254a7Smrg/** Find root of a real function f(x) by bracketing, allowing the
181254a7Smrg * termination condition to be specified.
181254a7Smrg *
181254a7Smrg * Params:
181254a7Smrg *
181254a7Smrg * f = Function to be analyzed
181254a7Smrg *
181254a7Smrg * ax = Left bound of initial range of `f` known to contain the
181254a7Smrg * root.
181254a7Smrg *
181254a7Smrg * bx = Right bound of initial range of `f` known to contain the
181254a7Smrg * root.
181254a7Smrg *
*b1e83836Smrg * fax = Value of `f(ax)`.
181254a7Smrg *
*b1e83836Smrg * fbx = Value of `f(bx)`. `fax` and `fbx` should have opposite signs.
*b1e83836Smrg * (`f(ax)` and `f(bx)` are commonly known in advance.)
181254a7Smrg *
181254a7Smrg *
181254a7Smrg * tolerance = Defines an early termination condition. Receives the
181254a7Smrg *             current upper and lower bounds on the root. The
*b1e83836Smrg *             delegate must return `true` when these bounds are
*b1e83836Smrg *             acceptable. If this function always returns `false`,
181254a7Smrg *             full machine precision will be achieved.
181254a7Smrg *
181254a7Smrg * Returns:
181254a7Smrg *
181254a7Smrg * A tuple consisting of two ranges. The first two elements are the
181254a7Smrg * range (in `x`) of the root, while the second pair of elements
181254a7Smrg * are the corresponding function values at those points. If an exact
181254a7Smrg * root was found, both of the first two elements will contain the
181254a7Smrg * root, and the second pair of elements will be 0.
181254a7Smrg */
*b1e83836SmrgTuple!(T, T, R, R) findRoot(T, R, DF, DT)(scope DF f,
*b1e83836Smrg    const T ax, const T bx, const R fax, const R fbx,
181254a7Smrg    scope DT tolerance) // = (T a, T b) => false)
181254a7Smrgif (
181254a7Smrg    isFloatingPoint!T &&
181254a7Smrg    is(typeof(tolerance(T.init, T.init)) : bool) &&
181254a7Smrg    is(typeof(f(T.init)) == R) && isFloatingPoint!R
181254a7Smrg    )
181254a7Smrgin
181254a7Smrg{
181254a7Smrg    assert(!ax.isNaN() && !bx.isNaN(), "Limits must not be NaN");
181254a7Smrg    assert(signbit(fax) != signbit(fbx), "Parameters must bracket the root.");
181254a7Smrg}
*b1e83836Smrgdo
181254a7Smrg{
181254a7Smrg    // Author: Don Clugston. This code is (heavily) modified from TOMS748
181254a7Smrg    // (www.netlib.org).  The changes to improve the worst-cast performance are
181254a7Smrg    // entirely original.
181254a7Smrg
181254a7Smrg    T a, b, d;  // [a .. b] is our current bracket. d is the third best guess.
181254a7Smrg    R fa, fb, fd; // Values of f at a, b, d.
181254a7Smrg    bool done = false; // Has a root been found?
181254a7Smrg
181254a7Smrg    // Allow ax and bx to be provided in reverse order
181254a7Smrg    if (ax <= bx)
181254a7Smrg    {
181254a7Smrg        a = ax; fa = fax;
181254a7Smrg        b = bx; fb = fbx;
181254a7Smrg    }
181254a7Smrg    else
181254a7Smrg    {
181254a7Smrg        a = bx; fa = fbx;
181254a7Smrg        b = ax; fb = fax;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    // Test the function at point c; update brackets accordingly
181254a7Smrg    void bracket(T c)
181254a7Smrg    {
181254a7Smrg        R fc = f(c);
181254a7Smrg        if (fc == 0 || fc.isNaN()) // Exact solution, or NaN
181254a7Smrg        {
181254a7Smrg            a = c;
181254a7Smrg            fa = fc;
181254a7Smrg            d = c;
181254a7Smrg            fd = fc;
181254a7Smrg            done = true;
181254a7Smrg            return;
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        // Determine new enclosing interval
181254a7Smrg        if (signbit(fa) != signbit(fc))
181254a7Smrg        {
181254a7Smrg            d = b;
181254a7Smrg            fd = fb;
181254a7Smrg            b = c;
181254a7Smrg            fb = fc;
181254a7Smrg        }
181254a7Smrg        else
181254a7Smrg        {
181254a7Smrg            d = a;
181254a7Smrg            fd = fa;
181254a7Smrg            a = c;
181254a7Smrg            fa = fc;
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg
181254a7Smrg   /* Perform a secant interpolation. If the result would lie on a or b, or if
181254a7Smrg     a and b differ so wildly in magnitude that the result would be meaningless,
181254a7Smrg     perform a bisection instead.
181254a7Smrg    */
181254a7Smrg    static T secant_interpolate(T a, T b, R fa, R fb)
181254a7Smrg    {
181254a7Smrg        if (( ((a - b) == a) && b != 0) || (a != 0 && ((b - a) == b)))
181254a7Smrg        {
181254a7Smrg            // Catastrophic cancellation
181254a7Smrg            if (a == 0)
181254a7Smrg                a = copysign(T(0), b);
181254a7Smrg            else if (b == 0)
181254a7Smrg                b = copysign(T(0), a);
181254a7Smrg            else if (signbit(a) != signbit(b))
181254a7Smrg                return 0;
181254a7Smrg            T c = ieeeMean(a, b);
181254a7Smrg            return c;
181254a7Smrg        }
181254a7Smrg        // avoid overflow
181254a7Smrg        if (b - a > T.max)
181254a7Smrg            return b / 2 + a / 2;
181254a7Smrg        if (fb - fa > R.max)
181254a7Smrg            return a - (b - a) / 2;
181254a7Smrg        T c = a - (fa / (fb - fa)) * (b - a);
181254a7Smrg        if (c == a || c == b)
181254a7Smrg            return (a + b) / 2;
181254a7Smrg        return c;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /* Uses 'numsteps' newton steps to approximate the zero in [a .. b] of the
181254a7Smrg       quadratic polynomial interpolating f(x) at a, b, and d.
181254a7Smrg       Returns:
181254a7Smrg         The approximate zero in [a .. b] of the quadratic polynomial.
181254a7Smrg    */
181254a7Smrg    T newtonQuadratic(int numsteps)
181254a7Smrg    {
181254a7Smrg        // Find the coefficients of the quadratic polynomial.
181254a7Smrg        immutable T a0 = fa;
181254a7Smrg        immutable T a1 = (fb - fa)/(b - a);
181254a7Smrg        immutable T a2 = ((fd - fb)/(d - b) - a1)/(d - a);
181254a7Smrg
181254a7Smrg        // Determine the starting point of newton steps.
181254a7Smrg        T c = oppositeSigns(a2, fa) ? a  : b;
181254a7Smrg
181254a7Smrg        // start the safeguarded newton steps.
181254a7Smrg        foreach (int i; 0 .. numsteps)
181254a7Smrg        {
181254a7Smrg            immutable T pc = a0 + (a1 + a2 * (c - b))*(c - a);
181254a7Smrg            immutable T pdc = a1 + a2*((2 * c) - (a + b));
181254a7Smrg            if (pdc == 0)
181254a7Smrg                return a - a0 / a1;
181254a7Smrg            else
181254a7Smrg                c = c - pc / pdc;
181254a7Smrg        }
181254a7Smrg        return c;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    // On the first iteration we take a secant step:
181254a7Smrg    if (fa == 0 || fa.isNaN())
181254a7Smrg    {
181254a7Smrg        done = true;
181254a7Smrg        b = a;
181254a7Smrg        fb = fa;
181254a7Smrg    }
181254a7Smrg    else if (fb == 0 || fb.isNaN())
181254a7Smrg    {
181254a7Smrg        done = true;
181254a7Smrg        a = b;
181254a7Smrg        fa = fb;
181254a7Smrg    }
181254a7Smrg    else
181254a7Smrg    {
181254a7Smrg        bracket(secant_interpolate(a, b, fa, fb));
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    // Starting with the second iteration, higher-order interpolation can
181254a7Smrg    // be used.
181254a7Smrg    int itnum = 1;   // Iteration number
181254a7Smrg    int baditer = 1; // Num bisections to take if an iteration is bad.
181254a7Smrg    T c, e;  // e is our fourth best guess
181254a7Smrg    R fe;
181254a7Smrg
181254a7Smrgwhileloop:
181254a7Smrg    while (!done && (b != nextUp(a)) && !tolerance(a, b))
181254a7Smrg    {
181254a7Smrg        T a0 = a, b0 = b; // record the brackets
181254a7Smrg
181254a7Smrg        // Do two higher-order (cubic or parabolic) interpolation steps.
181254a7Smrg        foreach (int QQ; 0 .. 2)
181254a7Smrg        {
181254a7Smrg            // Cubic inverse interpolation requires that
181254a7Smrg            // all four function values fa, fb, fd, and fe are distinct;
181254a7Smrg            // otherwise use quadratic interpolation.
181254a7Smrg            bool distinct = (fa != fb) && (fa != fd) && (fa != fe)
181254a7Smrg                         && (fb != fd) && (fb != fe) && (fd != fe);
181254a7Smrg            // The first time, cubic interpolation is impossible.
181254a7Smrg            if (itnum<2) distinct = false;
181254a7Smrg            bool ok = distinct;
181254a7Smrg            if (distinct)
181254a7Smrg            {
181254a7Smrg                // Cubic inverse interpolation of f(x) at a, b, d, and e
181254a7Smrg                immutable q11 = (d - e) * fd / (fe - fd);
181254a7Smrg                immutable q21 = (b - d) * fb / (fd - fb);
181254a7Smrg                immutable q31 = (a - b) * fa / (fb - fa);
181254a7Smrg                immutable d21 = (b - d) * fd / (fd - fb);
181254a7Smrg                immutable d31 = (a - b) * fb / (fb - fa);
181254a7Smrg
181254a7Smrg                immutable q22 = (d21 - q11) * fb / (fe - fb);
181254a7Smrg                immutable q32 = (d31 - q21) * fa / (fd - fa);
181254a7Smrg                immutable d32 = (d31 - q21) * fd / (fd - fa);
181254a7Smrg                immutable q33 = (d32 - q22) * fa / (fe - fa);
181254a7Smrg                c = a + (q31 + q32 + q33);
181254a7Smrg                if (c.isNaN() || (c <= a) || (c >= b))
181254a7Smrg                {
181254a7Smrg                    // DAC: If the interpolation predicts a or b, it's
181254a7Smrg                    // probable that it's the actual root. Only allow this if
181254a7Smrg                    // we're already close to the root.
181254a7Smrg                    if (c == a && a - b != a)
181254a7Smrg                    {
181254a7Smrg                        c = nextUp(a);
181254a7Smrg                    }
181254a7Smrg                    else if (c == b && a - b != -b)
181254a7Smrg                    {
181254a7Smrg                        c = nextDown(b);
181254a7Smrg                    }
181254a7Smrg                    else
181254a7Smrg                    {
181254a7Smrg                        ok = false;
181254a7Smrg                    }
181254a7Smrg                }
181254a7Smrg            }
181254a7Smrg            if (!ok)
181254a7Smrg            {
181254a7Smrg                // DAC: Alefeld doesn't explain why the number of newton steps
181254a7Smrg                // should vary.
181254a7Smrg                c = newtonQuadratic(distinct ? 3 : 2);
181254a7Smrg                if (c.isNaN() || (c <= a) || (c >= b))
181254a7Smrg                {
181254a7Smrg                    // Failure, try a secant step:
181254a7Smrg                    c = secant_interpolate(a, b, fa, fb);
181254a7Smrg                }
181254a7Smrg            }
181254a7Smrg            ++itnum;
181254a7Smrg            e = d;
181254a7Smrg            fe = fd;
181254a7Smrg            bracket(c);
181254a7Smrg            if (done || ( b == nextUp(a)) || tolerance(a, b))
181254a7Smrg                break whileloop;
181254a7Smrg            if (itnum == 2)
181254a7Smrg                continue whileloop;
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        // Now we take a double-length secant step:
181254a7Smrg        T u;
181254a7Smrg        R fu;
181254a7Smrg        if (fabs(fa) < fabs(fb))
181254a7Smrg        {
181254a7Smrg            u = a;
181254a7Smrg            fu = fa;
181254a7Smrg        }
181254a7Smrg        else
181254a7Smrg        {
181254a7Smrg            u = b;
181254a7Smrg            fu = fb;
181254a7Smrg        }
181254a7Smrg        c = u - 2 * (fu / (fb - fa)) * (b - a);
181254a7Smrg
181254a7Smrg        // DAC: If the secant predicts a value equal to an endpoint, it's
181254a7Smrg        // probably false.
181254a7Smrg        if (c == a || c == b || c.isNaN() || fabs(c - u) > (b - a) / 2)
181254a7Smrg        {
181254a7Smrg            if ((a-b) == a || (b-a) == b)
181254a7Smrg            {
181254a7Smrg                if ((a>0 && b<0) || (a<0 && b>0))
181254a7Smrg                    c = 0;
181254a7Smrg                else
181254a7Smrg                {
181254a7Smrg                    if (a == 0)
181254a7Smrg                        c = ieeeMean(copysign(T(0), b), b);
181254a7Smrg                    else if (b == 0)
181254a7Smrg                        c = ieeeMean(copysign(T(0), a), a);
181254a7Smrg                    else
181254a7Smrg                        c = ieeeMean(a, b);
181254a7Smrg                }
181254a7Smrg            }
181254a7Smrg            else
181254a7Smrg            {
181254a7Smrg                c = a + (b - a) / 2;
181254a7Smrg            }
181254a7Smrg        }
181254a7Smrg        e = d;
181254a7Smrg        fe = fd;
181254a7Smrg        bracket(c);
181254a7Smrg        if (done || (b == nextUp(a)) || tolerance(a, b))
181254a7Smrg            break;
181254a7Smrg
181254a7Smrg        // IMPROVE THE WORST-CASE PERFORMANCE
181254a7Smrg        // We must ensure that the bounds reduce by a factor of 2
181254a7Smrg        // in binary space! every iteration. If we haven't achieved this
181254a7Smrg        // yet, or if we don't yet know what the exponent is,
181254a7Smrg        // perform a binary chop.
181254a7Smrg
181254a7Smrg        if ((a == 0 || b == 0 ||
181254a7Smrg            (fabs(a) >= T(0.5) * fabs(b) && fabs(b) >= T(0.5) * fabs(a)))
181254a7Smrg            &&  (b - a) < T(0.25) * (b0 - a0))
181254a7Smrg        {
181254a7Smrg            baditer = 1;
181254a7Smrg            continue;
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        // DAC: If this happens on consecutive iterations, we probably have a
181254a7Smrg        // pathological function. Perform a number of bisections equal to the
181254a7Smrg        // total number of consecutive bad iterations.
181254a7Smrg
181254a7Smrg        if ((b - a) < T(0.25) * (b0 - a0))
181254a7Smrg            baditer = 1;
181254a7Smrg        foreach (int QQ; 0 .. baditer)
181254a7Smrg        {
181254a7Smrg            e = d;
181254a7Smrg            fe = fd;
181254a7Smrg
181254a7Smrg            T w;
181254a7Smrg            if ((a>0 && b<0) || (a<0 && b>0))
181254a7Smrg                w = 0;
181254a7Smrg            else
181254a7Smrg            {
181254a7Smrg                T usea = a;
181254a7Smrg                T useb = b;
181254a7Smrg                if (a == 0)
181254a7Smrg                    usea = copysign(T(0), b);
181254a7Smrg                else if (b == 0)
181254a7Smrg                    useb = copysign(T(0), a);
181254a7Smrg                w = ieeeMean(usea, useb);
181254a7Smrg            }
181254a7Smrg            bracket(w);
181254a7Smrg        }
181254a7Smrg        ++baditer;
181254a7Smrg    }
181254a7Smrg    return Tuple!(T, T, R, R)(a, b, fa, fb);
181254a7Smrg}
181254a7Smrg
181254a7Smrg///ditto
*b1e83836SmrgTuple!(T, T, R, R) findRoot(T, R, DF)(scope DF f,
*b1e83836Smrg    const T ax, const T bx, const R fax, const R fbx)
181254a7Smrg{
181254a7Smrg    return findRoot(f, ax, bx, fax, fbx, (T a, T b) => false);
181254a7Smrg}
181254a7Smrg
181254a7Smrg///ditto
*b1e83836SmrgT findRoot(T, R)(scope R delegate(T) f, const T a, const T b,
181254a7Smrg    scope bool delegate(T lo, T hi) tolerance = (T a, T b) => false)
181254a7Smrg{
181254a7Smrg    return findRoot!(T, R delegate(T), bool delegate(T lo, T hi))(f, a, b, tolerance);
181254a7Smrg}
181254a7Smrg
181254a7Smrg@safe nothrow unittest
181254a7Smrg{
181254a7Smrg    int numProblems = 0;
181254a7Smrg    int numCalls;
181254a7Smrg
181254a7Smrg    void testFindRoot(real delegate(real) @nogc @safe nothrow pure f , real x1, real x2) @nogc @safe nothrow pure
181254a7Smrg    {
181254a7Smrg        //numCalls=0;
181254a7Smrg        //++numProblems;
181254a7Smrg        assert(!x1.isNaN() && !x2.isNaN());
*b1e83836Smrg        assert(signbit(f(x1)) != signbit(f(x2)));
181254a7Smrg        auto result = findRoot(f, x1, x2, f(x1), f(x2),
181254a7Smrg          (real lo, real hi) { return false; });
181254a7Smrg
181254a7Smrg        auto flo = f(result[0]);
181254a7Smrg        auto fhi = f(result[1]);
181254a7Smrg        if (flo != 0)
181254a7Smrg        {
181254a7Smrg            assert(oppositeSigns(flo, fhi));
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    // Test functions
181254a7Smrg    real cubicfn(real x) @nogc @safe nothrow pure
181254a7Smrg    {
181254a7Smrg        //++numCalls;
181254a7Smrg        if (x>float.max)
181254a7Smrg            x = float.max;
*b1e83836Smrg        if (x<-float.max)
*b1e83836Smrg            x = -float.max;
181254a7Smrg        // This has a single real root at -59.286543284815
181254a7Smrg        return 0.386*x*x*x + 23*x*x + 15.7*x + 525.2;
181254a7Smrg    }
181254a7Smrg    // Test a function with more than one root.
181254a7Smrg    real multisine(real x) { ++numCalls; return sin(x); }
*b1e83836Smrg    testFindRoot( &multisine, 6, 90);
*b1e83836Smrg    testFindRoot(&cubicfn, -100, 100);
*b1e83836Smrg    testFindRoot( &cubicfn, -double.max, real.max);
181254a7Smrg
181254a7Smrg
181254a7Smrg/* Tests from the paper:
181254a7Smrg * "On Enclosing Simple Roots of Nonlinear Equations", G. Alefeld, F.A. Potra,
181254a7Smrg *   Yixun Shi, Mathematics of Computation 61, pp733-744 (1993).
181254a7Smrg */
181254a7Smrg    // Parameters common to many alefeld tests.
181254a7Smrg    int n;
181254a7Smrg    real ale_a, ale_b;
181254a7Smrg
181254a7Smrg    int powercalls = 0;
181254a7Smrg
181254a7Smrg    real power(real x)
181254a7Smrg    {
181254a7Smrg        ++powercalls;
181254a7Smrg        ++numCalls;
181254a7Smrg        return pow(x, n) + double.min_normal;
181254a7Smrg    }
181254a7Smrg    int [] power_nvals = [3, 5, 7, 9, 19, 25];
181254a7Smrg    // Alefeld paper states that pow(x,n) is a very poor case, where bisection
181254a7Smrg    // outperforms his method, and gives total numcalls =
181254a7Smrg    // 921 for bisection (2.4 calls per bit), 1830 for Alefeld (4.76/bit),
181254a7Smrg    // 2624 for brent (6.8/bit)
181254a7Smrg    // ... but that is for double, not real80.
181254a7Smrg    // This poor performance seems mainly due to catastrophic cancellation,
181254a7Smrg    // which is avoided here by the use of ieeeMean().
181254a7Smrg    // I get: 231 (0.48/bit).
181254a7Smrg    // IE this is 10X faster in Alefeld's worst case
181254a7Smrg    numProblems=0;
181254a7Smrg    foreach (k; power_nvals)
181254a7Smrg    {
181254a7Smrg        n = k;
*b1e83836Smrg        testFindRoot(&power, -1, 10);
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    int powerProblems = numProblems;
181254a7Smrg
181254a7Smrg    // Tests from Alefeld paper
181254a7Smrg
181254a7Smrg    int [9] alefeldSums;
181254a7Smrg    real alefeld0(real x)
181254a7Smrg    {
181254a7Smrg        ++alefeldSums[0];
181254a7Smrg        ++numCalls;
181254a7Smrg        real q =  sin(x) - x/2;
181254a7Smrg        for (int i=1; i<20; ++i)
181254a7Smrg            q+=(2*i-5.0)*(2*i-5.0)/((x-i*i)*(x-i*i)*(x-i*i));
181254a7Smrg        return q;
181254a7Smrg    }
181254a7Smrg    real alefeld1(real x)
181254a7Smrg    {
181254a7Smrg        ++numCalls;
181254a7Smrg        ++alefeldSums[1];
181254a7Smrg        return ale_a*x + exp(ale_b * x);
181254a7Smrg    }
181254a7Smrg    real alefeld2(real x)
181254a7Smrg    {
181254a7Smrg        ++numCalls;
181254a7Smrg        ++alefeldSums[2];
181254a7Smrg        return pow(x, n) - ale_a;
181254a7Smrg    }
181254a7Smrg    real alefeld3(real x)
181254a7Smrg    {
181254a7Smrg        ++numCalls;
181254a7Smrg        ++alefeldSums[3];
181254a7Smrg        return (1.0 +pow(1.0L-n, 2))*x - pow(1.0L-n*x, 2);
181254a7Smrg    }
181254a7Smrg    real alefeld4(real x)
181254a7Smrg    {
181254a7Smrg        ++numCalls;
181254a7Smrg        ++alefeldSums[4];
181254a7Smrg        return x*x - pow(1-x, n);
181254a7Smrg    }
181254a7Smrg    real alefeld5(real x)
181254a7Smrg    {
181254a7Smrg        ++numCalls;
181254a7Smrg        ++alefeldSums[5];
181254a7Smrg        return (1+pow(1.0L-n, 4))*x - pow(1.0L-n*x, 4);
181254a7Smrg    }
181254a7Smrg    real alefeld6(real x)
181254a7Smrg    {
181254a7Smrg        ++numCalls;
181254a7Smrg        ++alefeldSums[6];
181254a7Smrg        return exp(-n*x)*(x-1.01L) + pow(x, n);
181254a7Smrg    }
181254a7Smrg    real alefeld7(real x)
181254a7Smrg    {
181254a7Smrg        ++numCalls;
181254a7Smrg        ++alefeldSums[7];
181254a7Smrg        return (n*x-1)/((n-1)*x);
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    numProblems=0;
*b1e83836Smrg    testFindRoot(&alefeld0, PI_2, PI);
181254a7Smrg    for (n=1; n <= 10; ++n)
181254a7Smrg    {
*b1e83836Smrg        testFindRoot(&alefeld0, n*n+1e-9L, (n+1)*(n+1)-1e-9L);
181254a7Smrg    }
181254a7Smrg    ale_a = -40; ale_b = -1;
*b1e83836Smrg    testFindRoot(&alefeld1, -9, 31);
181254a7Smrg    ale_a = -100; ale_b = -2;
*b1e83836Smrg    testFindRoot(&alefeld1, -9, 31);
181254a7Smrg    ale_a = -200; ale_b = -3;
*b1e83836Smrg    testFindRoot(&alefeld1, -9, 31);
181254a7Smrg    int [] nvals_3 = [1, 2, 5, 10, 15, 20];
181254a7Smrg    int [] nvals_5 = [1, 2, 4, 5, 8, 15, 20];
181254a7Smrg    int [] nvals_6 = [1, 5, 10, 15, 20];
181254a7Smrg    int [] nvals_7 = [2, 5, 15, 20];
181254a7Smrg
181254a7Smrg    for (int i=4; i<12; i+=2)
181254a7Smrg    {
181254a7Smrg        n = i;
181254a7Smrg        ale_a = 0.2;
*b1e83836Smrg        testFindRoot(&alefeld2, 0, 5);
181254a7Smrg        ale_a=1;
*b1e83836Smrg        testFindRoot(&alefeld2, 0.95, 4.05);
*b1e83836Smrg        testFindRoot(&alefeld2, 0, 1.5);
181254a7Smrg    }
181254a7Smrg    foreach (i; nvals_3)
181254a7Smrg    {
181254a7Smrg        n=i;
*b1e83836Smrg        testFindRoot(&alefeld3, 0, 1);
181254a7Smrg    }
181254a7Smrg    foreach (i; nvals_3)
181254a7Smrg    {
181254a7Smrg        n=i;
*b1e83836Smrg        testFindRoot(&alefeld4, 0, 1);
181254a7Smrg    }
181254a7Smrg    foreach (i; nvals_5)
181254a7Smrg    {
181254a7Smrg        n=i;
*b1e83836Smrg        testFindRoot(&alefeld5, 0, 1);
181254a7Smrg    }
181254a7Smrg    foreach (i; nvals_6)
181254a7Smrg    {
181254a7Smrg        n=i;
*b1e83836Smrg        testFindRoot(&alefeld6, 0, 1);
181254a7Smrg    }
181254a7Smrg    foreach (i; nvals_7)
181254a7Smrg    {
181254a7Smrg        n=i;
*b1e83836Smrg        testFindRoot(&alefeld7, 0.01L, 1);
181254a7Smrg    }
181254a7Smrg    real worstcase(real x)
181254a7Smrg    {
181254a7Smrg        ++numCalls;
181254a7Smrg        return x<0.3*real.max? -0.999e-3 : 1.0;
181254a7Smrg    }
*b1e83836Smrg    testFindRoot(&worstcase, -real.max, real.max);
181254a7Smrg
181254a7Smrg    // just check that the double + float cases compile
*b1e83836Smrg    findRoot((double x){ return 0.0; }, -double.max, double.max);
*b1e83836Smrg    findRoot((float x){ return 0.0f; }, -float.max, float.max);
181254a7Smrg
181254a7Smrg/*
181254a7Smrg   int grandtotal=0;
181254a7Smrg   foreach (calls; alefeldSums)
181254a7Smrg   {
181254a7Smrg       grandtotal+=calls;
181254a7Smrg   }
181254a7Smrg   grandtotal-=2*numProblems;
181254a7Smrg   printf("\nALEFELD TOTAL = %d avg = %f (alefeld avg=19.3 for double)\n",
181254a7Smrg   grandtotal, (1.0*grandtotal)/numProblems);
181254a7Smrg   powercalls -= 2*powerProblems;
181254a7Smrg   printf("POWER TOTAL = %d avg = %f ", powercalls,
181254a7Smrg        (1.0*powercalls)/powerProblems);
181254a7Smrg*/
*b1e83836Smrg    // https://issues.dlang.org/show_bug.cgi?id=14231
181254a7Smrg    auto xp = findRoot((float x) => x, 0f, 1f);
181254a7Smrg    auto xn = findRoot((float x) => x, -1f, -0f);
181254a7Smrg}
181254a7Smrg
181254a7Smrg//regression control
181254a7Smrg@system unittest
181254a7Smrg{
181254a7Smrg    // @system due to the case in the 2nd line
181254a7Smrg    static assert(__traits(compiles, findRoot((float x)=>cast(real) x, float.init, float.init)));
181254a7Smrg    static assert(__traits(compiles, findRoot!real((x)=>cast(double) x, real.init, real.init)));
181254a7Smrg    static assert(__traits(compiles, findRoot((real x)=>cast(double) x, real.init, real.init)));
181254a7Smrg}
181254a7Smrg
181254a7Smrg/++
181254a7SmrgFind a real minimum of a real function `f(x)` via bracketing.
181254a7SmrgGiven a function `f` and a range `(ax .. bx)`,
181254a7Smrgreturns the value of `x` in the range which is closest to a minimum of `f(x)`.
181254a7Smrg`f` is never evaluted at the endpoints of `ax` and `bx`.
181254a7SmrgIf `f(x)` has more than one minimum in the range, one will be chosen arbitrarily.
181254a7SmrgIf `f(x)` returns NaN or -Infinity, `(x, f(x), NaN)` will be returned;
181254a7Smrgotherwise, this algorithm is guaranteed to succeed.
181254a7Smrg
181254a7SmrgParams:
181254a7Smrg    f = Function to be analyzed
181254a7Smrg    ax = Left bound of initial range of f known to contain the minimum.
181254a7Smrg    bx = Right bound of initial range of f known to contain the minimum.
181254a7Smrg    relTolerance = Relative tolerance.
181254a7Smrg    absTolerance = Absolute tolerance.
181254a7Smrg
181254a7SmrgPreconditions:
181254a7Smrg    `ax` and `bx` shall be finite reals. $(BR)
*b1e83836Smrg    `relTolerance` shall be normal positive real. $(BR)
*b1e83836Smrg    `absTolerance` shall be normal positive real no less then `T.epsilon*2`.
181254a7Smrg
181254a7SmrgReturns:
181254a7Smrg    A tuple consisting of `x`, `y = f(x)` and `error = 3 * (absTolerance * fabs(x) + relTolerance)`.
181254a7Smrg
181254a7Smrg    The method used is a combination of golden section search and
181254a7Smrgsuccessive parabolic interpolation. Convergence is never much slower
181254a7Smrgthan that for a Fibonacci search.
181254a7Smrg
181254a7SmrgReferences:
181254a7Smrg    "Algorithms for Minimization without Derivatives", Richard Brent, Prentice-Hall, Inc. (1973)
181254a7Smrg
181254a7SmrgSee_Also: $(LREF findRoot), $(REF isNormal, std,math)
181254a7Smrg+/
181254a7SmrgTuple!(T, "x", Unqual!(ReturnType!DF), "y", T, "error")
181254a7SmrgfindLocalMin(T, DF)(
181254a7Smrg        scope DF f,
*b1e83836Smrg        const T ax,
*b1e83836Smrg        const T bx,
*b1e83836Smrg        const T relTolerance = sqrt(T.epsilon),
*b1e83836Smrg        const T absTolerance = sqrt(T.epsilon),
181254a7Smrg        )
181254a7Smrgif (isFloatingPoint!T
181254a7Smrg    && __traits(compiles, {T _ = DF.init(T.init);}))
181254a7Smrgin
181254a7Smrg{
181254a7Smrg    assert(isFinite(ax), "ax is not finite");
181254a7Smrg    assert(isFinite(bx), "bx is not finite");
181254a7Smrg    assert(isNormal(relTolerance), "relTolerance is not normal floating point number");
181254a7Smrg    assert(isNormal(absTolerance), "absTolerance is not normal floating point number");
181254a7Smrg    assert(relTolerance >= 0, "absTolerance is not positive");
181254a7Smrg    assert(absTolerance >= T.epsilon*2, "absTolerance is not greater then `2*T.epsilon`");
181254a7Smrg}
181254a7Smrgout (result)
181254a7Smrg{
181254a7Smrg    assert(isFinite(result.x));
181254a7Smrg}
*b1e83836Smrgdo
181254a7Smrg{
181254a7Smrg    alias R = Unqual!(CommonType!(ReturnType!DF, T));
181254a7Smrg    // c is the squared inverse of the golden ratio
181254a7Smrg    // (3 - sqrt(5))/2
181254a7Smrg    // Value obtained from Wolfram Alpha.
181254a7Smrg    enum T c = 0x0.61c8864680b583ea0c633f9fa31237p+0L;
181254a7Smrg    enum T cm1 = 0x0.9e3779b97f4a7c15f39cc0605cedc8p+0L;
181254a7Smrg    R tolerance;
181254a7Smrg    T a = ax > bx ? bx : ax;
181254a7Smrg    T b = ax > bx ? ax : bx;
181254a7Smrg    // sequence of declarations suitable for SIMD instructions
181254a7Smrg    T  v = a * cm1 + b * c;
181254a7Smrg    assert(isFinite(v));
181254a7Smrg    R fv = f(v);
181254a7Smrg    if (isNaN(fv) || fv == -T.infinity)
181254a7Smrg    {
181254a7Smrg        return typeof(return)(v, fv, T.init);
181254a7Smrg    }
181254a7Smrg    T  w = v;
181254a7Smrg    R fw = fv;
181254a7Smrg    T  x = v;
181254a7Smrg    R fx = fv;
181254a7Smrg    size_t i;
181254a7Smrg    for (R d = 0, e = 0;;)
181254a7Smrg    {
181254a7Smrg        i++;
181254a7Smrg        T m = (a + b) / 2;
181254a7Smrg        // This fix is not part of the original algorithm
181254a7Smrg        if (!isFinite(m)) // fix infinity loop. Issue can be reproduced in R.
181254a7Smrg        {
181254a7Smrg            m = a / 2 + b / 2;
181254a7Smrg            if (!isFinite(m)) // fast-math compiler switch is enabled
181254a7Smrg            {
181254a7Smrg                //SIMD instructions can be used by compiler, do not reduce declarations
181254a7Smrg                int a_exp = void;
181254a7Smrg                int b_exp = void;
181254a7Smrg                immutable an = frexp(a, a_exp);
181254a7Smrg                immutable bn = frexp(b, b_exp);
181254a7Smrg                immutable am = ldexp(an, a_exp-1);
181254a7Smrg                immutable bm = ldexp(bn, b_exp-1);
181254a7Smrg                m = am + bm;
181254a7Smrg                if (!isFinite(m)) // wrong input: constraints are disabled in release mode
181254a7Smrg                {
181254a7Smrg                    return typeof(return).init;
181254a7Smrg                }
181254a7Smrg            }
181254a7Smrg        }
181254a7Smrg        tolerance = absTolerance * fabs(x) + relTolerance;
181254a7Smrg        immutable t2 = tolerance * 2;
181254a7Smrg        // check stopping criterion
181254a7Smrg        if (!(fabs(x - m) > t2 - (b - a) / 2))
181254a7Smrg        {
181254a7Smrg            break;
181254a7Smrg        }
181254a7Smrg        R p = 0;
181254a7Smrg        R q = 0;
181254a7Smrg        R r = 0;
181254a7Smrg        // fit parabola
181254a7Smrg        if (fabs(e) > tolerance)
181254a7Smrg        {
181254a7Smrg            immutable  xw =  x -  w;
181254a7Smrg            immutable fxw = fx - fw;
181254a7Smrg            immutable  xv =  x -  v;
181254a7Smrg            immutable fxv = fx - fv;
181254a7Smrg            immutable xwfxv = xw * fxv;
181254a7Smrg            immutable xvfxw = xv * fxw;
181254a7Smrg            p = xv * xvfxw - xw * xwfxv;
181254a7Smrg            q = (xvfxw - xwfxv) * 2;
181254a7Smrg            if (q > 0)
181254a7Smrg                p = -p;
181254a7Smrg            else
181254a7Smrg                q = -q;
181254a7Smrg            r = e;
181254a7Smrg            e = d;
181254a7Smrg        }
181254a7Smrg        T u;
181254a7Smrg        // a parabolic-interpolation step
181254a7Smrg        if (fabs(p) < fabs(q * r / 2) && p > q * (a - x) && p < q * (b - x))
181254a7Smrg        {
181254a7Smrg            d = p / q;
181254a7Smrg            u = x + d;
181254a7Smrg            // f must not be evaluated too close to a or b
181254a7Smrg            if (u - a < t2 || b - u < t2)
181254a7Smrg                d = x < m ? tolerance : -tolerance;
181254a7Smrg        }
181254a7Smrg        // a golden-section step
181254a7Smrg        else
181254a7Smrg        {
181254a7Smrg            e = (x < m ? b : a) - x;
181254a7Smrg            d = c * e;
181254a7Smrg        }
181254a7Smrg        // f must not be evaluated too close to x
181254a7Smrg        u = x + (fabs(d) >= tolerance ? d : d > 0 ? tolerance : -tolerance);
181254a7Smrg        immutable fu = f(u);
181254a7Smrg        if (isNaN(fu) || fu == -T.infinity)
181254a7Smrg        {
181254a7Smrg            return typeof(return)(u, fu, T.init);
181254a7Smrg        }
181254a7Smrg        //  update  a, b, v, w, and x
181254a7Smrg        if (fu <= fx)
181254a7Smrg        {
*b1e83836Smrg            (u < x ? b : a) = x;
181254a7Smrg            v = w; fv = fw;
181254a7Smrg            w = x; fw = fx;
181254a7Smrg            x = u; fx = fu;
181254a7Smrg        }
181254a7Smrg        else
181254a7Smrg        {
*b1e83836Smrg            (u < x ? a : b) = u;
181254a7Smrg            if (fu <= fw || w == x)
181254a7Smrg            {
181254a7Smrg                v = w; fv = fw;
181254a7Smrg                w = u; fw = fu;
181254a7Smrg            }
181254a7Smrg            else if (fu <= fv || v == x || v == w)
181254a7Smrg            { // do not remove this braces
181254a7Smrg                v = u; fv = fu;
181254a7Smrg            }
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg    return typeof(return)(x, fx, tolerance * 3);
181254a7Smrg}
181254a7Smrg
181254a7Smrg///
181254a7Smrg@safe unittest
181254a7Smrg{
*b1e83836Smrg    import std.math.operations : isClose;
181254a7Smrg
181254a7Smrg    auto ret = findLocalMin((double x) => (x-4)^^2, -1e7, 1e7);
*b1e83836Smrg    assert(ret.x.isClose(4.0));
*b1e83836Smrg    assert(ret.y.isClose(0.0, 0.0, 1e-10));
181254a7Smrg}
181254a7Smrg
181254a7Smrg@safe unittest
181254a7Smrg{
181254a7Smrg    import std.meta : AliasSeq;
*b1e83836Smrg    static foreach (T; AliasSeq!(double, float, real))
181254a7Smrg    {
181254a7Smrg        {
181254a7Smrg            auto ret = findLocalMin!T((T x) => (x-4)^^2, T.min_normal, 1e7);
*b1e83836Smrg            assert(ret.x.isClose(T(4)));
*b1e83836Smrg            assert(ret.y.isClose(T(0), 0.0, T.epsilon));
181254a7Smrg        }
181254a7Smrg        {
181254a7Smrg            auto ret = findLocalMin!T((T x) => fabs(x-1), -T.max/4, T.max/4, T.min_normal, 2*T.epsilon);
*b1e83836Smrg            assert(isClose(ret.x, T(1)));
*b1e83836Smrg            assert(isClose(ret.y, T(0), 0.0, T.epsilon));
181254a7Smrg            assert(ret.error <= 10 * T.epsilon);
181254a7Smrg        }
181254a7Smrg        {
181254a7Smrg            auto ret = findLocalMin!T((T x) => T.init, 0, 1, T.min_normal, 2*T.epsilon);
181254a7Smrg            assert(!ret.x.isNaN);
181254a7Smrg            assert(ret.y.isNaN);
181254a7Smrg            assert(ret.error.isNaN);
181254a7Smrg        }
181254a7Smrg        {
181254a7Smrg            auto ret = findLocalMin!T((T x) => log(x), 0, 1, T.min_normal, 2*T.epsilon);
181254a7Smrg            assert(ret.error < 3.00001 * ((2*T.epsilon)*fabs(ret.x)+ T.min_normal));
181254a7Smrg            assert(ret.x >= 0 && ret.x <= ret.error);
181254a7Smrg        }
181254a7Smrg        {
181254a7Smrg            auto ret = findLocalMin!T((T x) => log(x), 0, T.max, T.min_normal, 2*T.epsilon);
181254a7Smrg            assert(ret.y < -18);
181254a7Smrg            assert(ret.error < 5e-08);
181254a7Smrg            assert(ret.x >= 0 && ret.x <= ret.error);
181254a7Smrg        }
181254a7Smrg        {
181254a7Smrg            auto ret = findLocalMin!T((T x) => -fabs(x), -1, 1, T.min_normal, 2*T.epsilon);
*b1e83836Smrg            assert(ret.x.fabs.isClose(T(1)));
*b1e83836Smrg            assert(ret.y.fabs.isClose(T(1)));
*b1e83836Smrg            assert(ret.error.isClose(T(0), 0.0, 100*T.epsilon));
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
181254a7SmrgComputes $(LINK2 https://en.wikipedia.org/wiki/Euclidean_distance,
*b1e83836SmrgEuclidean distance) between input ranges `a` and
*b1e83836Smrg`b`. The two ranges must have the same length. The three-parameter
181254a7Smrgversion stops computation as soon as the distance is greater than or
*b1e83836Smrgequal to `limit` (this is useful to save computation if a small
181254a7Smrgdistance is sought).
181254a7Smrg */
181254a7SmrgCommonType!(ElementType!(Range1), ElementType!(Range2))
181254a7SmrgeuclideanDistance(Range1, Range2)(Range1 a, Range2 b)
181254a7Smrgif (isInputRange!(Range1) && isInputRange!(Range2))
181254a7Smrg{
181254a7Smrg    enum bool haveLen = hasLength!(Range1) && hasLength!(Range2);
181254a7Smrg    static if (haveLen) assert(a.length == b.length);
181254a7Smrg    Unqual!(typeof(return)) result = 0;
181254a7Smrg    for (; !a.empty; a.popFront(), b.popFront())
181254a7Smrg    {
181254a7Smrg        immutable t = a.front - b.front;
181254a7Smrg        result += t * t;
181254a7Smrg    }
181254a7Smrg    static if (!haveLen) assert(b.empty);
181254a7Smrg    return sqrt(result);
181254a7Smrg}
181254a7Smrg
181254a7Smrg/// Ditto
181254a7SmrgCommonType!(ElementType!(Range1), ElementType!(Range2))
181254a7SmrgeuclideanDistance(Range1, Range2, F)(Range1 a, Range2 b, F limit)
181254a7Smrgif (isInputRange!(Range1) && isInputRange!(Range2))
181254a7Smrg{
181254a7Smrg    limit *= limit;
181254a7Smrg    enum bool haveLen = hasLength!(Range1) && hasLength!(Range2);
181254a7Smrg    static if (haveLen) assert(a.length == b.length);
181254a7Smrg    Unqual!(typeof(return)) result = 0;
181254a7Smrg    for (; ; a.popFront(), b.popFront())
181254a7Smrg    {
181254a7Smrg        if (a.empty)
181254a7Smrg        {
181254a7Smrg            static if (!haveLen) assert(b.empty);
181254a7Smrg            break;
181254a7Smrg        }
181254a7Smrg        immutable t = a.front - b.front;
181254a7Smrg        result += t * t;
181254a7Smrg        if (result >= limit) break;
181254a7Smrg    }
181254a7Smrg    return sqrt(result);
181254a7Smrg}
181254a7Smrg
181254a7Smrg@safe unittest
181254a7Smrg{
181254a7Smrg    import std.meta : AliasSeq;
*b1e83836Smrg    static foreach (T; AliasSeq!(double, const double, immutable double))
*b1e83836Smrg    {{
181254a7Smrg        T[] a = [ 1.0, 2.0, ];
181254a7Smrg        T[] b = [ 4.0, 6.0, ];
181254a7Smrg        assert(euclideanDistance(a, b) == 5);
*b1e83836Smrg        assert(euclideanDistance(a, b, 6) == 5);
181254a7Smrg        assert(euclideanDistance(a, b, 5) == 5);
181254a7Smrg        assert(euclideanDistance(a, b, 4) == 5);
181254a7Smrg        assert(euclideanDistance(a, b, 2) == 3);
*b1e83836Smrg    }}
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
181254a7SmrgComputes the $(LINK2 https://en.wikipedia.org/wiki/Dot_product,
*b1e83836Smrgdot product) of input ranges `a` and $(D
181254a7Smrgb). The two ranges must have the same length. If both ranges define
181254a7Smrglength, the check is done once; otherwise, it is done at each
181254a7Smrgiteration.
181254a7Smrg */
181254a7SmrgCommonType!(ElementType!(Range1), ElementType!(Range2))
181254a7SmrgdotProduct(Range1, Range2)(Range1 a, Range2 b)
181254a7Smrgif (isInputRange!(Range1) && isInputRange!(Range2) &&
181254a7Smrg    !(isArray!(Range1) && isArray!(Range2)))
181254a7Smrg{
181254a7Smrg    enum bool haveLen = hasLength!(Range1) && hasLength!(Range2);
181254a7Smrg    static if (haveLen) assert(a.length == b.length);
181254a7Smrg    Unqual!(typeof(return)) result = 0;
181254a7Smrg    for (; !a.empty; a.popFront(), b.popFront())
181254a7Smrg    {
181254a7Smrg        result += a.front * b.front;
181254a7Smrg    }
181254a7Smrg    static if (!haveLen) assert(b.empty);
181254a7Smrg    return result;
181254a7Smrg}
181254a7Smrg
181254a7Smrg/// Ditto
181254a7SmrgCommonType!(F1, F2)
181254a7SmrgdotProduct(F1, F2)(in F1[] avector, in F2[] bvector)
181254a7Smrg{
181254a7Smrg    immutable n = avector.length;
181254a7Smrg    assert(n == bvector.length);
181254a7Smrg    auto avec = avector.ptr, bvec = bvector.ptr;
181254a7Smrg    Unqual!(typeof(return)) sum0 = 0, sum1 = 0;
181254a7Smrg
181254a7Smrg    const all_endp = avec + n;
181254a7Smrg    const smallblock_endp = avec + (n & ~3);
181254a7Smrg    const bigblock_endp = avec + (n & ~15);
181254a7Smrg
181254a7Smrg    for (; avec != bigblock_endp; avec += 16, bvec += 16)
181254a7Smrg    {
181254a7Smrg        sum0 += avec[0] * bvec[0];
181254a7Smrg        sum1 += avec[1] * bvec[1];
181254a7Smrg        sum0 += avec[2] * bvec[2];
181254a7Smrg        sum1 += avec[3] * bvec[3];
181254a7Smrg        sum0 += avec[4] * bvec[4];
181254a7Smrg        sum1 += avec[5] * bvec[5];
181254a7Smrg        sum0 += avec[6] * bvec[6];
181254a7Smrg        sum1 += avec[7] * bvec[7];
181254a7Smrg        sum0 += avec[8] * bvec[8];
181254a7Smrg        sum1 += avec[9] * bvec[9];
181254a7Smrg        sum0 += avec[10] * bvec[10];
181254a7Smrg        sum1 += avec[11] * bvec[11];
181254a7Smrg        sum0 += avec[12] * bvec[12];
181254a7Smrg        sum1 += avec[13] * bvec[13];
181254a7Smrg        sum0 += avec[14] * bvec[14];
181254a7Smrg        sum1 += avec[15] * bvec[15];
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    for (; avec != smallblock_endp; avec += 4, bvec += 4)
181254a7Smrg    {
181254a7Smrg        sum0 += avec[0] * bvec[0];
181254a7Smrg        sum1 += avec[1] * bvec[1];
181254a7Smrg        sum0 += avec[2] * bvec[2];
181254a7Smrg        sum1 += avec[3] * bvec[3];
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    sum0 += sum1;
181254a7Smrg
181254a7Smrg    /* Do trailing portion in naive loop. */
181254a7Smrg    while (avec != all_endp)
181254a7Smrg    {
181254a7Smrg        sum0 += *avec * *bvec;
181254a7Smrg        ++avec;
181254a7Smrg        ++bvec;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    return sum0;
181254a7Smrg}
181254a7Smrg
*b1e83836Smrg/// ditto
*b1e83836SmrgF dotProduct(F, uint N)(const ref scope F[N] a, const ref scope F[N] b)
*b1e83836Smrgif (N <= 16)
*b1e83836Smrg{
*b1e83836Smrg    F sum0 = 0;
*b1e83836Smrg    F sum1 = 0;
*b1e83836Smrg    static foreach (i; 0 .. N / 2)
*b1e83836Smrg    {
*b1e83836Smrg        sum0 += a[i*2] * b[i*2];
*b1e83836Smrg        sum1 += a[i*2+1] * b[i*2+1];
*b1e83836Smrg    }
*b1e83836Smrg    static if (N % 2 == 1)
*b1e83836Smrg    {
*b1e83836Smrg        sum0 += a[N-1] * b[N-1];
*b1e83836Smrg    }
*b1e83836Smrg    return sum0 + sum1;
*b1e83836Smrg}
*b1e83836Smrg
181254a7Smrg@system unittest
181254a7Smrg{
181254a7Smrg    // @system due to dotProduct and assertCTFEable
181254a7Smrg    import std.exception : assertCTFEable;
181254a7Smrg    import std.meta : AliasSeq;
*b1e83836Smrg    static foreach (T; AliasSeq!(double, const double, immutable double))
*b1e83836Smrg    {{
181254a7Smrg        T[] a = [ 1.0, 2.0, ];
181254a7Smrg        T[] b = [ 4.0, 6.0, ];
181254a7Smrg        assert(dotProduct(a, b) == 16);
181254a7Smrg        assert(dotProduct([1, 3, -5], [4, -2, -1]) == 3);
*b1e83836Smrg        // Test with fixed-length arrays.
*b1e83836Smrg        T[2] c = [ 1.0, 2.0, ];
*b1e83836Smrg        T[2] d = [ 4.0, 6.0, ];
*b1e83836Smrg        assert(dotProduct(c, d) == 16);
*b1e83836Smrg        T[3] e = [1,  3, -5];
*b1e83836Smrg        T[3] f = [4, -2, -1];
*b1e83836Smrg        assert(dotProduct(e, f) == 3);
*b1e83836Smrg    }}
181254a7Smrg
181254a7Smrg    // Make sure the unrolled loop codepath gets tested.
181254a7Smrg    static const x =
*b1e83836Smrg        [1.0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22];
181254a7Smrg    static const y =
*b1e83836Smrg        [2.0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23];
*b1e83836Smrg    assertCTFEable!({ assert(dotProduct(x, y) == 4048); });
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
181254a7SmrgComputes the $(LINK2 https://en.wikipedia.org/wiki/Cosine_similarity,
*b1e83836Smrgcosine similarity) of input ranges `a` and $(D
181254a7Smrgb). The two ranges must have the same length. If both ranges define
181254a7Smrglength, the check is done once; otherwise, it is done at each
181254a7Smrgiteration. If either range has all-zero elements, return 0.
181254a7Smrg */
181254a7SmrgCommonType!(ElementType!(Range1), ElementType!(Range2))
181254a7SmrgcosineSimilarity(Range1, Range2)(Range1 a, Range2 b)
181254a7Smrgif (isInputRange!(Range1) && isInputRange!(Range2))
181254a7Smrg{
181254a7Smrg    enum bool haveLen = hasLength!(Range1) && hasLength!(Range2);
181254a7Smrg    static if (haveLen) assert(a.length == b.length);
181254a7Smrg    Unqual!(typeof(return)) norma = 0, normb = 0, dotprod = 0;
181254a7Smrg    for (; !a.empty; a.popFront(), b.popFront())
181254a7Smrg    {
181254a7Smrg        immutable t1 = a.front, t2 = b.front;
181254a7Smrg        norma += t1 * t1;
181254a7Smrg        normb += t2 * t2;
181254a7Smrg        dotprod += t1 * t2;
181254a7Smrg    }
181254a7Smrg    static if (!haveLen) assert(b.empty);
181254a7Smrg    if (norma == 0 || normb == 0) return 0;
181254a7Smrg    return dotprod / sqrt(norma * normb);
181254a7Smrg}
181254a7Smrg
181254a7Smrg@safe unittest
181254a7Smrg{
181254a7Smrg    import std.meta : AliasSeq;
*b1e83836Smrg    static foreach (T; AliasSeq!(double, const double, immutable double))
*b1e83836Smrg    {{
181254a7Smrg        T[] a = [ 1.0, 2.0, ];
181254a7Smrg        T[] b = [ 4.0, 3.0, ];
*b1e83836Smrg        assert(isClose(
181254a7Smrg                    cosineSimilarity(a, b), 10.0 / sqrt(5.0 * 25),
181254a7Smrg                    0.01));
*b1e83836Smrg    }}
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
*b1e83836SmrgNormalizes values in `range` by multiplying each element with a
*b1e83836Smrgnumber chosen such that values sum up to `sum`. If elements in $(D
181254a7Smrgrange) sum to zero, assigns $(D sum / range.length) to
*b1e83836Smrgall. Normalization makes sense only if all elements in `range` are
*b1e83836Smrgpositive. `normalize` assumes that is the case without checking it.
181254a7Smrg
*b1e83836SmrgReturns: `true` if normalization completed normally, `false` if
*b1e83836Smrgall elements in `range` were zero or if `range` is empty.
181254a7Smrg */
181254a7Smrgbool normalize(R)(R range, ElementType!(R) sum = 1)
181254a7Smrgif (isForwardRange!(R))
181254a7Smrg{
181254a7Smrg    ElementType!(R) s = 0;
181254a7Smrg    // Step 1: Compute sum and length of the range
181254a7Smrg    static if (hasLength!(R))
181254a7Smrg    {
181254a7Smrg        const length = range.length;
181254a7Smrg        foreach (e; range)
181254a7Smrg        {
181254a7Smrg            s += e;
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg    else
181254a7Smrg    {
181254a7Smrg        uint length = 0;
181254a7Smrg        foreach (e; range)
181254a7Smrg        {
181254a7Smrg            s += e;
181254a7Smrg            ++length;
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg    // Step 2: perform normalization
181254a7Smrg    if (s == 0)
181254a7Smrg    {
181254a7Smrg        if (length)
181254a7Smrg        {
181254a7Smrg            immutable f = sum / range.length;
181254a7Smrg            foreach (ref e; range) e = f;
181254a7Smrg        }
181254a7Smrg        return false;
181254a7Smrg    }
181254a7Smrg    // The path most traveled
181254a7Smrg    assert(s >= 0);
181254a7Smrg    immutable f = sum / s;
181254a7Smrg    foreach (ref e; range)
181254a7Smrg        e *= f;
181254a7Smrg    return true;
181254a7Smrg}
181254a7Smrg
181254a7Smrg///
181254a7Smrg@safe unittest
181254a7Smrg{
181254a7Smrg    double[] a = [];
181254a7Smrg    assert(!normalize(a));
181254a7Smrg    a = [ 1.0, 3.0 ];
181254a7Smrg    assert(normalize(a));
181254a7Smrg    assert(a == [ 0.25, 0.75 ]);
*b1e83836Smrg    assert(normalize!(typeof(a))(a, 50)); // a = [12.5, 37.5]
181254a7Smrg    a = [ 0.0, 0.0 ];
181254a7Smrg    assert(!normalize(a));
181254a7Smrg    assert(a == [ 0.5, 0.5 ]);
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
*b1e83836SmrgCompute the sum of binary logarithms of the input range `r`.
181254a7SmrgThe error of this method is much smaller than with a naive sum of log2.
181254a7Smrg */
181254a7SmrgElementType!Range sumOfLog2s(Range)(Range r)
181254a7Smrgif (isInputRange!Range && isFloatingPoint!(ElementType!Range))
181254a7Smrg{
181254a7Smrg    long exp = 0;
181254a7Smrg    Unqual!(typeof(return)) x = 1;
181254a7Smrg    foreach (e; r)
181254a7Smrg    {
181254a7Smrg        if (e < 0)
181254a7Smrg            return typeof(return).nan;
181254a7Smrg        int lexp = void;
181254a7Smrg        x *= frexp(e, lexp);
181254a7Smrg        exp += lexp;
181254a7Smrg        if (x < 0.5)
181254a7Smrg        {
181254a7Smrg            x *= 2;
181254a7Smrg            exp--;
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg    return exp + log2(x);
181254a7Smrg}
181254a7Smrg
181254a7Smrg///
181254a7Smrg@safe unittest
181254a7Smrg{
*b1e83836Smrg    import std.math.traits : isNaN;
181254a7Smrg
181254a7Smrg    assert(sumOfLog2s(new double[0]) == 0);
181254a7Smrg    assert(sumOfLog2s([0.0L]) == -real.infinity);
181254a7Smrg    assert(sumOfLog2s([-0.0L]) == -real.infinity);
181254a7Smrg    assert(sumOfLog2s([2.0L]) == 1);
181254a7Smrg    assert(sumOfLog2s([-2.0L]).isNaN());
181254a7Smrg    assert(sumOfLog2s([real.nan]).isNaN());
181254a7Smrg    assert(sumOfLog2s([-real.nan]).isNaN());
181254a7Smrg    assert(sumOfLog2s([real.infinity]) == real.infinity);
181254a7Smrg    assert(sumOfLog2s([-real.infinity]).isNaN());
181254a7Smrg    assert(sumOfLog2s([ 0.25, 0.25, 0.25, 0.125 ]) == -9);
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
181254a7SmrgComputes $(LINK2 https://en.wikipedia.org/wiki/Entropy_(information_theory),
*b1e83836Smrg_entropy) of input range `r` in bits. This
*b1e83836Smrgfunction assumes (without checking) that the values in `r` are all
*b1e83836Smrgin $(D [0, 1]). For the entropy to be meaningful, often `r` should
181254a7Smrgbe normalized too (i.e., its values should sum to 1). The
181254a7Smrgtwo-parameter version stops evaluating as soon as the intermediate
*b1e83836Smrgresult is greater than or equal to `max`.
181254a7Smrg */
181254a7SmrgElementType!Range entropy(Range)(Range r)
181254a7Smrgif (isInputRange!Range)
181254a7Smrg{
181254a7Smrg    Unqual!(typeof(return)) result = 0.0;
181254a7Smrg    for (;!r.empty; r.popFront)
181254a7Smrg    {
181254a7Smrg        if (!r.front) continue;
181254a7Smrg        result -= r.front * log2(r.front);
181254a7Smrg    }
181254a7Smrg    return result;
181254a7Smrg}
181254a7Smrg
181254a7Smrg/// Ditto
181254a7SmrgElementType!Range entropy(Range, F)(Range r, F max)
181254a7Smrgif (isInputRange!Range &&
181254a7Smrg    !is(CommonType!(ElementType!Range, F) == void))
181254a7Smrg{
181254a7Smrg    Unqual!(typeof(return)) result = 0.0;
181254a7Smrg    for (;!r.empty; r.popFront)
181254a7Smrg    {
181254a7Smrg        if (!r.front) continue;
181254a7Smrg        result -= r.front * log2(r.front);
181254a7Smrg        if (result >= max) break;
181254a7Smrg    }
181254a7Smrg    return result;
181254a7Smrg}
181254a7Smrg
181254a7Smrg@safe unittest
181254a7Smrg{
181254a7Smrg    import std.meta : AliasSeq;
*b1e83836Smrg    static foreach (T; AliasSeq!(double, const double, immutable double))
*b1e83836Smrg    {{
181254a7Smrg        T[] p = [ 0.0, 0, 0, 1 ];
181254a7Smrg        assert(entropy(p) == 0);
181254a7Smrg        p = [ 0.25, 0.25, 0.25, 0.25 ];
181254a7Smrg        assert(entropy(p) == 2);
181254a7Smrg        assert(entropy(p, 1) == 1);
*b1e83836Smrg    }}
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
181254a7SmrgComputes the $(LINK2 https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence,
181254a7SmrgKullback-Leibler divergence) between input ranges
*b1e83836Smrg`a` and `b`, which is the sum $(D ai * log(ai / bi)). The base
181254a7Smrgof logarithm is 2. The ranges are assumed to contain elements in $(D
181254a7Smrg[0, 1]). Usually the ranges are normalized probability distributions,
181254a7Smrgbut this is not required or checked by $(D
*b1e83836SmrgkullbackLeiblerDivergence). If any element `bi` is zero and the
*b1e83836Smrgcorresponding element `ai` nonzero, returns infinity. (Otherwise,
181254a7Smrgif $(D ai == 0 && bi == 0), the term $(D ai * log(ai / bi)) is
181254a7Smrgconsidered zero.) If the inputs are normalized, the result is
181254a7Smrgpositive.
181254a7Smrg */
181254a7SmrgCommonType!(ElementType!Range1, ElementType!Range2)
181254a7SmrgkullbackLeiblerDivergence(Range1, Range2)(Range1 a, Range2 b)
181254a7Smrgif (isInputRange!(Range1) && isInputRange!(Range2))
181254a7Smrg{
181254a7Smrg    enum bool haveLen = hasLength!(Range1) && hasLength!(Range2);
181254a7Smrg    static if (haveLen) assert(a.length == b.length);
181254a7Smrg    Unqual!(typeof(return)) result = 0;
181254a7Smrg    for (; !a.empty; a.popFront(), b.popFront())
181254a7Smrg    {
181254a7Smrg        immutable t1 = a.front;
181254a7Smrg        if (t1 == 0) continue;
181254a7Smrg        immutable t2 = b.front;
181254a7Smrg        if (t2 == 0) return result.infinity;
181254a7Smrg        assert(t1 > 0 && t2 > 0);
181254a7Smrg        result += t1 * log2(t1 / t2);
181254a7Smrg    }
181254a7Smrg    static if (!haveLen) assert(b.empty);
181254a7Smrg    return result;
181254a7Smrg}
181254a7Smrg
181254a7Smrg///
181254a7Smrg@safe unittest
181254a7Smrg{
*b1e83836Smrg    import std.math.operations : isClose;
181254a7Smrg
181254a7Smrg    double[] p = [ 0.0, 0, 0, 1 ];
181254a7Smrg    assert(kullbackLeiblerDivergence(p, p) == 0);
181254a7Smrg    double[] p1 = [ 0.25, 0.25, 0.25, 0.25 ];
181254a7Smrg    assert(kullbackLeiblerDivergence(p1, p1) == 0);
181254a7Smrg    assert(kullbackLeiblerDivergence(p, p1) == 2);
181254a7Smrg    assert(kullbackLeiblerDivergence(p1, p) == double.infinity);
181254a7Smrg    double[] p2 = [ 0.2, 0.2, 0.2, 0.4 ];
*b1e83836Smrg    assert(isClose(kullbackLeiblerDivergence(p1, p2), 0.0719281, 1e-5));
*b1e83836Smrg    assert(isClose(kullbackLeiblerDivergence(p2, p1), 0.0780719, 1e-5));
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
181254a7SmrgComputes the $(LINK2 https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence,
*b1e83836SmrgJensen-Shannon divergence) between `a` and $(D
181254a7Smrgb), which is the sum $(D (ai * log(2 * ai / (ai + bi)) + bi * log(2 *
181254a7Smrgbi / (ai + bi))) / 2). The base of logarithm is 2. The ranges are
181254a7Smrgassumed to contain elements in $(D [0, 1]). Usually the ranges are
181254a7Smrgnormalized probability distributions, but this is not required or
*b1e83836Smrgchecked by `jensenShannonDivergence`. If the inputs are normalized,
181254a7Smrgthe result is bounded within $(D [0, 1]). The three-parameter version
181254a7Smrgstops evaluations as soon as the intermediate result is greater than
*b1e83836Smrgor equal to `limit`.
181254a7Smrg */
181254a7SmrgCommonType!(ElementType!Range1, ElementType!Range2)
181254a7SmrgjensenShannonDivergence(Range1, Range2)(Range1 a, Range2 b)
181254a7Smrgif (isInputRange!Range1 && isInputRange!Range2 &&
181254a7Smrg    is(CommonType!(ElementType!Range1, ElementType!Range2)))
181254a7Smrg{
181254a7Smrg    enum bool haveLen = hasLength!(Range1) && hasLength!(Range2);
181254a7Smrg    static if (haveLen) assert(a.length == b.length);
181254a7Smrg    Unqual!(typeof(return)) result = 0;
181254a7Smrg    for (; !a.empty; a.popFront(), b.popFront())
181254a7Smrg    {
181254a7Smrg        immutable t1 = a.front;
181254a7Smrg        immutable t2 = b.front;
181254a7Smrg        immutable avg = (t1 + t2) / 2;
181254a7Smrg        if (t1 != 0)
181254a7Smrg        {
181254a7Smrg            result += t1 * log2(t1 / avg);
181254a7Smrg        }
181254a7Smrg        if (t2 != 0)
181254a7Smrg        {
181254a7Smrg            result += t2 * log2(t2 / avg);
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg    static if (!haveLen) assert(b.empty);
181254a7Smrg    return result / 2;
181254a7Smrg}
181254a7Smrg
181254a7Smrg/// Ditto
181254a7SmrgCommonType!(ElementType!Range1, ElementType!Range2)
181254a7SmrgjensenShannonDivergence(Range1, Range2, F)(Range1 a, Range2 b, F limit)
181254a7Smrgif (isInputRange!Range1 && isInputRange!Range2 &&
181254a7Smrg    is(typeof(CommonType!(ElementType!Range1, ElementType!Range2).init
181254a7Smrg    >= F.init) : bool))
181254a7Smrg{
181254a7Smrg    enum bool haveLen = hasLength!(Range1) && hasLength!(Range2);
181254a7Smrg    static if (haveLen) assert(a.length == b.length);
181254a7Smrg    Unqual!(typeof(return)) result = 0;
181254a7Smrg    limit *= 2;
181254a7Smrg    for (; !a.empty; a.popFront(), b.popFront())
181254a7Smrg    {
181254a7Smrg        immutable t1 = a.front;
181254a7Smrg        immutable t2 = b.front;
181254a7Smrg        immutable avg = (t1 + t2) / 2;
181254a7Smrg        if (t1 != 0)
181254a7Smrg        {
181254a7Smrg            result += t1 * log2(t1 / avg);
181254a7Smrg        }
181254a7Smrg        if (t2 != 0)
181254a7Smrg        {
181254a7Smrg            result += t2 * log2(t2 / avg);
181254a7Smrg        }
181254a7Smrg        if (result >= limit) break;
181254a7Smrg    }
181254a7Smrg    static if (!haveLen) assert(b.empty);
181254a7Smrg    return result / 2;
181254a7Smrg}
181254a7Smrg
181254a7Smrg///
181254a7Smrg@safe unittest
181254a7Smrg{
*b1e83836Smrg    import std.math.operations : isClose;
181254a7Smrg
181254a7Smrg    double[] p = [ 0.0, 0, 0, 1 ];
181254a7Smrg    assert(jensenShannonDivergence(p, p) == 0);
181254a7Smrg    double[] p1 = [ 0.25, 0.25, 0.25, 0.25 ];
181254a7Smrg    assert(jensenShannonDivergence(p1, p1) == 0);
*b1e83836Smrg    assert(isClose(jensenShannonDivergence(p1, p), 0.548795, 1e-5));
181254a7Smrg    double[] p2 = [ 0.2, 0.2, 0.2, 0.4 ];
*b1e83836Smrg    assert(isClose(jensenShannonDivergence(p1, p2), 0.0186218, 1e-5));
*b1e83836Smrg    assert(isClose(jensenShannonDivergence(p2, p1), 0.0186218, 1e-5));
*b1e83836Smrg    assert(isClose(jensenShannonDivergence(p2, p1, 0.005), 0.00602366, 1e-5));
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
181254a7SmrgThe so-called "all-lengths gap-weighted string kernel" computes a
*b1e83836Smrgsimilarity measure between `s` and `t` based on all of their
181254a7Smrgcommon subsequences of all lengths. Gapped subsequences are also
181254a7Smrgincluded.
181254a7Smrg
181254a7SmrgTo understand what $(D gapWeightedSimilarity(s, t, lambda)) computes,
181254a7Smrgconsider first the case $(D lambda = 1) and the strings $(D s =
181254a7Smrg["Hello", "brave", "new", "world"]) and $(D t = ["Hello", "new",
*b1e83836Smrg"world"]). In that case, `gapWeightedSimilarity` counts the
181254a7Smrgfollowing matches:
181254a7Smrg
*b1e83836Smrg$(OL $(LI three matches of length 1, namely `"Hello"`, `"new"`,
*b1e83836Smrgand `"world"`;) $(LI three matches of length 2, namely ($(D
181254a7Smrg"Hello", "new")), ($(D "Hello", "world")), and ($(D "new", "world"));)
181254a7Smrg$(LI one match of length 3, namely ($(D "Hello", "new", "world")).))
181254a7Smrg
181254a7SmrgThe call $(D gapWeightedSimilarity(s, t, 1)) simply counts all of
181254a7Smrgthese matches and adds them up, returning 7.
181254a7Smrg
181254a7Smrg----
181254a7Smrgstring[] s = ["Hello", "brave", "new", "world"];
181254a7Smrgstring[] t = ["Hello", "new", "world"];
181254a7Smrgassert(gapWeightedSimilarity(s, t, 1) == 7);
181254a7Smrg----
181254a7Smrg
181254a7SmrgNote how the gaps in matching are simply ignored, for example ($(D
181254a7Smrg"Hello", "new")) is deemed as good a match as ($(D "new",
181254a7Smrg"world")). This may be too permissive for some applications. To
181254a7Smrgeliminate gapped matches entirely, use $(D lambda = 0):
181254a7Smrg
181254a7Smrg----
181254a7Smrgstring[] s = ["Hello", "brave", "new", "world"];
181254a7Smrgstring[] t = ["Hello", "new", "world"];
181254a7Smrgassert(gapWeightedSimilarity(s, t, 0) == 4);
181254a7Smrg----
181254a7Smrg
181254a7SmrgThe call above eliminated the gapped matches ($(D "Hello", "new")),
181254a7Smrg($(D "Hello", "world")), and ($(D "Hello", "new", "world")) from the
181254a7Smrgtally. That leaves only 4 matches.
181254a7Smrg
181254a7SmrgThe most interesting case is when gapped matches still participate in
181254a7Smrgthe result, but not as strongly as ungapped matches. The result will
181254a7Smrgbe a smooth, fine-grained similarity measure between the input
*b1e83836Smrgstrings. This is where values of `lambda` between 0 and 1 enter
181254a7Smrginto play: gapped matches are $(I exponentially penalized with the
*b1e83836Smrgnumber of gaps) with base `lambda`. This means that an ungapped
181254a7Smrgmatch adds 1 to the return value; a match with one gap in either
*b1e83836Smrgstring adds `lambda` to the return value; ...; a match with a total
*b1e83836Smrgof `n` gaps in both strings adds $(D pow(lambda, n)) to the return
181254a7Smrgvalue. In the example above, we have 4 matches without gaps, 2 matches
181254a7Smrgwith one gap, and 1 match with three gaps. The latter match is ($(D
181254a7Smrg"Hello", "world")), which has two gaps in the first string and one gap
181254a7Smrgin the second string, totaling to three gaps. Summing these up we get
181254a7Smrg$(D 4 + 2 * lambda + pow(lambda, 3)).
181254a7Smrg
181254a7Smrg----
181254a7Smrgstring[] s = ["Hello", "brave", "new", "world"];
181254a7Smrgstring[] t = ["Hello", "new", "world"];
181254a7Smrgassert(gapWeightedSimilarity(s, t, 0.5) == 4 + 0.5 * 2 + 0.125);
181254a7Smrg----
181254a7Smrg
*b1e83836Smrg`gapWeightedSimilarity` is useful wherever a smooth similarity
181254a7Smrgmeasure between sequences allowing for approximate matches is
181254a7Smrgneeded. The examples above are given with words, but any sequences
181254a7Smrgwith elements comparable for equality are allowed, e.g. characters or
*b1e83836Smrgnumbers. `gapWeightedSimilarity` uses a highly optimized dynamic
181254a7Smrgprogramming implementation that needs $(D 16 * min(s.length,
181254a7Smrgt.length)) extra bytes of memory and $(BIGOH s.length * t.length) time
181254a7Smrgto complete.
181254a7Smrg */
181254a7SmrgF gapWeightedSimilarity(alias comp = "a == b", R1, R2, F)(R1 s, R2 t, F lambda)
181254a7Smrgif (isRandomAccessRange!(R1) && hasLength!(R1) &&
181254a7Smrg    isRandomAccessRange!(R2) && hasLength!(R2))
181254a7Smrg{
181254a7Smrg    import core.exception : onOutOfMemoryError;
181254a7Smrg    import core.stdc.stdlib : malloc, free;
181254a7Smrg    import std.algorithm.mutation : swap;
181254a7Smrg    import std.functional : binaryFun;
181254a7Smrg
181254a7Smrg    if (s.length < t.length) return gapWeightedSimilarity(t, s, lambda);
181254a7Smrg    if (!t.length) return 0;
181254a7Smrg
181254a7Smrg    auto dpvi = cast(F*) malloc(F.sizeof * 2 * t.length);
181254a7Smrg    if (!dpvi)
181254a7Smrg        onOutOfMemoryError();
181254a7Smrg
181254a7Smrg    auto dpvi1 = dpvi + t.length;
181254a7Smrg    scope(exit) free(dpvi < dpvi1 ? dpvi : dpvi1);
181254a7Smrg    dpvi[0 .. t.length] = 0;
181254a7Smrg    dpvi1[0] = 0;
181254a7Smrg    immutable lambda2 = lambda * lambda;
181254a7Smrg
181254a7Smrg    F result = 0;
181254a7Smrg    foreach (i; 0 .. s.length)
181254a7Smrg    {
181254a7Smrg        const si = s[i];
181254a7Smrg        for (size_t j = 0;;)
181254a7Smrg        {
181254a7Smrg            F dpsij = void;
181254a7Smrg            if (binaryFun!(comp)(si, t[j]))
181254a7Smrg            {
181254a7Smrg                dpsij = 1 + dpvi[j];
181254a7Smrg                result += dpsij;
181254a7Smrg            }
181254a7Smrg            else
181254a7Smrg            {
181254a7Smrg                dpsij = 0;
181254a7Smrg            }
181254a7Smrg            immutable j1 = j + 1;
181254a7Smrg            if (j1 == t.length) break;
181254a7Smrg            dpvi1[j1] = dpsij + lambda * (dpvi1[j] + dpvi[j1]) -
181254a7Smrg                        lambda2 * dpvi[j];
181254a7Smrg            j = j1;
181254a7Smrg        }
181254a7Smrg        swap(dpvi, dpvi1);
181254a7Smrg    }
181254a7Smrg    return result;
181254a7Smrg}
181254a7Smrg
181254a7Smrg@system unittest
181254a7Smrg{
181254a7Smrg    string[] s = ["Hello", "brave", "new", "world"];
181254a7Smrg    string[] t = ["Hello", "new", "world"];
181254a7Smrg    assert(gapWeightedSimilarity(s, t, 1) == 7);
181254a7Smrg    assert(gapWeightedSimilarity(s, t, 0) == 4);
181254a7Smrg    assert(gapWeightedSimilarity(s, t, 0.5) == 4 + 2 * 0.5 + 0.125);
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
*b1e83836SmrgThe similarity per `gapWeightedSimilarity` has an issue in that it
181254a7Smrggrows with the lengths of the two strings, even though the strings are
181254a7Smrgnot actually very similar. For example, the range $(D ["Hello",
181254a7Smrg"world"]) is increasingly similar with the range $(D ["Hello",
*b1e83836Smrg"world", "world", "world",...]) as more instances of `"world"` are
*b1e83836Smrgappended. To prevent that, `gapWeightedSimilarityNormalized`
181254a7Smrgcomputes a normalized version of the similarity that is computed as
181254a7Smrg$(D gapWeightedSimilarity(s, t, lambda) /
181254a7Smrgsqrt(gapWeightedSimilarity(s, t, lambda) * gapWeightedSimilarity(s, t,
*b1e83836Smrglambda))). The function `gapWeightedSimilarityNormalized` (a
*b1e83836Smrgso-called normalized kernel) is bounded in $(D [0, 1]), reaches `0`
*b1e83836Smrgonly for ranges that don't match in any position, and `1` only for
181254a7Smrgidentical ranges.
181254a7Smrg
*b1e83836SmrgThe optional parameters `sSelfSim` and `tSelfSim` are meant for
181254a7Smrgavoiding duplicate computation. Many applications may have already
181254a7Smrgcomputed $(D gapWeightedSimilarity(s, s, lambda)) and/or $(D
181254a7SmrggapWeightedSimilarity(t, t, lambda)). In that case, they can be passed
*b1e83836Smrgas `sSelfSim` and `tSelfSim`, respectively.
181254a7Smrg */
181254a7SmrgSelect!(isFloatingPoint!(F), F, double)
181254a7SmrggapWeightedSimilarityNormalized(alias comp = "a == b", R1, R2, F)
181254a7Smrg        (R1 s, R2 t, F lambda, F sSelfSim = F.init, F tSelfSim = F.init)
181254a7Smrgif (isRandomAccessRange!(R1) && hasLength!(R1) &&
181254a7Smrg    isRandomAccessRange!(R2) && hasLength!(R2))
181254a7Smrg{
181254a7Smrg    static bool uncomputed(F n)
181254a7Smrg    {
181254a7Smrg        static if (isFloatingPoint!(F))
181254a7Smrg            return isNaN(n);
181254a7Smrg        else
181254a7Smrg            return n == n.init;
181254a7Smrg    }
181254a7Smrg    if (uncomputed(sSelfSim))
181254a7Smrg        sSelfSim = gapWeightedSimilarity!(comp)(s, s, lambda);
181254a7Smrg    if (sSelfSim == 0) return 0;
181254a7Smrg    if (uncomputed(tSelfSim))
181254a7Smrg        tSelfSim = gapWeightedSimilarity!(comp)(t, t, lambda);
181254a7Smrg    if (tSelfSim == 0) return 0;
181254a7Smrg
181254a7Smrg    return gapWeightedSimilarity!(comp)(s, t, lambda) /
181254a7Smrg           sqrt(cast(typeof(return)) sSelfSim * tSelfSim);
181254a7Smrg}
181254a7Smrg
181254a7Smrg///
181254a7Smrg@system unittest
181254a7Smrg{
*b1e83836Smrg    import std.math.operations : isClose;
*b1e83836Smrg    import std.math.algebraic : sqrt;
181254a7Smrg
181254a7Smrg    string[] s = ["Hello", "brave", "new", "world"];
181254a7Smrg    string[] t = ["Hello", "new", "world"];
181254a7Smrg    assert(gapWeightedSimilarity(s, s, 1) == 15);
181254a7Smrg    assert(gapWeightedSimilarity(t, t, 1) == 7);
181254a7Smrg    assert(gapWeightedSimilarity(s, t, 1) == 7);
*b1e83836Smrg    assert(isClose(gapWeightedSimilarityNormalized(s, t, 1),
181254a7Smrg                    7.0 / sqrt(15.0 * 7), 0.01));
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
*b1e83836SmrgSimilar to `gapWeightedSimilarity`, just works in an incremental
181254a7Smrgmanner by first revealing the matches of length 1, then gapped matches
181254a7Smrgof length 2, and so on. The memory requirement is $(BIGOH s.length *
181254a7Smrgt.length). The time complexity is $(BIGOH s.length * t.length) time
181254a7Smrgfor computing each step. Continuing on the previous example:
181254a7Smrg
181254a7SmrgThe implementation is based on the pseudocode in Fig. 4 of the paper
181254a7Smrg$(HTTP jmlr.csail.mit.edu/papers/volume6/rousu05a/rousu05a.pdf,
181254a7Smrg"Efﬁcient Computation of Gapped Substring Kernels on Large Alphabets")
181254a7Smrgby Rousu et al., with additional algorithmic and systems-level
181254a7Smrgoptimizations.
181254a7Smrg */
181254a7Smrgstruct GapWeightedSimilarityIncremental(Range, F = double)
181254a7Smrgif (isRandomAccessRange!(Range) && hasLength!(Range))
181254a7Smrg{
181254a7Smrg    import core.stdc.stdlib : malloc, realloc, alloca, free;
181254a7Smrg
181254a7Smrgprivate:
181254a7Smrg    Range s, t;
181254a7Smrg    F currentValue = 0;
181254a7Smrg    F* kl;
181254a7Smrg    size_t gram = void;
181254a7Smrg    F lambda = void, lambda2 = void;
181254a7Smrg
181254a7Smrgpublic:
181254a7Smrg/**
*b1e83836SmrgConstructs an object given two ranges `s` and `t` and a penalty
*b1e83836Smrg`lambda`. Constructor completes in $(BIGOH s.length * t.length)
181254a7Smrgtime and computes all matches of length 1.
181254a7Smrg */
181254a7Smrg    this(Range s, Range t, F lambda)
181254a7Smrg    {
181254a7Smrg        import core.exception : onOutOfMemoryError;
181254a7Smrg
181254a7Smrg        assert(lambda > 0);
181254a7Smrg        this.gram = 0;
181254a7Smrg        this.lambda = lambda;
181254a7Smrg        this.lambda2 = lambda * lambda; // for efficiency only
181254a7Smrg
181254a7Smrg        size_t iMin = size_t.max, jMin = size_t.max,
181254a7Smrg            iMax = 0, jMax = 0;
181254a7Smrg        /* initialize */
181254a7Smrg        Tuple!(size_t, size_t) * k0;
181254a7Smrg        size_t k0len;
181254a7Smrg        scope(exit) free(k0);
181254a7Smrg        currentValue = 0;
181254a7Smrg        foreach (i, si; s)
181254a7Smrg        {
181254a7Smrg            foreach (j; 0 .. t.length)
181254a7Smrg            {
181254a7Smrg                if (si != t[j]) continue;
181254a7Smrg                k0 = cast(typeof(k0)) realloc(k0, ++k0len * (*k0).sizeof);
181254a7Smrg                with (k0[k0len - 1])
181254a7Smrg                {
181254a7Smrg                    field[0] = i;
181254a7Smrg                    field[1] = j;
181254a7Smrg                }
181254a7Smrg                // Maintain the minimum and maximum i and j
181254a7Smrg                if (iMin > i) iMin = i;
181254a7Smrg                if (iMax < i) iMax = i;
181254a7Smrg                if (jMin > j) jMin = j;
181254a7Smrg                if (jMax < j) jMax = j;
181254a7Smrg            }
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        if (iMin > iMax) return;
181254a7Smrg        assert(k0len);
181254a7Smrg
181254a7Smrg        currentValue = k0len;
181254a7Smrg        // Chop strings down to the useful sizes
181254a7Smrg        s = s[iMin .. iMax + 1];
181254a7Smrg        t = t[jMin .. jMax + 1];
181254a7Smrg        this.s = s;
181254a7Smrg        this.t = t;
181254a7Smrg
181254a7Smrg        kl = cast(F*) malloc(s.length * t.length * F.sizeof);
181254a7Smrg        if (!kl)
181254a7Smrg            onOutOfMemoryError();
181254a7Smrg
181254a7Smrg        kl[0 .. s.length * t.length] = 0;
181254a7Smrg        foreach (pos; 0 .. k0len)
181254a7Smrg        {
181254a7Smrg            with (k0[pos])
181254a7Smrg            {
181254a7Smrg                kl[(field[0] - iMin) * t.length + field[1] -jMin] = lambda2;
181254a7Smrg            }
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /**
*b1e83836Smrg    Returns: `this`.
181254a7Smrg     */
181254a7Smrg    ref GapWeightedSimilarityIncremental opSlice()
181254a7Smrg    {
181254a7Smrg        return this;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /**
181254a7Smrg    Computes the match of the popFront length. Completes in $(BIGOH s.length *
181254a7Smrg    t.length) time.
181254a7Smrg     */
181254a7Smrg    void popFront()
181254a7Smrg    {
181254a7Smrg        import std.algorithm.mutation : swap;
181254a7Smrg
181254a7Smrg        // This is a large source of optimization: if similarity at
181254a7Smrg        // the gram-1 level was 0, then we can safely assume
181254a7Smrg        // similarity at the gram level is 0 as well.
181254a7Smrg        if (empty) return;
181254a7Smrg
181254a7Smrg        // Now attempt to match gapped substrings of length `gram'
181254a7Smrg        ++gram;
181254a7Smrg        currentValue = 0;
181254a7Smrg
181254a7Smrg        auto Si = cast(F*) alloca(t.length * F.sizeof);
181254a7Smrg        Si[0 .. t.length] = 0;
181254a7Smrg        foreach (i; 0 .. s.length)
181254a7Smrg        {
181254a7Smrg            const si = s[i];
181254a7Smrg            F Sij_1 = 0;
181254a7Smrg            F Si_1j_1 = 0;
181254a7Smrg            auto kli = kl + i * t.length;
181254a7Smrg            for (size_t j = 0;;)
181254a7Smrg            {
181254a7Smrg                const klij = kli[j];
181254a7Smrg                const Si_1j = Si[j];
181254a7Smrg                const tmp = klij + lambda * (Si_1j + Sij_1) - lambda2 * Si_1j_1;
181254a7Smrg                // now update kl and currentValue
181254a7Smrg                if (si == t[j])
181254a7Smrg                    currentValue += kli[j] = lambda2 * Si_1j_1;
181254a7Smrg                else
181254a7Smrg                    kli[j] = 0;
181254a7Smrg                // commit to Si
181254a7Smrg                Si[j] = tmp;
181254a7Smrg                if (++j == t.length) break;
181254a7Smrg                // get ready for the popFront step; virtually increment j,
181254a7Smrg                // so essentially stuffj_1 <-- stuffj
181254a7Smrg                Si_1j_1 = Si_1j;
181254a7Smrg                Sij_1 = tmp;
181254a7Smrg            }
181254a7Smrg        }
181254a7Smrg        currentValue /= pow(lambda, 2 * (gram + 1));
181254a7Smrg
181254a7Smrg        version (none)
181254a7Smrg        {
181254a7Smrg            Si_1[0 .. t.length] = 0;
181254a7Smrg            kl[0 .. min(t.length, maxPerimeter + 1)] = 0;
181254a7Smrg            foreach (i; 1 .. min(s.length, maxPerimeter + 1))
181254a7Smrg            {
181254a7Smrg                auto kli = kl + i * t.length;
181254a7Smrg                assert(s.length > i);
181254a7Smrg                const si = s[i];
181254a7Smrg                auto kl_1i_1 = kl_1 + (i - 1) * t.length;
181254a7Smrg                kli[0] = 0;
181254a7Smrg                F lastS = 0;
181254a7Smrg                foreach (j; 1 .. min(maxPerimeter - i + 1, t.length))
181254a7Smrg                {
181254a7Smrg                    immutable j_1 = j - 1;
181254a7Smrg                    immutable tmp = kl_1i_1[j_1]
181254a7Smrg                        + lambda * (Si_1[j] + lastS)
181254a7Smrg                        - lambda2 * Si_1[j_1];
181254a7Smrg                    kl_1i_1[j_1] = float.nan;
181254a7Smrg                    Si_1[j_1] = lastS;
181254a7Smrg                    lastS = tmp;
181254a7Smrg                    if (si == t[j])
181254a7Smrg                    {
181254a7Smrg                        currentValue += kli[j] = lambda2 * lastS;
181254a7Smrg                    }
181254a7Smrg                    else
181254a7Smrg                    {
181254a7Smrg                        kli[j] = 0;
181254a7Smrg                    }
181254a7Smrg                }
181254a7Smrg                Si_1[t.length - 1] = lastS;
181254a7Smrg            }
181254a7Smrg            currentValue /= pow(lambda, 2 * (gram + 1));
181254a7Smrg            // get ready for the popFront computation
181254a7Smrg            swap(kl, kl_1);
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /**
181254a7Smrg    Returns: The gapped similarity at the current match length (initially
*b1e83836Smrg    1, grows with each call to `popFront`).
181254a7Smrg    */
181254a7Smrg    @property F front() { return currentValue; }
181254a7Smrg
181254a7Smrg    /**
181254a7Smrg    Returns: Whether there are more matches.
181254a7Smrg     */
181254a7Smrg    @property bool empty()
181254a7Smrg    {
181254a7Smrg        if (currentValue) return false;
181254a7Smrg        if (kl)
181254a7Smrg        {
181254a7Smrg            free(kl);
181254a7Smrg            kl = null;
181254a7Smrg        }
181254a7Smrg        return true;
181254a7Smrg    }
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
181254a7SmrgDitto
181254a7Smrg */
181254a7SmrgGapWeightedSimilarityIncremental!(R, F) gapWeightedSimilarityIncremental(R, F)
181254a7Smrg(R r1, R r2, F penalty)
181254a7Smrg{
181254a7Smrg    return typeof(return)(r1, r2, penalty);
181254a7Smrg}
181254a7Smrg
181254a7Smrg///
181254a7Smrg@system unittest
181254a7Smrg{
181254a7Smrg    string[] s = ["Hello", "brave", "new", "world"];
181254a7Smrg    string[] t = ["Hello", "new", "world"];
181254a7Smrg    auto simIter = gapWeightedSimilarityIncremental(s, t, 1.0);
181254a7Smrg    assert(simIter.front == 3); // three 1-length matches
181254a7Smrg    simIter.popFront();
181254a7Smrg    assert(simIter.front == 3); // three 2-length matches
181254a7Smrg    simIter.popFront();
181254a7Smrg    assert(simIter.front == 1); // one 3-length match
181254a7Smrg    simIter.popFront();
181254a7Smrg    assert(simIter.empty);     // no more match
181254a7Smrg}
181254a7Smrg
181254a7Smrg@system unittest
181254a7Smrg{
181254a7Smrg    import std.conv : text;
181254a7Smrg    string[] s = ["Hello", "brave", "new", "world"];
181254a7Smrg    string[] t = ["Hello", "new", "world"];
181254a7Smrg    auto simIter = gapWeightedSimilarityIncremental(s, t, 1.0);
181254a7Smrg    //foreach (e; simIter) writeln(e);
181254a7Smrg    assert(simIter.front == 3); // three 1-length matches
181254a7Smrg    simIter.popFront();
181254a7Smrg    assert(simIter.front == 3, text(simIter.front)); // three 2-length matches
181254a7Smrg    simIter.popFront();
181254a7Smrg    assert(simIter.front == 1); // one 3-length matches
181254a7Smrg    simIter.popFront();
181254a7Smrg    assert(simIter.empty);     // no more match
181254a7Smrg
181254a7Smrg    s = ["Hello"];
181254a7Smrg    t = ["bye"];
181254a7Smrg    simIter = gapWeightedSimilarityIncremental(s, t, 0.5);
181254a7Smrg    assert(simIter.empty);
181254a7Smrg
181254a7Smrg    s = ["Hello"];
181254a7Smrg    t = ["Hello"];
181254a7Smrg    simIter = gapWeightedSimilarityIncremental(s, t, 0.5);
181254a7Smrg    assert(simIter.front == 1); // one match
181254a7Smrg    simIter.popFront();
181254a7Smrg    assert(simIter.empty);
181254a7Smrg
181254a7Smrg    s = ["Hello", "world"];
181254a7Smrg    t = ["Hello"];
181254a7Smrg    simIter = gapWeightedSimilarityIncremental(s, t, 0.5);
181254a7Smrg    assert(simIter.front == 1); // one match
181254a7Smrg    simIter.popFront();
181254a7Smrg    assert(simIter.empty);
181254a7Smrg
181254a7Smrg    s = ["Hello", "world"];
181254a7Smrg    t = ["Hello", "yah", "world"];
181254a7Smrg    simIter = gapWeightedSimilarityIncremental(s, t, 0.5);
181254a7Smrg    assert(simIter.front == 2); // two 1-gram matches
181254a7Smrg    simIter.popFront();
181254a7Smrg    assert(simIter.front == 0.5, text(simIter.front)); // one 2-gram match, 1 gap
181254a7Smrg}
181254a7Smrg
181254a7Smrg@system unittest
181254a7Smrg{
181254a7Smrg    GapWeightedSimilarityIncremental!(string[]) sim =
181254a7Smrg        GapWeightedSimilarityIncremental!(string[])(
181254a7Smrg            ["nyuk", "I", "have", "no", "chocolate", "giba"],
181254a7Smrg            ["wyda", "I", "have", "I", "have", "have", "I", "have", "hehe"],
181254a7Smrg            0.5);
181254a7Smrg    double[] witness = [ 7.0, 4.03125, 0, 0 ];
181254a7Smrg    foreach (e; sim)
181254a7Smrg    {
181254a7Smrg        //writeln(e);
181254a7Smrg        assert(e == witness.front);
181254a7Smrg        witness.popFront();
181254a7Smrg    }
181254a7Smrg    witness = [ 3.0, 1.3125, 0.25 ];
181254a7Smrg    sim = GapWeightedSimilarityIncremental!(string[])(
181254a7Smrg        ["I", "have", "no", "chocolate"],
181254a7Smrg        ["I", "have", "some", "chocolate"],
181254a7Smrg        0.5);
181254a7Smrg    foreach (e; sim)
181254a7Smrg    {
181254a7Smrg        //writeln(e);
181254a7Smrg        assert(e == witness.front);
181254a7Smrg        witness.popFront();
181254a7Smrg    }
181254a7Smrg    assert(witness.empty);
181254a7Smrg}
181254a7Smrg
181254a7Smrg/**
*b1e83836SmrgComputes the greatest common divisor of `a` and `b` by using
181254a7Smrgan efficient algorithm such as $(HTTPS en.wikipedia.org/wiki/Euclidean_algorithm, Euclid's)
181254a7Smrgor $(HTTPS en.wikipedia.org/wiki/Binary_GCD_algorithm, Stein's) algorithm.
181254a7Smrg
181254a7SmrgParams:
*b1e83836Smrg    a = Integer value of any numerical type that supports the modulo operator `%`.
*b1e83836Smrg        If bit-shifting `<<` and `>>` are also supported, Stein's algorithm will
181254a7Smrg        be used; otherwise, Euclid's algorithm is used as _a fallback.
*b1e83836Smrg    b = Integer value of any equivalent numerical type.
*b1e83836Smrg
181254a7SmrgReturns:
181254a7Smrg    The greatest common divisor of the given arguments.
181254a7Smrg */
*b1e83836Smrgtypeof(Unqual!(T).init % Unqual!(U).init) gcd(T, U)(T a, U b)
*b1e83836Smrgif (isIntegral!T && isIntegral!U)
*b1e83836Smrg{
*b1e83836Smrg    // Operate on a common type between the two arguments.
*b1e83836Smrg    alias UCT = Unsigned!(CommonType!(Unqual!T, Unqual!U));
*b1e83836Smrg
*b1e83836Smrg    // `std.math.abs` doesn't support unsigned integers, and `T.min` is undefined.
*b1e83836Smrg    static if (is(T : immutable short) || is(T : immutable byte))
*b1e83836Smrg        UCT ax = (isUnsigned!T || a >= 0) ? a : cast(UCT) -int(a);
*b1e83836Smrg    else
*b1e83836Smrg        UCT ax = (isUnsigned!T || a >= 0) ? a : -UCT(a);
*b1e83836Smrg
*b1e83836Smrg    static if (is(U : immutable short) || is(U : immutable byte))
*b1e83836Smrg        UCT bx = (isUnsigned!U || b >= 0) ? b : cast(UCT) -int(b);
*b1e83836Smrg    else
*b1e83836Smrg        UCT bx = (isUnsigned!U || b >= 0) ? b : -UCT(b);
*b1e83836Smrg
*b1e83836Smrg    // Special cases.
*b1e83836Smrg    if (ax == 0)
*b1e83836Smrg        return bx;
*b1e83836Smrg    if (bx == 0)
*b1e83836Smrg        return ax;
*b1e83836Smrg
*b1e83836Smrg    return gcdImpl(ax, bx);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrgprivate typeof(T.init % T.init) gcdImpl(T)(T a, T b)
181254a7Smrgif (isIntegral!T)
181254a7Smrg{
*b1e83836Smrg    pragma(inline, true);
181254a7Smrg    import core.bitop : bsf;
181254a7Smrg    import std.algorithm.mutation : swap;
181254a7Smrg
181254a7Smrg    immutable uint shift = bsf(a | b);
181254a7Smrg    a >>= a.bsf;
181254a7Smrg    do
181254a7Smrg    {
181254a7Smrg        b >>= b.bsf;
181254a7Smrg        if (a > b)
181254a7Smrg            swap(a, b);
181254a7Smrg        b -= a;
181254a7Smrg    } while (b);
181254a7Smrg
181254a7Smrg    return a << shift;
181254a7Smrg}
181254a7Smrg
181254a7Smrg///
181254a7Smrg@safe unittest
181254a7Smrg{
181254a7Smrg    assert(gcd(2 * 5 * 7 * 7, 5 * 7 * 11) == 5 * 7);
181254a7Smrg    const int a = 5 * 13 * 23 * 23, b = 13 * 59;
181254a7Smrg    assert(gcd(a, b) == 13);
181254a7Smrg}
181254a7Smrg
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    import std.meta : AliasSeq;
*b1e83836Smrg    static foreach (T; AliasSeq!(byte, ubyte, short, ushort, int, uint, long, ulong,
*b1e83836Smrg                                 const byte, const short, const int, const long,
*b1e83836Smrg                                 immutable ubyte, immutable ushort, immutable uint, immutable ulong))
*b1e83836Smrg    {
*b1e83836Smrg        static foreach (U; AliasSeq!(byte, ubyte, short, ushort, int, uint, long, ulong,
*b1e83836Smrg                                     const ubyte, const ushort, const uint, const ulong,
*b1e83836Smrg                                     immutable byte, immutable short, immutable int, immutable long))
*b1e83836Smrg        {
*b1e83836Smrg            // Signed and unsigned tests.
*b1e83836Smrg            static if (T.max > byte.max && U.max > byte.max)
*b1e83836Smrg                assert(gcd(T(200), U(200)) == 200);
*b1e83836Smrg            static if (T.max > ubyte.max)
*b1e83836Smrg            {
*b1e83836Smrg                assert(gcd(T(2000), U(20))  == 20);
*b1e83836Smrg                assert(gcd(T(2011), U(17))  == 1);
*b1e83836Smrg            }
*b1e83836Smrg            static if (T.max > ubyte.max && U.max > ubyte.max)
*b1e83836Smrg                assert(gcd(T(1071), U(462)) == 21);
*b1e83836Smrg
*b1e83836Smrg            assert(gcd(T(0),   U(13))  == 13);
*b1e83836Smrg            assert(gcd(T(29),  U(0))   == 29);
*b1e83836Smrg            assert(gcd(T(0),   U(0))   == 0);
*b1e83836Smrg            assert(gcd(T(1),   U(2))   == 1);
*b1e83836Smrg            assert(gcd(T(9),   U(6))   == 3);
*b1e83836Smrg            assert(gcd(T(3),   U(4))   == 1);
*b1e83836Smrg            assert(gcd(T(32),  U(24))  == 8);
*b1e83836Smrg            assert(gcd(T(5),   U(6))   == 1);
*b1e83836Smrg            assert(gcd(T(54),  U(36))  == 18);
*b1e83836Smrg
*b1e83836Smrg            // Int and Long tests.
*b1e83836Smrg            static if (T.max > short.max && U.max > short.max)
*b1e83836Smrg                assert(gcd(T(46391), U(62527)) == 2017);
*b1e83836Smrg            static if (T.max > ushort.max && U.max > ushort.max)
*b1e83836Smrg                assert(gcd(T(63245986), U(39088169)) == 1);
*b1e83836Smrg            static if (T.max > uint.max && U.max > uint.max)
*b1e83836Smrg            {
*b1e83836Smrg                assert(gcd(T(77160074263), U(47687519812)) == 1);
*b1e83836Smrg                assert(gcd(T(77160074264), U(47687519812)) == 4);
*b1e83836Smrg            }
*b1e83836Smrg
*b1e83836Smrg            // Negative tests.
*b1e83836Smrg            static if (T.min < 0)
*b1e83836Smrg            {
*b1e83836Smrg                assert(gcd(T(-21), U(28)) == 7);
*b1e83836Smrg                assert(gcd(T(-3),  U(4))  == 1);
*b1e83836Smrg            }
*b1e83836Smrg            static if (U.min < 0)
*b1e83836Smrg            {
*b1e83836Smrg                assert(gcd(T(1),  U(-2))  == 1);
*b1e83836Smrg                assert(gcd(T(33), U(-44)) == 11);
*b1e83836Smrg            }
*b1e83836Smrg            static if (T.min < 0 && U.min < 0)
*b1e83836Smrg            {
*b1e83836Smrg                assert(gcd(T(-5),  U(-6))  == 1);
*b1e83836Smrg                assert(gcd(T(-50), U(-60)) == 10);
*b1e83836Smrg            }
*b1e83836Smrg        }
*b1e83836Smrg    }
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg// https://issues.dlang.org/show_bug.cgi?id=21834
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    assert(gcd(-120, 10U) == 10);
*b1e83836Smrg    assert(gcd(120U, -10) == 10);
*b1e83836Smrg    assert(gcd(int.min, 0L) == 1L + int.max);
*b1e83836Smrg    assert(gcd(0L, int.min) == 1L + int.max);
*b1e83836Smrg    assert(gcd(int.min, 0L + int.min) == 1L + int.max);
*b1e83836Smrg    assert(gcd(int.min, 1L + int.max) == 1L + int.max);
*b1e83836Smrg    assert(gcd(short.min, 1U + short.max) == 1U + short.max);
*b1e83836Smrg}
*b1e83836Smrg
181254a7Smrg// This overload is for non-builtin numerical types like BigInt or
181254a7Smrg// user-defined types.
181254a7Smrg/// ditto
*b1e83836Smrgauto gcd(T)(T a, T b)
181254a7Smrgif (!isIntegral!T &&
181254a7Smrg        is(typeof(T.init % T.init)) &&
181254a7Smrg        is(typeof(T.init == 0 || T.init > 0)))
181254a7Smrg{
*b1e83836Smrg    static if (!is(T == Unqual!T))
*b1e83836Smrg    {
*b1e83836Smrg        return gcd!(Unqual!T)(a, b);
*b1e83836Smrg    }
*b1e83836Smrg    else
*b1e83836Smrg    {
*b1e83836Smrg        // Ensure arguments are unsigned.
*b1e83836Smrg        a = a >= 0 ? a : -a;
*b1e83836Smrg        b = b >= 0 ? b : -b;
181254a7Smrg
*b1e83836Smrg        // Special cases.
*b1e83836Smrg        if (a == 0)
*b1e83836Smrg            return b;
*b1e83836Smrg        if (b == 0)
*b1e83836Smrg            return a;
*b1e83836Smrg
*b1e83836Smrg        return gcdImpl(a, b);
*b1e83836Smrg    }
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrgprivate auto gcdImpl(T)(T a, T b)
*b1e83836Smrgif (!isIntegral!T)
*b1e83836Smrg{
*b1e83836Smrg    pragma(inline, true);
*b1e83836Smrg    import std.algorithm.mutation : swap;
181254a7Smrg    enum canUseBinaryGcd = is(typeof(() {
181254a7Smrg        T t, u;
181254a7Smrg        t <<= 1;
181254a7Smrg        t >>= 1;
181254a7Smrg        t -= u;
181254a7Smrg        bool b = (t & 1) == 0;
181254a7Smrg        swap(t, u);
181254a7Smrg    }));
181254a7Smrg
181254a7Smrg    static if (canUseBinaryGcd)
181254a7Smrg    {
181254a7Smrg        uint shift = 0;
181254a7Smrg        while ((a & 1) == 0 && (b & 1) == 0)
181254a7Smrg        {
181254a7Smrg            a >>= 1;
181254a7Smrg            b >>= 1;
181254a7Smrg            shift++;
181254a7Smrg        }
181254a7Smrg
*b1e83836Smrg        if ((a & 1) == 0) swap(a, b);
*b1e83836Smrg
181254a7Smrg        do
181254a7Smrg        {
181254a7Smrg            assert((a & 1) != 0);
181254a7Smrg            while ((b & 1) == 0)
181254a7Smrg                b >>= 1;
181254a7Smrg            if (a > b)
181254a7Smrg                swap(a, b);
181254a7Smrg            b -= a;
181254a7Smrg        } while (b);
181254a7Smrg
181254a7Smrg        return a << shift;
181254a7Smrg    }
181254a7Smrg    else
181254a7Smrg    {
181254a7Smrg        // The only thing we have is %; fallback to Euclidean algorithm.
181254a7Smrg        while (b != 0)
181254a7Smrg        {
181254a7Smrg            auto t = b;
181254a7Smrg            b = a % b;
181254a7Smrg            a = t;
181254a7Smrg        }
181254a7Smrg        return a;
181254a7Smrg    }
181254a7Smrg}
181254a7Smrg
*b1e83836Smrg// https://issues.dlang.org/show_bug.cgi?id=7102
181254a7Smrg@system pure unittest
181254a7Smrg{
181254a7Smrg    import std.bigint : BigInt;
181254a7Smrg    assert(gcd(BigInt("71_000_000_000_000_000_000"),
181254a7Smrg               BigInt("31_000_000_000_000_000_000")) ==
181254a7Smrg           BigInt("1_000_000_000_000_000_000"));
*b1e83836Smrg
*b1e83836Smrg    assert(gcd(BigInt(0), BigInt(1234567)) == BigInt(1234567));
*b1e83836Smrg    assert(gcd(BigInt(1234567), BigInt(0)) == BigInt(1234567));
181254a7Smrg}
181254a7Smrg
181254a7Smrg@safe pure nothrow unittest
181254a7Smrg{
181254a7Smrg    // A numerical type that only supports % and - (to force gcd implementation
181254a7Smrg    // to use Euclidean algorithm).
181254a7Smrg    struct CrippledInt
181254a7Smrg    {
181254a7Smrg        int impl;
181254a7Smrg        CrippledInt opBinary(string op : "%")(CrippledInt i)
181254a7Smrg        {
181254a7Smrg            return CrippledInt(impl % i.impl);
181254a7Smrg        }
*b1e83836Smrg        CrippledInt opUnary(string op : "-")()
*b1e83836Smrg        {
*b1e83836Smrg            return CrippledInt(-impl);
*b1e83836Smrg        }
181254a7Smrg        int opEquals(CrippledInt i) { return impl == i.impl; }
181254a7Smrg        int opEquals(int i) { return impl == i; }
181254a7Smrg        int opCmp(int i) { return (impl < i) ? -1 : (impl > i) ? 1 : 0; }
181254a7Smrg    }
181254a7Smrg    assert(gcd(CrippledInt(2310), CrippledInt(1309)) == CrippledInt(77));
*b1e83836Smrg    assert(gcd(CrippledInt(-120), CrippledInt(10U)) == CrippledInt(10));
*b1e83836Smrg    assert(gcd(CrippledInt(120U), CrippledInt(-10)) == CrippledInt(10));
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg// https://issues.dlang.org/show_bug.cgi?id=19514
*b1e83836Smrg@system pure unittest
*b1e83836Smrg{
*b1e83836Smrg    import std.bigint : BigInt;
*b1e83836Smrg    assert(gcd(BigInt(2), BigInt(1)) == BigInt(1));
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg// Issue 20924
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    import std.bigint : BigInt;
*b1e83836Smrg    const a = BigInt("123143238472389492934020");
*b1e83836Smrg    const b = BigInt("902380489324729338420924");
*b1e83836Smrg    assert(__traits(compiles, gcd(a, b)));
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg// https://issues.dlang.org/show_bug.cgi?id=21834
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    import std.bigint : BigInt;
*b1e83836Smrg    assert(gcd(BigInt(-120), BigInt(10U)) == BigInt(10));
*b1e83836Smrg    assert(gcd(BigInt(120U), BigInt(-10)) == BigInt(10));
*b1e83836Smrg    assert(gcd(BigInt(int.min), BigInt(0L)) == BigInt(1L + int.max));
*b1e83836Smrg    assert(gcd(BigInt(0L), BigInt(int.min)) == BigInt(1L + int.max));
*b1e83836Smrg    assert(gcd(BigInt(int.min), BigInt(0L + int.min)) == BigInt(1L + int.max));
*b1e83836Smrg    assert(gcd(BigInt(int.min), BigInt(1L + int.max)) == BigInt(1L + int.max));
*b1e83836Smrg    assert(gcd(BigInt(short.min), BigInt(1U + short.max)) == BigInt(1U + short.max));
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg
*b1e83836Smrg/**
*b1e83836SmrgComputes the least common multiple of `a` and `b`.
*b1e83836SmrgArguments are the same as $(MYREF gcd).
*b1e83836Smrg
*b1e83836SmrgReturns:
*b1e83836Smrg    The least common multiple of the given arguments.
*b1e83836Smrg */
*b1e83836Smrgtypeof(Unqual!(T).init % Unqual!(U).init) lcm(T, U)(T a, U b)
*b1e83836Smrgif (isIntegral!T && isIntegral!U)
*b1e83836Smrg{
*b1e83836Smrg    // Operate on a common type between the two arguments.
*b1e83836Smrg    alias UCT = Unsigned!(CommonType!(Unqual!T, Unqual!U));
*b1e83836Smrg
*b1e83836Smrg    // `std.math.abs` doesn't support unsigned integers, and `T.min` is undefined.
*b1e83836Smrg    static if (is(T : immutable short) || is(T : immutable byte))
*b1e83836Smrg        UCT ax = (isUnsigned!T || a >= 0) ? a : cast(UCT) -int(a);
*b1e83836Smrg    else
*b1e83836Smrg        UCT ax = (isUnsigned!T || a >= 0) ? a : -UCT(a);
*b1e83836Smrg
*b1e83836Smrg    static if (is(U : immutable short) || is(U : immutable byte))
*b1e83836Smrg        UCT bx = (isUnsigned!U || b >= 0) ? b : cast(UCT) -int(b);
*b1e83836Smrg    else
*b1e83836Smrg        UCT bx = (isUnsigned!U || b >= 0) ? b : -UCT(b);
*b1e83836Smrg
*b1e83836Smrg    // Special cases.
*b1e83836Smrg    if (ax == 0)
*b1e83836Smrg        return ax;
*b1e83836Smrg    if (bx == 0)
*b1e83836Smrg        return bx;
*b1e83836Smrg
*b1e83836Smrg    return (ax / gcdImpl(ax, bx)) * bx;
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg///
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    assert(lcm(1, 2) == 2);
*b1e83836Smrg    assert(lcm(3, 4) == 12);
*b1e83836Smrg    assert(lcm(5, 6) == 30);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    import std.meta : AliasSeq;
*b1e83836Smrg    static foreach (T; AliasSeq!(byte, ubyte, short, ushort, int, uint, long, ulong,
*b1e83836Smrg                                 const byte, const short, const int, const long,
*b1e83836Smrg                                 immutable ubyte, immutable ushort, immutable uint, immutable ulong))
*b1e83836Smrg    {
*b1e83836Smrg        static foreach (U; AliasSeq!(byte, ubyte, short, ushort, int, uint, long, ulong,
*b1e83836Smrg                                     const ubyte, const ushort, const uint, const ulong,
*b1e83836Smrg                                     immutable byte, immutable short, immutable int, immutable long))
*b1e83836Smrg        {
*b1e83836Smrg            assert(lcm(T(21), U(6))  == 42);
*b1e83836Smrg            assert(lcm(T(41), U(0))  == 0);
*b1e83836Smrg            assert(lcm(T(0),  U(7))  == 0);
*b1e83836Smrg            assert(lcm(T(0),  U(0))  == 0);
*b1e83836Smrg            assert(lcm(T(1U), U(2))  == 2);
*b1e83836Smrg            assert(lcm(T(3),  U(4U)) == 12);
*b1e83836Smrg            assert(lcm(T(5U), U(6U)) == 30);
*b1e83836Smrg            static if (T.min < 0)
*b1e83836Smrg                assert(lcm(T(-42), U(21U)) == 42);
*b1e83836Smrg        }
*b1e83836Smrg    }
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg/// ditto
*b1e83836Smrgauto lcm(T)(T a, T b)
*b1e83836Smrgif (!isIntegral!T &&
*b1e83836Smrg        is(typeof(T.init % T.init)) &&
*b1e83836Smrg        is(typeof(T.init == 0 || T.init > 0)))
*b1e83836Smrg{
*b1e83836Smrg    // Ensure arguments are unsigned.
*b1e83836Smrg    a = a >= 0 ? a : -a;
*b1e83836Smrg    b = b >= 0 ? b : -b;
*b1e83836Smrg
*b1e83836Smrg    // Special cases.
*b1e83836Smrg    if (a == 0)
*b1e83836Smrg        return a;
*b1e83836Smrg    if (b == 0)
*b1e83836Smrg        return b;
*b1e83836Smrg
*b1e83836Smrg    return (a / gcdImpl(a, b)) * b;
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg@safe unittest
*b1e83836Smrg{
*b1e83836Smrg    import std.bigint : BigInt;
*b1e83836Smrg    assert(lcm(BigInt(21),  BigInt(6))   == BigInt(42));
*b1e83836Smrg    assert(lcm(BigInt(41),  BigInt(0))   == BigInt(0));
*b1e83836Smrg    assert(lcm(BigInt(0),   BigInt(7))   == BigInt(0));
*b1e83836Smrg    assert(lcm(BigInt(0),   BigInt(0))   == BigInt(0));
*b1e83836Smrg    assert(lcm(BigInt(1U),  BigInt(2))   == BigInt(2));
*b1e83836Smrg    assert(lcm(BigInt(3),   BigInt(4U))  == BigInt(12));
*b1e83836Smrg    assert(lcm(BigInt(5U),  BigInt(6U))  == BigInt(30));
*b1e83836Smrg    assert(lcm(BigInt(-42), BigInt(21U)) == BigInt(42));
181254a7Smrg}
181254a7Smrg
181254a7Smrg// This is to make tweaking the speed/size vs. accuracy tradeoff easy,
181254a7Smrg// though floats seem accurate enough for all practical purposes, since
*b1e83836Smrg// they pass the "isClose(inverseFft(fft(arr)), arr)" test even for
181254a7Smrg// size 2 ^^ 22.
181254a7Smrgprivate alias lookup_t = float;
181254a7Smrg
181254a7Smrg/**A class for performing fast Fourier transforms of power of two sizes.
181254a7Smrg * This class encapsulates a large amount of state that is reusable when
181254a7Smrg * performing multiple FFTs of sizes smaller than or equal to that specified
181254a7Smrg * in the constructor.  This results in substantial speedups when performing
181254a7Smrg * multiple FFTs with a known maximum size.  However,
181254a7Smrg * a free function API is provided for convenience if you need to perform a
181254a7Smrg * one-off FFT.
181254a7Smrg *
181254a7Smrg * References:
181254a7Smrg * $(HTTP en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm)
181254a7Smrg */
181254a7Smrgfinal class Fft
181254a7Smrg{
181254a7Smrg    import core.bitop : bsf;
181254a7Smrg    import std.algorithm.iteration : map;
181254a7Smrg    import std.array : uninitializedArray;
181254a7Smrg
181254a7Smrgprivate:
181254a7Smrg    immutable lookup_t[][] negSinLookup;
181254a7Smrg
181254a7Smrg    void enforceSize(R)(R range) const
181254a7Smrg    {
181254a7Smrg        import std.conv : text;
181254a7Smrg        assert(range.length <= size, text(
181254a7Smrg            "FFT size mismatch.  Expected ", size, ", got ", range.length));
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    void fftImpl(Ret, R)(Stride!R range, Ret buf) const
181254a7Smrg    in
181254a7Smrg    {
181254a7Smrg        assert(range.length >= 4);
181254a7Smrg        assert(isPowerOf2(range.length));
181254a7Smrg    }
*b1e83836Smrg    do
181254a7Smrg    {
181254a7Smrg        auto recurseRange = range;
181254a7Smrg        recurseRange.doubleSteps();
181254a7Smrg
181254a7Smrg        if (buf.length > 4)
181254a7Smrg        {
181254a7Smrg            fftImpl(recurseRange, buf[0..$ / 2]);
181254a7Smrg            recurseRange.popHalf();
181254a7Smrg            fftImpl(recurseRange, buf[$ / 2..$]);
181254a7Smrg        }
181254a7Smrg        else
181254a7Smrg        {
181254a7Smrg            // Do this here instead of in another recursion to save on
181254a7Smrg            // recursion overhead.
181254a7Smrg            slowFourier2(recurseRange, buf[0..$ / 2]);
181254a7Smrg            recurseRange.popHalf();
181254a7Smrg            slowFourier2(recurseRange, buf[$ / 2..$]);
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        butterfly(buf);
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    // This algorithm works by performing the even and odd parts of our FFT
181254a7Smrg    // using the "two for the price of one" method mentioned at
181254a7Smrg    // http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM#Head521
181254a7Smrg    // by making the odd terms into the imaginary components of our new FFT,
181254a7Smrg    // and then using symmetry to recombine them.
181254a7Smrg    void fftImplPureReal(Ret, R)(R range, Ret buf) const
181254a7Smrg    in
181254a7Smrg    {
181254a7Smrg        assert(range.length >= 4);
181254a7Smrg        assert(isPowerOf2(range.length));
181254a7Smrg    }
*b1e83836Smrg    do
181254a7Smrg    {
181254a7Smrg        alias E = ElementType!R;
181254a7Smrg
181254a7Smrg        // Converts odd indices of range to the imaginary components of
181254a7Smrg        // a range half the size.  The even indices become the real components.
181254a7Smrg        static if (isArray!R && isFloatingPoint!E)
181254a7Smrg        {
181254a7Smrg            // Then the memory layout of complex numbers provides a dirt
181254a7Smrg            // cheap way to convert.  This is a common case, so take advantage.
181254a7Smrg            auto oddsImag = cast(Complex!E[]) range;
181254a7Smrg        }
181254a7Smrg        else
181254a7Smrg        {
181254a7Smrg            // General case:  Use a higher order range.  We can assume
181254a7Smrg            // source.length is even because it has to be a power of 2.
181254a7Smrg            static struct OddToImaginary
181254a7Smrg            {
181254a7Smrg                R source;
181254a7Smrg                alias C = Complex!(CommonType!(E, typeof(buf[0].re)));
181254a7Smrg
181254a7Smrg                @property
181254a7Smrg                {
181254a7Smrg                    C front()
181254a7Smrg                    {
181254a7Smrg                        return C(source[0], source[1]);
181254a7Smrg                    }
181254a7Smrg
181254a7Smrg                    C back()
181254a7Smrg                    {
181254a7Smrg                        immutable n = source.length;
181254a7Smrg                        return C(source[n - 2], source[n - 1]);
181254a7Smrg                    }
181254a7Smrg
181254a7Smrg                    typeof(this) save()
181254a7Smrg                    {
181254a7Smrg                        return typeof(this)(source.save);
181254a7Smrg                    }
181254a7Smrg
181254a7Smrg                    bool empty()
181254a7Smrg                    {
181254a7Smrg                        return source.empty;
181254a7Smrg                    }
181254a7Smrg
181254a7Smrg                    size_t length()
181254a7Smrg                    {
181254a7Smrg                        return source.length / 2;
181254a7Smrg                    }
181254a7Smrg                }
181254a7Smrg
181254a7Smrg                void popFront()
181254a7Smrg                {
181254a7Smrg                    source.popFront();
181254a7Smrg                    source.popFront();
181254a7Smrg                }
181254a7Smrg
181254a7Smrg                void popBack()
181254a7Smrg                {
181254a7Smrg                    source.popBack();
181254a7Smrg                    source.popBack();
181254a7Smrg                }
181254a7Smrg
181254a7Smrg                C opIndex(size_t index)
181254a7Smrg                {
181254a7Smrg                    return C(source[index * 2], source[index * 2 + 1]);
181254a7Smrg                }
181254a7Smrg
181254a7Smrg                typeof(this) opSlice(size_t lower, size_t upper)
181254a7Smrg                {
181254a7Smrg                    return typeof(this)(source[lower * 2 .. upper * 2]);
181254a7Smrg                }
181254a7Smrg            }
181254a7Smrg
181254a7Smrg            auto oddsImag = OddToImaginary(range);
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        fft(oddsImag, buf[0..$ / 2]);
181254a7Smrg        auto evenFft = buf[0..$ / 2];
181254a7Smrg        auto oddFft = buf[$ / 2..$];
181254a7Smrg        immutable halfN = evenFft.length;
181254a7Smrg        oddFft[0].re = buf[0].im;
181254a7Smrg        oddFft[0].im = 0;
181254a7Smrg        evenFft[0].im = 0;
181254a7Smrg        // evenFft[0].re is already right b/c it's aliased with buf[0].re.
181254a7Smrg
181254a7Smrg        foreach (k; 1 .. halfN / 2 + 1)
181254a7Smrg        {
181254a7Smrg            immutable bufk = buf[k];
181254a7Smrg            immutable bufnk = buf[buf.length / 2 - k];
181254a7Smrg            evenFft[k].re = 0.5 * (bufk.re + bufnk.re);
181254a7Smrg            evenFft[halfN - k].re = evenFft[k].re;
181254a7Smrg            evenFft[k].im = 0.5 * (bufk.im - bufnk.im);
181254a7Smrg            evenFft[halfN - k].im = -evenFft[k].im;
181254a7Smrg
181254a7Smrg            oddFft[k].re = 0.5 * (bufk.im + bufnk.im);
181254a7Smrg            oddFft[halfN - k].re = oddFft[k].re;
181254a7Smrg            oddFft[k].im = 0.5 * (bufnk.re - bufk.re);
181254a7Smrg            oddFft[halfN - k].im = -oddFft[k].im;
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        butterfly(buf);
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    void butterfly(R)(R buf) const
181254a7Smrg    in
181254a7Smrg    {
181254a7Smrg        assert(isPowerOf2(buf.length));
181254a7Smrg    }
*b1e83836Smrg    do
181254a7Smrg    {
181254a7Smrg        immutable n = buf.length;
181254a7Smrg        immutable localLookup = negSinLookup[bsf(n)];
181254a7Smrg        assert(localLookup.length == n);
181254a7Smrg
181254a7Smrg        immutable cosMask = n - 1;
181254a7Smrg        immutable cosAdd = n / 4 * 3;
181254a7Smrg
181254a7Smrg        lookup_t negSinFromLookup(size_t index) pure nothrow
181254a7Smrg        {
181254a7Smrg            return localLookup[index];
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        lookup_t cosFromLookup(size_t index) pure nothrow
181254a7Smrg        {
181254a7Smrg            // cos is just -sin shifted by PI * 3 / 2.
181254a7Smrg            return localLookup[(index + cosAdd) & cosMask];
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        immutable halfLen = n / 2;
181254a7Smrg
181254a7Smrg        // This loop is unrolled and the two iterations are interleaved
181254a7Smrg        // relative to the textbook FFT to increase ILP.  This gives roughly 5%
181254a7Smrg        // speedups on DMD.
181254a7Smrg        for (size_t k = 0; k < halfLen; k += 2)
181254a7Smrg        {
181254a7Smrg            immutable cosTwiddle1 = cosFromLookup(k);
181254a7Smrg            immutable sinTwiddle1 = negSinFromLookup(k);
181254a7Smrg            immutable cosTwiddle2 = cosFromLookup(k + 1);
181254a7Smrg            immutable sinTwiddle2 = negSinFromLookup(k + 1);
181254a7Smrg
181254a7Smrg            immutable realLower1 = buf[k].re;
181254a7Smrg            immutable imagLower1 = buf[k].im;
181254a7Smrg            immutable realLower2 = buf[k + 1].re;
181254a7Smrg            immutable imagLower2 = buf[k + 1].im;
181254a7Smrg
181254a7Smrg            immutable upperIndex1 = k + halfLen;
181254a7Smrg            immutable upperIndex2 = upperIndex1 + 1;
181254a7Smrg            immutable realUpper1 = buf[upperIndex1].re;
181254a7Smrg            immutable imagUpper1 = buf[upperIndex1].im;
181254a7Smrg            immutable realUpper2 = buf[upperIndex2].re;
181254a7Smrg            immutable imagUpper2 = buf[upperIndex2].im;
181254a7Smrg
181254a7Smrg            immutable realAdd1 = cosTwiddle1 * realUpper1
181254a7Smrg                               - sinTwiddle1 * imagUpper1;
181254a7Smrg            immutable imagAdd1 = sinTwiddle1 * realUpper1
181254a7Smrg                               + cosTwiddle1 * imagUpper1;
181254a7Smrg            immutable realAdd2 = cosTwiddle2 * realUpper2
181254a7Smrg                               - sinTwiddle2 * imagUpper2;
181254a7Smrg            immutable imagAdd2 = sinTwiddle2 * realUpper2
181254a7Smrg                               + cosTwiddle2 * imagUpper2;
181254a7Smrg
181254a7Smrg            buf[k].re += realAdd1;
181254a7Smrg            buf[k].im += imagAdd1;
181254a7Smrg            buf[k + 1].re += realAdd2;
181254a7Smrg            buf[k + 1].im += imagAdd2;
181254a7Smrg
181254a7Smrg            buf[upperIndex1].re = realLower1 - realAdd1;
181254a7Smrg            buf[upperIndex1].im = imagLower1 - imagAdd1;
181254a7Smrg            buf[upperIndex2].re = realLower2 - realAdd2;
181254a7Smrg            buf[upperIndex2].im = imagLower2 - imagAdd2;
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    // This constructor is used within this module for allocating the
181254a7Smrg    // buffer space elsewhere besides the GC heap.  It's definitely **NOT**
181254a7Smrg    // part of the public API and definitely **IS** subject to change.
181254a7Smrg    //
181254a7Smrg    // Also, this is unsafe because the memSpace buffer will be cast
181254a7Smrg    // to immutable.
*b1e83836Smrg    //
*b1e83836Smrg    // Public b/c of https://issues.dlang.org/show_bug.cgi?id=4636.
*b1e83836Smrg    public this(lookup_t[] memSpace)
181254a7Smrg    {
181254a7Smrg        immutable size = memSpace.length / 2;
181254a7Smrg
181254a7Smrg        /* Create a lookup table of all negative sine values at a resolution of
181254a7Smrg         * size and all smaller power of two resolutions.  This may seem
181254a7Smrg         * inefficient, but having all the lookups be next to each other in
181254a7Smrg         * memory at every level of iteration is a huge win performance-wise.
181254a7Smrg         */
181254a7Smrg        if (size == 0)
181254a7Smrg        {
181254a7Smrg            return;
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        assert(isPowerOf2(size),
181254a7Smrg            "Can only do FFTs on ranges with a size that is a power of two.");
181254a7Smrg
181254a7Smrg        auto table = new lookup_t[][bsf(size) + 1];
181254a7Smrg
181254a7Smrg        table[$ - 1] = memSpace[$ - size..$];
181254a7Smrg        memSpace = memSpace[0 .. size];
181254a7Smrg
181254a7Smrg        auto lastRow = table[$ - 1];
181254a7Smrg        lastRow[0] = 0;  // -sin(0) == 0.
181254a7Smrg        foreach (ptrdiff_t i; 1 .. size)
181254a7Smrg        {
181254a7Smrg            // The hard coded cases are for improved accuracy and to prevent
181254a7Smrg            // annoying non-zeroness when stuff should be zero.
181254a7Smrg
181254a7Smrg            if (i == size / 4)
181254a7Smrg                lastRow[i] = -1;  // -sin(pi / 2) == -1.
181254a7Smrg            else if (i == size / 2)
181254a7Smrg                lastRow[i] = 0;   // -sin(pi) == 0.
181254a7Smrg            else if (i == size * 3 / 4)
181254a7Smrg                lastRow[i] = 1;  // -sin(pi * 3 / 2) == 1
181254a7Smrg            else
181254a7Smrg                lastRow[i] = -sin(i * 2.0L * PI / size);
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        // Fill in all the other rows with strided versions.
181254a7Smrg        foreach (i; 1 .. table.length - 1)
181254a7Smrg        {
181254a7Smrg            immutable strideLength = size / (2 ^^ i);
181254a7Smrg            auto strided = Stride!(lookup_t[])(lastRow, strideLength);
181254a7Smrg            table[i] = memSpace[$ - strided.length..$];
181254a7Smrg            memSpace = memSpace[0..$ - strided.length];
181254a7Smrg
181254a7Smrg            size_t copyIndex;
181254a7Smrg            foreach (elem; strided)
181254a7Smrg            {
181254a7Smrg                table[i][copyIndex++] = elem;
181254a7Smrg            }
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        negSinLookup = cast(immutable) table;
181254a7Smrg    }
181254a7Smrg
181254a7Smrgpublic:
*b1e83836Smrg    /**Create an `Fft` object for computing fast Fourier transforms of
*b1e83836Smrg     * power of two sizes of `size` or smaller.  `size` must be a
181254a7Smrg     * power of two.
181254a7Smrg     */
181254a7Smrg    this(size_t size)
181254a7Smrg    {
181254a7Smrg        // Allocate all twiddle factor buffers in one contiguous block so that,
181254a7Smrg        // when one is done being used, the next one is next in cache.
181254a7Smrg        auto memSpace = uninitializedArray!(lookup_t[])(2 * size);
181254a7Smrg        this(memSpace);
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    @property size_t size() const
181254a7Smrg    {
181254a7Smrg        return (negSinLookup is null) ? 0 : negSinLookup[$ - 1].length;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /**Compute the Fourier transform of range using the $(BIGOH N log N)
*b1e83836Smrg     * Cooley-Tukey Algorithm.  `range` must be a random-access range with
*b1e83836Smrg     * slicing and a length equal to `size` as provided at the construction of
181254a7Smrg     * this object.  The contents of range can be either  numeric types,
181254a7Smrg     * which will be interpreted as pure real values, or complex types with
*b1e83836Smrg     * properties or members `.re` and `.im` that can be read.
181254a7Smrg     *
181254a7Smrg     * Note:  Pure real FFTs are automatically detected and the relevant
181254a7Smrg     *        optimizations are performed.
181254a7Smrg     *
181254a7Smrg     * Returns:  An array of complex numbers representing the transformed data in
181254a7Smrg     *           the frequency domain.
181254a7Smrg     *
181254a7Smrg     * Conventions: The exponent is negative and the factor is one,
181254a7Smrg     *              i.e., output[j] := sum[ exp(-2 PI i j k / N) input[k] ].
181254a7Smrg     */
181254a7Smrg    Complex!F[] fft(F = double, R)(R range) const
181254a7Smrg        if (isFloatingPoint!F && isRandomAccessRange!R)
181254a7Smrg    {
181254a7Smrg        enforceSize(range);
181254a7Smrg        Complex!F[] ret;
181254a7Smrg        if (range.length == 0)
181254a7Smrg        {
181254a7Smrg            return ret;
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        // Don't waste time initializing the memory for ret.
181254a7Smrg        ret = uninitializedArray!(Complex!F[])(range.length);
181254a7Smrg
181254a7Smrg        fft(range,  ret);
181254a7Smrg        return ret;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /**Same as the overload, but allows for the results to be stored in a user-
181254a7Smrg     * provided buffer.  The buffer must be of the same length as range, must be
181254a7Smrg     * a random-access range, must have slicing, and must contain elements that are
181254a7Smrg     * complex-like.  This means that they must have a .re and a .im member or
181254a7Smrg     * property that can be both read and written and are floating point numbers.
181254a7Smrg     */
181254a7Smrg    void fft(Ret, R)(R range, Ret buf) const
181254a7Smrg        if (isRandomAccessRange!Ret && isComplexLike!(ElementType!Ret) && hasSlicing!Ret)
181254a7Smrg    {
181254a7Smrg        assert(buf.length == range.length);
181254a7Smrg        enforceSize(range);
181254a7Smrg
181254a7Smrg        if (range.length == 0)
181254a7Smrg        {
181254a7Smrg            return;
181254a7Smrg        }
181254a7Smrg        else if (range.length == 1)
181254a7Smrg        {
181254a7Smrg            buf[0] = range[0];
181254a7Smrg            return;
181254a7Smrg        }
181254a7Smrg        else if (range.length == 2)
181254a7Smrg        {
181254a7Smrg            slowFourier2(range, buf);
181254a7Smrg            return;
181254a7Smrg        }
181254a7Smrg        else
181254a7Smrg        {
181254a7Smrg            alias E = ElementType!R;
181254a7Smrg            static if (is(E : real))
181254a7Smrg            {
181254a7Smrg                return fftImplPureReal(range, buf);
181254a7Smrg            }
181254a7Smrg            else
181254a7Smrg            {
181254a7Smrg                static if (is(R : Stride!R))
181254a7Smrg                    return fftImpl(range, buf);
181254a7Smrg                else
181254a7Smrg                    return fftImpl(Stride!R(range, 1), buf);
181254a7Smrg            }
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /**
181254a7Smrg     * Computes the inverse Fourier transform of a range.  The range must be a
181254a7Smrg     * random access range with slicing, have a length equal to the size
181254a7Smrg     * provided at construction of this object, and contain elements that are
181254a7Smrg     * either of type std.complex.Complex or have essentially
181254a7Smrg     * the same compile-time interface.
181254a7Smrg     *
181254a7Smrg     * Returns:  The time-domain signal.
181254a7Smrg     *
181254a7Smrg     * Conventions: The exponent is positive and the factor is 1/N, i.e.,
181254a7Smrg     *              output[j] := (1 / N) sum[ exp(+2 PI i j k / N) input[k] ].
181254a7Smrg     */
181254a7Smrg    Complex!F[] inverseFft(F = double, R)(R range) const
181254a7Smrg        if (isRandomAccessRange!R && isComplexLike!(ElementType!R) && isFloatingPoint!F)
181254a7Smrg    {
181254a7Smrg        enforceSize(range);
181254a7Smrg        Complex!F[] ret;
181254a7Smrg        if (range.length == 0)
181254a7Smrg        {
181254a7Smrg            return ret;
181254a7Smrg        }
181254a7Smrg
181254a7Smrg        // Don't waste time initializing the memory for ret.
181254a7Smrg        ret = uninitializedArray!(Complex!F[])(range.length);
181254a7Smrg
181254a7Smrg        inverseFft(range, ret);
181254a7Smrg        return ret;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    /**
181254a7Smrg     * Inverse FFT that allows a user-supplied buffer to be provided.  The buffer
181254a7Smrg     * must be a random access range with slicing, and its elements
181254a7Smrg     * must be some complex-like type.
181254a7Smrg     */
181254a7Smrg    void inverseFft(Ret, R)(R range, Ret buf) const
181254a7Smrg        if (isRandomAccessRange!Ret && isComplexLike!(ElementType!Ret) && hasSlicing!Ret)
181254a7Smrg    {
181254a7Smrg        enforceSize(range);
181254a7Smrg
181254a7Smrg        auto swapped = map!swapRealImag(range);
181254a7Smrg        fft(swapped,  buf);
181254a7Smrg
181254a7Smrg        immutable lenNeg1 = 1.0 / buf.length;
181254a7Smrg        foreach (ref elem; buf)
181254a7Smrg        {
181254a7Smrg            immutable temp = elem.re * lenNeg1;
181254a7Smrg            elem.re = elem.im * lenNeg1;
181254a7Smrg            elem.im = temp;
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg}
181254a7Smrg
181254a7Smrg// This mixin creates an Fft object in the scope it's mixed into such that all
181254a7Smrg// memory owned by the object is deterministically destroyed at the end of that
181254a7Smrg// scope.
181254a7Smrgprivate enum string MakeLocalFft = q{
181254a7Smrg    import core.stdc.stdlib;
181254a7Smrg    import core.exception : onOutOfMemoryError;
181254a7Smrg
181254a7Smrg    auto lookupBuf = (cast(lookup_t*) malloc(range.length * 2 * lookup_t.sizeof))
181254a7Smrg                     [0 .. 2 * range.length];
181254a7Smrg    if (!lookupBuf.ptr)
181254a7Smrg        onOutOfMemoryError();
181254a7Smrg
181254a7Smrg    scope(exit) free(cast(void*) lookupBuf.ptr);
181254a7Smrg    auto fftObj = scoped!Fft(lookupBuf);
181254a7Smrg};
181254a7Smrg
*b1e83836Smrg/**Convenience functions that create an `Fft` object, run the FFT or inverse
181254a7Smrg * FFT and return the result.  Useful for one-off FFTs.
181254a7Smrg *
181254a7Smrg * Note:  In addition to convenience, these functions are slightly more
181254a7Smrg *        efficient than manually creating an Fft object for a single use,
181254a7Smrg *        as the Fft object is deterministically destroyed before these
181254a7Smrg *        functions return.
181254a7Smrg */
181254a7SmrgComplex!F[] fft(F = double, R)(R range)
181254a7Smrg{
181254a7Smrg    mixin(MakeLocalFft);
181254a7Smrg    return fftObj.fft!(F, R)(range);
181254a7Smrg}
181254a7Smrg
181254a7Smrg/// ditto
181254a7Smrgvoid fft(Ret, R)(R range, Ret buf)
181254a7Smrg{
181254a7Smrg    mixin(MakeLocalFft);
181254a7Smrg    return fftObj.fft!(Ret, R)(range, buf);
181254a7Smrg}
181254a7Smrg
181254a7Smrg/// ditto
181254a7SmrgComplex!F[] inverseFft(F = double, R)(R range)
181254a7Smrg{
181254a7Smrg    mixin(MakeLocalFft);
181254a7Smrg    return fftObj.inverseFft!(F, R)(range);
181254a7Smrg}
181254a7Smrg
181254a7Smrg/// ditto
181254a7Smrgvoid inverseFft(Ret, R)(R range, Ret buf)
181254a7Smrg{
181254a7Smrg    mixin(MakeLocalFft);
181254a7Smrg    return fftObj.inverseFft!(Ret, R)(range, buf);
181254a7Smrg}
181254a7Smrg
181254a7Smrg@system unittest
181254a7Smrg{
181254a7Smrg    import std.algorithm;
181254a7Smrg    import std.conv;
181254a7Smrg    import std.range;
181254a7Smrg    // Test values from R and Octave.
181254a7Smrg    auto arr = [1,2,3,4,5,6,7,8];
181254a7Smrg    auto fft1 = fft(arr);
*b1e83836Smrg    assert(isClose(map!"a.re"(fft1),
*b1e83836Smrg        [36.0, -4, -4, -4, -4, -4, -4, -4], 1e-4));
*b1e83836Smrg    assert(isClose(map!"a.im"(fft1),
*b1e83836Smrg        [0, 9.6568, 4, 1.6568, 0, -1.6568, -4, -9.6568], 1e-4));
181254a7Smrg
181254a7Smrg    auto fft1Retro = fft(retro(arr));
*b1e83836Smrg    assert(isClose(map!"a.re"(fft1Retro),
*b1e83836Smrg        [36.0, 4, 4, 4, 4, 4, 4, 4], 1e-4));
*b1e83836Smrg    assert(isClose(map!"a.im"(fft1Retro),
*b1e83836Smrg        [0, -9.6568, -4, -1.6568, 0, 1.6568, 4, 9.6568], 1e-4));
181254a7Smrg
181254a7Smrg    auto fft1Float = fft(to!(float[])(arr));
*b1e83836Smrg    assert(isClose(map!"a.re"(fft1), map!"a.re"(fft1Float)));
*b1e83836Smrg    assert(isClose(map!"a.im"(fft1), map!"a.im"(fft1Float)));
181254a7Smrg
181254a7Smrg    alias C = Complex!float;
181254a7Smrg    auto arr2 = [C(1,2), C(3,4), C(5,6), C(7,8), C(9,10),
181254a7Smrg        C(11,12), C(13,14), C(15,16)];
181254a7Smrg    auto fft2 = fft(arr2);
*b1e83836Smrg    assert(isClose(map!"a.re"(fft2),
*b1e83836Smrg        [64.0, -27.3137, -16, -11.3137, -8, -4.6862, 0, 11.3137], 1e-4));
*b1e83836Smrg    assert(isClose(map!"a.im"(fft2),
*b1e83836Smrg        [72, 11.3137, 0, -4.686, -8, -11.3137, -16, -27.3137], 1e-4));
181254a7Smrg
181254a7Smrg    auto inv1 = inverseFft(fft1);
*b1e83836Smrg    assert(isClose(map!"a.re"(inv1), arr, 1e-6));
181254a7Smrg    assert(reduce!max(map!"a.im"(inv1)) < 1e-10);
181254a7Smrg
181254a7Smrg    auto inv2 = inverseFft(fft2);
*b1e83836Smrg    assert(isClose(map!"a.re"(inv2), map!"a.re"(arr2)));
*b1e83836Smrg    assert(isClose(map!"a.im"(inv2), map!"a.im"(arr2)));
181254a7Smrg
181254a7Smrg    // FFTs of size 0, 1 and 2 are handled as special cases.  Test them here.
181254a7Smrg    ushort[] empty;
181254a7Smrg    assert(fft(empty) == null);
181254a7Smrg    assert(inverseFft(fft(empty)) == null);
181254a7Smrg
181254a7Smrg    real[] oneElem = [4.5L];
181254a7Smrg    auto oneFft = fft(oneElem);
181254a7Smrg    assert(oneFft.length == 1);
181254a7Smrg    assert(oneFft[0].re == 4.5L);
181254a7Smrg    assert(oneFft[0].im == 0);
181254a7Smrg
181254a7Smrg    auto oneInv = inverseFft(oneFft);
181254a7Smrg    assert(oneInv.length == 1);
*b1e83836Smrg    assert(isClose(oneInv[0].re, 4.5));
*b1e83836Smrg    assert(isClose(oneInv[0].im, 0, 0.0, 1e-10));
181254a7Smrg
181254a7Smrg    long[2] twoElems = [8, 4];
181254a7Smrg    auto twoFft = fft(twoElems[]);
181254a7Smrg    assert(twoFft.length == 2);
*b1e83836Smrg    assert(isClose(twoFft[0].re, 12));
*b1e83836Smrg    assert(isClose(twoFft[0].im, 0, 0.0, 1e-10));
*b1e83836Smrg    assert(isClose(twoFft[1].re, 4));
*b1e83836Smrg    assert(isClose(twoFft[1].im, 0, 0.0, 1e-10));
181254a7Smrg    auto twoInv = inverseFft(twoFft);
*b1e83836Smrg    assert(isClose(twoInv[0].re, 8));
*b1e83836Smrg    assert(isClose(twoInv[0].im, 0, 0.0, 1e-10));
*b1e83836Smrg    assert(isClose(twoInv[1].re, 4));
*b1e83836Smrg    assert(isClose(twoInv[1].im, 0, 0.0, 1e-10));
181254a7Smrg}
181254a7Smrg
181254a7Smrg// Swaps the real and imaginary parts of a complex number.  This is useful
181254a7Smrg// for inverse FFTs.
181254a7SmrgC swapRealImag(C)(C input)
181254a7Smrg{
181254a7Smrg    return C(input.im, input.re);
181254a7Smrg}
181254a7Smrg
*b1e83836Smrg/** This function transforms `decimal` value into a value in the factorial number
*b1e83836Smrgsystem stored in `fac`.
*b1e83836Smrg
*b1e83836SmrgA factorial number is constructed as:
*b1e83836Smrg$(D fac[0] * 0! + fac[1] * 1! + ... fac[20] * 20!)
*b1e83836Smrg
*b1e83836SmrgParams:
*b1e83836Smrg    decimal = The decimal value to convert into the factorial number system.
*b1e83836Smrg    fac = The array to store the factorial number. The array is of size 21 as
*b1e83836Smrg        `ulong.max` requires 21 digits in the factorial number system.
*b1e83836SmrgReturns:
*b1e83836Smrg    A variable storing the number of digits of the factorial number stored in
*b1e83836Smrg    `fac`.
*b1e83836Smrg*/
*b1e83836Smrgsize_t decimalToFactorial(ulong decimal, ref ubyte[21] fac)
*b1e83836Smrg        @safe pure nothrow @nogc
*b1e83836Smrg{
*b1e83836Smrg    import std.algorithm.mutation : reverse;
*b1e83836Smrg    size_t idx;
*b1e83836Smrg
*b1e83836Smrg    for (ulong i = 1; decimal != 0; ++i)
*b1e83836Smrg    {
*b1e83836Smrg        auto temp = decimal % i;
*b1e83836Smrg        decimal /= i;
*b1e83836Smrg        fac[idx++] = cast(ubyte)(temp);
*b1e83836Smrg    }
*b1e83836Smrg
*b1e83836Smrg    if (idx == 0)
*b1e83836Smrg    {
*b1e83836Smrg        fac[idx++] = cast(ubyte) 0;
*b1e83836Smrg    }
*b1e83836Smrg
*b1e83836Smrg    reverse(fac[0 .. idx]);
*b1e83836Smrg
*b1e83836Smrg    // first digit of the number in factorial will always be zero
*b1e83836Smrg    assert(fac[idx - 1] == 0);
*b1e83836Smrg
*b1e83836Smrg    return idx;
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg///
*b1e83836Smrg@safe pure @nogc unittest
*b1e83836Smrg{
*b1e83836Smrg    ubyte[21] fac;
*b1e83836Smrg    size_t idx = decimalToFactorial(2982, fac);
*b1e83836Smrg
*b1e83836Smrg    assert(fac[0] == 4);
*b1e83836Smrg    assert(fac[1] == 0);
*b1e83836Smrg    assert(fac[2] == 4);
*b1e83836Smrg    assert(fac[3] == 1);
*b1e83836Smrg    assert(fac[4] == 0);
*b1e83836Smrg    assert(fac[5] == 0);
*b1e83836Smrg    assert(fac[6] == 0);
*b1e83836Smrg}
*b1e83836Smrg
*b1e83836Smrg@safe pure unittest
*b1e83836Smrg{
*b1e83836Smrg    ubyte[21] fac;
*b1e83836Smrg    size_t idx = decimalToFactorial(0UL, fac);
*b1e83836Smrg    assert(idx == 1);
*b1e83836Smrg    assert(fac[0] == 0);
*b1e83836Smrg
*b1e83836Smrg    fac[] = 0;
*b1e83836Smrg    idx = 0;
*b1e83836Smrg    idx = decimalToFactorial(ulong.max, fac);
*b1e83836Smrg    assert(idx == 21);
*b1e83836Smrg    auto t = [7, 11, 12, 4, 3, 15, 3, 5, 3, 5, 0, 8, 3, 5, 0, 0, 0, 2, 1, 1, 0];
*b1e83836Smrg    foreach (i, it; fac[0 .. 21])
*b1e83836Smrg    {
*b1e83836Smrg        assert(it == t[i]);
*b1e83836Smrg    }
*b1e83836Smrg
*b1e83836Smrg    fac[] = 0;
*b1e83836Smrg    idx = decimalToFactorial(2982, fac);
*b1e83836Smrg
*b1e83836Smrg    assert(idx == 7);
*b1e83836Smrg    t = [4, 0, 4, 1, 0, 0, 0];
*b1e83836Smrg    foreach (i, it; fac[0 .. idx])
*b1e83836Smrg    {
*b1e83836Smrg        assert(it == t[i]);
*b1e83836Smrg    }
*b1e83836Smrg}
*b1e83836Smrg
181254a7Smrgprivate:
181254a7Smrg// The reasons I couldn't use std.algorithm were b/c its stride length isn't
181254a7Smrg// modifiable on the fly and because range has grown some performance hacks
181254a7Smrg// for powers of 2.
181254a7Smrgstruct Stride(R)
181254a7Smrg{
181254a7Smrg    import core.bitop : bsf;
181254a7Smrg    Unqual!R range;
181254a7Smrg    size_t _nSteps;
181254a7Smrg    size_t _length;
181254a7Smrg    alias E = ElementType!(R);
181254a7Smrg
181254a7Smrg    this(R range, size_t nStepsIn)
181254a7Smrg    {
181254a7Smrg        this.range = range;
181254a7Smrg       _nSteps = nStepsIn;
181254a7Smrg       _length = (range.length + _nSteps - 1) / nSteps;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    size_t length() const @property
181254a7Smrg    {
181254a7Smrg        return _length;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    typeof(this) save() @property
181254a7Smrg    {
181254a7Smrg        auto ret = this;
181254a7Smrg        ret.range = ret.range.save;
181254a7Smrg        return ret;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    E opIndex(size_t index)
181254a7Smrg    {
181254a7Smrg        return range[index * _nSteps];
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    E front() @property
181254a7Smrg    {
181254a7Smrg        return range[0];
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    void popFront()
181254a7Smrg    {
181254a7Smrg        if (range.length >= _nSteps)
181254a7Smrg        {
181254a7Smrg            range = range[_nSteps .. range.length];
181254a7Smrg            _length--;
181254a7Smrg        }
181254a7Smrg        else
181254a7Smrg        {
181254a7Smrg            range = range[0 .. 0];
181254a7Smrg            _length = 0;
181254a7Smrg        }
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    // Pops half the range's stride.
181254a7Smrg    void popHalf()
181254a7Smrg    {
181254a7Smrg        range = range[_nSteps / 2 .. range.length];
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    bool empty() const @property
181254a7Smrg    {
181254a7Smrg        return length == 0;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    size_t nSteps() const @property
181254a7Smrg    {
181254a7Smrg        return _nSteps;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    void doubleSteps()
181254a7Smrg    {
181254a7Smrg        _nSteps *= 2;
181254a7Smrg        _length /= 2;
181254a7Smrg    }
181254a7Smrg
181254a7Smrg    size_t nSteps(size_t newVal) @property
181254a7Smrg    {
181254a7Smrg        _nSteps = newVal;
181254a7Smrg
181254a7Smrg        // Using >> bsf(nSteps) is a few cycles faster than / nSteps.
181254a7Smrg        _length = (range.length + _nSteps - 1)  >> bsf(nSteps);
181254a7Smrg        return newVal;
181254a7Smrg    }
181254a7Smrg}
181254a7Smrg
181254a7Smrg// Hard-coded base case for FFT of size 2.  This is actually a TON faster than
181254a7Smrg// using a generic slow DFT.  This seems to be the best base case.  (Size 1
181254a7Smrg// can be coded inline as buf[0] = range[0]).
181254a7Smrgvoid slowFourier2(Ret, R)(R range, Ret buf)
181254a7Smrg{
181254a7Smrg    assert(range.length == 2);
181254a7Smrg    assert(buf.length == 2);
181254a7Smrg    buf[0] = range[0] + range[1];
181254a7Smrg    buf[1] = range[0] - range[1];
181254a7Smrg}
181254a7Smrg
181254a7Smrg// Hard-coded base case for FFT of size 4.  Doesn't work as well as the size
181254a7Smrg// 2 case.
181254a7Smrgvoid slowFourier4(Ret, R)(R range, Ret buf)
181254a7Smrg{
181254a7Smrg    alias C = ElementType!Ret;
181254a7Smrg
181254a7Smrg    assert(range.length == 4);
181254a7Smrg    assert(buf.length == 4);
181254a7Smrg    buf[0] = range[0] + range[1] + range[2] + range[3];
181254a7Smrg    buf[1] = range[0] - range[1] * C(0, 1) - range[2] + range[3] * C(0, 1);
181254a7Smrg    buf[2] = range[0] - range[1] + range[2] - range[3];
181254a7Smrg    buf[3] = range[0] + range[1] * C(0, 1) - range[2] - range[3] * C(0, 1);
181254a7Smrg}
181254a7Smrg
181254a7SmrgN roundDownToPowerOf2(N)(N num)
181254a7Smrgif (isScalarType!N && !isFloatingPoint!N)
181254a7Smrg{
181254a7Smrg    import core.bitop : bsr;
181254a7Smrg    return num & (cast(N) 1 << bsr(num));
181254a7Smrg}
181254a7Smrg
181254a7Smrg@safe unittest
181254a7Smrg{
181254a7Smrg    assert(roundDownToPowerOf2(7) == 4);
181254a7Smrg    assert(roundDownToPowerOf2(4) == 4);
181254a7Smrg}
181254a7Smrg
181254a7Smrgtemplate isComplexLike(T)
181254a7Smrg{
181254a7Smrg    enum bool isComplexLike = is(typeof(T.init.re)) &&
181254a7Smrg        is(typeof(T.init.im));
181254a7Smrg}
181254a7Smrg
181254a7Smrg@safe unittest
181254a7Smrg{
181254a7Smrg    static assert(isComplexLike!(Complex!double));
181254a7Smrg    static assert(!isComplexLike!(uint));
181254a7Smrg}