/* **COPYRIGHT******************************************************************
    INTEL CONFIDENTIAL
    Copyright (C) 2017 Intel Corporation
    Copyright (C), 1994-1998 Aware Inc. All Rights Reserved.
******************************************************************COPYRIGHT** */
/* **DISCLAIMER*****************************************************************
    The source code contained or described herein and all documents related
    to the source code ("Material") are owned by Intel Corporation or its
    suppliers or licensors. Title to the Material remains with Intel
    Corporation or its suppliers and licensors. The Material may contain
    trade secrets and proprietary and confidential information of Intel
    Corporation and its suppliers and licensors, and is protected by
    worldwide copyright and trade secret laws and treaty provisions. No part
    of the Material may be used, copied, reproduced, modified, published,
    uploaded, posted, transmitted, distributed, or disclosed in any way
    without Intel's prior express written permission.

    No license under any patent, copyright, trade secret or other
    intellectual property right is granted to or conferred upon you by
    disclosure or delivery of the Materials, either expressly, by
    implication, inducement, estoppel or otherwise. Any license under
    such intellectual property rights must be express and approved by
    Intel in writing.
*****************************************************************DISCLAIMER** */
/*
 *------------------------------------------------------------------------
 *
 *   Aware DMT Technology. Proprietary and Confidential.
 *
 *   40 Middlesex Turnpike, Bedford, MA 01730-1413
 *   Phone (781) 276 - 4000
 *   Fax   (781) 276 - 4001
 *
 *   IEEE_FLT.C
 *
 *   Description:  Fixed point emulation of IEEE 754 Single Precision
 *   Floating poing routines
 *
 *------------------------------------------------------------------------
 */

#include "common.h"
#include "ieee_flt.h"
#include "dsp_op.h"
#include "mul.h"

/* =============================================== */
/* constants used only by this file */
/* =============================================== */
#define IEEE_FLT_ROUND //used to enable IEEE 754 "toward the nearest representable value" rounding rules */
/* #undef  IEEE_FLT_ROUND */

#define NUM_ROUND_BITS_ADD 6 /* set #of bits for rounding precision in addf32(). Range: 1-6 for 32-bit computation */
#define NUM_ROUND_BITS_MPY 5 /* set ODD (range: 1,3,5) # of bits for rounding precision in mpyf32(). */

#ifdef FLOAT_IN_SW

/*^^^
 *------------------------------------------------------------------------
 *
 *  Name : int2f32()
 *
 *  Description:    Converts a value of type int16 to type Float32
 *
 *  Prototype:      Float32 int2f32(int16 s_xin)
 *
 *  Input Arguments:
 *      int16 s_xin   int16 to be converted
 *
 *  Output Arguments:
 *
 *  Return:
 *      Float32 representation of s_xin
 *
 *  Notes:
 *
 *------------------------------------------------------------------------
 *^^^
 */

Float32 int2f32(int16 s_xin)
{

   FlagT ft_NegativeFlag;
   uint8 uc_Exponent;
   uint16 us_x2;

   Float32 f_temp;

   Float32 f_xout = 0x0;

   /* check for 0 */
   if (s_xin == 0)
   {
      return(0x0l);
   }

   /* hold the sign of xin and place absolute value in xin */
   if (s_xin < 0)
   {
      ft_NegativeFlag = TRUE;
      us_x2 = -s_xin;
   }
   else
   {
      us_x2= s_xin;
      ft_NegativeFlag = FALSE;
   }

   /*
   * Find the uc_Exponent.  norm_l() gives us the shift for a 32-bit signed word
   * to put the MS 1 in bit 30.  We want to put it beyond bit 15.
   */
   uc_Exponent = (unsigned char) norm_l((int32) us_x2) - 14;

   /* left shift by uc_Exponent */
   us_x2 <<= uc_Exponent;

   /* set uc_Exponent equal to ieee uc_Exponent */
   uc_Exponent = 127 + (16-uc_Exponent);

   /* add in to output and shift to proper position */
   f_xout += uc_Exponent;
   f_xout <<= 23;

   /* add in fractional portion with proper left shift */
   f_temp = us_x2;
   f_temp <<= 7;
   f_xout += f_temp;

   /* now correct sign */
   if (ft_NegativeFlag == TRUE)
   {
      f_xout += NEG32;
   }

   return(f_xout);
}

/*^^^
 *------------------------------------------------------------------------
 *
 *  Name : int32toFloat32()
 *
 *  Description:    Converts a value of type int32 to type Float32
 *
 *  Prototype:      Float32 int32toFloat32(int32 l_xin)
 *
 *  Input Arguments:
 *      int32 l_xin   int32 to be converted
 *
 *  Output Arguments:
 *
 *  Return:
 *      Float32 representation of l_xin
 *
 *  Notes:
 *
 *------------------------------------------------------------------------
 *^^^
 */

Float32 int32toFloat32(int32 l_xin)
{

   int16 s_shift23;
   uint32   ul_Exponent;
   Float32 f_xout;
   int32 l_lsb;

   /* check for 0 */
   if ((l_xin & 0x7fffffff) == 0)
   {
      if (l_xin==0)
      {
         return(0x0L);
      }
      else
      {
         return(0xcf000000L);
      }
   }
   f_xout = (uint32) l_xin;

   f_xout = (l_xin >> 31)<<31;
   if (f_xout)
   {
      l_xin = -l_xin;
   }



   /* This gives the shift amount to put the most significant 1 in bit 30 */   for(s_shift23 = 0; l_xin < 0x40000000L; s_shift23++)
   {
      l_xin <<= 1;
   }

   /* IEEE floating point has a 23 bit Mantissa, so move MSB to bit 23 */

   s_shift23 -= 7;
   l_xin >>= 6;
   l_lsb = l_xin & 0x1;
   l_xin >>= 1;
   l_xin += l_lsb;

   /* Mask off the implicit 1 in bit 23 */
   l_xin &= 0x7fffff;
   f_xout |= l_xin;

   /* Calculate IEEE Exponent */
   ul_Exponent = 127 + (23-(Float32)s_shift23);
   /* Move into position */
   f_xout += ul_Exponent<< 23;


   return(f_xout);
}

/*^^^
 *------------------------------------------------------------------------
 *
 *  Name : f32toint16
 *
 *  Description:    Converts a variable of type Float32 to type int16.
 *  Input values greater than 32767 will be saturated to the maximum int16
 *  value of 32767.  Input values less than -32768 will be saturated to
 *  the minimum int16 value of -32768. Rounding may be performed by either
 *  rounding towards the nearest integer or by truncating (rounding towards
 *  zero).
 *
 *  Prototype:      int16 f32toint16(Float32 f_in, int16 s_conv_mode)
 *
 *  Input Arguments:
 *      Float32 f_in        Float32 value to be converted
 *      int16    s_conv_mode 0, round to nearset integer
 *                          1, truncate (round towards 0)
 *
 *  Output Arguments:
 *
 *  Return:
 *      int32 representation of f_in
 *
 *  Notes:  Differs from Microsoft Visual C++ (VC) implementation in the
 *  following manners:
 *      1) VC will always truncate when converting from float to int16
 *      2) VC does not saturate when f_in is beyond int16 bounds.
 *      VC will return (f_in MOD 2^16), with truncation
 *
 *------------------------------------------------------------------------
 *^^^
 */

int16 f32toint16(Float32 f_in, int16 s_conv_mode)
{

   int16 s_out;

   int16 s_Exp, s_shift;
   Float32 f_Frac;

   /*  unpack uc_Exponent */
   s_Exp  = (int16)(f_in >> EXP32SHIFT_LOW);
   s_Exp &= MASK8LOW;

   /*  test for |f_in| < 0.5 */
   if (s_Exp < (EXPONENT_BIAS - 1))
   {
      s_out = 0;
      return s_out;
   }

   /*  test for |f_in| > 2^15 */
   if (s_Exp >= (EXPONENT_BIAS + 15))
   {
      if (f_in & NEG32)
      {
         s_out = (int16) MIN_16;
      }
      else
      {
         s_out = (int16) MAX_16;
      }
      return s_out;
   }

   /*  unpack fraction */
   f_Frac  = f_in & MASK23LOW;
   f_Frac += HIDDEN_BIT;

   /*  get shift count for int16 representation */
   s_shift = EXP32SHIFT_LOW - (s_Exp - EXPONENT_BIAS);

   if (s_conv_mode == 0)
   {
      f_Frac = round(f_Frac,s_shift);
   }
   else
   {
      f_Frac >>= s_shift;
   }

   /*  get sign and test for overflow */
   if (f_in & NEG32)
   {
      if (f_Frac > MAX_16)
      {
         s_out = (int16)MIN_16;
      }
      else
      {
         s_out = - (int16)(f_Frac);
      }
   }
   else
   {
      if (f_Frac > MAX_16)
      {
         s_out = (int16)MAX_16;
      }
      else
      {
         s_out = (int16)(f_Frac);
      }
   }

   return(s_out);
}

/*^^^
 *------------------------------------------------------------------------
 *
 *  Name : f32toint32
 *
 *  Description:    Converts a variable of type Float32 to type int32.
 *  Input values greater than 0x7FFFFFFF will be saturated to the maximum int16
 *  value of 0x7FFFFFFF.  Input values less than -0xFFFFFFF will be saturated to
 *  the minimum int32 value of -0xFFFFFFF. Rounding may be performed by either
 *  rounding towards the nearest integer or by truncating (rounding towards
 *  zero).
 *
 *  Prototype:      int32 f32toint32(Float32 f_in, int16 s_conv_mode)
 *
 *  Input Arguments:
 *      Float32 f_in        Float32 value to be converted
 *      int16    s_conv_mode 0, round to nearset integer
 *                          1, truncate (round towards 0)
 *
 *  Output Arguments:
 *
 *  Return:
 *      int32 representation of f_in
 *
 *  Notes:  Differs from Microsoft Visual C++ (VC) implementation in the
 *  following manners:
 *      1) VC will always truncate when converting from float to int32
 *      2) VC does not saturate when f_in is beyond int32 bounds.
 *      VC will return (f_in MOD 2^32), with truncation
 *
 *------------------------------------------------------------------------
 *^^^
 */

int32 f32toint32(Float32 f_in, int16 s_conv_mode)
{

   int32 l_out;

   int16 s_Exp, s_shift;
   Float32 f_Frac;

   /*  unpack uc_Exponent */
   s_Exp  = (int16)(f_in >> EXP32SHIFT_LOW);
   s_Exp &= MASK8LOW;

   /*  test for |f_in| < 0.5 */
   if (s_Exp < (EXPONENT_BIAS - 1))
   {
      l_out = 0;
      return l_out;
   }

   /*  test for |f_in| > 2^31 */
   if (s_Exp >= (EXPONENT_BIAS + 31))
   {
      if (f_in & NEG32)
      {
         l_out = (int32) MIN_32;
      }
      else
      {
         l_out = (int32) MAX_32;
      }
      return l_out;
   }

   /*  unpack fraction */
   f_Frac  = f_in & MASK23LOW;
   f_Frac += HIDDEN_BIT;

   /*  get shift count for int32 representation */
   s_shift = EXP32SHIFT_LOW - (s_Exp - EXPONENT_BIAS);

   /* take care of case where s_shift < 0 */
   if (s_shift < 0)
   {
      f_Frac <<= -s_shift;
   }
   else
   {
      if (s_conv_mode == 0)
      {
         f_Frac = round(f_Frac,s_shift);
      }
      else
      {
         f_Frac >>= s_shift;
      }
   }

   /*  get sign and test for overflow */
   if (f_in & NEG32)
   {
      if (f_Frac > MAX_32)
      {
         l_out = (int32)MIN_32;
      }
      else
      {
         l_out = - (int32)(f_Frac);
      }
   }
   else
   {
      if (f_Frac > MAX_32)
      {
         l_out = (int32)MAX_32;
      }
      else
      {
         l_out = (int32)(f_Frac);
      }
   }

   return(l_out);
}

/*^^^
 *------------------------------------------------------------------------
 *
 *  Name : addf32
 *
 *  Description:    Adds two Float32 values and returns the result
 *
 *  Prototype:      Float32 addf32(Float32 f_x, Float32 f_y)
 *
 *  Input Arguments:
 *      Float32 f_x, f_y    numbers to be added
 *
 *  Output Arguments:
 *
 *  Return:
 *      sum of f_x and f_y
 *
 *  Notes:  IEEE 754 specifies that rounding shall occur towards the
 *  nearest representable value, with even values preferred when there are
 *  two nearest representable values.  To allow faster execution, this
 *  implementation will round away from zero when there are two nearest
 *  representable values when IEEE_FLT_ROUND is undefined.
 *
 *------------------------------------------------------------------------
 *^^^
 */

/*
* Float32 addf32(Float32 x, Float32 y);
* return z = x+y.
*/
Float32 addf32(Float32 f_x, Float32 f_y)
{

   Float32 f_z;

   int16 s_ExpX, s_ExpY, s_ExpZ;
   int32 l_FracX, l_FracY, l_FracZ;
   int16 s_relexp, s_normfactor;
   int16 s_newprecision;

   if (f_x == 0)
   {
      return f_y;
   }
   else if (f_y == 0)
   {
      return f_x;
   }

   /*  unpack uc_Exponents */
   s_ExpX  = (int16) (f_x >> EXP32SHIFT_LOW);
   s_ExpX &= MASK8LOW;
   s_ExpY  = (int16) (f_y >> EXP32SHIFT_LOW);
   s_ExpY &= MASK8LOW;

   /*  unpack fractions as signed 32bit integers */
   /*  shift left by NUM_ROUND_BITS_ADD to allow for rounding */
   l_FracX = f_x & MASK23LOW;
   l_FracX += HIDDEN_BIT;
   if ( (int32)f_x < 0)
   {
      l_FracX = -l_FracX;
   }
   l_FracX <<= NUM_ROUND_BITS_ADD;

   l_FracY = f_y & MASK23LOW;
   l_FracY += HIDDEN_BIT;
   if ( (int32)f_y < 0)
   {
      l_FracY = -l_FracY;
   }
   l_FracY <<= NUM_ROUND_BITS_ADD;

   /* shift fractions and add */
   s_relexp = s_ExpX - s_ExpY;

   if (s_relexp >= 0)
   {
      s_ExpZ = s_ExpX;

      if (s_relexp  > ( EXP32SHIFT_LOW + NUM_ROUND_BITS_ADD))
      {
         l_FracZ = l_FracX;
      }
      else
      {
         l_FracY >>= s_relexp;
         l_FracZ = l_FracX + l_FracY;
      }
   }
   else
   {
      s_ExpZ = s_ExpY;

      if ( -s_relexp  > ( EXP32SHIFT_LOW + NUM_ROUND_BITS_ADD))
      {
         l_FracZ = l_FracY;
      }
      else
      {
         l_FracX >>= -s_relexp;
         l_FracZ = l_FracX + l_FracY;
      }
   }


   /*  get magnitude of l_FracZ and save sign bit */
   if (l_FracZ < 0)
   {
      l_FracZ = -l_FracZ;
      f_z = NEG32;
   }
   else
   {
      f_z = 0;
   }

   /*  normalize result, round if necessary */
   if (l_FracZ == 0)
   {
      return 0;
   }
   else if (l_FracZ >= (HIDDEN_BIT << (NUM_ROUND_BITS_ADD + 1)) )
   {
      if(l_FracZ & ((0x3 << NUM_ROUND_BITS_ADD) - 1))
      {
         l_FracZ += (0x1 << NUM_ROUND_BITS_ADD);
      }
      l_FracZ >>= (NUM_ROUND_BITS_ADD+1);
      s_ExpZ += 1;
   }
   else if (l_FracZ < (HIDDEN_BIT << NUM_ROUND_BITS_ADD) )
   {
      s_normfactor = norm_l(l_FracZ) - (32 - EXP32SHIFT_LOW - NUM_ROUND_BITS_ADD - 2);
      if(s_normfactor >= NUM_ROUND_BITS_ADD)
      {
         l_FracZ <<= (s_normfactor - NUM_ROUND_BITS_ADD);
      }
      else    /* must still do some rounding */
      {
         s_newprecision=NUM_ROUND_BITS_ADD-s_normfactor;
         if(l_FracZ & ((0x3 << (s_newprecision-1)) - 1))
         {
            l_FracZ += (0x1 << (s_newprecision-1));
         }
         l_FracZ >>= s_newprecision;
      }
      s_ExpZ -= s_normfactor;
   }

   else
   {
      if(l_FracZ & ((0x3 << (NUM_ROUND_BITS_ADD - 1)) - 1))
      {
         l_FracZ += (0x1 << (NUM_ROUND_BITS_ADD - 1));
      }
      l_FracZ >>= NUM_ROUND_BITS_ADD;
   }

   /* take care of possible overflow caused by rounding */
   if (l_FracZ > MASK24LOW)
   {
      l_FracZ >>= 1;
      s_ExpZ++;
   }

   /*  check for underflow and overflow */
   if (s_ExpZ <= 0)        /*  underflow */
   {
      l_FracZ = 0;
      s_ExpZ = 0;
   }

   else if (s_ExpZ >= EXP_INF_NAN)
   {
      s_ExpZ = EXP_INF_NAN;
      l_FracZ = 0;
   }

   /*  pack result and return */

   f_z += ((l_FracZ & MASK23LOW) + ((Float32)s_ExpZ << EXP32SHIFT_LOW));

   return f_z;

}

/*^^^
 *------------------------------------------------------------------------
 *
 *  Name : subf32
 *
 *  Description:    Subtracts a Float32 value from another Float32 values
 *  and returns the result.
 *
 *  Prototype:      Float32 subf32(Float32 f_x, Float32 f_y)
 *
 *  Input Arguments:
 *      Float32 f_x, f_y    values to be subtracted
 *
 *  Output Arguments:
 *
 *  Return:
 *      f_x - f_y
 *
 *  Notes:  IEEE 754 specifies that rounding shall occur towards the
 *  nearest representable value, with even values preferred when there are
 *  two nearest representable values.  To allow faster execution, this
 *  implementation will round away from zero when there are two nearest
 *  representable values when IEEE_FLT_ROUND is undefined.
 *
 *------------------------------------------------------------------------
 *^^^
 */


Float32 subf32(Float32 f_x, Float32 f_y)
{

   Float32 f_z;

   f_y ^= NEG32;
   f_z = addf32(f_x, f_y);

   return f_z;

}

/*^^^
 *------------------------------------------------------------------------
 *
 *  Name : mpyf32
 *
 *  Description:    Multpilies two Float32 values and returns their product
 *
 *  Prototype:      Float32 mpyf32(Float32 f_x, Float32 f_y)
 *
 *  Input Arguments:
 *      Float32 f_x, f_y
 *
 *  Output Arguments:
 *
 *  Return:
 *      f_x * f_y
 *
 *  Notes:  IEEE 754 specifies that rounding shall occur towards the
 *  nearest representable value, with even values preferred when there are
 *  two nearest representable values.  To allow faster execution, this
 *  implementation will round away from zero when there are two nearest
 *  representable values when IEEE_FLT_ROUND is undefined.
 *
 *------------------------------------------------------------------------
 *^^^
 */

/*
* Float32 mpyf32(Float32 f_x, Float32 f_y);
* return z = x*y.
*/

Float32 mpyf32(Float32 f_x, Float32 f_y)
{

   Float32 f_z;

   Float32   f_FracX, f_FracY, f_FracZ;    /* fractional portions of x, y and z*/

   int16     s_ExpX, s_ExpY, s_ExpZ;      /* uc_Exponents of x, y and z*/
   Float32   f_sign;                  /* sign bit of z */
   uint16    us_m1, us_m2, us_m3, us_m4;
   Float32   f_tmp;

   uint8    uc_fnzero_trun;   /* flag to indicate nonzero truncated bits*/

   if ((f_x == 0) || (f_y == 0))
   {
      return 0;
   }

   /*  initialize return value */
   f_z = 0;

   /*  get sign of product */
   f_sign  = f_x ^ f_y;
   f_sign &= NEG32;

   /*  unpack uc_Exponents; */
   s_ExpX  = (int16) (f_x >> EXP32SHIFT_LOW);
   s_ExpX &= MASK8LOW;
   s_ExpY  = (int16) (f_y >> EXP32SHIFT_LOW);
   s_ExpY &= MASK8LOW;

   /*  unpack fractions */
   f_FracX = f_x & MASK23LOW;
   f_FracY = f_y & MASK23LOW;

   /*  perform multiplication */
   /*  if either f_x or f_y is a power of two, then */
   /*  simply add uc_Exponents */

   s_ExpZ  = s_ExpX + s_ExpY;
   s_ExpZ -= EXPONENT_BIAS;

   if (f_FracX == 0)           /*  f_x is a power of two */
   {
      f_FracZ = f_FracY;
   }
   else
   {
      if (f_FracY == 0)       /*  f_y is a power of two */
      {
         f_FracZ = f_FracX;
      }
      else
      {
         f_FracX += HIDDEN_BIT;
         f_FracY += HIDDEN_BIT;


         /*  shift fractions; use max 31 bits */
         f_FracX <<= (32 - EXP32SHIFT_LOW + NUM_ROUND_BITS_MPY)/2;
         f_FracY <<= (32 - EXP32SHIFT_LOW + NUM_ROUND_BITS_MPY)/2;

         /*  get upper and lower 16bits of fractions */
         us_m1 = (uint16) (f_FracX & MASK16LOW);
         us_m2 = (uint16) (f_FracY & MASK16LOW);
         us_m3 = (uint16) (f_FracX >> 16);
         us_m4 = (uint16) (f_FracY >> 16);

         /*  multiply */
         //f_FracZ  = ((Float32)us_m1 * us_m2);
         MULU16(f_FracZ, (Float32)us_m1, us_m2);
         uc_fnzero_trun=0;
         if(f_FracZ & 0xffff)
         {
            uc_fnzero_trun=1;
         }
         f_FracZ >>= 16;

         //f_FracZ += ((Float32)us_m4 * us_m1);
         MULU16(f_tmp, (Float32)us_m4, us_m1);
         f_FracZ += f_tmp;

         //f_FracZ += ((Float32)us_m3 * us_m2);
         MULU16(f_tmp, (Float32)us_m3, us_m2);
         f_FracZ += f_tmp;

         if(f_FracZ & 0xffff)
         {
            uc_fnzero_trun=1;
         }
         //f_FracZ  = ((Float32)us_m3 * us_m4) + (f_FracZ >> 16);
         MULU16(f_tmp, (Float32) us_m3, us_m4);
         f_FracZ = f_tmp + (f_FracZ >> 16);

         /*  normalize result, round if necessary */
         if (f_FracZ >= (HIDDEN_BIT << (NUM_ROUND_BITS_MPY + 1)))
         {
            if(uc_fnzero_trun ||
                  (f_FracZ & ((0x3 << NUM_ROUND_BITS_MPY) - 1)))
            {
               f_FracZ += (0x1 << NUM_ROUND_BITS_MPY);
            }
            f_FracZ >>= (NUM_ROUND_BITS_MPY + 1);
            s_ExpZ += 1;
         }
         else
         {
            if(uc_fnzero_trun ||
                  (f_FracZ & ((0x3 << (NUM_ROUND_BITS_MPY - 1)) - 1)))
            {
               f_FracZ += (0x1 << (NUM_ROUND_BITS_MPY - 1));
            }
            f_FracZ >>= NUM_ROUND_BITS_MPY;
         }
      }
   }

   /*  check for underflow and overflow */

   if (s_ExpZ <= 0)        /*  underflow */
   {
      f_FracZ = 0;
      s_ExpZ = 0;
   }

   else if (s_ExpZ >= EXP_INF_NAN)     /* overflow */
   {
      s_ExpZ = EXP_INF_NAN;
      f_FracZ = 0;
   }

   /*  pack result and return */

   f_z = (f_FracZ & MASK23LOW) + ((Float32)s_ExpZ << EXP32SHIFT_LOW);
   f_z += f_sign;

   return f_z;
}

/*^^^
 *------------------------------------------------------------------------
 *
 *  Name : divf32
 *
 *  Description:    Returns the quotient of f_x/f_y.  Current implementation
 *  uses long division.  There may be a more efficient algorithm (Newton -
 *  Rhapson iteration).
 *
 *  Prototype:      Float32 divf32(Float32 f_x, Float32 f_y)
 *
 *  Input Arguments:
 *      f_x     dividend
 *      f_y     divisor
 *
 *  Output Arguments:
 *
 *  Return:
 *      quotient of f_x / f_y
 *
 *  Notes:
 *
 *------------------------------------------------------------------------
 *^^^
 */

/*
* Float32 divf32(Float32 f_x, Float32 f_y);
* return f_z = f_x/f_y.
*/

Float32 divf32(Float32 f_x, Float32 f_y)
{

   Float32 f_z;
   Float32 f_sign;

   Float32 f_FracX, f_FracY, f_FracZ;
   int16 s_ExpX, s_ExpY, s_ExpZ;

   /*  initialize return value */
   f_z = 0;

   /*  get sign of quotient */
   f_sign  = f_x ^ f_y;
   f_sign &= NEG32;

   /*  unpack uc_Exponents; */
   s_ExpX  = (int16) (f_x >> EXP32SHIFT_LOW);
   s_ExpX &= MASK8LOW;
   s_ExpY  = (int16) (f_y >> EXP32SHIFT_LOW);
   s_ExpY &= MASK8LOW;

   /*  check for divide by zero */
   if (s_ExpY == 0)
   {
      if ((f_x & ~NEG32) == 0)
      {
         f_z = REAL_NAN;   /*  0 / 0 = NAN */
      }
      else
      {
         f_z = REAL_INF;   /*  f_x / 0 = INF */
      }
      f_z += f_sign;
      return f_z;
   }

   /*  unpack fractions */
   f_FracX = f_x & MASK23LOW;
   f_FracY = f_y & MASK23LOW;

   /*  perform division  */
   /*  if f_y is a power of two, then */
   /*  simply add uc_Exponents */

   s_ExpZ  = s_ExpX - s_ExpY;
   s_ExpZ += EXPONENT_BIAS;

   if (f_FracY == 0)       /*  f_y is a power of two */
   {
      f_FracZ = f_FracX;
   }

   else                    /*  perform long division */
   {
      f_FracX += HIDDEN_BIT;
      f_FracY += HIDDEN_BIT;
      f_FracZ = 0;

      if (f_FracX < f_FracY)
      {
         s_ExpZ--;
      }
      do
      {
         f_FracZ <<= 1;

         if (f_FracX >= f_FracY)
         {
            f_FracX -= f_FracY;
            f_FracZ += 1;
         }

         f_FracX <<= 1;
      }
      while (f_FracZ <= (HIDDEN_BIT << 1) );

      f_FracZ += 1;
      f_FracZ >>= 1;
   }

   /*  check for underflow and overflow */

   if (s_ExpZ <= 0)        /*  underflow */
   {
      f_FracZ = 0;
      s_ExpZ = 0;
   }

   else if (s_ExpZ >= EXP_INF_NAN)
   {
      s_ExpZ = EXP_INF_NAN;
      f_FracZ = 0;
   }

   /*  pack result and return */

   f_z = (f_FracZ & MASK23LOW) + ((Float32)s_ExpZ << EXP32SHIFT_LOW);

   f_z += f_sign;

   return f_z;
}

/*^^^
 *------------------------------------------------------------------------
 *
 *  Name : cmpgtf32
 *
 *  Description:  Compares two Float32 values and determines if the first
 *  argument is greater than the second
 *
 *  Prototype:  int cmpgtf32(Float32 f1, Float32 f2)
 *
 *  Input Arguments:
 *
 *  Output Arguments:
 *
 *  Return:
 *      1   f1 >  f2
 *      0   f1 <= f2
 *
 *  Notes:
 *
 *------------------------------------------------------------------------
 *^^^
 */

int cmpgtf32(Float32 f1, Float32 f2)
{

   Float32 f1_sign, f2_sign;

   f1_sign = f1 & NEG32;
   f2_sign = f2 & NEG32;

   if (f1_sign == 0)       /*  f1 is positive */
   {
      if (f2_sign == NEG32)
      {
         return 1;
      }
      else                /*  f1, f2 both positive */
      {
         if (f1 > f2)
         {
            return 1;
         }
         else
         {
            return 0;
         }
      }
   }

   /*  f1 is negative */
   if (f2_sign == 0)
   {
      return 0;
   }

   /*  f1 and f2 both negative */
   if (f2 > f1)
   {
      return 1;
   }
   else
   {
      return 0;
   }

}

/*^^^
 *------------------------------------------------------------------------
 *
 *  Name : absf32
 *
 *  Description:    Finds the absolute value of a floating point number
 *
 *  Prototype:  Float32 absf32(Float32 f_in)
 *
 *  Input Arguments:
 *      f_in
 *
 *  Output Arguments:
 *
 *  Return:
 *      | f_in |
 *
 *  Notes:
 *
 *------------------------------------------------------------------------
 *^^^
 */

Float32 absf32(Float32 f_in)
{

   Float32 f_sign;

   f_sign = f_in & NEG32;

   if (f_sign == NEG32)
   {
      f_in ^= NEG32;
   }

   return f_in;

}

/*^^^
 *------------------------------------------------------------------------
 *
 *  Name :      sqrtf32
 *
 *  Description:    Computes the sqare root of a Float32 value. Recall that
 *      f_x = frac * (2^exp), 1 <= frac < 2
 *
 *  for exp even:
 *      sqrt(f_x)  = sqrt(frac') * (2^(exp/2)); frac' = frac
 *
 *  for exp odd:
 *      sqrt(f_x)  = sqrt(frac) * (2^((exp-1)/2)) * sqrt(2)
 *                 = sqrt(2 * frac) * (2^((exp-1)/2))
 *                 = sqrt(frac') * (2^((exp-1)/2)), frac' = 2 * frac
 *
 *  sqrt(frac') is determined by first estimating sqrt(frac') w/ a first
 *  order Taylor Series Expansion and then performing two Newton iterations
 *
 *  Prototype:  Float32 sqrtf32(Float32 f_x)
 *
 *  Input Arguments:
 *      f_x
 *
 *  Output Arguments:
 *      none
 *
 *  Return:
 *      the square root of f_x
 *
 *  Notes:  Current version sometimes differs from Microsoft Visual C
 *  results by 1 significant bit.  If greater precision is required then
 *  either a the initial estimate of sqrt(frac') can be improved by using
 *  a higher ordered Taylor Series expansion, or another Newton iteration
 *  can be performed.
 *
 *------------------------------------------------------------------------
 *^^^
 */

Float32 sqrtf32(Float32 f_x)
{

   int16 s_ExpX;
   Float32 f_z;
   Float32 f_FracX, f_FracZ;
   Float32 f_a0, f_a1;

   /*  if f_x is negative then return NAN */
   if ((f_x & NEG32) != 0)
   {
      return REAL_NAN;
   }

   /*  extract uc_Exponent */
   s_ExpX  = (int16) (f_x >> EXP32SHIFT_LOW);
   s_ExpX &= MASK8LOW;

   /*  if f_x is a denormalized number then return 0 */
   if (s_ExpX == 0)
   {
      return 0;
   }

   /*  extract fraction from f_x, represent as Float32 */
   f_FracX = f_x & MASK23LOW;
   f_FracX |= ONE32;

   /*  s_Exp includes bias of 127, therefore if s_Exp is even then */
   /*  true uc_Exponent is odd */
   if ((s_ExpX & 0x01) == 0)           /*  exp is odd */
   {
      f_FracX += ((int32)1 << EXP32SHIFT_LOW);
      f_a0 = 0x3F5DB3D7;              /* 0.8660254 = sqrt(3) / 2 */
      f_a1 = 0x3E93CD3A;              /* 0.2886751 = 1 / (2*sqrt(3) */
   }
   else                                /*  exp is even */
   {
      f_a0 = 0x3F1CC471;              /* 0.61237244 = sqrt(1.5) / 2 */
      f_a1 = 0x3ED105EC;              /* 0.40824829 = 1 / (2*sqrt(1.5) */
   }

   s_ExpX = (s_ExpX + EXPONENT_BIAS) >> 1;

   /*  Taylor series approximation */
   f_FracZ = addf32(mpyf32(f_FracX,f_a1), f_a0);

   /*  Newton iterations */
   f_FracZ = mpyf32(ONE_HALF, addf32(f_FracZ, divf32(f_FracX, f_FracZ)));
   f_FracZ = mpyf32(ONE_HALF, addf32(f_FracZ, divf32(f_FracX, f_FracZ)));

   /*  pack result */
   f_z = (f_FracZ & MASK23LOW) + ((Float32)s_ExpX << EXP32SHIFT_LOW);

   return f_z;

}

/*^^^
 *------------------------------------------------------------------------
 *
 *  Name :      expf32()
 *
 *  Description:    Given Float32 value f_x, returns the largest integer
 *  s_x such that
 *      2^(s_x) <= f_x
 *
 *  Prototype:  int16 expf32(Float32 f_x)
 *
 *  Input Arguments:
 *      f_x
 *
 *  Output Arguments:
 *
 *  Return:
 *     s_x      largest integer uc_Exponent of 2 such that 2^s_x >= f_x
 *
 *  Notes:
 *
 *------------------------------------------------------------------------
 *^^^
 */

int16 expf32(Float32 f_x)
{

   static int16 s_x;

   s_x  = (int16) (f_x >> EXP32SHIFT_LOW);
   s_x &= MASK8LOW;
   s_x  = s_x - EXPONENT_BIAS;

   return s_x;

}

/*^^^
 *------------------------------------------------------------------------
 *
 *  Name :      pow2f32()
 *
 *  Description:    Given an int16 value, s_x, returns 2^s_x
 *
 *  Prototype:  Float32 pow2f32(int16 s_x)
 *
 *  Input Arguments:
 *      s_x     power of 2 to be computed
 *
 *  Output Arguments:
 *
 *  Return:
 *      2^s_x
 *
 *  Notes:
 *
 *------------------------------------------------------------------------
 *^^^
 */

Float32 pow2f32(int16 s_x)
{

   Float32 f_z;

   /*  check limits to see if 2^s_x can be represented as a Float32 */
   if (s_x < -126)
   {
      return 0;
   }
   if (s_x > 127)
   {
      return REAL_INF;
   }

   /*  pack uc_Exponent */
   f_z = ((Float32)s_x + EXPONENT_BIAS)<<EXP32SHIFT_LOW;

   return f_z;

}

#endif // FLOAT_IN_SW
