/* **COPYRIGHT******************************************************************
    INTEL CONFIDENTIAL
    Copyright (C) 2017 Intel Corporation
    Copyright (C), 1994-2008 Aware Inc. All Rights Reserved.
******************************************************************COPYRIGHT** */
/* **DISCLAIMER*****************************************************************
    The source code contained or described herein and all documents related
    to the source code ("Material") are owned by Intel Corporation or its
    suppliers or licensors. Title to the Material remains with Intel
    Corporation or its suppliers and licensors. The Material may contain
    trade secrets and proprietary and confidential information of Intel
    Corporation and its suppliers and licensors, and is protected by
    worldwide copyright and trade secret laws and treaty provisions. No part
    of the Material may be used, copied, reproduced, modified, published,
    uploaded, posted, transmitted, distributed, or disclosed in any way
    without Intel's prior express written permission.

    No license under any patent, copyright, trade secret or other
    intellectual property right is granted to or conferred upon you by
    disclosure or delivery of the Materials, either expressly, by
    implication, inducement, estoppel or otherwise. Any license under
    such intellectual property rights must be express and approved by
    Intel in writing.
*****************************************************************DISCLAIMER** */
/****************************************************************************
;   Aware DMT Technology. Proprietary and Confidential.
;
;   40 Middlesex Turnpike, Bedford, MA 01730-1413
;   Phone (781) 276 - 4000
;   Fax   (781) 276 - 4001
;
;   File Name: GenFramingParams_VDSL2.c
;
;   This file contains the functions for generating VDSL2 framing parameters.
;
;
*****************************************************************************/

// ***********************************************************************************************************
// History
//
// 23/08/2012 VENKATESH: XDSLRTFW-506 Lower DS Rates with INP > 2 cases & No Connect with INP = 16
//                    Grep for XDSLRTFW-506-TELEFONICA_MAXPROFILE-INP-CASES
//
// 28/11/2012 Ram: Merged fix related to JIRA XDSLRTFW-389 No Connect in INP & Fixed rate cases
//                 Grep for XDSLRTFW-389-TELEFONICA_FIXED-INP-CASES
// 30/10/2012 VENKATESH: XDSLRTFW-389 No Connect in INP & Fixed rate cases
//                    Grep for XDSLRTFW-389-TELEFONICA_FIXED-INP-CASES
//27/06/2014  Varun : Subtract addtional RRC bits from max available Lp for Framing in US ReTx only mode.
//                    Grep for XDSLRTFW-1607
//11/07/2017 Abu Rahman : XDSLRTFW-3554 DS datarate exceeds configured max datarate
//           - Enabled Min-Max Rate check for IKNS and CNXT DSLAMs
//           - Forced LP to the round(Multiple of 8) and then re-compute the rate for IKNS and CNXT DSLAM
//           - Added debug variable to debug framing generation algorithm
//           Grep for XDSLRTFW-3554
//
// 22/11/2017 Abu Rahman
//            XDSLRTFW-3556: VRx518 shows different method_0 attainable datarate than VR9(R7)
//            Introduced a CMV based option to select Method_0 framing based DS ATTNDR or
//            channel capacity based DS ATTNDR calculation algorithm.
//                Set CNFG 86 3 = 0 : to select frame based Method_0 ATTNDR algorithm or
//                Set CNFG 86 3 = 1 : to select channel capacity based Method_0 ATTNDR algorithm (VR9-R7 like)
//            Note that this switching works only with Method_0 configuration
//            SEARCH PATTERN: XDSLRTFW-3556
// ************************************************************************************************************

#include <string.h>
#include "common.h"
#include "dsp_op.h"
#include "mul.h"
#include "gdata.h"
#include "GenFramingParams_VDSL2.h"
#include "Bitload_support.h"
#include "Bitload.h"
#include "ghs.h"
#include "Framing_VDSL2.h"
#include "CalcMaxIlvDilvSize.h"
#include "cmv.h"
#include "cmv_Data.h"
#include "vdsl_state.h"

extern ReTX_Params_t gt_ReTXParams;

/*^^^
*-------------------------------------------------------------------------------------------------
*
*   Prototype:
*       FlagT FormVDSL2FramingParams(VDSL2FramingParamsInputs_t *pt_Input, VDSL2Config_t *pt_Output)
*
*   Abstract:
*       This function implements the optimal VDSL2 rate generation algorithm.  This algorithm
*       takes as input measured SNR values along with parameter restrictions (e.g. Min-INP,
*       Min/Max-Delay, Min/Max-Data Rate, & Min/Max-Msg-Overhead Rate) and outputs
*       the framing configuration (i.e. Lp, Bpn, Mp, Dp, Rp, Tp, Gp, Fp) that the
*       receiver will use.
*
*       Reference Matlab code:
*          VDSL2Framing.m
*
*   Input Parameters:
*       pt_Input      -- pointer to the input structure "VDSL2FramingParamsInputs_t"
*
*  Output Parameter:
*       pt_Output     -- pointer to the output structure "VDSL2Config_t"
*
*  Return:
*       SUCCEED       -- indicate we can generate a set of valid framing parameters
*       FAIL          -- indicate we cannot generate valid set of framing parameters
*
*   Global Variables:
*       gsa_log2Tbl   -- use to perform division by Mp
*       gs_VDSL2FrameParamGenStatus -- Bit-field register to indicate the error
*                                      code/status from FormVDSL2FramingParams()
*
*--------------------------------------------------------------------------------------------------
^^^*/

FlagT FormVDSL2FramingParams(VDSL2FramingParamsInputs_t *pt_Input, VDSL2Config_t *pt_Output)
{

   // control flags to determine when to stop the search
   FlagT ft_FeasibleSolutionFound, ft_Done, ft_LimitedRate;

   FlagT ft_CiPolicyInpIncrPossible; //Indication, whether the CiPolicyOneMaximizeINP algorithm found a solution with maximum Rate and higher INP (than normal rate maximized search algorithm)
   uint16 ft_Attndr8k;
   uint32 ul_InpNumerVal, ul_InpDenomVal;
   uint32 ul_InpNumerOpt, ul_InpDenomOpt;
   // control flags to determine delay bound
   FlagT ft_NoMaxDelayBound;

   // variables related to Lp bound
   uint16 us_LpMinOHRate;
   uint32 ul_LpMax, ul_LpMaxCh, ul_LpMaxINP, ul_LpMin;

   // variables related to CWSize bound
   int16 s_CWMax, s_CWMin;
   int32 l_CWMaxHW;

   // variables related to Tp & Mp ==> Tp = k*Mp
   uint16 us_k, us_mink, us_maxk;

   // variables related to Dp
   uint16 us_maxDp, us_minDp;
   int32 l_DpMaxCh, l_DpMaxDelay, l_DpAlphaMinus, l_DpMaxRefinement;  // use 32-bit to account for highest optimal-Dp value for the given set of parameters

   // variables related to Gp
   uint16 us_maxGp;
   int16 s_GpMinFinal, s_GpMaxFinal; // wng? do we really need to use signed number???

   // variables related to Data Rate bound
   uint32 ul_MaxLineRate, ul_LineRate;
   int32 l_MinTotalRate, l_MaxTotalRate;

   // variables related to Overhead Rate bound
   int16  s_MinIBitsOHR, s_MinOHRate, s_MaxOHRate;

   // variables related to feasible region (alphaplus and alphaminus line)
   uint16 usa_alpha_numer[6], usa_alpha_denom[6];            //0-1: for alphaplus, 2-5: for alphaminus
   uint16 us_alphaplus_other_numer, us_alphaplus_other_denom;
   uint16 us_alphaplus_maxdelay_numer,us_alphaplus_maxdelay_denom;
   uint16 us_alphaminus_other_numer, us_alphaminus_other_denom;
   uint16 us_alphaminus_mindelay_numer,us_alphaminus_mindelay_denom;
   uint16 us_alphaplus_numerator, us_alphaplus_denominator, us_alphaminus_numerator, us_alphaminus_denominator;

   // variables to store the optimal parameter set
   uint16 us_BpOpt, us_KpOpt, us_RpOpt, us_MpOpt, us_DpOpt, us_TpOpt, us_GpOpt, us_FpOpt, us_CWSizeOpt;
   uint32 ul_LpOpt;
   int32 l_NetRateOpt;

   // variables to store currently searched parameter set
   uint16 us_Bp, us_Kp, us_Rp, us_Mp, us_Dp, us_Tp, us_Gp, us_Fp, us_CWSize;
   uint32 ul_Lp;
   int32 l_NetRate;
   // note: we use signed 32-bit for NewLp and deltaLp to catch any error case
   int32 l_OHRate, l_deltaNDR, l_deltaLp, l_newLp;
   uint16 us_PayloadBytes;

   // variables for storing derived parameters
   int16 s_Up, s_IBitsOHR, s_MinOHRateFinal, s_MaxOHRateFinal;
   uint16 us_Qmax, us_DRmax, us_MaxRatemax, us_SEQp; //XDSLRTFW-3226 (Start_End)

   // variables for storing frequently used terms:
   int16 s_k_x_CWSize, s_OHOct_Per_MDF, s_MDF_Per_Symbol, s_OHOct_Per_Sym_rounded;

   // variables to store latency path and bearer channel mapping.
   int16 s_path, s_bc;

   // temp register variables
   int16 s_r0, s_r1, s_temp, s_lp;
   int32 l_a0, l_a1;
   uint16 us_temp;
   uint32 ul_temp1, ul_temp2, ul_temp3, ul_temp4, ul_LpMax_DueToMinDelay;
   int16 s_CWTemp,s_KpTemp;
   int16 s_SMaxValue;

   // loop-counter (all in native 32-bit format to save program memory)
   int32  idx_R;
   FlagT  ft_BitloadOK;
   uint32 ul_LpMaxrate;

   uint16 s_CWMaxValue;
   uint32 ul_DMinINP;

   // Initialization of global variables
   gft_LimitedRatecase = 0;
   gft_AdaptiveRatecase = 0;
   gft_FixedRatecase = 0;
   gft_RestrictDpTo1 = 0;

   // Initialization of local variables
   ft_CiPolicyInpIncrPossible = 0;
   ft_NoMaxDelayBound = 0;
   ft_BitloadOK = 0;
   s_temp = 0;
   s_lp = 0;
   ul_LpMax_DueToMinDelay = 0;

   s_CWMaxValue = 255;


   //====================================================================================
   // Always clear the status register before we start a new framing parameter search
   // note: we don't want to clear it in "InitGlobalVars_PreHandshake" so that we can keep
   //       a memory of what happened in last link.
   //====================================================================================
   gs_VDSL2FrameParamGenStatus = 0;

   //====================================================================================
   // Determine the latency path & the bearer channel.
   // WARNING: Current algorithm only handles 1 latency path & 1 bearer channel.
   //          and we only allow the usage BC0 for now.
   //====================================================================================
   s_path = pt_Input->s_BCtoLP[BC0];
   if ( s_path != DISABLED_LP )
   {
      // note: input path can be either LP0 or LP1.
      s_bc = BC0;
   }
   else
   {
      gs_VDSL2FrameParamGenStatus |= VDSL2FRAMEPARAMGEN_ERR_BC0_DISABLE;
      return FAIL;
   }

   if (pt_Input->s_BCtoLP[BC1] != DISABLED_LP)
   {
      gs_VDSL2FrameParamGenStatus |= VDSL2FRAMEPARAMGEN_ERR_BC1_ENABLE;
      return FAIL;
   }

   //====================================================================================
   // Perform intial error checking of the input variables to make sure
   // they are in the valid range.  These checks are important to ensure we are not
   // overflowing registers after multiplications & divisions.
   //====================================================================================
   // Safety checks:
   // (1) make sure MinINP in valid range 0 <= MinINP <= 16
   if ( (pt_Input->s_MinINP<0) || (pt_Input->s_MinINP>16) )
   {
      gs_VDSL2FrameParamGenStatus |= VDSL2FRAMEPARAMGEN_ERR_INVALID_MININP;
      // Do not fail training
//      return FAIL;
      pt_Input->s_MinINP = 0; //Workaround for DSLAM configuration issue
   }

   // (2) make sure input MaxNetRate >= MinNetRate
   if (pt_Input->us_MinRate > pt_Input->us_MaxRate)
   {
      gs_VDSL2FrameParamGenStatus |= VDSL2FRAMEPARAMGEN_ERR_MINRATE_GT_MAXRATE;
      return FAIL;
   }
   // (3) MaxRateReq > 32kbps
   if (pt_Input->us_MaxRate < 4)
   {
      gs_VDSL2FrameParamGenStatus |= VDSL2FRAMEPARAMGEN_ERR_MAXRATE_BELOW_32KBPS;
      return FAIL;
   }

   // (4) make sure Min & Max Msg-Overhead rate within the range
   //      16 <= MSGmin < 248 kbps
   //      MSGmax = 256 kbps
   if (pt_Input->s_OHFrameType == 1)       // Only check if we set OHFrameType==1
   {

      if ((pt_Input->s_MinMsgOHR < 16) || (pt_Input->s_MinMsgOHR >= 248))
      {
         gs_VDSL2FrameParamGenStatus |= VDSL2FRAMEPARAMGEN_ERR_INVALID_MINMSGOHR;
         return FAIL;
      }

      if (pt_Input->s_MaxMsgOHR > 256)
      {
         //wng? is this correct???
         gs_VDSL2FrameParamGenStatus |= VDSL2FRAMEPARAMGEN_ERR_INVALID_MAXMSGOHR;
         return FAIL;
      }
   }

   // (5) make sure Dmax is always <= 4096.
   if (pt_Input->s_Dmax > 4096)
   {
      gs_VDSL2FrameParamGenStatus |= VDSL2FRAMEPARAMGEN_ERR_DMAX_GT_4096;
      return FAIL;
   }

   // (6) make sure 1/Smax is always <= 48 (standard case) or 64 (extended case).
   if (gsa_Optn2_AlgControl[2] & OPTN_DSExtSMaxDisable)
   {
      s_SMaxValue = 48;
   }
   else
   {
      s_SMaxValue = 64;
   }
   if (pt_Input->s_OneOverSmax > s_SMaxValue)
   {
      gs_VDSL2FrameParamGenStatus |= VDSL2FRAMEPARAMGEN_ERR_1OVERSMAX_TOO_BIG;
      return FAIL;
   }

   // (7) make sure MaxDelay in valid range & overwrite MinDelay for special MaxDelay value
   //       0 <= MaxDelay <= 63; OR
   //       MaxDelay = 255
   //     also, set the corresponding flag to handle the special meaning of MaxDelay:
   //       a) MaxDelay = 0   ==> no bound on the max delay
   //       b) MaxDelay = 1   ==> restrict Dp to 1 only.
   //       c) MaxDelay = 255 ==> max delay of 1ms

   gft_RestrictDpTo1 = FALSE;
   if (pt_Input->s_MaxDelay == 1 || ((pt_Input->s_MaxDelay == 0) && (gul_dbgMiscControl & FORCE_D1_FOR_MAXDELAY0)))
   {
      // The value "MaxDelay" = 1 is a special value indicating that the interleaver depth Dp shall be
      // set to Dp=1.  (note: this doesn't guarantee a max delay of 1 ms because S can be >= fs)
      gft_RestrictDpTo1 = 1;

      // By restricting Dp to 1, it implies S/fs <= MaxDelay
      // Since S <= 64, it also means MaxDelay = 64/fs in such case.
      //pt_Input->s_MaxDelay = 64/pt_Input->s_fs;
      pt_Input->s_MaxDelay = floor32((64<<FRAME_RATE_SHIFT_CNT), pt_Input->s_fs);
      pt_Input->s_MinDelay = 0;
   }
   else if (pt_Input->s_MaxDelay == 0)
   {
      // The value "MaxDelay" = 0 is a special value indicating that no bound on the maximum delay
      // is being imposed.
      ft_NoMaxDelayBound = 1;

      // Make sure we have no bound on MinDelay as well.
      pt_Input->s_MinDelay = 0;
   }
   else if (pt_Input->s_MaxDelay == 255)
   {
      // The value "MaxDelay" = 255 is a special value indicating a max delay of 1 ms.
      pt_Input->s_MaxDelay = 1;

      // make sure MinDelay is always <= MaxDelay for such special case.
      if (pt_Input->s_MinDelay > pt_Input->s_MaxDelay)
      {
         pt_Input->s_MinDelay  = pt_Input->s_MaxDelay;
      }

   }
   else if ( (pt_Input->s_MaxDelay<0) || (pt_Input->s_MaxDelay>63) )
   {
      gs_VDSL2FrameParamGenStatus |= VDSL2FRAMEPARAMGEN_ERR_INVALID_MAXDELAY;
      // XDSLRTFW-2002 IOP_DS_VDSL2_ALL_CAP_CO_INP_DEALY (START)
      // Do Not fail training
//      return FAIL;
      if(pt_Input->s_MaxDelay < 0)
         pt_Input->s_MaxDelay = 1; //Workaround for DSLAM configuration issue
      else if (pt_Input->s_MaxDelay > 63)
         pt_Input->s_MaxDelay = 63; //Workaround for DSLAM configuration issue
      // XDSLRTFW-2002 IOP_DS_VDSL2_ALL_CAP_CO_INP_DEALY (END)
   }

   // (8) make sure MinDelay <= MaxDelay
   if (pt_Input->s_MinDelay > pt_Input->s_MaxDelay)
   {
      gs_VDSL2FrameParamGenStatus |= VDSL2FRAMEPARAMGEN_ERR_MINDELAY_GT_MAXDELAY;
      return FAIL;
   }

   // check if we are in fixed rate case
   // XDSLRTFW-389 PERF_DS_ALL_ALL_FixedRate (Start)
   // Telefonica has 8kbps diference between Min & Max Rates
   // Consider this case to be a Fixed Rate Case
   // Which Helps in selecting the Right DpMax & DpMin Range with INP Cases
   // XDSLRTFW-389-TELEFONICA_FIXED-INP-CASES
   if ((pt_Input->us_MinRate == pt_Input->us_MaxRate) ||
       (pt_Input->us_MinRate == (pt_Input->us_MaxRate-1)))
   {
      gft_FixedRatecase = 1;
   }

   //====================================================================================
   // Initialization of profile dependent local variables used for max range before doing the search
   //====================================================================================
   us_MaxRatemax = 0x7FFF;
   us_Qmax = QMAX;
   us_DRmax = DR0;
   if (gt_ProfileAct.us_ProfileSelected & CNFG_V2_PROFILE_35B_MASK)
   {
      us_Qmax = QMAX_35B;
      us_DRmax = DR0_35B;
      us_MaxRatemax = 0xFFFF;
   }

   //====================================================================================
   // Reset all the outputs before doing the search
   //====================================================================================
   us_BpOpt = 0;
   us_RpOpt = 0;
   us_MpOpt = 0;
   us_DpOpt = 0;
   us_TpOpt = 0;
   us_GpOpt = 0;
   us_FpOpt = 0;
   ul_LpOpt = 0;
   us_CWSizeOpt = 0;
   l_NetRateOpt = 0;
   ft_Done = FALSE;
   ft_FeasibleSolutionFound = FALSE;
   // reset search input INP to 0 by setting input INP numerator to 0 and input INP denominator to 1
   ul_InpNumerOpt = 0;
   ul_InpDenomOpt = 1;
   ft_CiPolicyInpIncrPossible = FALSE;

   //====================================================================================
   // Loop through all Rp, Mp, Tp & a subset of Dp to find best combination.
   //====================================================================================
   for (idx_R = pt_Input->s_MaxR; idx_R >= pt_Input->s_MinR; idx_R -=pt_Input->s_RStep)
   {
      // wng - temp fix since using unsigned is not going to work in the for-loop with
      //       a decrementing counter that goes below 0.
      us_Rp = (uint16) idx_R;

      // R has to be non-zero to meet non-zero MinINP requirement
      if (us_Rp == 0 && pt_Input->s_MinINP > 0)
      {
         continue;
      }

      //----------------------------------------------------------------
      //Find channal capacity
      //----------------------------------------------------------------
#ifndef STANDALONE_FRAMING_TEST
      // XDSLRTFW-1877 : VDSL2 IFEC ATTNDR (Start)
      //In normal operation, the SNR is based on the framing calculation.
      //Therefore, during initialization the rate calculation has to be done twice:
      //1.   For the actual framing with the configured max net data rate (MaxRate).
      //2.   For ATTNDR calculation with  Max net data rate = unlimited = assume maximum possible
      // value AND consider all conditions of the selected ATTNDR_METHOD as listed in Table 1 in Attndr_Concept_v1.pdf
      // The second run of the framing generation (not doing the full bitloading) in which the max rate
      // constraints are removed.
      if(gft_CalcAttndr == TRUE)
      {
         //gl_MaxSumLpSupported = gla_MaxSumLp[(gus_RpOpt_Attndr>>1)];
         gl_MaxSumLpSupported = gla_MaxSumLp[(us_Rp>>1)]; // XDSLRTFW-1935 (Start_End)
         pt_Input->us_MaxRate = us_MaxRatemax;
         if(guc_attndr_method == ATTNDR_METHOD_2)
         {
            pt_Input->s_MinINP = 0;
         }
      }
      else
      {
         // XDSLRTFW-1877 : VDSL2 IFEC ATTNDR (End)
         // skip channel capacity calculation for STANDALONE_FRAMING_TEST
         ft_BitloadOK = CalcChannelCapacity((int16)us_Rp, &gl_MaxSumLpSupported);
         // Incase we force Lp to be a multiple of 8, we 'round' Lp
         // Hence, it is possible we increase the Lp from its nominal value
         // To account for this arbitrary increase in Lp, we reduce the channel
         // capacity beforehand so that we do not fail in BitloadFixedRate later
         if (gul_dbgMiscControl & FORCE_MULTIPLE_OF_8_L_ADAPTIVE_RATE_BITLOAD)
         {
            if (gl_MaxSumLpSupported > 8)
            {
               gl_MaxSumLpSupported -= 8;
            }
         }

         if (ft_BitloadOK == FAIL)
         {
            gs_VDSL2FrameParamGenStatus |= VDSL2FRAMEPARAMGEN_ERR_CHANNEL_CAPACITY_CALC;
            return (FlagT)FAIL;
         }

         //XDSLRTFW-1607(START)
         if(gt_ReTXParams.uc_UsReTxStatus == US_RETX_IN_USE)
         {
            gl_MaxSumLpSupported-=US_RRC_BITS;
         }
         //XDSLRTFW-1607(END)
         // save channel capacity given a value of check byte
         gla_MaxSumLp[(us_Rp>>1)] = gl_MaxSumLpSupported;
      }  // Else XDSLRTFW-1877

      // skip framing for LDM mode and VDSL2 standalone Bitload test
      if ((gul_OperationModeStatus_VDSL2 & V2_LOOP_DIAG) ||
          (gs_DbgFramingDisableFlag == 1))
      {
         // if a different limit (specified by mw) is desired, choose the minimum of two
         if (gl_MaxLpLimit > 0)
         {
            gl_MaxSumLpSupported = MIN(gl_MaxSumLpSupported, gl_MaxLpLimit);
         }

         gt_rx_config_v2.s_Rp[LP0] = idx_R;
         gt_rx_config_v2.ul_Lp[LP0] = (uint32)gl_MaxSumLpSupported;

         return (FlagT)SUCCEED;
      }
#endif // STANDALONE_FRAMING_TEST

      ul_LpMaxCh = (uint32)gl_MaxSumLpSupported;

      // XDSLRTFW-389 PERF_DS_ALL_ALL_FixedRate (End)

      //====================================================================================
      // Determine if we are in Fixed Rate case (MinRate = MaxRate).
      // If so, we allow a lee-way of +8 kbps (according to VDSL2 Annex-K K2.7)
      // Standard also specifies that if MinRate < MaxRate, then MaxRate shall be set at
      // least 8 kbps above the MinRate.
      //====================================================================================
      if ((pt_Input->us_MaxRate - pt_Input->us_MinRate) < FIXRATE_LEEWAY/8)   //fixed rate
      {
         pt_Input->us_MaxRate = pt_Input->us_MinRate + FIXRATE_LEEWAY/8;

         // Channel Initialization Policy (Rate Optimization, INP optimization etc.)
         // T-REC-G.993.2-201112-I!!PDF-E-10 (chapter 12.3.7)
         // Policy ONE a) case:
         // If the minimum net data rate is set equal to the maximum net data rate then:
         // 1)   Maximize INP_act n  for bearer channel #n.
      }

      if (pt_Input->uc_ChannelInitPolicy == 1)
      {
         // To enhance the feature for CI policy 1:
         // - We do not target for the min rate but for the attainable data rate if possible
         //   (i.e. if we have a valid attndr estimation).
         // - We are not allowed to change min rate in fixed rate profile
         // - Algorithm:  min reserve data rate = MIN(attndr, max data rate) - 8kbps

         if ( us_Rp == 16 )        // INP increase Algorithm assumes an R = 16.
         {
#ifndef STANDALONE_FRAMING_TEST
            ComputeMaxDataRate();
            // convert bps to bits per frame
            ft_Attndr8k = (uint16) (((gt_LineStatusDS.ul_AttainableDataRate >> 3) / 1000));
#else
            ft_Attndr8k = (uint16)( (gl_MaxSumLpSupported >> 3) );   // gl_MaxSumLpSupported in Framing Test is in kpbs
#endif
            // If the ATTNDR is bigger than the configured MaxRate and CIpolicy=1 we try to transform excess margin into INP increase
            // For this, calculate a rough estimate of the maximal achievable INP based on Cw, Lp, and Dp (also considering framing parameter constraints)
            if (ft_Attndr8k > pt_Input->us_MaxRate)
            {
               ft_CiPolicyInpIncrPossible = CiPolicyOneMaximizeINP( pt_Input );
            }

            if ((pt_Input->us_MaxRate - pt_Input->us_MinRate) > FIXRATE_LEEWAY/8)  // rate adaptive
            {
               // Channel Initialization Policy (Rate Optimization, INP optimization etc.)
               // T-REC-G.993.2-201112-I!!PDF-E-10 (chapter 12.3.7)
               // Policy ONE b) case:
               // If the minimum net data rate is not set equal to the maximum net data rate then
               // 1) Maximize net data rate for all the bearer channels
               // 2) If  such  maximized  net  data  rate  is  equal  to  the  maximum  net  data  rate , maximize INP_act n  for the bearer channel #n
               // 3) Minimize excess margin wrt MAXSNRM through gains scalings or other control paramters (point 3 not relevant in framing algorithm)

               if(ft_CiPolicyInpIncrPossible)
               {
                  // Only increase MinRate when we found a valid solution for MaxRate in CiPolicyOneMaximizeINP
                  pt_Input->us_MinRate = (uint16) (pt_Input->us_MaxRate - FIXRATE_LEEWAY/8);
               }
            }
         }
      }


      // upfront decide if you can do framing being requested or not and
      // also decide if you are in capped rate or rate adptive scenario.
      if (((uint32)(pt_Input->us_MaxRate)<<1) < ul_LpMaxCh)
      {
         gft_LimitedRatecase=1;
      }
      else if (((uint32)(pt_Input->us_MaxRate)<<1) >= ul_LpMaxCh)
      {
         gft_AdaptiveRatecase = 1;
      }


      // For fixed rate cases, Lp is limited by max rate
      // NDRp <= Maxrate
      // (Lp * fs) <= [(8* Maxrate) + ORp] * Nfec/(Nfec - Rp)
      // Max value of Nfecp/(Nfecp - Rp] equals 2 and ORp max = (64 * fs)
      // Hence, (Lp * fs) <= [(8*Maxrate) + (64*fs)] * 2
      // ie. max Lp = (16 * MaxRate)/fs + 128

      //ul_temp1 = ((int32)(pt_Input->us_MaxRate << 4))/pt_Input->s_fs + 128;
      ul_temp1 = floor32((int32)(pt_Input->us_MaxRate << (4+FRAME_RATE_SHIFT_CNT)), pt_Input->s_fs);
      ul_temp1 += 128;

      // Handle fixed rate case here
      if (ul_LpMaxCh > ul_temp1)
      {
         ul_LpMaxCh = ul_temp1;
      }

      /***
         Unlike ADSL2, we now have to compute LpMin, MaxTotalRate & MinTotalRate after
         knowing the channel capacity.  It's because the PERp requirement
         is dependant on PERBp and in turns PERBp is dependant on line rate (DRp = Lp*fs).
         In essense, it means there's a limit on the PERp based on the line rate,
         and therefore we cannot find the Min/MaxOHRate until we know the max line rate.
      ***/

      //----------------------------------------------------------------
      //Find Max Line Rate, Max PERp, Min/Max OHRate
      //----------------------------------------------------------------
      // MaxLineRate = LpMaxCh*fs
      //MULU16(ul_MaxLineRate, ul_LpMaxCh, pt_Input->s_fs);
      MULS32xU16(ul_MaxLineRate, ul_LpMaxCh, pt_Input->s_fs);
      ul_MaxLineRate += (1<<(FRAME_RATE_SHIFT_CNT-1));
      ul_MaxLineRate >>= FRAME_RATE_SHIFT_CNT;

      if (pt_Input->s_OHFrameType == 1)
      {
         // For OH Frame Type 1, it carries both IB-based and MSG-based portion of Ovhd Frame:
         //-----------------------------------------------------------------------------------
         //   Octet 1      = CRCp
         //   Octet 2      = Syncbyte
         //   Octet 3-5    = IBits
         //   Octet 6      = NTR
         //   Octet 6-SEQp = MSGc = Msg Ovhd
         //
         // According to the formula on PERBp, we know that Qhat is limited to
         //    17000 bytes if line rate is >= 7880 kbps  => for legacy profiles
         //                     OR
         //    34000 bytes if line rate is >= 15760 kbps => for 35b profile.
         // This means Qhat >= PERBp.  Since PERp is defined as:
         //     PERp = (8*PERBp)/(Lp*fs)
         // Therefore,
         //     PERp <= (8*Qhat)/(Lp*fs)
         // The limit on Qhat implies we have a limitation on PERp for line rate above 7880 kbps.
         // In general, we want to have the highest possible PERp because this translate to lower OHRate
         // So to determine the IB-portion of OHRate, we would select the highest
         // possible PERp given the max line rate as follow:
         //
         //      if (MaxLineRate > DR0)
         //         MaxPERp = floor(8*Qmax/MaxLineRate);
         //      else
         //         MaxPERp = floor(8*Qmax/DR0);
         //      end
         //      MinIBitsOHR = ceil(6*8/MaxPERp);  % take the ceiling to be more conservative
         //
         if (ul_MaxLineRate > us_DRmax)
         {
            /***
               Ideally, we should always floor MaxPERp so that we are more conservative and can account for the worst case difference
               in PERBp and Qhat (that's when we have largest size of an OH-Subframe = Tp/Mp*Nfec = 64*255 = 16320 bytes).
               But in reality, we don't want to floor MaxPERp when MaxLineRate can be > Qmax*8 = 136000.
               In this case, MaxPERp is < 1ms and flooring it would make it become 0, which is not good when we compute MinIBitsOHR.
               So we cannot be conservative in finding initial Min/MaxOHRate.  But at end of the search, where
               we compute Gp, then we make sure we can meet the Min/Max MsgOHRate requirement.
            ***/

            // compute the numerator for MinIBitsOHR:
            // l_a0 = ul_MaxLineRate*6;
            MULS32x16(l_a0, ul_MaxLineRate, 6);
            // denominator for MinIBitsOHR = us_Qmax
            s_MinIBitsOHR = ceil32u((uint32)l_a0, us_Qmax);
         }
         else
         {
            /***
               For LineRate < 7880, Qhat is adjusted proportional to line rate, therefore PERp is capped at ~17.25ms
            ***/
            //XDSLRTFW-3226 (Start)
            l_a0 = us_Qmax<<3;
            s_r0 = l_a0/us_DRmax;  // MaxPERp = s_r0 = floor(8*Qmax/DR0)
            s_MinIBitsOHR = ceil16(48,s_r0);
            //XDSLRTFW-3226 (End)
         }

         // Compute Min/Max Overhead Rate
         s_MinOHRate = pt_Input->s_MinMsgOHR + s_MinIBitsOHR;
         s_MaxOHRate = pt_Input->s_MaxMsgOHR + s_MinIBitsOHR;


         // We should always limit MaxOHRate correctly sot that we can have better optimal result in Fixed-Rate and Rate-Limited cases
         // The the limit of 64*fs is imposed by the standard:
         //    ORp = Gp*Mp/Sp/Tp * 8 * fs
         // And we know that Gp*Mp/Sp/Tp has to be <= 8, therefore we need to make sure ORp <= 64*fs.
         //if (s_MaxOHRate > (pt_Input->s_fs << 6))   // if (MaxOHRate > 64*fs) ?
         //   s_MaxOHRate = (pt_Input->s_fs << 6);
         us_temp = pt_Input->s_fs >> (FRAME_RATE_SHIFT_CNT - 6);

         if (s_MaxOHRate > us_temp)
         {
            s_MaxOHRate = us_temp;
         }
      }
      else
      {
         // For OH Frame Type 2, we are only allow SEQp of 8 bytes:
         //--------------------------------------------------------
         //   Octet 1 = CRCp
         //   Octet 2 = Syncbyte
         //   Octet 3-8 = reserved
         //
         // Since ORp = SEQp*8/PERp, and we know 1 <= PERp <= 20 [ms]
         // Then we can find the limit on OHRate based on PERp limits:
         //   1 <= 8*8/ORp <= 20
         //   64/20 <= ORp <= 64/1
         //
         // NOTE1: since we now have a bigger range of PERp, we can
         //        ceil the MinOHRate and floor the MaxOHRate
         // NOTE2: standard didn't specify the MinPERp, let's make it 1 ms to easy computation.
         //
         s_MinOHRate = ceil16(64, MAX_PER);   //MinOHRate = ceil(64/MaxPERp);
         s_MaxOHRate = 64;                    //MaxOHRate = floor(64/MinPERp);

      } //end if (pt_Input->s_OHFrameType == 1)


      //----------------------------------------------------------------
      //Find the Max and Min Total Rate
      // -- this is important for Rate-Limited/Fixed-Rate case
      //----------------------------------------------------------------
      // convert the unit of MaxRate from 8 kbps to kbps
      l_a0 = (pt_Input->us_MaxRate) << 3;
      // compute MinTotalRate = MinRate + MinOHRate [in kbps]
      l_MaxTotalRate = l_a0 + s_MaxOHRate;

      // convert the unit of MinRate from 8 kbps to kbps
      l_a0 = (pt_Input->us_MinRate) << 3;
      // compute MinTotalRate = MinRate + MinOHRate [in kbps]
      l_MinTotalRate = l_a0 + s_MinOHRate;


      //----------------------------------------------------------------
      //Find the Min Lp required based on MinRate & MinOHRate Requirement
      //  LpMin = max(8,ceil((MinRate+MinOHRate)/fs));
      //----------------------------------------------------------------
      //us_LpMinOHRate = (uint16) ceil32(l_MinTotalRate, pt_Input->s_fs);
      us_LpMinOHRate = (uint16) ceil32((l_MinTotalRate<<FRAME_RATE_SHIFT_CNT), pt_Input->s_fs);

      if (us_LpMinOHRate < 8)
      {
         us_LpMinOHRate = 8;
      }

      us_mink = 1;     // since k can only be integer and we want Tp >= 1
      //XDSLRTFW-1249 : DS Port drop with NVLT-C
      //Aviod Kp = 1 for IKANOS. As Some Capped Rate cases
      //Drops out of showtime
      if (gul_fe_G994VendorID == IKNS_VENDOR_ID)
      {
         us_mink = 2;
      }
      //XDSLRTFW-1249 : DS Port drop with NVLT-C
//LOOP_M_START:
      for (us_Mp = MIN_M; us_Mp <= guc_maxM; us_Mp <<= 1)
      {

         // wng? This rule doesn't show up in VDSL2.  But it's probably good to rule-of-thumb.
         if (us_Rp == 0 && us_Mp > 1)
         {
            break;
         }

         //---------------------------------------------
         //Relationship of Tp and Mp:
         // Tp = k*Mp, where k is an integer, Tp <= 64
         //----------------------------------------------
         us_maxk = guc_maxT/us_Mp; // maxk limit comes from Tp <= 64
//LOOP_T_START:
         for (us_k = us_mink; us_k <= us_maxk; us_k += 1)
         {
            //Find Tp for the given k and Mp
            //  Tp = k*Mp;
            MULU16(us_Tp, us_k, us_Mp);


            //---------------------------------------------
            //Relationship of Tp and Gp and meaning of Opi:
            // Opi ~= Gp/Tp
            // Ceil(Gp/Tp) <= 8
            // 1 <= Gp <= 32
            //---------------------------------------------
            //Now we can also find Min and Max value for Gp based on the above info:
            //minGp = 1;
            //maxGp = min(32, 8*Tp);
            us_maxGp = us_Tp<<3;
            if (us_maxGp > guc_maxG)
            {
               us_maxGp = guc_maxG;
            }

            // In case there is INP and latency limitation specified, one can determine what is CW to use and
            // get an idea of Lp required and Dp required. This is helpful to determine Dp for capped rate cases.
            // To satify INP requireent 4 * R * D /L > min INP
            //                     L < 4 R D / Min INP
            //            Latency : S*D /4 < Max latency
            //                     L > 2 * CW * D / Max Latency
            // Equating these two : CW = 2  * Max Lat * R / INP

            if ((pt_Input->s_MinINP!=0) &&(ft_NoMaxDelayBound==0))
            {
               // calculate cw to use
               s_CWMaxValue = ((((pt_Input->s_MaxDelay) *(us_Rp) )<<1) )/(pt_Input->s_MinINP);
               if (s_CWMaxValue < 32)
               {
                  continue;
               }
               if (s_CWMaxValue > 255)
               {
                  s_CWMaxValue = 255;
               }
            }

            // Ideally since Lp is dependent on CW and is needed to determine Dp,
            // we can loop thru all CW, but that is very time consuming. So we either start with Max CW or
            //  CW as determined above


            // Currently as we are not seeing any issue in rate Adapptive case, do the following change only for capped rate
            // Aim for capped rate is to pick smallest L and largest CW finally

            // Telefonica has 8kbps diference between Min & Max Rates
            // Consider this case to be a Fixed Rate Case
            // Which Helps in selecting the Right DpMax & DpMin Range with INP Cases
            // Lets make this explicit here for Fixed Rate and INP case, this condition should
            // a pass all the time.
            //XDSLRTFW-389 PERF_DS_ALL_ALL_FixedRateCase

            //if ( ((!gft_LimitedRatecase) && (!gft_AdaptiveRatecase) && (pt_Input->s_MinINP !=0)) ||
            //     ((gft_FixedRatecase) && (pt_Input->s_MinINP !=0)) )
            if(0) //Not Required : XDSLRTFW-662 BugFix_DS_ALL_ALL_FixedRateCases
            {


               //   ul_LpMaxrate = ((pt_Input->us_MaxRate + 32 ) * us_CWloop *2 + ( us_CWloop - us_Rp - 1))/ (us_CWloop - us_Rp) ;
               ul_LpMaxrate = ((pt_Input->us_MaxRate + 32 ) * s_CWMaxValue *2 )/ (s_CWMaxValue - us_Rp) ;

               if (ul_LpMaxrate > ul_LpMaxCh)
               {
                  ul_LpMaxrate = ul_LpMaxCh;
               }

               // one can add checks to see if this is feasible region or not here.
               ul_DMinINP = (int32)(((pt_Input->s_MinINP) * ul_LpMaxrate)+(4*us_Rp - 1))/(4 * us_Rp);

               if (ul_DMinINP > (uint32)gul_DILV_MEM_SIZE>>4)
               {
                  ul_DMinINP = gul_DILV_MEM_SIZE>>4;
               }

               us_maxDp = (int16)(ul_DMinINP) + 34;
               us_minDp = us_maxDp - 34;

            }
            else
            {
               //---------------------------------------------------------------------------------------
               // Find the optimal Dp that gives us the highest TotalRate based on the following logic:
               //    If we were allowed to have any D, then it's always optimal to pick a Dp such that Lp is only
               //    limited by the channel capacity (i.e. LpMaxCh).
               //    Suppose Lp is already limited by LpMaxCh for the given D, then further increase
               //    in Dp is not going to help because it forces the CWSize to be lower in order
               //    to meet the MaxDelay requirement.  On the other hand, we also don't want to pick
               //    a Dp value such that Lp is being limted by LpMaxINP.  Hence the optimal-D is exactly the
               //    point when LpMaxCH = LpMaxINP = 4*Dp*Rp/MinINP.
               //---------------------------------------------------------------------------------------

               if (pt_Input->s_MinINP != 0)
               {
                  //XDSLRTFW-591 PERF_DS_ALL_ALL_LimitedRateCase (Start)
                  // l_a0 = MinINP*LpMaxCh
                  //if( ((pt_Input->us_MaxRate)<<1) < ul_LpMaxCh) {
                  //Calculate LpMax from DS Max rate
                  //Nfec = Mp*kp + Rp
                  //NDR = (Bp * Mp) * Lp/(Mp*kp+Rp)
                  //LpMax = (NDR * NFEC)/((Kp-1) * Mp)

                  //Both Adaptive and Limited Rates will have to follow this sequence
                  // If Calculated Lp is More than what SNR can Support, use the Channel Max Rate From SNR
                  ul_LpMaxrate = (((pt_Input->us_MaxRate << 1)* (us_Mp * us_k + us_Rp))/ (us_k * us_Mp));
                  if (ul_LpMaxrate > ul_LpMaxCh)
                  {
                     ul_LpMaxrate = ul_LpMaxCh;
                  }
                  MULS32xU16(l_a0, ul_LpMaxrate, (uint16)pt_Input->s_MinINP);

                  //XDSLRTFW-591 PERF_DS_ALL_ALL_LimitedRateCase (End)
                  // s_r0 = 4*Rp
                  s_r0= us_Rp << 2;
                  // DpMaxCh = floor(MinINP*LpMaxCh/(4*Rp));
                  // note1: since we will be search around this optimal-D, then flooring is ok.
                  // note2: use 32-bit to store DpMaxCh to handle worst case
                  l_DpMaxCh = l_a0/s_r0;

                  /*
                     We should also make sure that the optimal Dp is not so high such that
                     it force CWSize to be below CWMin.

                     We know that CWSize*Dp <= DILV_MEM_SIZE*2
                     Therefore:   CWSize <= DILV_MEM_SIZE*2/Dp
                           But:   32 <= CWSize  (required by standard)
                         Hence:   32 <= DILV_MEM_SIZE*2/Dp
                                  Dp <= DILV_MEM_SIZE*2/32
                     if (DpMaxCh > DILV_MEM_SIZE/16)
                        DpMaxCh = DILV_MEM_SIZE/16;
                     end
                  */

                  if (l_DpMaxCh > (int32)gul_DILV_MEM_SIZE>>4)
                  {
                     l_DpMaxCh = gul_DILV_MEM_SIZE>>4;
                  }

               }
               else
               {

                  /***
                     If there's no INP constraint, pick the Dp value such that we can use highest possible CWSize of 255.
                     This would allow framing parameter with some kind of burst error protection.
                  ***/

                  //(1) Consider Dp being constraint by Deinterleaver Memory size
                  //    DpMaxCh = floor(DILV_MEM_SIZE*2/255);
                  l_DpMaxCh = (gul_DILV_MEM_SIZE<<1)/MAX_CWSIZE;


                  //(2) If we have MaxDelay constraint, we also have to consider the fact that
                  //    increasing Dp would lower the "alphaplus" line and therefore lowering the
                  //    final CWSize for a given LpMaxCh (Lp limit by channel capacity).
                  //    The ideal case is to have highest CWSize and Lp, and hence we can define
                  //    the optimal alphaplus as: MAX_CWSIZE/LpMaxCh.
                  //    With this optimal alphaplus value, we can then find the optimal Dp for a given MaxDelay.
                  if (ft_NoMaxDelayBound == 0)     // Note: if there's no delay, then don't consider adjusting DpMaxCh
                  {

                     //s_r0 = pt_Input->s_MaxDelay*pt_Input->s_fs;
                     //MULS16(s_r0, pt_Input->s_MaxDelay, pt_Input->s_fs);
                     MULU16(ul_temp1, pt_Input->s_MaxDelay, pt_Input->s_fs);
                     ul_temp1 += (1<<(FRAME_RATE_SHIFT_CNT-1));
                     s_r0 = (int16)(ul_temp1>>FRAME_RATE_SHIFT_CNT);

                     //l_a0 = ul_LpMaxCh*s_r0;
                     MULS32x16(l_a0, ul_LpMaxCh, s_r0);

                     //s_r0 = 255*8;
                     s_r0 = MAX_CWSIZE<<3;

                     // DpMaxDelay =  floor(MaxDelay*fs*LpMaxCh/(8 *255)) + 1;
                     l_DpMaxDelay = l_a0/s_r0 + 1;

                     // Always pick the smaller Dp to allow maximum CWSize.
                     if (l_DpMaxCh>l_DpMaxDelay)
                     {
                        l_DpMaxCh = l_DpMaxDelay;
                     }
                  }
               } //end if (pt_Input->s_MinINP != 0)

               // safety check:
               if (l_DpMaxCh > (int32) pt_Input->s_Dmax)
               {
                  l_DpMaxCh = pt_Input->s_Dmax;
               }


               // Due to the requirement that Dp and CWSize has to be co-prime, we might not be able to use the
               // optimal-D for the given CWSize.  Therefore, we should search around the optimal-D value.
               // Suppose D is a prime number, then we can work with any CWSize as long as CWSize is not a multiple
               // of D.  We can determine the range of D to search by finding the worst case
               // distance between sucessive prime numbers that are <= MaxD (e.g. 4096).  This worst case distance
               // is 34.  So we can search from DpMaxCh-34/2 to DpMaxCh+34/2.
               // Of course, we have to make sure D is between 1 & Dmax limited by the profile!

               l_DpMaxRefinement = 0;  // this is use to expand the range of Dp to be searched to give more optimal result in special test cases

               // When MinDelay==MaxDelay, we would always be search on the line "alphaplus==alphminus" and there's a higher chance of
               // not finding optimal Dp value within a small range.  Therefore, we use "DpMaxRefinement" to increase the search range.
               // In addition, it's favourable to put more weight to search for Dp that
               // is higher than the optimal-Dp because higher Dp is only going to reduce final CWSize but not Lp.
               // On the other hand, if we choose Dp lower than the optimal-Dp, then it would reduce Lp.
               //
               if (ft_NoMaxDelayBound==0)     // if there's no delay bound, MaxDelay actually is set to 0 and it's equal to MinDelay.  So make sure we don't adjust DpMax in such case
               {
                  if (pt_Input->s_MaxDelay == pt_Input->s_MinDelay)
                  {
                     l_DpMaxRefinement = 10;
                  }
               }

               // if we have to restrict Dp to 1, then always set minDp=maxDp=1
               if (gft_RestrictDpTo1==0)
               {
                  //minDp = max(1, DpMaxCh-DpSearchRange);

                  us_minDp = l_DpMaxCh-gus_DSearchRange;
                  if (((int16)us_minDp) < 1)            // use signed comparison to handle the case when "l_DpMaxCh<gus_DSearchRange"
                  {
                     us_minDp = 1;
                  }

                  //maxDp = min(DpMaxProfile, DpMaxCh+DpSearchRange+DpMaxRefinement);
                  us_maxDp = l_DpMaxCh+gus_DSearchRange+l_DpMaxRefinement;
                  if (us_maxDp > pt_Input->s_Dmax)
                  {
                     us_maxDp = pt_Input->s_Dmax;
                  }
               }
               else
               {
                  us_minDp = 1;
                  us_maxDp = 1;
               }

            }
            /***
               At this point, we can determine all the inqualities used to compute
               alphaplus and alphaminus, except for the ones dependant on Dp.
               The idea is to adjust Dp whenever possible such that we will yield
               a non-zero feasible region.  If this is not possbile, we can skip
               the D-Loop and continue to check next Mp,Tp,Rp.
            ***/


            //---------------------------------------------------------------------------------------
            // Compute feasible region (1st estimation)
            // Note: to keep the precision, we stores the numerator and the denominator
            //       separately for computation
            //---------------------------------------------------------------------------------------
            /* Pseudo code:
               alpha(1) = 8;                           % bound by Sp <= 64
               alpha(2) = maxGp*fs/(MinOHRate*k);      % bound by MinOHRate & MaxGp
               alpha(3) = MaxDelay*fs/(minDp*8);       % bound by MaxDelay -- use minDp because this is the highest alphaplus_maxdelay we can get.  If this still doesn't yield feasible region, we better adjust it
               alphaplus_maxdelay = alpha(3);

               % thresholds for bounds of form CWSize >= alpha(i)*Lp
               alpha(4) = 1/(8*OneOverSmax);           % bound by 1/Sp <= 1/Smax
               alpha(5) = Mp/512;                      % bound by Mp/Sp <= 64
               alpha(6) = 1/(64*k);                    % bound by Gp*Mp/(Tp*Sp) <= 8
               alpha(7) = fs/(MaxOHRate*k);            % bound by MaxOHRate & MinGp

               alpha(8) = MinDelay*fs/((maxDp - 1)*8);       % bound by MinDelay -- use maxDp because this is the lowest alphaminus_mindelay we can get.  If this doesn't yield a fesabile region, we better adjust Dp.
               alphaminus_mindelay = alpha(8);
            */

            //---------------------------------------------------------------------------------------
            // Thresholds for bounds in form CWSize <= alpha(i)*Lp:
            //---------------------------------------------------------------------------------------
            usa_alpha_numer[0] = 8;
            usa_alpha_denom[0] = 1;

            if (gul_dbgMiscControl & FORCE_Sp_LESS_THAN_1)
            {
               // Special interop mode - Need Sp <=1 for the ILV of the other side to work properly
               usa_alpha_numer[0] = 1;
               usa_alpha_denom[0] = 8;
            }


            //MULU16(usa_alpha_numer[1], us_maxGp, pt_Input->s_fs);   //usa_alpha_numer[1] = us_maxGp*pt_Input->s_fs;
            MULU16(ul_temp1, us_maxGp, pt_Input->s_fs);
            ul_temp1 += (1<<(FRAME_RATE_SHIFT_CNT-1));
            usa_alpha_numer[1] = (int16)(ul_temp1>>FRAME_RATE_SHIFT_CNT);

            MULU16(usa_alpha_denom[1], us_k, s_MinOHRate);         //usa_alpha_denom[1] = us_k*s_MinOHRate;

            //MULU16(us_alphaplus_maxdelay_numer, pt_Input->s_MaxDelay, pt_Input->s_fs);   //us_alphaplus_maxdelay_numer = pt_Input->s_MaxDelay*pt_Input->s_fs;
            MULU16(ul_temp1, pt_Input->s_MaxDelay, pt_Input->s_fs);
            ul_temp1 += (1<<(FRAME_RATE_SHIFT_CNT-1));
            us_alphaplus_maxdelay_numer = (int16)(ul_temp1>>FRAME_RATE_SHIFT_CNT);


            us_alphaplus_maxdelay_denom = (us_minDp - 1)<<3;

            //---------------------------------------------------------------------------------------
            // Thresholds for bounds in form CWSize >= alpha(i)*Lp:
            //---------------------------------------------------------------------------------------
            usa_alpha_numer[2] = 1;
            usa_alpha_denom[2] = pt_Input->s_OneOverSmax<<3;

            usa_alpha_numer[3] = us_Mp;
            usa_alpha_denom[3] = 512;

            usa_alpha_numer[4] = 1;
            usa_alpha_denom[4] = us_k<<6;

            //usa_alpha_numer[5] = pt_Input->s_fs;
            usa_alpha_numer[5] = pt_Input->s_fs + (int16)(1<<(FRAME_RATE_SHIFT_CNT-1));
            usa_alpha_numer[5] >>= FRAME_RATE_SHIFT_CNT;

            MULU16(usa_alpha_denom[5], us_k, s_MaxOHRate);      //usa_alpha_denom[5] = us_k*s_MaxOHRate;

            //MULU16(us_alphaminus_mindelay_numer, pt_Input->s_MinDelay, pt_Input->s_fs);      //us_alphaminus_mindelay_numer = pt_Input->s_MinDelay*pt_Input->s_fs;
            MULU16(ul_temp1, pt_Input->s_MinDelay, pt_Input->s_fs);
            ul_temp1 += 1<<(FRAME_RATE_SHIFT_CNT-1);
            us_alphaminus_mindelay_numer = ul_temp1>>FRAME_RATE_SHIFT_CNT;


            us_alphaminus_mindelay_denom = (us_maxDp - 1)<<3;

            /***
               Determine if the other inequalities are the limiting factors such that we don't have a feasible region.
               If so, we can skip searching Dp
             ***/

            // Find alphaplus_other = min(alpha[0], alpha[1]);
            FindAlphaPlusOther(usa_alpha_numer, usa_alpha_denom, &us_alphaplus_other_numer, &us_alphaplus_other_denom);

            // Find alphaminus_other = max(alpha[2], alpha[3], alpha[4], alpha[5])
            FindAlphaMinusOther(usa_alpha_numer, usa_alpha_denom, &us_alphaminus_other_numer, &us_alphaminus_other_denom);

            // Cross-multiply alphaplus_other & alphaminus_other:
            //ul_temp1 = us_alphaplus_other_numer*us_alphaminus_other_denom;
            //ul_temp2 = us_alphaminus_other_numer*us_alphaplus_other_denom;
            //
            MULU16(ul_temp1, us_alphaplus_other_numer, us_alphaminus_other_denom);
            MULU16(ul_temp2, us_alphaminus_other_numer, us_alphaplus_other_denom);

            // if (alphaplus_other < alphaminus_other), then other inequalities are the limiting factors
            // and we don't have a feasible region, so look for next Tp
            if (ul_temp1 < ul_temp2)
            {
               continue;
            }


            /***
               When there's a MinINP constraint, we need to further refine the search range of Dp based on:
               a) the limitation on CWSize based on DILV_MEM_SIZE,
               b) the limit on Lp for a given CWSize due to alphaminus limit

               We can find the optimal-Dp by solving the following equation:
                 LpMaxINP = LpMaxAlphaMinus

               Where:
                 LpMaxINP = 4*Dp*Rp/MinINP          (highest Lp for a given Dp, Rp, and MinINP limit)
                 LpMaxAlphaMinus = CWMax/alphaminus (highest Lp for a given CWMax bounded by alphaminus limit)
                 CWMax = DILV_MEM_SIZE*2/Dp         (highest CWSize for a given Dp bounded by Deinterleaver Memory limit)

               Therefore:
                 4*Dp*Rp/MinINP = DILV_MEM_SIZE*2/Dp/alphaminus

                 Dp = sqrt[(DILV_MEM_SIZE*MinINP)/(2*Rp*alphaminus)]

               And if such Dp (DpAlphaMinus) is smaller than the one derived based on the channel capacity
               constraint (DpMaxCh), then we should always use DpAlphaMinus.

                 Note : This determines solution for D, so that Lp is maximized. This is what we need to do for adaptive rate, but for limited /fixed
                 rate cases, this is not optimal, as we already have a fair idea of Lp to be used.
            ***/
            //XDSLRTFW-591 PERF_DS_ALL_ALL_LimitedRateCase (Start)
            // Also, make sure if Dp is restrict to 1, then DON'T adjust min/maxDp!
            //XDSLRTFW-506-TELEFONICA_MAXPROFILE-INP-CASES
            //Enable the Optimum D Selection for Limited Rate also: Issue seen with NVLT-G/ISAM-C/VDSF
            if ((gft_RestrictDpTo1==0) && (pt_Input->s_MinINP !=0) &&
                  (gft_AdaptiveRatecase || gft_LimitedRatecase))
            {
               //l_a0 = DILV_MEM_SIZE*pt_Input->s_MinINP;
               MULS32x16(l_a0, gul_DILV_MEM_SIZE, pt_Input->s_MinINP);

               //l_a1 = l_a0*us_alphaminus_other_denom;
               MULS32xU16(l_a1, l_a0, us_alphaminus_other_denom);

               //s_r1 = us_Rp*2*us_alphaminus_other_numer;
               MULU16(s_r1, (uint16)(us_Rp<<1), us_alphaminus_other_numer);

               //l_a0 = DILV_MEM_SIZE*MinINP/Rp/2/alphaminus_other
               l_a0 = l_a1/s_r1;

               //DpAlphaMinus = floor(sqrt(DILV_MEM_SIZE*MinINP/Rp/2/alphaminus_other)
               l_DpAlphaMinus = sqrt32(l_a0);

               //If minDp is still greater than the Dp imposed by DILV_MEM_SIZE & alphaminus,
               //then we should reduce Dp search range
               if (us_minDp >= l_DpAlphaMinus)
               {

                  //minDp = max(1, DpAlphaMinus-DpSearchRange);

                  us_minDp = l_DpAlphaMinus-gus_DSearchRange;
                  if (((int16)us_minDp) < 1)               // use signed comparison to handle the case when "l_DpMaxCh<gus_DSearchRange"
                  {
                     us_minDp = 1;
                  }

                  //maxDp = min(DpMaxProfile, DpAlphaMinus+DpSearchRange);
                  us_maxDp = l_DpAlphaMinus+gus_DSearchRange;
                  if (us_maxDp > pt_Input->s_Dmax)
                  {
                     us_maxDp = pt_Input->s_Dmax;
                  }

                  //Recompute the alphaplus/minus limit due to Max/MinDelay

                  //alphaplus_maxdelay = MaxDelay*fs/(minDp*8)
                  //MULU16(us_alphaplus_maxdelay_numer, pt_Input->s_MaxDelay, pt_Input->s_fs);   //us_alphaplus_maxdelay_numer = pt_Input->s_MaxDelay*pt_Input->s_fs;
                  MULU16(ul_temp1, pt_Input->s_MaxDelay, pt_Input->s_fs);
                  ul_temp1 += 1<<(FRAME_RATE_SHIFT_CNT-1);
                  us_alphaplus_maxdelay_numer = ul_temp1>>FRAME_RATE_SHIFT_CNT;


                  us_alphaplus_maxdelay_denom = (us_minDp - 1)<<3;

                  //alphaminus_mindelay = MinDelay*fs/(maxDp*8);
                  //MULU16(us_alphaminus_mindelay_numer, pt_Input->s_MinDelay, pt_Input->s_fs);   //us_alphaminus_mindelay_numer = pt_Input->s_MinDelay*pt_Input->s_fs;
                  MULU16(ul_temp1, pt_Input->s_MinDelay, pt_Input->s_fs);
                  ul_temp1 += 1<<(FRAME_RATE_SHIFT_CNT-1);
                  us_alphaminus_mindelay_numer = ul_temp1>>FRAME_RATE_SHIFT_CNT;


                  us_alphaminus_mindelay_denom = (us_maxDp - 1)<<3;


               }

            } //end if (pt_Input->s_MinINP !=0)

            // If we have no delay bound, then "alphaplus_maxdelay" & "alphaminus_mindelay" is guarantee not to be a limiting factor.
            // So no need to adjust minDp & maxDp here.
            // Also if we have to restrict the usage of Dp to 1, then we shouldn't overwrite minDp and maxDp here as well.
            if ((ft_NoMaxDelayBound==0) && (gft_RestrictDpTo1==0))
            {

               /***
                  Determine if the range of Dp to be search will yield a feasbile region.
                  This code assumes that alphaplus_maxdelay >= alphaminus_mindelay which is always true.
               ***/

               // Cross-multiply alphaplus_other & alphaminus_mindelay:
               //ul_temp1 = us_alphaplus_other_numer*us_alphaminus_mindelay_denom;
               //ul_temp2 = us_alphaminus_mindelay_numer*us_alphaplus_other_denom;
               //
               MULU16(ul_temp1, us_alphaplus_other_numer, us_alphaminus_mindelay_denom);
               MULU16(ul_temp2, us_alphaminus_mindelay_numer, us_alphaplus_other_denom);

               // Cross-multiply alphaminus_other & alphaplus_maxdelay:
               //ul_temp3 = us_alphaminus_other_numer*us_alphaplus_maxdelay_denom;
               //ul_temp4 = us_alphaplus_maxdelay_numer*us_alphaminus_other_denom;
               //
               MULU16(ul_temp3, us_alphaminus_other_numer, us_alphaplus_maxdelay_denom);
               MULU16(ul_temp4, us_alphaplus_maxdelay_numer, us_alphaminus_other_denom);


               if (ul_temp1 < ul_temp2)       // if (alphaplus_other < alphaminus_mindelay)
               {
                  //if alphaminus_mindelay is always above alphaplus_other even with maxDp,
                  //then we need to adjust minDp such that it makes alphaminus_mindelay
                  //to be always below alphaplus_other.  Further increase in Dp will open up the feasible region.
                  /* Pseudo code:

                     minDp = ceil((MinDelay*fs)/(alphaplus_other*8)) + 1;
                     % safety check
                     minDp = max(1, minDp);
                     maxDp = min(DpMaxProfile, minDp+DpSearchRange);
                  */

                  //l_a0 = us_alphaplus_other_denom*pt_Input->s_fs;
                  //MULU16(l_a0, us_alphaplus_other_denom, pt_Input->s_fs);   // denominator of alphaplus_other is <= 64*248=15872, so we need a 32-bit product
                  MULU16(l_a0, us_alphaplus_other_denom, pt_Input->s_fs);
                  l_a0 += 1<<(FRAME_RATE_SHIFT_CNT-1);
                  l_a0 >>= FRAME_RATE_SHIFT_CNT;

                  //l_a1 = l_a0*pt_Input->s_MinDelay;
                  MULS32x16(l_a1, l_a0, pt_Input->s_MinDelay);

                  s_r0 = us_alphaplus_other_numer<<3;      // numerator of alphaplus_other is <= 256, so signed 16-bit is ok.

                  us_minDp = ceil32(l_a1,s_r0) + 1;

                  if (us_minDp < 1)                  // minDp is not going to be a negative number at this point
                  {
                     us_minDp = 1;
                  }

                  us_maxDp = us_minDp+gus_DSearchRange;
                  if (us_maxDp > pt_Input->s_Dmax)
                  {
                     us_maxDp = pt_Input->s_Dmax;
                  }

               }
               else if (ul_temp3 > ul_temp4)    // elseif (alphaminus_other > alphaplus_maxdelay)
               {
                  //if alphaplus_maxdelay is always below alphaminus_other even with minDp,
                  //then we need to adjust maxDp such that it makes alphaplus_maxdelay
                  //to be always above alphaminus_other.  Further decrease in Dp will open up the feasbile region
                  /* Pseudo code:
                     maxDp = floor((MaxDelay*fs)/(alphaminus_other*8));
                     % safety check
                     maxDp = min(DpMaxProfile, maxDp);
                     minDp = max(1, maxDp-DpSearchRange);
                  */

                  //l_a0 = us_alphaminus_other_denom*pt_Input->s_fs;
                  //MULU16(l_a0, us_alphaminus_other_denom, pt_Input->s_fs);   // denominator of alphaminus_other is <= 64*256=16384, so we need a 32-bit product
                  MULU16(l_a0, us_alphaminus_other_denom, pt_Input->s_fs);
                  l_a0 += 1<<(FRAME_RATE_SHIFT_CNT-1);
                  l_a0 >>= FRAME_RATE_SHIFT_CNT;

                  //l_a1 = l_a0*pt_Input->s_MaxDelay;
                  MULS32x16(l_a1, l_a0, pt_Input->s_MaxDelay);

                  s_r0 = us_alphaminus_other_numer<<3;   // numerator of alphaminus_other is <= 16, so signed 16-bit is ok.

                  us_maxDp = l_a1/s_r0 + 1;   // floor32(l_a1,s_r0) + 1;
                  if (us_maxDp > pt_Input->s_Dmax)
                  {
                     us_maxDp = pt_Input->s_Dmax;
                  }

                  us_minDp = us_maxDp-gus_DSearchRange;
                  if (((int16)us_minDp) < 1)               // use signed comparison to handle the case when "us_maxDp<gus_DSearchRange"
                  {
                     us_minDp = 1;
                  }

               } // else we are guarentee to have a feasible region and no need to adjust minDp and maxDp.

            } // end if ((ft_NoMaxDelayBound==0) && (gft_RestrictDpTo1==0))

//LOOP_D_START:
            // Always start search from the highest Dp to lowest, if we found that the TotalRate decrease
            // as we go for the lower Dp value, then we can stop the search.
            // (wng? not always true....due to limit on Deintlv memory, so we don't stop the search until we hit the end
            //       we can optimize the search strategy later on...)
            for (us_Dp = us_maxDp; us_Dp >= us_minDp; us_Dp -= 1)
            {
               //Mei: If we do this check, we will never choose R of 0 for the case of INP of 0 and Delay > 1
               // wng? This rule doesn't show up in VDSL2.  But it's probably good to rule-of-thumb.
               //if (us_Rp == 0 && us_Dp > 1)
               //   break;      // invalid case, no need to check remaining Dp and for next Tp...


               //---------------------------------------------------------------------------------------
               // Find the Max Lp
               // -- Lp is bounded by the channel capacity and the MinINP requirements
               //---------------------------------------------------------------------------------------

               // MaxLp is limited by the channel capacity
               ul_LpMax = ul_LpMaxCh;

               // Init the Min Lp required based on MinRate & MinOHRate Requirement
               ul_LpMin = us_LpMinOHRate;

               // Compute the maximum possible Lp as limited by minINP requirement
               // NOTE: if (minINP == 0), then we don't care about s_LpMaxINP
               if (pt_Input->s_MinINP !=0)
               {

                  /* Pseudo code:
                  LpMaxINP = floor(4*Dp*Rp/MinINP);
                  LpMax = min(LpMaxCh,LpMaxINP)
                     */

                  //ul_LpMaxINP =  4*us_Rp*us_Dp/pt_Input->s_MinINP;

                  // l_a0 = us_Rp*us_Dp;
                  MULU16(l_a0, us_Rp, us_Dp);
                  // l_a1 = l_a0*4;
                  l_a1 = l_a0<<2;
                  // ul_LpMaxINP = floor32(l_a1, s_MinINP)
                  ul_LpMaxINP = l_a1/pt_Input->s_MinINP;


                  // This INP fix only kicks in if there is no fixed rate setting
                  if ( (gft_EnableINPFix == TRUE) && (ul_LpMaxINP >= 16) && (pt_Input->us_MaxRate != (pt_Input->us_MinRate + FIXRATE_LEEWAY/8)) )// We need to have at least 2 bytes...
                  {
                     // Make Lp an integer multiple of 1 byte and, additionally, cut 1 byte
                     ul_LpMaxINP = (((ul_LpMaxINP >> 3)-1) << 3);
                  }

                  if (ul_LpMaxCh > ul_LpMaxINP)
                  {
                     ul_LpMax = ul_LpMaxINP;
                  }
               }

               // Safety check: LpMax has to be greater than LpMin
               // this check also handle the case of meeting the MinRate Requirement.
               if (ul_LpMax < ul_LpMin)
               {
                  continue;   // consider next Dp value
               }

               //---------------------------------------------------------------------------------------
               // Find the Min & Max CWSize
               // -- CWSize is bounded by the standard requirement and the
               //    size of the deinterleaver memory
               //---------------------------------------------------------------------------------------

               // Set CWMin according to the standard

               // Nfecp = Kp*Mp + Rp where Kp = Bpn + ceil(Gp/Tp) ie. Kp >= 2 for Bpn != 0
               s_CWMin = (us_Mp << 1) + us_Rp;

               // wng - FIXME - Need to check ZEP-ILV spec to get more accurate formula -- this is more conservative
               //
               // CW size is limited by DeIntlv Buffer size of 34048 bytes:
               //   CWSize*Dp/2 <= DILV_MEM_SIZE = 34048
               // Note: to handle worst case (highest DILV_MEM_SIZE of 34048 and smallest Dp of 1),
               //       we should save the "CWMaxHW" in 32-bit number
               l_CWMaxHW = (gul_DILV_MEM_SIZE<<1)/us_Dp;

               // CW size is also limited by the standard
               s_CWMax = MAX_CWSIZE;

               // Set CWMax:
               if ((int32)s_CWMax > l_CWMaxHW)
               {
                  s_CWMax = (int16) l_CWMaxHW;
               }

               // Safety check:
               if (s_CWMax < s_CWMin)
               {
                  continue;
               }


               // Now compute the alpha+/- bound due to Max/Min Delay for the given Dp
               /* Pseudo code:
                  if (ft_NoMaxDelayBound==0)
                     alphaplus_maxdelay = MaxDelay*fs/((Dp - 1)*8);          % bound by MaxDelay
                     alphaminus_mindelay = MinDelay*fs/((Dp - 1)*8);         % bound by MinDelay
                  else
                     alphaplus_maxdelay = alphaplus_other;
                     alphaminus_mindelay = 0;
                  end
               */
               if (ft_NoMaxDelayBound==0)
               {
                  // if there's a delay bound, always compute the alpha+/- bound based on Max/Min Delay for the given Dp

                  //us_alphaplus_maxdelay_numer = pt_Input->s_MaxDelay*pt_Input->s_fs;
                  //MULU16(us_alphaplus_maxdelay_numer, pt_Input->s_MaxDelay, pt_Input->s_fs);
                  MULU16(ul_temp1, pt_Input->s_MaxDelay, pt_Input->s_fs);
                  ul_temp1 += 1<<(FRAME_RATE_SHIFT_CNT-1);
                  us_alphaplus_maxdelay_numer = ul_temp1>>FRAME_RATE_SHIFT_CNT;


                  us_alphaplus_maxdelay_denom = (us_Dp - 1)<<3;

                  //us_alphaminus_mindelay_numer = pt_Input->s_MinDelay*pt_Input->s_fs;
                  //MULU16(us_alphaminus_mindelay_numer, pt_Input->s_MinDelay, pt_Input->s_fs);
                  MULU16(ul_temp1, pt_Input->s_MinDelay, pt_Input->s_fs);
                  ul_temp1 += 1<<(FRAME_RATE_SHIFT_CNT-1);
                  us_alphaminus_mindelay_numer = ul_temp1>>FRAME_RATE_SHIFT_CNT;

                  us_alphaminus_mindelay_denom = (us_Dp - 1)<<3;

               }
               else
               {
                  // if there's no delay bound, then make sure "alphaplus_maxdelay" & "alphaminus_mindelay" are not the limiting factor
                  us_alphaplus_maxdelay_numer = us_alphaplus_other_numer;
                  us_alphaplus_maxdelay_denom = us_alphaplus_other_denom;

                  us_alphaminus_mindelay_numer = 0;
                  us_alphaminus_mindelay_denom = 1;  // warning: denominator cannot be 0! any other +'ve number is ok.
               }

               //---------------------------------------------------------------------------------------
               // Compute the final feasible region:
               //   alphaplus = min(alphaplus_other,alphaplus_maxdelay);
               //   alphaminus = max(alphaminus_other, alphaminus_mindelay);
               //---------------------------------------------------------------------------------------

               // ensure that Sp <= 1 for Dp = 1
               if (gft_RestrictDpTo1 == 1)
               {
                  //   alphaplus_other = min(alphaplus_other,1/8);
                  if (us_alphaplus_other_denom < (us_alphaplus_other_numer << 3))
                  {
                     us_alphaplus_other_numer = 1;
                     us_alphaplus_other_denom = 8;
                  }
               }

               FindAlphaPlusFinal(&us_alphaplus_numerator, &us_alphaplus_denominator,
                                  &us_alphaplus_other_numer, &us_alphaplus_other_denom,
                                  &us_alphaplus_maxdelay_numer, &us_alphaplus_maxdelay_denom);

               FindAlphaMinusFinal(&us_alphaminus_numerator, &us_alphaminus_denominator,
                                   &us_alphaminus_other_numer, &us_alphaminus_other_denom,
                                   &us_alphaminus_mindelay_numer, &us_alphaminus_mindelay_denom);

               //Cross-multiply alphaplus & alphaminus:
               //ul_temp1 = us_alphaplus_numerator*us_alphaminus_denominator;
               //ul_temp2 = us_alphaminus_numerator*us_alphaplus_denominator;
               //
               MULU16(ul_temp1, us_alphaplus_numerator, us_alphaminus_denominator);
               MULU16(ul_temp2, us_alphaminus_numerator, us_alphaplus_denominator);

               // if (alphaplus < alphaminus), then we don't have a feasible region and should skip current Dp
               if (ul_temp1 < ul_temp2)
               {
                  continue;
               }

               //---------------------------------------------------------------------------------------
               // Further sanity check: verify that there's at least one valid
               // point inside the feasible region.
               //---------------------------------------------------------------------------------------

               // 1st Check: recall CWSize >= alphaminus*Lp,
               // so using LpMin, we find the min CWSize required and
               // we need to reject the current config if it requires min CW Size
               // greater than CWMax.
               /* Pseudo code:
                  CW1 = alphaminus*LpMin;   %CW1 can be a decimal number, so round it up in C-code before comparing w/ CWMax
                  if (CW1 > CWMax)
                     continue;
                  end
               */

               // ul_temp1 = us_alphaminus_numerator*ul_LpMin
               MULS32xU16(ul_temp1, ul_LpMin, us_alphaminus_numerator);

               s_CWTemp = ul_temp1/us_alphaminus_denominator;  // us_CWTemp=floor(alphaminus*LpMin)

               // ul_temp2 = us_CWTemp*us_alphaminus_denominator
               MULU16(ul_temp2, s_CWTemp, us_alphaminus_denominator);

               if (ul_temp2 < ul_temp1)
               {
                  s_CWTemp++;   // round up us_CWTemp before comparing with CWMax
               }

               if (s_CWTemp > s_CWMax)
               {
                  continue;   // consider next Dp value
               }

               // 2nd Check: recall CWSize <= alphaplus*Lp,
               // so using LpMax, we find the max CWSize allowed and
               // we need to reject the current config if CWMax is less
               // than the min CWSize allowed.
               /* Pseudo code:
                  CW1 = alphaplus*LpMax;    %CW1 can be a decimal number, so round it down in C-code before comparing w/ CWMin
                  K1 =  floor((CW1-Rp)/Mp);
                  CW1 = K1*Mp+Rp;
                  if (CW1 < CWMin)
                     continue;
                  end
               */

               // ul_temp = us_alphaplus_numerator*ul_LpMax
               MULS32xU16(ul_temp1, ul_LpMax, us_alphaplus_numerator);
               s_CWTemp = ul_temp1/us_alphaplus_denominator;

               // note: Kp has to be a signed number because CWTemp-Rp might be a negative number
               s_KpTemp = (int16) (s_CWTemp-us_Rp) >> gsa_log2Tbl[us_Mp]; // floor ((CW1-Rp)/Mp)

               // s_CWTemp = s_KpTemp*us_Mp + us_Rp;
               MULS16(s_CWTemp, s_KpTemp, us_Mp);
               s_CWTemp = s_CWTemp + (int16)us_Rp;
               if (s_CWTemp < s_CWMin)
               {
                  continue;   // consider next Dp value
               }


               //---------------------------------------------------------------------------------------
               // Compute optimal corner point of the feasible region.
               // The optimal corner can lie in the intersection of
               // either:
               //  1. Line CWSize = alphaplus*Lp & line Lp = LpMax
               //  2. Line CWSize = CWMax & line CWSize = alphaminus*Lp
               //  3. Line CWSize = CWMax & line Lp = LpMax
               //---------------------------------------------------------------------------------------
               if (s_CWTemp > s_CWMax)
               {
                  /***
                     This is Case when the line "CWSize=CWMax" & line "CWSize=alphaminus*Lp" intersect
                     therefore ul_Lp = CWSize/alphaminus

                     We need to limit the codeword size here and try finding the optimal point
                     given by Lp = CWMax/Alphaminus
                  ***/

                  // So we limit the final CWSize
                  // note: we can work with unsigned numbers from now since the CWSize is in valid range
                  us_CWSize = (uint16) s_CWMax;

                  // Recompute Kp based on the new CWSize: floor ((CWSize-Rp)/Mp)
                  us_Kp = (us_CWSize-us_Rp) >> gsa_log2Tbl[us_Mp];

                  // Recompute CWSize to consider quantization effect on Kp:
                  //   us_CWSize = us_Kp*us_Mp + us_Rp;
                  MULU16(us_CWSize, us_Kp, us_Mp);
                  us_CWSize = us_CWSize + us_Rp;

                  // We need to the new Lp based on s_CWSize: floor (CWMax/alphaminus)
                  //   ul_temp2 = ((us_CWSize)*us_alphaminus_denominator)/us_alphaminus_numerator;
                  //
                  // NOTE: we store the Lp value in a 32-bit variable because Lp based on alphaminus
                  //       might go above 65535, e.g. if CWMax=255, alphaminus=1/(8*OneOverSmax)=0.0026)
                  //
                  MULU16(ul_temp1, us_CWSize, us_alphaminus_denominator);
                  ul_temp2 = (ul_temp1/us_alphaminus_numerator);   //floor() is needed to keep point inside feasible region

                  /***
                     If (ul_Lp == ul_Lpmax) , the optimal point is the point where
                     the line "CWSize=CWMax" intersects the line "Lp=LpMax"

                     If (ul_Lp < ul_LpMax), then the optimal point is the point where
                     the line " CWSize = alphaminus*Lp" intersects the line CWSize = CWmax

                  ***/
                  ul_Lp = ul_LpMax;

                  if (ul_Lp > ul_temp2)
                  {
                     // us_lp = min(floor (CWMax/alphaminus), Lpmax)

                     // we know that "ul_LpMax", which is bounded by Channel Capacity or MinINP constraint,
                     // is always <= 65535. So this casting to 16-bit variable is always ok.
                     ul_Lp = ul_temp2;
                  }

                  // Take care of the flooring effect on Lp which causes the optimal corner to be above alphaplus line
                  // Note: the special case of alphaplus==alphaminus and the handling of
                  //       degenerate case where we can never find a valid combo of CWSize and Lp
                  //       to be on the line of alphaplus==alphaminus is handled later on.

                  // CWTemp = alphaplus*Lp
                  MULS32xU16(ul_temp1, ul_Lp, us_alphaplus_numerator);
                  s_CWTemp = ul_temp1/us_alphaplus_denominator;

                  // now check if we are outside the feasible region
                  if ((int16) us_CWSize > s_CWTemp)     // us_CWSize should always be a +'ve number, but just in case, use a signed comparison
                  {
                     us_Kp = (s_CWTemp-us_Rp) >> gsa_log2Tbl[us_Mp]; // floor ((CW1-Rp)/Mp)

                     // safety check: make sure CWSize is always > 0
                     if ((int16)us_Kp<=0)
                     {
                        continue;
                     }

                     // us_CWSize = us_Kp*us_Mp + us_Rp;
                     MULU16(us_CWSize, us_Kp, us_Mp);
                     us_CWSize = us_CWSize + us_Rp;
                  }

               }
               else
               {
                  /***
                     This is case where the optimal corner point intersect
                     the line "CWSize = alphaplus*Lp" and line "Lp=LpMax".
                     Also, we are sure s_CWMin <= s_CWTemp <= s_CWMax
                  ***/
                  ul_Lp = ul_LpMax;

                  // Re-compute CWSize based on LpMax: CWSize = alphaplus*LpMax
                  // Note: at this point, CWSize might still be greater than 255 before we quantize Kp
                  MULS32xU16(ul_temp1, ul_LpMax, us_alphaplus_numerator);
                  us_CWSize = ul_temp1/us_alphaplus_denominator;

                  // note: at this point, Kp doesn't have to be signed because CWSize is always >= Rp
                  us_Kp = (us_CWSize-us_Rp) >> gsa_log2Tbl[us_Mp]; // floor ((CW1-Rp)/Mp)

                  // us_CWSize = us_Kp*us_Mp + us_Rp;
                  MULU16(us_CWSize, us_Kp, us_Mp);
                  us_CWSize = us_CWSize + us_Rp;

               }

               //---------------------------------------------------------------------------------------
               // Redefine the lower left corner of the feasible region
               // so that we guarantee that CWMin and LpMin can be used
               // in the fixed/limited rate cases
               // Here we might be increasing both LpMin & CWMin
               //---------------------------------------------------------------------------------------

               // Check if (alphaplus * Lpmin) < CWmin
               //               Lmin = (CWmin/alphaplus)

               MULS32xU16(ul_temp1, ul_LpMin, us_alphaplus_numerator);
               MULU16(ul_temp2, s_CWMin, us_alphaplus_denominator);

               if(ul_temp1 < ul_temp2)
               {
                  us_temp = ul_temp2/ us_alphaplus_numerator;

                  // Safety check
                  if (us_temp > ul_LpMin)
                  {
                     ul_LpMin = us_temp;
                  }
               }

               // Check if (alphaminus * Lpmin) > CWmin
               //               CWmin = (alphaminus*Lmin)
               MULS32xU16(ul_temp1, ul_LpMin, us_alphaminus_numerator);
               MULU16(ul_temp2, s_CWMin, us_alphaminus_denominator);

               if(ul_temp1 > ul_temp2)
               {
                  us_temp = ul_temp1/ us_alphaminus_denominator;

                  // Safety check
                  if ((int16)us_temp > s_CWMin)
                  {
                     s_CWMin = (int16)us_temp;
                  }
               }

               // We have found the optimal corner {CWSize, Lp}.  But we need to make sure CWSize is co-prime with Dp.
               // If not, keep reducing CWSize until we find a CWSize that is co-prime with Dp.
               while ( (((int16)us_CWSize) >= s_CWMin) && !CheckCoPrime((int16)us_Dp, (int16)us_CWSize) )
               {
                  /***
                     NOTE1: We can skip this while loop if we know that Dp is even and Mp is even (recall: Mp is always a power of 2).
                           In this case, due to quantization effect on Kp, the CWSize will always be
                           even and we'll never find a proper CWSize to be co-prime with Dp.
                     NOTE2: We should only do this check here instead of outside in Dp loop because we need
                           to handle the case when CWSize is a prime number
                           then it will work with any Dp value!
                  ***/
                  if ( ((us_Dp&0x1) == 0) && ((us_Mp&0x1) == 0) )
                  {
                     // Set CWSize to be below CWMin to make sure we skip current Dp
                     us_CWSize = s_CWMin-2;
                     break;
                  }

                  us_CWSize--;

                  // Recompute Kp based on the new CWSize: floor ((CWSize-Rp)/Mp)
                  us_Kp = (us_CWSize-us_Rp) >> gsa_log2Tbl[us_Mp];

                  // Recompute CWSize to consider quantization effect on Kp:
                  //   us_CWSize = us_Kp*us_Mp + us_Rp;
                  MULU16(us_CWSize, us_Kp, us_Mp);
                  us_CWSize = us_CWSize + us_Rp;

               }

               //Safety check: make sure CWSize didn't go below CWMin
               if ((int16)us_CWSize < s_CWMin)
               {
                  continue;   // consider next Dp value
               }

               //consider MinDelay constraint
               // Note: ul_LpMax_DueToMinDelay =  8*(us_Dp-1)*(us_CWSize-1)/(fs*s_MinDelay)
               if (pt_Input->s_MinDelay > 0)
               {
                  ul_LpMax_DueToMinDelay = (8000/pt_Input->s_MinDelay)*(us_Dp-1)*(us_CWSize-1)/(pt_Input->s_fs);
               }


               // The following check is especially important when "alphaplus == alphaminus"
               // It's because CWSize might be reduced due to the flooring effect of Kp,
               // and so there is a chance we go below the line "CWsize = alphaminus*Lp"
               // (recall we need CWSize >= alphaminus*Lp).
               // Therefore, it's better to make sure we didn't violate alphaminus constraint now.
               //
               MULU16(ul_temp1, us_alphaminus_denominator, us_CWSize);      //ul_temp1 = us_alphaminus_denominator*us_CWSize
               ul_temp2 = ul_temp1/us_alphaminus_numerator;

               if (ul_temp2 < ul_Lp)    // if (CWSize/alphaminus < Lp)
               {

                  // Lp = floor(CWSize/alphaminus)
                  ul_Lp = ul_temp2;

                  /***
                     If the feasible region is a line, we might never to able to find the integer values of (Lp,CWSize)
                     such that the optimal point is on the line (i.e. Lp != alpha*CWSize, for all valid integers Lp and CWSize)
                     In such case, we should consider next Dp
                  ***/

                  //Cross-multiply alphaplus & alphaminus:
                  //ul_temp1 = us_alphaplus_numerator*us_alphaminus_denominator;
                  //ul_temp2 = us_alphaminus_numerator*us_alphaplus_denominator;
                  //
                  MULU16(ul_temp1, us_alphaplus_numerator, us_alphaminus_denominator);
                  MULU16(ul_temp2, us_alphaminus_numerator, us_alphaplus_denominator);

                  if (ul_temp1 == ul_temp2)          // if (alphaplus == alphaminus)
                  {

                     MULU16(ul_temp1, us_CWSize, us_alphaplus_denominator);
                     MULS32xU16(ul_temp2, ul_Lp, us_alphaplus_numerator);
                     if (ul_temp1 > ul_temp2)         // if (CWSize > alphaplus*Lp)
                     {
                        continue;
                     }
                  }

               }


               ///////////////////////////////////
               // HANDLE FIXED RATE CASE
               ///////////////////////////////////

               //----------------------------------------------------------------
               //Find the Max total rate based on max OH Rate as per the final config
               // -- this is important for Rate-Limited/Fixed-Rate case
               //----------------------------------------------------------------

               // Compute the Max Gp value based on ORp <= 256 for current framing values
               // max Gp = floor[(256 * k* alphaplus)/ fs)]
               MULU16(ul_temp1, (us_k << 8), us_alphaplus_numerator);

               //MULU16(ul_temp2, pt_Input->s_fs, us_alphaplus_denominator);
               MULU16(ul_temp2, pt_Input->s_fs, us_alphaplus_denominator);
               ul_temp2 += 1<<(FRAME_RATE_SHIFT_CNT-1);
               ul_temp2 >>= FRAME_RATE_SHIFT_CNT;

               us_temp = (ul_temp1 / ul_temp2);

               // Check if this Gp value is smaller than the Gp based on Gp/Tp constraint
               if ((us_temp < us_maxGp) && (us_temp >= 1))
               {
                  s_r1 = us_temp;
               }
               else
               {
                  s_r1 = us_maxGp;
               }

               // Compute ORpmax = (Gpmax * fs)/(k * alphaplus)
               //MULU16(s_r0, s_r1, pt_Input->s_fs);
               MULU16(ul_temp1, s_r1, pt_Input->s_fs);
               ul_temp1 += 1<<(FRAME_RATE_SHIFT_CNT-1);
               s_r0 = ul_temp1>>FRAME_RATE_SHIFT_CNT;

               MULU16(ul_temp1, us_alphaplus_denominator, s_r0);

               // Get the denominator = (k * alphaplus)
               MULU16(s_r1, us_alphaplus_numerator, us_k);
               us_temp = (ul_temp1/s_r1);

               // compute MaxTotalRate = MaxRate + MaxOHRate [in kbps]
               // Note: we convert the unit of MaxRate from 8 kbps to kbps
               l_MaxTotalRate = ((pt_Input->us_MaxRate) << 3) + us_temp;

               //---------------------------------------------------------------------------------------
               // Determine if we are in Rate Limited case:
               //
               // At this point, we have found the optimal ul_Lp and us_CWSize given the current
               // set of {us_Rp,us_Mp,us_Tp,us_Dp}.
               // We are also sure that the point {ul_Lp, us_CWSize} lies in the feasible region.
               // The question is now whether this optimal point {ul_Lp, us_CWSize} meets
               // the max rate requirement.  If not, then we have to adjust ul_Lp and us_CWSize
               // such that they would yield a final net data rate that meets the min/max net
               // data rate requirements and still be in the feasible region
               //---------------------------------------------------------------------------------------
               ft_LimitedRate = 0;

               //DRp = ul_LineRate = ul_Lp*pt_Input->s_fs;
               //MULU16(ul_LineRate, ul_Lp, pt_Input->s_fs);
               MULS32x16(ul_LineRate, ul_Lp, pt_Input->s_fs);
               ul_LineRate += 1<<(FRAME_RATE_SHIFT_CNT-1);
               ul_LineRate >>= FRAME_RATE_SHIFT_CNT;

               // Recall: TotalRate = Lp*fs*(CWSize-Rp)/CWSize
               //                   = DRp*(CWSize-Rp)/CWSize
               //l_a0 = ul_LineRate*(us_CWSize-us_Rp);
               MULS32x16(l_a0, ul_LineRate, (us_CWSize-us_Rp));
               //l_a1 = l_MaxTotalRate*us_CWSize;
               MULS32xU16(l_a1, l_MaxTotalRate, us_CWSize);

               // if (TotalRate > MaxTotalRate)
               if (l_a0 > l_a1)
               {
                  // indicate we are in rate-limited case
                  ft_LimitedRate = 1;

                  // reset CWMax to the final CWSize we have selected -- guaranteeing that CWMax
                  // is always co-prime with the current Dp.
                  s_CWMax = us_CWSize;

                  // Now compute the data rate if we limit Lp based on Max CWSize along the line CWSize = alphaplus*Lp
                  /* Pseudo code:
                      L1 = (CWMax/alphaplus);                % target Lp
                      RateTotal1 = fs*(CWMax-Rp)/CWMax*L1;   % rate limited by CWMax & alphaplus
                      RateTotal1 = fs*(CWMax-Rp)/alphaplus
                  */
                  s_r0 = (s_CWMax-us_Rp);

                  //s_r1 = pt_Input->s_fs*s_r0;
                  //MULS16(s_r1,pt_Input->s_fs, s_r0);  //s_r1 = fs*(CWMax-R)
                  MULS16(ul_temp1, pt_Input->s_fs, s_r0);
                  ul_temp1 += 1<<(FRAME_RATE_SHIFT_CNT-1);
                  s_r1 = ul_temp1>>FRAME_RATE_SHIFT_CNT;

                  //l_a0 = us_alphaplus_denominator*s_r1;
                  MULS32x16(l_a0, us_alphaplus_denominator, s_r1);  //l_a0 = alphaplus_denominator*fs*(CWMax-R)

                  //l_a1 = l_MaxTotalRate*us_alphaplus_numerator;
                  MULS32xU16(l_a1, l_MaxTotalRate, us_alphaplus_numerator);

                  // if (MaxTotalRate > RateTotal1)
                  if (l_a1 > l_a0)
                  {
                     // Case I: Only Lp needs to be cut

                     /* Pseudo code:
                        Lp = floor(CWMax*MaxTotalRate/(CWMax-Rp)/fs);
                        CWSize = CWMax;
                     */

                     s_r0 = (s_CWMax-us_Rp);

                     // safety check: make sure s_r0 is > 0.  CWMax should always be > 32, but just in case...
                     if (s_r0<0)
                     {
                        continue;
                     }

                     //s_r1 = pt_Input->s_fs*s_r0;
                     //MULS16(s_r1,pt_Input->s_fs, s_r0);  //s_r1 = fs*(CWMax-R)
                     MULS16(ul_temp1, pt_Input->s_fs, s_r0);
                     ul_temp1 += 1<<(FRAME_RATE_SHIFT_CNT-1);
                     s_r1 = ul_temp1>>FRAME_RATE_SHIFT_CNT;

                     //l_a1 = l_MaxTotalRate*s_CWMax;
                     MULS32x16(l_a1, l_MaxTotalRate, s_CWMax);

                     // Lp = (MaxTotalRate*CWMax)/(fs*(CWMax-R))
                     ul_Lp = l_a1/s_r1;

                     // make sure that chosen Lp is less than channel capacity
                     if (ul_Lp > ul_LpMax)
                     {
                        ul_Lp = ul_LpMax;
                     }

                     us_CWSize = s_CWMax;

                  }
                  else
                  {
                     // Now compute the data rate if we limit CWSize based on LpMin along the line CWSize = alphaplus*Lp
                     /* Pseudo code:
                         CW1 = (alphaplus*LpMin); % target CW size at edge of feasible region
                        RateTotal2 = (CW1-Rp)/CW1*LpMin*fs;
                     */

                     //l_a0 = (us_alphaplus_numerator*ul_LpMin - us_alphaplus_denominator*us_Rp)*pt_Input->s_fs;
                     MULS32xU16(ul_temp1, ul_LpMin, us_alphaplus_numerator);
                     MULU16(ul_temp2, us_alphaplus_denominator, us_Rp);

                     //MULS32x16(l_a0, (int32)(ul_temp1-ul_temp2), pt_Input->s_fs);
                     MULS32x16(l_a0, (int32)(ul_temp1-ul_temp2), ((pt_Input->s_fs)>>(FRAME_RATE_SHIFT_CNT-2)));
                     l_a0 += (1<<1);
                     l_a0 >>= 2;

                     //wng- FIXME - check MULS32x16 usage, it's using much more code that C intrinsic * operator
                     l_a1 = l_MaxTotalRate*us_alphaplus_numerator;

                     // if (MaxTotalRate > RateTotal2)
                     if (l_a1 > l_a0)
                     {
                        // Case II: rate contour intersects line CW = alphaplus * Lp; we need to cut both Lp & CWSize
                        /* Pseudo code:
                           % Solve first for Lp
                           Lp1 = MaxTotalRate/fs+Rp/alphaplus;
                           CW1 = alphaplus*Lp1;
                           Lp = floor(Lp1);

                           % get Kp and get real CWSize
                           Kp =  floor((CW1-Rp)/Mp);
                           CWSize = Kp*Mp+Rp;

                           if (CWSize > alphaplus*Lp) % then we are outside the feasible region
                              CW1 = alphaplus*Lp;
                              Kp =  floor((CW1-Rp)/Mp);
                              CWSize = Kp*Mp+Rp;
                           end
                        */

                        // Solve first for Lp
                        //ul_temp1 = (l_MaxTotalRate*us_alphaplus_numerator + us_alphaplus_denominator*us_Rp*pt_Input->s_fs);
                        MULS32xU16(ul_temp2, l_MaxTotalRate, us_alphaplus_numerator);
                        MULU16(ul_temp1, us_alphaplus_denominator, pt_Input->s_fs);
                        ul_temp1 += 1<<(FRAME_RATE_SHIFT_CNT-1);
                        ul_temp1 >>= FRAME_RATE_SHIFT_CNT;
                        //XDSLRTFW-662 BugFix_DS_ALL_ALL_FormulaBug (Start)
                        ul_temp1 = ul_temp1 * us_Rp;
                        //XDSLRTFW-662 BugFix_DS_ALL_ALL_FormulaBug (End)
                        ul_temp1 += ul_temp2;

                        //MULU16(ul_temp2, us_alphaplus_numerator, pt_Input->s_fs);
                        MULU16(ul_temp2, us_alphaplus_numerator, pt_Input->s_fs);
                        ul_temp2 += 1<<(FRAME_RATE_SHIFT_CNT-1);
                        ul_temp2 >>= FRAME_RATE_SHIFT_CNT;

                        // Lp = floor(MaxTotalRate/fs+Rp/alphaplus)
                        //    = floor((MaxTotalRate*alphaplus_numerator + Rp*alphaplus_denominator*fs)/(fs*alphaplus_numerator))
                        ul_Lp = ul_temp1/ul_temp2;

                        // make sure that chosen Lp is less than channel capacity
                        if (ul_Lp > ul_LpMax)
                        {
                           ul_Lp = ul_LpMax;
                        }

                        // CW1 = alphaplus*(MaxTotalRate/fs+Rp/alphaplus)
                        //     = (MaxTotalRate*alphaplus_numerator + Rp*alphaplus_denominator*fs)/(fs*alphaplus_denominator)
                        //MULU16(ul_temp2, us_alphaplus_denominator, pt_Input->s_fs);
                        MULU16(ul_temp2, us_alphaplus_denominator, pt_Input->s_fs);
                        ul_temp2 += 1<<(FRAME_RATE_SHIFT_CNT-1);
                        ul_temp2 >>= FRAME_RATE_SHIFT_CNT;

                        // CWTemp = floor(CW1) --> this is different from Matlab implementation, but should be ok.
                        s_CWTemp = ul_temp1/ul_temp2;

                        us_Kp = (s_CWTemp-us_Rp) >> gsa_log2Tbl[us_Mp]; // floor ((CW1-Rp)/Mp)

                        // safety check: make sure CWSize is always > 0
                        if ((int16)us_Kp<=0)
                        {
                           continue;
                        }

                        // us_CWSize = us_Kp*us_Mp + us_Rp;
                        MULU16(us_CWSize, us_Kp, us_Mp);
                        us_CWSize = us_CWSize + us_Rp;

                        // Now check if we are outside the feasible region.
                        // We still need to recompute CWSize based on the new Lp.
                        // This is to deal with the case where Lp is lowered, while CWSize still the same
                        // and we ended up outsie the feasible region (above the line CWSize = alphaplus*Lp)

                        // CWTemp = alphaplus*Lp
                        MULS32xU16(ul_temp1, ul_Lp, us_alphaplus_numerator);
                        s_CWTemp = ul_temp1/us_alphaplus_denominator;

                        if ((int16) us_CWSize > s_CWTemp)     // us_CWSize should always be a +'ve number, but just in case, use a signed comparison
                        {
                           us_Kp = (s_CWTemp-us_Rp) >> gsa_log2Tbl[us_Mp]; // floor ((CW1-Rp)/Mp)

                           // safety check: make sure CWSize is always > 0
                           if ((int16)us_Kp<=0)
                           {
                              continue;
                           }

                           // us_CWSize = us_Kp*us_Mp + us_Rp;
                           MULU16(us_CWSize, us_Kp, us_Mp);
                           us_CWSize = us_CWSize + us_Rp;
                        }

                     }
                     else
                     {
                        // Case III: we are at LpMin, so we can only cut CWSize
                        /* Pseudo code:
                           Lp = LpMin;
                           % solve for CWSize
                           CW1 = Lp*Rp/(LpMin-MaxTotalRate/fs);

                           % get Kp and get real CWSize
                           Kp =  floor((CW1-Rp)/Mp);
                           CWSize = Kp*Mp+Rp;
                        */

                        ul_Lp = ul_LpMin;

                        // Solve for CWSize:
                        //  CW1 = Lp*Rp/(Lp-MaxTotalRate/fs);
                        //      = (fs*Lp*Rp)/(fs*Lp - MaxTotalRate)
                        //MULU16(ul_temp1, ul_Lp, pt_Input->s_fs);
                        MULS32x16(ul_temp1, ul_Lp, pt_Input->s_fs);
                        ul_temp1 += 1<<(FRAME_RATE_SHIFT_CNT-1);
                        ul_temp1 >>= FRAME_RATE_SHIFT_CNT;

                        ul_temp2 = ul_temp1 * us_Rp; // wng- FIXME - check MULS32x16 usage, it's using much more code that C intrinsic * operator

                        l_a0 = ul_temp1 - l_MaxTotalRate;

                        // s_CWTemp = (ul_Lp*pt_Input->s_fs*us_Rp)/(ul_Lp*pt_Input->s_fs - l_MaxTotalRate)
                        s_CWTemp = ul_temp2/l_a0;

                        us_Kp = (s_CWTemp-us_Rp) >> gsa_log2Tbl[us_Mp]; // floor ((CW1-Rp)/Mp)

                        // safety check: make sure CWSize is always > 0
                        if ((int16)us_Kp<=0)
                        {
                           continue;
                        }

                        // us_CWSize = us_Kp*us_Mp + us_Rp;
                        MULU16(us_CWSize, us_Kp, us_Mp);
                        us_CWSize = us_CWSize + us_Rp;

                     } //end if (MaxTotalRate > RateTotal2)

                     //---------------------------------------------------------------------------------------------------------------
                     // Since CWSize is being adjusted in both Case II & III, then we better make sure the new CWSize is still co-prime
                     // with the Dp.  Also the new CWSize won't go below the line of alphaminus
                     //---------------------------------------------------------------------------------------------------------------
                     // Make sure the new CWSize is co-prime with current Dp.
                     // If not, keep reducing CWSize until we find a CWSize that is co-prime with Dp.
                     while ( (((int16)us_CWSize) >= s_CWMin) && !CheckCoPrime((int16)us_Dp, (int16)us_CWSize) )
                     {
                        /***
                           NOTE1: We can skip this while loop if we know that Dp is even and Mp is even (recall: Mp is always a power of 2).
                                 In this case, due to quantization effect on Kp, the CWSize will always be
                                 even and we'll never find a proper CWSize to be co-prime with Dp.
                           NOTE2: We should only do this check here instead of outside in Dp loop because we need
                                 to handle the case when CWSize is a prime number
                                 then it will work with any Dp value!
                        ***/
                        if ( ((us_Dp&0x1) == 0) && ((us_Mp&0x1) == 0) )
                        {
                           // Set CWSize to be below CWMin to make sure we skip current Dp
                           us_CWSize = s_CWMin-2;
                           break;
                        }

                        us_CWSize--;

                        // Recompute Kp based on the new CWSize: floor ((CWSize-Rp)/Mp)
                        us_Kp = (us_CWSize-us_Rp) >> gsa_log2Tbl[us_Mp];

                        // Recompute CWSize to consider quantization effect on Kp:
                        //   us_CWSize = us_Kp*us_Mp + us_Rp;
                        MULU16(us_CWSize, us_Kp, us_Mp);
                        us_CWSize = us_CWSize + us_Rp;

                     } // end while ( (((int16)us_CWSize) >= s_CWMin)...

                     //Safety check: make sure CWSize didn't go below CWMin
                     if ((int16)us_CWSize < s_CWMin)
                     {
                        continue;   // consider next Dp value
                     }


                     /***
                        We have to deal with the case when CWSize has been reduced by an amount that
                        could have caused the FinalRate to be far away from the MaxTotalRate curve.
                        In this case, we should increase Lp.  Therefore, find the new Lp for the
                        given MaxTotalRate and CWSize.
                     ***/

                     l_a0 = l_MaxTotalRate*us_CWSize;  //wng- FIXME - check MULS32x16 usage, it's using much more code that C intrinsic * operator

                     //s_r0 = (us_CWSize-us_Rp)*pt_Input->s_fs;
                     //MULS16(s_r0, (us_CWSize-us_Rp), pt_Input->s_fs);
                     MULS16(ul_temp1, (us_CWSize-us_Rp), pt_Input->s_fs);
                     ul_temp1 += 1<<(FRAME_RATE_SHIFT_CNT-1);
                     s_r0 = ul_temp1 >> FRAME_RATE_SHIFT_CNT;

                     // Lp1 = MaxTotalRate*CWSize/fs/(CWSize - Rp);
                     // ul_temp1 = floor(Lp1)
                     ul_temp1 = l_a0/s_r0;   // use 32-bit to store Lp computed base on MaxTotalRate to take care of worst case

                     ul_Lp = ul_LpMax;      // make sure final Lp is no more than LpMax.
                     if (ul_temp1 < ul_Lp)   // if Lp based on given CWSize and MaxTotalRate is smaller than LpMax, then use this lower Lp
                     {
                        ul_Lp = ul_temp1;
                     }

                     /***
                        We now need to make sure that if Lp(MaxTotalRate,CWSize) > Lp(alpahminus,CWSize),
                        then Lp is adjusted correctly such that we are back in the feasible region for
                        given {Lp,CWSize}.

                        This code also handle the case when alphaplus and alphamius is very close
                        then when we quantize CWSize to next lower feasible value, we might ended up outside
                        the feasible region (below the line CWSize = alphaminus*Lp).
                        In this case, we further reduce Lp by finding Lp along the line CWSize = alphaminus*Lp
                     ***/

                     MULU16(ul_temp1, us_alphaminus_denominator, us_CWSize);      //ul_temp1 = us_alphaminus_denominator*us_CWSize
                     ul_temp2 = ul_temp1/us_alphaminus_numerator;

                     if (ul_temp2 < ul_Lp)    // if (CWSize/alphaminus < Lp)
                     {

                        // Lp = floor(CWSize/alphaminus)
                        ul_Lp = ul_temp2;

                        //safety check: make sure we are not reducing Lp by too much
                        if (ul_Lp < ul_LpMin)
                        {
                           continue;
                        }
                     }

                  } //end if (MaxTotalRate > RateTotal1)

                  // NOTE: Matlab code would re-compute TotalRate here after we have adjusted Lp and CWSize,
                  //       but C-code doesn't re-compute it here.  Instead it always re-computes TotalRate
                  //       after finding Gp.

               } //end if (TotalRate > MaxTotalRate)


               //make sure that chosen Lp is less than constrained Lp due to min delay.
               if ((ul_LpMax_DueToMinDelay > 0) && (ul_Lp > ul_LpMax_DueToMinDelay))
               {
                  ul_Lp = ul_LpMax_DueToMinDelay;
               }


               //---------------------------------------------------------------------------------------
               // Find the limit on Gp based on the selected {CWSize, Lp}
               //---------------------------------------------------------------------------------------

               /***
                  To determine Gp, we need to compute a few derived paramters that are independant of Gp and
                  they are used to constraint Gp.
               ***/

               // compute frequently used terms and stored them in well-defined locals:
               // a) DRp = ul_LineRate = ul_Lp*pt_Input->s_fs;
               //MULU16(ul_LineRate, ul_Lp, pt_Input->s_fs);
               MULS32x16(ul_LineRate, ul_Lp, pt_Input->s_fs);
               ul_LineRate += 1<<(FRAME_RATE_SHIFT_CNT-1);
               ul_LineRate >>= FRAME_RATE_SHIFT_CNT;

               // b) k*CWSize=Tp*CWSize/Mp
               MULU16(s_k_x_CWSize, us_k, us_CWSize);      // worst case is 64*255 =16320 (store in 16-bit signed number is ok)

               {
                  uint16 us_Qhat;

                  us_Qhat = us_Qmax;
                  if (ul_LineRate < us_DRmax)
                  {
                     //us_Qhat = QMAX*ul_LineRate/DR0;
                     //XDSLRTFW-3226 (Start)
                     MULS32xU16(ul_temp1, ul_LineRate, us_Qmax);
                     us_Qhat = ul_temp1/us_DRmax;
                     //XDSLRTFW-3226 (End)
                  }

                  // Up = floor(Qhat*Mp/Tp/CWSize)
                  //    = floor(Qhat/(k*CWSize)), where k = Tp/Mp;
                  s_Up = us_Qhat/s_k_x_CWSize;
               }

               //XDSLRTFW-3226 (Start)
               // PERp     = k*Sp*Up/fs = k*CWSize*8*Up/DRp
               // IBitsOHR = 48/PERp
               //          = 6*DRp/(k*CWSize*Up)
               MULS32xU16(l_a0, ul_LineRate, 6);   // worst case is (4096*15)bits * 8ksymbol/s = 491520 (store in 32-bit signed number is ok)
               MULS16(us_temp, s_k_x_CWSize, s_Up);   // the product is is always less than us_Qmax (17000 for legacy profiles or 34000 for 35b profile)
               s_IBitsOHR = (int16)ceil32u((uint32)l_a0,us_temp);
               //XDSLRTFW-3226 (End)


               if (pt_Input->s_OHFrameType == 1)
               {
                  //We need to recompute the Min/Max OHRate based on the final PERp.
                  //Otherwise, we would fail to meet MSGp requirement, even when we can meet the ORp requirement.
                  s_MinOHRateFinal = pt_Input->s_MinMsgOHR + s_IBitsOHR;
                  s_MaxOHRateFinal = pt_Input->s_MaxMsgOHR + s_IBitsOHR;  // wng? -- should we floor s_IBitsOHR???

                  // We should always limit MaxOHRate correctly sot that we can have better optimal result in Fixed-Rate and Rate-Limited cases
                  // The the limit of 64*fs is imposed by the standard:
                  //    ORp = Gp*Mp/Sp/Tp * 8 * fs
                  // And we know that Gp*Mp/Sp/Tp has to be <= 8, therefore we need to make sure ORp <= 64*fs.
                  //if (s_MaxOHRateFinal > (pt_Input->s_fs << 6))   // if (MaxOHRate > 64*fs) ?
                  //s_MaxOHRateFinal = (pt_Input->s_fs << 6);
                  us_temp = pt_Input->s_fs + (int16)(1<<(FRAME_RATE_SHIFT_CNT-7));
                  us_temp >>= (FRAME_RATE_SHIFT_CNT-6);
                  if(s_MaxOHRateFinal > us_temp)
                  {
                     s_MaxOHRateFinal = us_temp;
                  }
               }
               else
               {
                  //wng? Double check if it's ok
                  s_MinOHRateFinal = s_MinOHRate;
                  s_MaxOHRateFinal = s_MaxOHRate;
               }


               // wng - FIXME - this problem not so critical, but it's something to consider if we find that certain Fixed-Rate config cannot be generated.
               //
               // We might over-estimate GpMinFinal in C-code as compared to Matlab code because
               // we over-estimate "s_MinOHRateFinal" when we take the ceiling of (48/PERp) in computing
               // s_IBitsOHR.
               // E.g  If MinOHRateFinal=64.72 kbps, then C-code would make it to be 65 kbps.
               //      Furthermore, if CWSize=254; k=8; Lp*fs=132000, then
               //      (65)*CWSize*k/Lp/fs = 1.0006 ==> GpMinFinal = ceil(1.0006) = 2
               //            VS.
               //      (64.72)*CWSize*k/Lp/fs = 0.9963 ==> GpMinFinal = ceil(0.9963) = 1
               //

               //GpMinFinal = ceil(MinOHRateFinal*CWSize*k/Lp/fs);
               MULS16(ul_temp1, s_MinOHRateFinal, s_k_x_CWSize);
               s_GpMinFinal = ul_temp1/ul_LineRate;
               if( ul_temp1 > (uint32)(s_GpMinFinal*ul_LineRate) )
               {
                  s_GpMinFinal++;
               }

               //GpMaxFinal = floor(MaxOHRateFinal*CWSize*k/Lp/fs)
               MULS16(ul_temp1, s_MaxOHRateFinal, s_k_x_CWSize);
               s_GpMaxFinal = ul_temp1/ul_LineRate;

               // safety check: make sure GpMaxFinal is <= 32
               //GpMaxFinal = min(GpMaxFinal, 32);
               if (s_GpMaxFinal > guc_maxG)
               {
                  s_GpMaxFinal = guc_maxG;
               }

               // safety check: make sure GpMin <= GpMax
               if (s_GpMinFinal > s_GpMaxFinal)
               {
                  continue;
               }

               // We always want to pick the lowest Gp because this allocates lowest channel capacity for OHRate
               //Gp = max(GpMinFinal, 1);
               us_Gp = s_GpMinFinal;
               if (us_Gp < MIN_G)
               {
                  us_Gp = MIN_G;
               }

               //wng- FIXME - check MULS32x16 usage, it's using much more code that C intrinsic * operator
               //---------------------------------------------------------------------------------------
               // Compute final TotalRate, OHRate, & NetRate
               // Note: they will be all in signed Q28.4 format
               //---------------------------------------------------------------------------------------
               ComputeRates(ul_Lp, us_CWSize, us_Rp, us_Gp, us_Mp, us_Tp, pt_Input->s_fs, &l_OHRate, &l_NetRate);

               // deltaNDR = NetRate - MaxRate (in Q28.4)
               l_deltaNDR = l_NetRate - (pt_Input->us_MaxRate<<7);


               //------------------------------------------------------------------
               // Make sure NetRate less than MaxRate
               //------------------------------------------------------------------
               // If final NetRate is higher than MaxRate, then we have 2 options:
               //   1. increase Gp to allocate more channel capacity for OHRate while TotalRate remains the same
               //   2. reduce Lp to bring down the overall TotalRate, which leads to lower NetRate and OHRate
               //
               // The following code first attempts to increase Gp as much as possible while not violating OHRate constraint & Rule #2.
               // If the NetRate is still above MaxRate, then it starts to reduce Lp.
               //
               // WARNING: When we break out from this while-loop, we don't guarentee that MaxRate constraint can be met.
               //          It simply means that we cannot adjust Gp or Lp further without violating other constraints.
               while (l_deltaNDR > 0)
               {
                  // Always attempt to adjust Gp without changing Lp, so that we allocate more channel capacity to
                  // OHRate and therefore reducing NetRate
                  us_Gp = us_Gp+1;

                  //---------------------------------------------------------
                  // Now determine if Gp and Tp comply with Rule #2 for Gp & Tp settings.  This has a stricter limit on OH-octet/symbol than the OHRate limit.
                  /*
                     % Formula according to Rule #2 of Gp & Tp selection
                     OHOct_Per_MDF = floor(Gp/Tp);
                     MDF_Per_Symbol = ceil(Mp/Sp);
                     OHOct_Per_Sym_rounded = OHOct_Per_MDF*MDF_Per_Symbol + floor(MDF_Per_Symbol/Tp)*mod(Gp,Tp) + min(mod(MDF_Per_Symbol, Tp), mod(Gp,Tp));
                  */

                  s_OHOct_Per_MDF  = us_Gp/us_Tp;
                  // to compute ceil(Mp/Sp) ==> ceil ((Mp*Lp)/(8*CWSize))
                  MULS32xU16(l_a0, ul_Lp, us_Mp);    //l_a0 = us_Mp * ul_Lp;   //worst case is 16*61440
                  s_r0 = us_CWSize<<3;               //s_r0 = us_CWSize * 8
                  // inputs: l_a0 = Mp*Lp; s_r0 = 8*CWSize
                  s_MDF_Per_Symbol = ceil32(l_a0, s_r0);
                  // s_r0 = floor(MDF_Per_Symbol/Tp)
                  s_r0 = s_MDF_Per_Symbol/us_Tp;
                  // s_r1 = mod(Gp,Tp)
                  s_r1 = mod16(us_Gp, us_Tp);

                  // compute partial value of "OHOct_Per_Sym_rounded" -> sum up 1st & 2nd part of the equation
                  MULS16(s_OHOct_Per_Sym_rounded, s_OHOct_Per_MDF, s_MDF_Per_Symbol);
                  MULS16(l_a0, s_r0, s_r1);
                  s_OHOct_Per_Sym_rounded += (int16)l_a0;      //s_OHOct_Per_Sym_rounded = s_OHOct_Per_MDF*s_MDF_Per_Symbol + s_r0*s_r1;

                  // now compute min(mod(MDF_Per_Symbol, Tp), mod(Gp,Tp))
                  // at this point: s_r1 = mod(Gp,Tp)
                  s_r0 = mod16(s_MDF_Per_Symbol, us_Tp);
                  if (s_r0>s_r1)
                  {
                     s_r0 = s_r1;   // s_r0 = min(s_r0,s_r1)
                  }

                  // finally, we can add the 3rd part to "OHOct_Per_Sym_rounded"
                  s_OHOct_Per_Sym_rounded += s_r0;
                  //---------------------------------------------------------

                  // If the newly selected Gp violates Rule #2 or violates guc_maxG requirement,
                  // then we should reduce Lp to bring down overall TotalRate
                  if ((s_OHOct_Per_Sym_rounded > 8) || (us_Gp > guc_maxG))
                  {
                     // reduce Gp back by 1 so that OHOct_Per_Sym_rounded <= 8  or us_Gp <= 32
                     us_Gp = us_Gp-1;

                     // take ceiling of "l_deltaNDR" to make sure NetRate < MaxRate after cutting Lp
                     // also convert "l_deltaNDR" to Q32.0 format
                     if (l_deltaNDR & 0xF)    // check the decimal place (last 4 bits) to determine if we need to round up deltaNDR
                     {
                        l_deltaNDR >>= 4;
                        l_deltaNDR = l_deltaNDR + 1;
                     }
                     else
                     {
                        l_deltaNDR >>= 4;
                     }

                     // safety check: just to make sure we get +'ve deltaLp
                     if (l_deltaNDR > 0)
                     {
                        // PayloadBytes = floor(CWSize - Rp - Gp*Mp/Tp);
                        //              = CWSize - Rp - floor(Gp/k)
                        // note: we take the floor of (Gp/k) to account for worst case deltaLp.
                        us_PayloadBytes = us_CWSize - us_Rp - us_Gp/us_k;

                        //s_r0 = us_PayloadBytes*pt_Input->s_fs;
                        //MULU16(s_r0, us_PayloadBytes,pt_Input->s_fs);
                        MULU16(ul_temp1, us_PayloadBytes, pt_Input->s_fs);
                        ul_temp1 += 1<<(FRAME_RATE_SHIFT_CNT-1);
                        s_r0 = ul_temp1 >> FRAME_RATE_SHIFT_CNT;

                        // note: "l_deltaNDR" is now in Q32.0 format
                        l_a0 = l_deltaNDR*us_CWSize;

                        // deltaLp = floor(deltaNDR * CWSize/fs/PayloadBytes);
                        // note: we take floor instead of ceiling to avoid reducing Lp by too much
                        l_deltaLp = l_a0/s_r0;

                        // note: we use signed 32-bit for NewLp and deltaLp to catch any error case.
                        l_newLp = (int32)(ul_Lp - l_deltaLp);


                        /***
                           Make sure Lp is not reduced by too much such that it's outside alphaplus line
                        ***/
                        //l_a0 = us_CWSize*us_alphaplus_denominator;
                        MULU16(l_a0, us_CWSize, us_alphaplus_denominator);

                        //l_a1 = l_NewLp*us_alphaplus_numerator;
                        MULS32xU16(l_a1, l_newLp, us_alphaplus_numerator);

                        // if (CWSize/alphaplus > LpNew)
                        //   then we should break the while loop since we cannot reduce Lp to bring NetRate below MaxRate
                        if (l_a0 > l_a1)
                        {
                           break;
                        }

                        ul_Lp = l_newLp;

                        /* Pseudo code:
                           % re-compute Sp, DRp, Up we have reduced  Lp
                           Sp = CWSize*8/Lp;
                           DRp = Lp*fs;
                           if (DRp >= DR0)
                              Qhat = Qmax;
                           else
                              Qhat = Qmax*DRp/DR0;
                           end
                           Up = floor(Qhat*Mp/Tp/CWSize);

                           % re-compute TotalRate, OHRate and NetRate after we have reduced Lp
                           TotalRate = Lp*fs*(CWSize-Rp)/CWSize; % net data rate + overhead rate.
                           OHRate = (Gp*Mp/Sp/Tp)*8*fs;
                           NetRate = TotalRate - OHRate;
                           deltaORp = NetRate - MaxRate;
                        */

                        // NOTE: we don't compute Sp explicitly in C-code

                        //-----------------------------------
                        // Recompute DRp since Lp has changed
                        //-----------------------------------
                        //MULU16(ul_LineRate, ul_Lp, pt_Input->s_fs);
                        MULS32x16(ul_LineRate, ul_Lp, pt_Input->s_fs);
                        ul_LineRate += 1<<(FRAME_RATE_SHIFT_CNT-1);
                        ul_LineRate >>= FRAME_RATE_SHIFT_CNT;

                        {
                           uint16 us_Qhat;

                           us_Qhat = us_Qmax;
                           if (ul_LineRate < us_DRmax)
                           {
                              //us_Qhat = QMAX*ul_LineRate/DR0;
                              //XDSLRTFW-3226 (Start)
                              MULS32xU16(ul_temp1, ul_LineRate, us_Qmax);
                              us_Qhat = ul_temp1/us_DRmax;
                              //XDSLRTFW-3226 (End)
                           }

                           //-----------------------------------
                           // Recompute Up since Qhat might be different due to a new DRp.
                           //-----------------------------------
                           // Up = floor(Qhat*Mp/Tp/CWSize)
                           //    = floor(Qhat/(k*CWSize)), where k = Tp/Mp;
                           s_Up = us_Qhat/s_k_x_CWSize;
                        }
                        //-----------------------------------
                        // Recompute OHRate, TotalRate, NetRate with the new DRp
                        //-----------------------------------
                        ComputeRates(ul_Lp, us_CWSize, us_Rp, us_Gp, us_Mp, us_Tp, pt_Input->s_fs, &l_OHRate, &l_NetRate);

                        // deltaNDR = NetRate - MaxRate (in Q28.4)
                        l_deltaNDR = l_NetRate - (pt_Input->us_MaxRate<<7);

                        //At this point, we should always break the while-loop because
                        //we have done our best to reduce NetRate
                        //WARNING: breaking while-loop at this point doesn't guarantee that NetRate <= MaxRate
                        //         we always check against MaxRate constraint at the end.
                        //
                        break;

                     } // if (l_deltaNDR > 0)

                  }
                  else
                  {
                     //------------------------------------------------------
                     // Recompute OHRate, TotalRate, NetRate with the new Gp
                     //------------------------------------------------------
                     ComputeRates(ul_Lp, us_CWSize, us_Rp, us_Gp, us_Mp, us_Tp, pt_Input->s_fs, &l_OHRate, &l_NetRate);

                     // deltaNDR = NetRate - MaxRate (in Q28.4)
                     l_deltaNDR = l_NetRate - (pt_Input->us_MaxRate<<7);

                  } // end if (s_OHOct_Per_Sym_rounded > 8)


               } //end while (l_deltaNDR > 0)

               // If Lp is forced to be a non-multiple of 8, truncate it and recompute the rates
               //XDSLRTFW-3554: DS datarate exceeds configured max (start)
               if ((gul_dbgMiscControl & ROUND_LP_TO_32KBPS) && (ul_Lp & 0x7))
               {
                  // Round Lp before truncating it to a multiple of 8
                  ul_Lp += 4;
               }

               if (gul_dbgMiscControl & (ROUND_LP_TO_32KBPS|FLOOR_LP_TO_32KBPS))
               {
                  // Truncating Lp to a multiple of 8 without rounding(floring).
                  // Rounding depending on ROUND_LP_TO_32KBPS bit set in gul_dbgMiscControl variable (see above!)
                  ul_Lp &= ~0x7;
                  ComputeRates(ul_Lp, us_CWSize, us_Rp, us_Gp, us_Mp, us_Tp, pt_Input->s_fs, &l_OHRate, &l_NetRate);
               }
               //XDSLRTFW-3554: DS datarate exceeds configured max (End)

               //wng? - For now, we always set Fp (number of OH Superframe) to 1.  This parameter is not determined by the others.
               us_Fp = 1;


               //------------------------------------------------------------------
               // Final safety check to catch borderline cases.
               //------------------------------------------------------------------
               /***
                  Determine if Gp and Tp comply with Rule #2 for Gp & Tp settings.
                  This has a stricter limit on OH-octet/symbol than the OHRate limit.

                  % Formula according to Rule #2 of Gp & Tp selection
                  OHOct_Per_MDF = floor(Gp/Tp);
                  MDF_Per_Symbol = ceil(Mp/Sp);
                  OHOct_Per_Sym_rounded = OHOct_Per_MDF*MDF_Per_Symbol + floor(MDF_Per_Symbol/Tp)*mod(Gp,Tp) + min(mod(MDF_Per_Symbol, Tp), mod(Gp,Tp));
               ***/

               s_OHOct_Per_MDF  = us_Gp/us_Tp;
               // to compute ceil(Mp/Sp) ==> ceil ((Mp*Lp)/(8*CWSize))
               MULS32xU16(l_a0, ul_Lp, us_Mp);            //l_a0 = us_Mp * ul_Lp;   //worst case is 16*61440
               s_r0 = us_CWSize<<3;               //s_r0 = us_CWSize * 8
               // inputs: l_a0 = Mp*Lp; s_r0 = 8*CWSize
               s_MDF_Per_Symbol = ceil32(l_a0, s_r0);
               // s_r0 = floor(MDF_Per_Symbol/Tp)
               s_r0 = s_MDF_Per_Symbol/us_Tp;
               // s_r1 = mod(Gp,Tp)
               s_r1 = mod16(us_Gp, us_Tp);

               // compute partial value of "OHOct_Per_Sym_rounded" -> sum up 1st & 2nd part of the equation
               MULS16(s_OHOct_Per_Sym_rounded, s_OHOct_Per_MDF, s_MDF_Per_Symbol);
               MULS16(l_a0, s_r0, s_r1);
               s_OHOct_Per_Sym_rounded += (int16)l_a0;      //s_OHOct_Per_Sym_rounded = s_OHOct_Per_MDF*s_MDF_Per_Symbol + s_r0*s_r1;

               // now compute min(mod(MDF_Per_Symbol, Tp), mod(Gp,Tp))
               // at this point: s_r1 = mod(Gp,Tp)
               s_r0 = mod16(s_MDF_Per_Symbol, us_Tp);
               if (s_r0>s_r1)
               {
                  s_r0 = s_r1;   // s_r0 = min(s_r0,s_r1)
               }

               // finally, we can add the 3rd part to "OHOct_Per_Sym_rounded"
               s_OHOct_Per_Sym_rounded += s_r0;

               // In order to reduce s_OHOct_Per_Sym_rounded, we could have reduced Gp
               // given all other parameters are fixed -- which is true at this point.
               // But we have already selected the lowest Gp value that meet the MinOHRate requirement.
               // Hence, we cannot do any better and have to check the next set of parameters.
               if (s_OHOct_Per_Sym_rounded > 8)
               {
                  continue;
               }


               /***
                  % Safety check: this protects against quantization errors when feasible region is too narrow
                  %               Also check MaxRate because we might not be able to increase Gp further to allocate more rate for overhead.
                  if ((NetRate < MinRate) | (NetRate > MaxRate))
                     continue;
                  end

               ***/
               // Note: MinRate and MaxRate are in unit of 8 kbps, so <<3 to convert it to 8kbps
               //       Then <<4 to convert it to Q28.4 format
               // If certain framing parameter workarounds are enabled, do not break out even if we violate min/max rate constraint. Settle with whatever we have got
               if (!(gul_dbgMiscControl & DISABLE_MIN_MAX_RATE_CHECK_IFEC_MODE))
               {
                  if ((l_NetRate < (pt_Input->us_MinRate<<7)) || (l_NetRate > (pt_Input->us_MaxRate<<7)))
                  {
                     continue;
                  }
               }

               /***
                  % Safety check: make sure we can meet MSGp requirement.  We could have violated this contraint after Lp is reduced
                  %               or Gp is increased to meet the MaxRate requirement.  The effect on MSGp due to reduction of Lp
                  %               should be very small, but it's still better to have this check to
                  %               catch the borderline case.
                  if (OHFrameType == 1)
                     SEQp = Up*Gp;
                     MSGp = OHRate * (SEQp-6)/SEQp;
                     % standard requires MSGmin < MSGp < MSGmax
                     if (MSGp <= MinMsgOHR)
                        continue;             % we might run into this case when Lp is reduced
                     end
                     if (MSGp >= MaxMsgOHR)
                        continue;             % we might run into this case when Gp is increased and often shows up at 8kHz frame rate as well
                     end
                  end
               ***/
               //XDSLRTFW-3226 (Start)
               if (pt_Input->s_OHFrameType == 1)
               {
                  // SEQp = Up*Gp;  (note: highest value of SEQp is 17000 bytes for legacy profiles or 3400 bytes for 35b profile)
                  MULS16(us_SEQp, s_Up, us_Gp);

                  // Note: OHRate in Q28.4 format, but highest value is 512 kbps<<4 = 8192, so use MULS16 is ok
                  MULS16(l_a0, l_OHRate, (us_SEQp - 6));   //l_a0 = l_OHRate * (us_SEQp - 6);

                  // first convert MinMsgOHR to Q28.4 format to match with the format of OHRate,
                  // and then multiple it with SEQp for comparison with "OHRate * (SEQp-6)"
                  // Note: highest MinMsgOHR is 248 kbps, so use MULS16 is ok
                  //
                  MULS16(l_a1, (pt_Input->s_MinMsgOHR<<4), us_SEQp);   //l_a1 = (pt_Input->s_MinMsgOHR<<4)*us_SEQp;

                  // if (MSGp <= MinMsgOHR)?
                  //   l_a0 = OHRate * (SEQp-6)
                  //   l_a1 = MinMsgOHR * SEQp
                  if (l_a0 <= l_a1)      // Note: standard requires MSGmin < MSGp
                  {
                     continue;
                  }

                  // first convert MaxMsgOHR to Q28.4 format to match with the format of OHRate,
                  // and then multiple it with SEQp for comparison with "OHRate * (SEQp-6)"
                  // Note: highest MinMsgOHR is 256 kbps, so use MULS16 is ok
                  //
                  MULS16(l_a1, (pt_Input->s_MaxMsgOHR<<4), us_SEQp);   //l_a1 = (pt_Input->s_MaxMsgOHR<<4)*us_SEQp;
                  // if (MSGp >= MaxMsgOHR)?
                  //   l_a0 = OHRate * (SEQp-6)
                  //   l_a1 = MaxMsgOHR * SEQp
                  if (l_a0 >= l_a1)      // Note: standard requires MSGp < MSGmax
                  {
                     continue;
                  }

                  if (gus_dbgMaxSEQp && (us_SEQp > gus_dbgMaxSEQp))
                  {
                     continue;
                  }

               }
               //XDSLRTFW-3226 (End)

               // Channel Initialization Policy (Rate Optimization, INP optimization etc.)
               // ServicePolicy==1 => INP optimization
               // update previous optimal INP to equal INP of current solution if previousoptimalINP <= currentINP
               if ( (pt_Input->uc_ChannelInitPolicy == 1) && (ft_CiPolicyInpIncrPossible == 1) )
               {
                  // Check if INP_opt <= current INP
                  // and if so update INP_opt to equal current INP
                  // INP equals 4*R*D/Lp so we need to check if (4*R_opt*D_opt/Lp_opt) <= (4*R*D/Lp)
                  // ie check if (4*R_opt*D_opt*Lp) <= (4*R*D*Lp_opt) ie check if ((R_opt*D_opt)*Lp) <= ((R*D)*Lp_opt)
                  // ie check if (INP_numer_opt * INP_denom) <= (INP_numer * INP_denom_opt)
                  // (where INP_numer is R*D, INP_denom_opt is Lp_opt, INP_numer_opt is R_opt*D_opt and INP_denom is Lp)
                  // and update INP_numer_opt & INP_denom_opt to match current INP_numer & INP_denom respectively
                  // Note that R*D <= 16*16 by spec, and Lp <= 15*(NSCusMax-1) <= 15*63 <= 16*64 by spec
                  // which means INP_numer * INP_denom_opt and INP_numer_opt * INP_denom_opt will both be <= 2^18 by spec
                  // hence INP numerator-denominator crossproducts can only use 18bits at most and will not overflow 32bit storage
                  ul_InpNumerVal = (uint32)((uint32)us_Rp * (uint32)us_Dp);
                  ul_InpDenomVal = (ul_Lp);

                  ul_temp1 = (uint32)(ul_InpNumerOpt * ul_InpDenomVal);
                  ul_temp2 = (uint32)(ul_InpNumerVal * ul_InpDenomOpt);

                  // If INP_opt <= INP_current ie (INPnumer_opt*INPdenom_current) <= (INPnumer_current*INPdenom_opt)
                  // then update optimal configuration to match current configuration
                  if ((ul_temp1 <= ul_temp2))
                  {
                     ul_InpNumerOpt = ul_InpNumerVal;
                     ul_InpDenomOpt = ul_InpDenomVal;

                     // Note: at this point, we know us_Kp > ceil(Gp/Tp) because we have
                     //       already check [k*(CWSize-Rp) - Gp] > 0.
                     us_Bp = us_Kp-ceil16(us_Gp, us_Tp);

                     us_BpOpt = us_Bp;
                     us_KpOpt = us_Kp;
                     us_RpOpt = us_Rp;
                     us_MpOpt = us_Mp;
                     us_DpOpt = us_Dp;
                     us_TpOpt = us_Tp;
                     us_GpOpt = us_Gp;
                     us_FpOpt = us_Fp;
                     ul_LpOpt = ul_Lp;
                     us_CWSizeOpt = us_CWSize;
                     l_NetRateOpt = l_NetRate;
                     ft_FeasibleSolutionFound = 1;
                  }
                  // Continue till INP optimization is done for all (R,D,M) combinations
                  continue;
               }
               //------------------------------------------------------------------
               // Save output parameters if they are the best so far.
               // Currently, we define "the best" output parameters as follow:
               //
               // In Rate Adaptive Case:
               //   a) parameter that give higher NetRate is selected
               //   b) if both sets of parameters give the same NetRate, then we
               //      would pick the one that gives highest INP (note: this criteria is
               //      the implicit based on the way we search for Rp and Dp)
               // In Fixed Rate or Rate Limited Case:
               //   a) we pick the first set of parameters that can meet reached
               //      the max rate requirement, while satisfying all other constraints
               //   b) this set of parameters would also have the highest INP
               //      due to the way we search for Rp and Dp.
               //
               // NOTE: if we have to apply other optimization mode such as
               //       picking a set of parameters with lowest possible delay,
               //       then we have to:
               //         1. change the stopping criterion of the search algorithm
               //            for fixed-rate or rate-limited case
               //         2. consider parameters that gives the same NetRate
               //            (i.e. NetRate == NetRateOpt)
               //------------------------------------------------------------------
               if (l_NetRate > l_NetRateOpt)
               {

                  // Note: at this point, we know us_Kp > ceil(Gp/Tp) because we have
                  //       already check [k*(CWSize-Rp) - Gp] > 0.
                  us_Bp = us_Kp-ceil16(us_Gp, us_Tp);

                  us_BpOpt = us_Bp;
                  us_KpOpt = us_Kp;
                  us_RpOpt = us_Rp;
                  us_MpOpt = us_Mp;
                  us_DpOpt = us_Dp;
                  us_TpOpt = us_Tp;
                  us_GpOpt = us_Gp;
                  us_FpOpt = us_Fp;
                  ul_LpOpt = ul_Lp;
                  us_CWSizeOpt = us_CWSize;
                  l_NetRateOpt = l_NetRate;
                  ft_FeasibleSolutionFound = 1;

//wng - this debug code logs all the feasbile combo
//      it's useful in case we find a mismatch between the C/Matlab code output and Runtime code output
//      Note: if we run out of PMEM, we can consider removing this code.


               } //end if (ul_NetRate...


               //------------------------------------------------------------------
               // This implementation just exits at the first configuration
               // that achieved max rate without searching further.
               //------------------------------------------------------------------
               /* Pseudo code:
                  if (NetRate >= MaxRate-8 & NetRate >= MinRate)
                     Done = 1;
                     break;
                  end
               */
               // Note: us_MaxRate & us_MinRate in unit of 8 kbps (Q16.0),
               //       while l_NetRate in unit of kbps (Q28.4), so perform conversion in the check...
               if ( (l_NetRate >= (pt_Input->us_MaxRate-(FIXRATE_LEEWAY/8))<<7) &&
                     (l_NetRate >= (pt_Input->us_MinRate<<7)) )
               {
                  ft_Done = 1;   // set flag to exit all loops
                  break;         // exit Dp-loop
               }

            } //end for (us_Dp...

            if (ft_Done!=0)
            {
               break;   // exit Tp-loop
            }

         } //end for (us_k... => Tp-loop

         if (ft_Done!=0)
         {
            break;   // exit Mp-loop
         }

      } //end for (us_Mp...

      if (ft_Done!=0)
      {
         break;   // exit Rp-loop
      }

   } //end for (us_Rp...


   if (ft_FeasibleSolutionFound == FALSE)
   {
      gs_VDSL2FrameParamGenStatus |= VDSL2FRAMEPARAMGEN_ERR_INPUT_CONSTRAINT_NOT_FEASIBLE;
      return FAIL;

   }


   //====================================================================================
   // Store the set of optimal framing parameters {Bpn,Rp,Mp,Dp,Tp,Gp,Fp,Lp,Ip}
   // to the final output config-struct that will be used for Showtime
   // HW configure (gt_rx_config_v2)
   // Note:  Current algorithm only handles 1 bearer channel (BC0).
   //====================================================================================

   pt_Output->sa_Bpn[s_path][s_bc] = us_BpOpt;
   pt_Output->s_Rp[s_path] = us_RpOpt;
   pt_Output->s_Mp[s_path] = us_MpOpt;
   pt_Output->s_Dp[s_path] = us_DpOpt;
   pt_Output->s_Tp[s_path] = us_TpOpt;
   pt_Output->s_Gp[s_path] = us_GpOpt;
   pt_Output->s_Fp[s_path] = us_FpOpt;
   pt_Output->ul_Lp[s_path] = ul_LpOpt;
   pt_Output->s_Ip[s_path] = us_CWSizeOpt;  // note: CWSize = I*q and in current the implementation, we assume q = 1.

   // XDSLRTFW-1877 : VDSL2 IFEC ATTNDR (Start)
   gus_RpOpt_Attndr = us_RpOpt;
   // XDSLRTFW-1877 : VDSL2 IFEC ATTNDR (Start)
   // Report ATTNDR actual parameters
   if(gft_CalcAttndr == TRUE)
   {

      //XDSLRTFW-1522 (Start)
      gul_ATTNDR = (l_NetRateOpt >> 4) * 1000;

      // XDSLRTFW-3556 : VRx518 shows different method_0 attainable datarate than VR9(R7) (Start)
      // ATTNDR initialization which will be seen at Showtime entry according to Method_0 Framing based or Method_1 or Method_2 algorithm
      // Note: Method_0 Channel Capacity based ATTNDR initialization is done in ComputeMaxDataRate() function
      if ( !((guc_attndr_method == ATTNDR_METHOD_0) &&
             (gt_DbgImprovedATTNDR.us_ATTNDR_MISC_CONFIGURATION == ATTNDR_ALGO_2_METHOD_0_CHANNEL_CAPACITY_BASED)) )

      {
         gt_LineStatusDS.ul_AttainableDataRate = gul_ATTNDR;   //IFEC Mode Training)
      }
      // XDSLRTFW-3556 : VRx518 shows different method_0 attainable datarate than VR9(R7) (End)



      //XDSLRTFW-1522 (End)
#ifndef STANDALONE_FRAMING_TEST
      gt_Improved_ATTNDR_ActParam.us_ATTNDR_ACT_METHOD = guc_attndr_method;

      gt_Improved_ATTNDR_ActParam.us_ATTNDR_ACT_INPds = (uint16)((((us_DpOpt * us_RpOpt)<<2)*10)/ul_LpOpt);

      gt_Improved_ATTNDR_ActParam.us_ATTNDR_ACT_INP_REINds = 0;

      // Compute the actual delay  _          _
      //           Sp * (Dp-1)    |     Qp     |
      // delay = -------------- x | 1 - -----  |
      //           Qp * fs        |_    NFECp _|

      s_lp = 0;
      // Compute the codeword size for each latency path
      pt_Output->s_Nfecp[s_lp] =  ComputeCWSize(pt_Output, s_lp);

      //======================================================
      // Compute q = N/I
      //======================================================
      pt_Output->s_q[s_lp] = 0;

      s_temp = pt_Output->s_Nfecp[s_lp];
      while ((s_temp >= pt_Output->s_Ip[s_lp]) && (pt_Output->s_Ip[s_lp] > 0))
      {
         s_temp -= pt_Output->s_Ip[s_lp];
         pt_Output->s_q[s_lp]++;
      }

      Compute_PERBp(pt_Output, s_lp, gs_TxNumTones);

      ComputeDelay(pt_Output, s_lp, &gt_Improved_ATTNDR_ActParam.us_ATTNDR_ACT_DELAYds);
      gt_Improved_ATTNDR_ActParam.us_ATTNDR_ACT_DELAYds *= 100; // For VRX MCAT delay in 0.01ms
#endif
   }
   // XDSLRTFW-1877 : VDSL2 IFEC ATTNDR (End)


   return (SUCCEED);
}



/*^^^
*-------------------------------------------------------------------------------------------------
*
*   Prototype:
*       int16 CheckCoPrime(int16 s_D, int16 s_CWSize)
*
*   Abstract:
*      Helper Function for FormVDSL2FramingParams().
*       This function checks if the input D and CWSize are co-prime
*
*   Input Parameters:
*      s_D      -- interleaver depth
*      s_CWSize -- CWSize
*
*   Output Parameter:
*      None
*
*   Return:
*      ft_CoPrime -- 1: IS co-prime; 0: is NOT co-prime
*
*   Global Variables:
*      None
*
*--------------------------------------------------------------------------------------------------
^^^*/
int16 CheckCoPrime(int16 s_D, int16 s_CWSize)
{
   int16 s_temp1, s_temp2, s_temp3, s_temp4;

   s_temp1 = s_D;
   s_temp2 = s_CWSize;

   while (s_temp2) // unless remainder = 0
   {
      s_temp3 = s_temp1 / s_temp2;
      s_temp4 = s_temp1 - s_temp2 * s_temp3;
      s_temp1 = s_temp2;
      s_temp2 = s_temp4;
   }

   // s_temp1 is gcd, if gcd is not one decrease cw size
   if (s_temp1==1)
   {
      return 1;
   }
   else
   {
      return 0;
   }

}


/*^^^
*-------------------------------------------------------------------------------------------------
*
*   Prototype:
*       void FindAlphaPlusOther(uint16 *pusa_alpha_numer, uint16 *pusa_alpha_denom, uint16 *pus_alphaplus_other_numer, uint16 *pus_alphaplus_other_denom)
*
*   Abstract:
*      Helper Function for FormVDSL2FramingParams().
*       This function finds "alphaplus_other", defined as: min(alpha[0], alpha[1])
*
*   Input Parameters:
*      pusa_alpha_numer -- pointer to the array that stores the numerators of alpha[]
*      pusa_alpha_denom -- pointer to the array that stores the denominators of alpha[]
*
*   Output Parameter:
*      pus_alphaplus_other_numer -- pointer to the numerator of "alphaplus_other"
*      pus_alphaplus_other_denom -- pointer to the denominator of "alphaplus_other"
*
*   Return:
*      None
*
*   Global Variables:
*      None
*
*--------------------------------------------------------------------------------------------------
^^^*/

void FindAlphaPlusOther(uint16 *pusa_alpha_numer, uint16 *pusa_alpha_denom, uint16 *pus_alphaplus_other_numer, uint16 *pus_alphaplus_other_denom)
{
   // Set the flag to indicate we are finding the MIN
   FlagT ft_FindMAX = 0;

   // Set up the content of the pointer correctly as follow...
   //   us_alphaplus_other_numer = usa_alpha_numer[0]
   //   us_alphaplus_other_denom = usa_alpha_denom[0]
   *pus_alphaplus_other_numer = *pusa_alpha_numer;
   *pus_alphaplus_other_denom = *pusa_alpha_denom;

   // Find min(alpha[0], alpha[1]) where:
   //   alpha[0] = sa_alpha_numer[0]/sa_alpha_denom[0]
   //   alpha[1] = sa_alpha_numer[1]/sa_alpha_denom[1]
   Find_MinORMax_Of2Fractions(ft_FindMAX,pus_alphaplus_other_numer,pus_alphaplus_other_denom, *(pusa_alpha_numer+1),*(pusa_alpha_denom+1));

}

/*^^^
*-------------------------------------------------------------------------------------------------
*
*   Prototype:
*       void FindAlphaPlusFinal(uint16 *pus_alphaplus_numerator, uint16 *pus_alphaplus_denominator,
*                        uint16 *pus_alphaplus_other_numer, uint16 *pus_alphaplus_other_denom,
*                        uint16 *pus_alphaplus_maxdelay_numer, uint16 *pus_alphaplus_maxdelay_denom)
*
*   Abstract:
*      Helper Function for FormVDSL2FramingParams().
*       This function finds "alphaplus", defined as: min(alphaplus_other, alphaplus_maxdelay)
*
*   Input Parameters:
*      pus_alphaplus_other_numer -- pointer to the numerator of "alphaplus_other"
*      pus_alphaplus_other_denom -- pointer to the denominator of "alphaplus_other"
*      pus_alphaplus_maxdelay_numer -- pointer to the numerator of "alphaplus_maxdelay"
*      pus_alphaplus_maxdelay_denom -- pointer to the denominator of "alphaplus_maxdelay"
*
*   Output Parameter:
*      pus_alphaplus_numerator   -- pointer to the numerator of "alphaplus"
*      pus_alphaplus_denominator -- pointer to the denominator of "alphaplus"
*
*   Return:
*      None
*
*   Global Variables:
*      None
*
*--------------------------------------------------------------------------------------------------
^^^*/

void FindAlphaPlusFinal(uint16 *pus_alphaplus_numerator, uint16 *pus_alphaplus_denominator,
                        uint16 *pus_alphaplus_other_numer, uint16 *pus_alphaplus_other_denom,
                        uint16 *pus_alphaplus_maxdelay_numer, uint16 *pus_alphaplus_maxdelay_denom)
{

   // Set the flag to indicate we are finding the MIN
   FlagT ft_FindMAX = 0;

   *pus_alphaplus_numerator = *pus_alphaplus_other_numer;
   *pus_alphaplus_denominator = *pus_alphaplus_other_denom;

   // Find min(alphaplus_other,alphaplus_maxdelay)
   Find_MinORMax_Of2Fractions(ft_FindMAX,pus_alphaplus_numerator,pus_alphaplus_denominator, *pus_alphaplus_maxdelay_numer,*pus_alphaplus_maxdelay_denom);

}


/*^^^
*-------------------------------------------------------------------------------------------------
*
*   Prototype:
*       void FindAlphaMinusOther(uint16 *pusa_alpha_numer, uint16 *pusa_alpha_denom, uint16 *pus_alphaminus_other_numer, uint16 *pus_alphaminus_other_denom)
*
*   Abstract:
*      Helper Function for FormVDSL2FramingParams().
*       This function finds "alphminus_other", defined as: max(alpha[2], alpha[3], alpha[4], alpha[5])
*
*   Input Parameters:
*      pusa_alpha_numer -- pointer to the array that stores the numerators of alpha[]
*      pusa_alpha_denom -- pointer to the array that stores the denominators of alpha[]
*
*   Output Parameter:
*      pus_alphaminus_other_numer -- pointer to the numerator of "alphaminus_other"
*      pus_alphaminus_other_denom -- pointer to the denominator of "alphaminus_other"
*
*   Return:
*      None
*
*   Global Variables:
*      None
*
*--------------------------------------------------------------------------------------------------
^^^*/

void FindAlphaMinusOther(uint16 *pusa_alpha_numer, uint16 *pusa_alpha_denom, uint16 *pus_alphaminus_other_numer, uint16 *pus_alphaminus_other_denom)
{
   // Set the flag to indicate we are finding the MAX
   FlagT ft_FindMAX = 1;

   // Set up the content of the pointer correctly as follow...
   //   us_alphaminus_other_numer = usa_alpha_numer[2]
   //   us_alphaminus_other_denom = usa_alpha_denom[2]
   *pus_alphaminus_other_numer = *(pusa_alpha_numer+2);
   *pus_alphaminus_other_denom = *(pusa_alpha_denom+2);

   // Find max(alpha[2], alpha[3]) where:
   //   alpha[2] = sa_alpha_numer[2]/sa_alpha_denom[2]
   //   alpha[3] = sa_alpha_numer[3]/sa_alpha_denom[3]
   Find_MinORMax_Of2Fractions(ft_FindMAX,pus_alphaminus_other_numer,pus_alphaminus_other_denom, *(pusa_alpha_numer+3),*(pusa_alpha_denom+3));

   // Find max(alpha[x], alpha[4]) where:
   //   alpha[x] = max(alpha[2], alpha[3])
   //   alpha[4] = sa_alpha_numer[4]/sa_alpha_denom[4]
   Find_MinORMax_Of2Fractions(ft_FindMAX,pus_alphaminus_other_numer,pus_alphaminus_other_denom, *(pusa_alpha_numer+4),*(pusa_alpha_denom+4));

   // Find max(alpha[x], alpha[5]) where:
   //   alpha[x] = max(alpha[2], alpha[3], alpha[4])
   //   alpha[5] = sa_alpha_numer[5]/sa_alpha_denom[5]
   Find_MinORMax_Of2Fractions(ft_FindMAX,pus_alphaminus_other_numer,pus_alphaminus_other_denom, *(pusa_alpha_numer+5),*(pusa_alpha_denom+5));
}


/*^^^
*-------------------------------------------------------------------------------------------------
*
*   Prototype:
*       void FindAlphaMinusFinal(uint16 *pus_alphaminus_numerator, uint16 *pus_alphaminus_denominator,
*                        uint16 *pus_alphaminus_other_numer, uint16 *pus_alphaminus_other_denom,
*                        uint16 *pus_alphaminus_mindelay_numer, uint16 *pus_alphaminus_mindelay_denom)
*
*   Abstract:
*      Helper Function for FormVDSL2FramingParams().
*       This function finds "alphaminus", defined as: max(alphaminus_other, alphaminus_mindelay)
*
*   Input Parameters:
*      pus_alphaminus_other_numer -- pointer to the numerator of "alphaminus_other"
*      pus_alphaminus_other_denom -- pointer to the denominator of "alphaminus_other"
*      pus_alphaminus_mindelay_numer -- pointer to the numerator of "alphaminus_mindelay"
*      pus_alphaminus_mindelay_denom -- pointer to the denominator of "alphaminus_mindelay"
*
*   Output Parameter:
*      pus_alphaminus_numerator   -- pointer to the numerator of "alphaminus"
*      pus_alphaminus_denominator -- pointer to the denominator of "alphaminus"
*
*   Return:
*      None
*
*   Global Variables:
*      None
*
*--------------------------------------------------------------------------------------------------
^^^*/

void FindAlphaMinusFinal(uint16 *pus_alphaminus_numerator, uint16 *pus_alphaminus_denominator,
                         uint16 *pus_alphaminus_other_numer, uint16 *pus_alphaminus_other_denom,
                         uint16 *pus_alphaminus_mindelay_numer, uint16 *pus_alphaminus_mindelay_denom)
{

   // Set the flag to indicate we are finding the MAX
   FlagT ft_FindMAX = 1;

   *pus_alphaminus_numerator = *pus_alphaminus_other_numer;
   *pus_alphaminus_denominator = *pus_alphaminus_other_denom;

   // Find max(alphaminus_other, alphaminus_mindelay)
   Find_MinORMax_Of2Fractions(ft_FindMAX,pus_alphaminus_numerator,pus_alphaminus_denominator, *pus_alphaminus_mindelay_numer,*pus_alphaminus_mindelay_denom);

}


/*^^^
*-------------------------------------------------------------------------------------------------
*
*   Prototype:
*       void Find_MinORMax_Of2Fractions(FlagT ft_FindMAX, uint16 *pus_Result_numer, uint16 *pus_Result_denom, uint16 us_Y_numer, uint16 us_Y_denom)
*
*   Abstract:
*      Helper Function for FindAlphaPlus() & FindAlphaMinus().
*       This function finds either the min(X,Y) OR max(X,Y), depending
*       on the input flag (ft_FindMAX).
*
*       X and Y are fractions defined as follow:
*         X = Nx/Dx, Y = Ny/Dy
*
*   Input Parameters:
*      ft_FindMAX       -- flag to indicate finding the MIN or MAX of the 2 input fractions (0: Min; 1: Max)
*       pus_Result_numer -- pointer to the numerator of X (Nx)
*       pus_Result_denom -- pointer to the denominator of X (Dx)
*      us_Y_numer       -- numerator of Y (Ny)
*      us_Y_denom       -- denominator of Y (Dy)
*
*   Output Parameter:
*       pus_Result_numer -- pointer to the numerator of result for min/max(X,Y)
*       pus_Result_denom -- pointer to the denominator of result for min/max(X,Y)
*
*   Return:
*       None
*
*   Global Variables:
*      None
*
*--------------------------------------------------------------------------------------------------
^^^*/
void Find_MinORMax_Of2Fractions(FlagT ft_FindMAX, uint16 *pus_Result_numer, uint16 *pus_Result_denom, uint16 us_Y_numer, uint16 us_Y_denom)
{
   uint32 ul_temp1, ul_temp2;

   uint16 us_X_numer = *pus_Result_numer;
   uint16 us_X_denom = *pus_Result_denom;


   // X = Nx/Dx, Y = Ny/Dy
   // To find        : (X > Y)?
   // Implies finding: (Nx*Dy > Ny*Dx)?

   //ul_temp1 = us_X_numer*us_Y_denom;
   MULU16(ul_temp1, us_X_numer, us_Y_denom);
   //ul_temp2 = us_Y_numer*us_X_denom;
   MULU16(ul_temp2, us_Y_numer, us_X_denom);

   if (ft_FindMAX == 1)
   {
      //-----------------
      // Find MAX(X,Y)
      //-----------------
      // If (ul_temp1 >= ul_temp2), it means X >= Y and so just return X.
      // Otherwise, return Y
      if (ul_temp1 >= ul_temp2)
      {
         return;   // return_X
      }

   }
   else
   {
      //-----------------
      // Find MIN(X,Y)
      //-----------------
      // If (ul_temp1 <= ul_temp2), it means X <= Y and so just return X.
      // Otherwise, return Y
      if (ul_temp1 <= ul_temp2)
      {
         return;   // return_X
      }
   }

//return_Y:
   *pus_Result_numer = us_Y_numer;
   *pus_Result_denom = us_Y_denom;
}


#ifndef STANDALONE_FRAMING_TEST

/*^^^
*-------------------------------------------------------------------------------------------------
*
*   Prototype:
*       int16 GetFramingParamsInput(void)
*
*   Abstract:
*      Fill the input structure for the framing parameter selection function.
*
*   Input Parameters:
*      None
*
*   Output Parameter:
*       None
*
*   Return:
*       TRUE: success, FALSE: failed
*
*   Global Variables:
*      gt_rx_TPS_Map[]       -- (I) the structure specifying the mapping from the RX bearer channel
*                                to a latency path
*      gta_UsBearerChanAct[] -- (I) the structure of the actual US TPS parameters selected
*                                for each bearer channel
*       gta_DsBearerChanAct[] -- (I) the structure of the actual DS TPS parameters selected
*                                for each bearer channel
*       gta_UsLpAct[]         -- (I) the structure of the actual US PMS parameters selected
*                                for each latency path
*       gta_DsLpAct[]         -- (I) the structure of the actual DS PMS parameters selected
*                                for each latency path
*       gs_frame_rate_is_8khz -- (I) frame rate in kHz/second
*       guc_DsOneOverSmaxAct  -- (I) the maximum 1/S for DS
*       guc_UsOneOverSmaxAct  -- (I) the maximum 1/S for US
*       guc_DsMinMsgOHRAct    -- (I) the minimum message overhead rate for DS
*       guc_UsMinMsgOHRAct    -- (I) the minimum message overhead rate for US
*
*       gt_FormFramingParamsInputs_v2 -- (O) the structure containing the input parameters
*                                        to the framing parameter selection function.
*--------------------------------------------------------------------------------------------------
^^^*/
int16 GetFramingParamsInput(void)
{
   int16 s_bc, s_lp;

   TPS_TC_BearerChanAct_t *pt_BearerChanAct;
   VDSL2FramingParamsInputs_t *pt_FramingParamsInputs;

   //Set the pointer to the framin parameter input structure
   pt_FramingParamsInputs = &gt_FormFramingParamsInputs_v2;

   // initialize framing input parameters for DIAG mode since they are not available through messages
   // and we use the same FormVDSL2FramingParams() to compute channel capacity
   if (gul_OperationModeStatus_VDSL2 & V2_LOOP_DIAG)
   {
      pt_FramingParamsInputs->s_BCtoLP[BC0] = LP0;
      pt_FramingParamsInputs->s_BCtoLP[BC1] = DISABLED_LP;

      pt_FramingParamsInputs->s_Dmax = 3072;
      pt_FramingParamsInputs->s_OneOverSmax = 48;
      pt_FramingParamsInputs->s_fs = 4080;

      pt_FramingParamsInputs->us_MinRate = 0;
      pt_FramingParamsInputs->us_MaxRate = 30000;
      pt_FramingParamsInputs->s_MinMsgOHR = 16;
      pt_FramingParamsInputs->s_MaxMsgOHR = 256;
      pt_FramingParamsInputs->s_MinDelay = 0;
      pt_FramingParamsInputs->s_MaxDelay = 20;
      pt_FramingParamsInputs->s_MinINP = 0;
      pt_FramingParamsInputs->s_OHFrameType = 1;

      pt_FramingParamsInputs->s_MinR = MAX_R;
      pt_FramingParamsInputs->s_MaxR = MAX_R;
      pt_FramingParamsInputs->s_RStep = 16;
      pt_FramingParamsInputs->uc_ChannelInitPolicy = 0;

      return(TRUE);
   }

   //Set the bearer channel mapping (Mei?? need to revisit)
   pt_FramingParamsInputs->s_BCtoLP[BC0] = gt_rx_TPS_Map.s_BCtoLP[BC0];
   pt_FramingParamsInputs->s_BCtoLP[BC1] = gt_rx_TPS_Map.s_BCtoLP[BC1];

   //Set the pointers to the actual selected TPS and PMS structures

#ifndef DUAL_LATENCY_VR9
   //Cannot support two bearer channels yet
   if((gta_DsBearerChanAct[BC0].s_TypeEnabled != TPS_TC_DISABLED) &&
         (gta_DsBearerChanAct[BC1].s_TypeEnabled != TPS_TC_DISABLED))
   {
      return(FALSE);
   }
#endif //#ifndef DUAL_LATENCY_VR9

   //Find the bearer channel used
   if(gta_DsBearerChanAct[BC0].s_TypeEnabled != TPS_TC_DISABLED)
   {
      s_bc = BC0;
   }
   else
   {
      s_bc = BC1;
   }
   pt_BearerChanAct = &(gta_DsBearerChanAct[s_bc]);

   //Find the latency path used
   s_lp = pt_FramingParamsInputs->s_BCtoLP[s_bc];
   if(s_lp != DISABLED_LP)
   {
      pt_FramingParamsInputs->s_Dmax = gta_DsLpAct[s_lp].s_MaxD;
      pt_FramingParamsInputs->s_MaxR = gta_DsLpAct[s_lp].s_MaxR;
   }
   else
   {
      return(FALSE);
   }


   //Scale the frame rate (in symbols/second), by 1024/1000 to easy the computation
   {
      uint32 ul_Acc;

      ul_Acc = gs_DataFrameRate << FRAME_RATE_SHIFT_CNT;
      ul_Acc += 500;
      pt_FramingParamsInputs->s_fs = floor32(ul_Acc, (int16)1000);
   }

   pt_FramingParamsInputs->us_MinRate = pt_BearerChanAct->ta_TpsTcType.us_MinNetDataRate;
   pt_FramingParamsInputs->us_MaxRate = pt_BearerChanAct->ta_TpsTcType.us_MaxNetDataRate;

   // Forced DS min and max rate: Debug option XDSLRTFW-3554
   if (gs_ForcedMinRate_DS != -1)
   {
      pt_FramingParamsInputs->us_MinRate = gs_ForcedMinRate_DS;
   }
   if (gs_ForcedMaxRate_DS != -1)
   {
      pt_FramingParamsInputs->us_MaxRate = gs_ForcedMaxRate_DS;
   }

   // Make sure the max overhead rate is greater than or equal to the min overhead rate
   if (gs_dbgMaxOHR >= pt_FramingParamsInputs->s_MinMsgOHR)
   {
      pt_FramingParamsInputs->s_MaxMsgOHR = gs_dbgMaxOHR;
   }
   else
   {
      pt_FramingParamsInputs->s_MaxMsgOHR = 256;
   }

   pt_FramingParamsInputs->s_MinDelay = 0;
   pt_FramingParamsInputs->s_MaxDelay = pt_BearerChanAct->ta_TpsTcType.s_MaxLatency;
   // Forced Max Delay : Debug option XDSLRTFW-3554
   if (gs_ForcedMaxDelay_DS != -1)
   {
      pt_FramingParamsInputs->s_MaxDelay = gs_ForcedMaxDelay_DS;
   }

   pt_FramingParamsInputs->s_MinINP = (int16)(pt_BearerChanAct->ta_TpsTcType.uc_IMAxINP & 0x7f);
   if(pt_BearerChanAct->ta_TpsTcType.uc_TPSTCoptions & TPS_TC_OPTION_CIP_MAX_INP)
   {
      pt_FramingParamsInputs->uc_ChannelInitPolicy = MAX_INP;
   }
   //Forced Min INP: Debug option XDSLRTFW-3554
   if (gs_ForcedMinINP_DS != -1)
   {
      pt_FramingParamsInputs->s_MinINP = (gs_ForcedMinINP_DS & 0x7F);
   }

   pt_FramingParamsInputs->s_OHFrameType = 1;

   pt_FramingParamsInputs->s_OneOverSmax = guc_DsOneOverSmaxAct;
   pt_FramingParamsInputs->s_MinMsgOHR = guc_DsMinMsgOHRAct+1;

   gs_MinMsgOHR_save = gt_FormFramingParamsInputs_v2.s_MinMsgOHR;


   //XDSLRTFW-1456
   //If in fastpath mode, change the MinMsgOHR requirement by 30% in bitloading. Then in showtime,
   //we will still use the original low MinMsgOHR requirement. With this change, the SRA downshift
   //operation will not be effectively disabled due to the actual MsgOHR too close to the MinMsgOHR.
   //In loop diagmode, the following change should be turned off.
   if (gs_RxState != R_O_SHOWTIME_RX)
   {
      if((gft_CapableRxAutoSRA == TRUE) &&
            ((gul_OperationModeStatus_VDSL2 & V2_LOOP_DIAG) == 0))
      {
         gt_FormFramingParamsInputs_v2.s_MinMsgOHR = (gt_FormFramingParamsInputs_v2.s_MinMsgOHR*13 + 5)/10;

         if(gt_FormFramingParamsInputs_v2.s_MinMsgOHR > 247)
         {
            gt_FormFramingParamsInputs_v2.s_MinMsgOHR = 247;
         }
      }
   }

   //Decide the limit for I*D/2, where I is the interlever block size and D the interlever depth
   gul_DILV_MEM_SIZE = CalcMaxDsDilvSize(s_lp);

   if (gul_dbg_DILV_MEM_SIZE)
   {
      gul_DILV_MEM_SIZE = gul_dbg_DILV_MEM_SIZE;
   }

   pt_FramingParamsInputs->s_MinR = guc_minR;
   pt_FramingParamsInputs->s_RStep = 16;

   return(TRUE);
}

#endif /*STANDALONE_FRAMING_TEST*/

FlagT CiPolicyOneMaximizeINP(VDSL2FramingParamsInputs_t *pt_Input )
{
   FlagT ft_CiPolicyInpIncrPossible;
   int16 s_INP_bkp;
   uint16 us_Cw, us_CwMin;
   uint32 ul_Lp, ul_Dp, ul_Sp, ul_OneOverS, ul_Delay, ul_MaxINP_CIP1;
   uint32 ul_Temp, ul_Temp1, ul_Temp2, ul_LpNum;
   uint32 ul_DpMaxCh, ul_LpMaxCh;


   ft_CiPolicyInpIncrPossible = FALSE;
   s_INP_bkp = pt_Input->s_MinINP;   //keep a backup of MinINP configuration
   ul_MaxINP_CIP1 = 0;

   //gl_MaxSumLpSupported
   ul_LpMaxCh = (gl_MaxSumLpSupported>>1);

   // Note: DpMaxCh limitation does not exist. This limitation here was artificially introduced to solve [AVNXFW-1970],
   // so the INP estimation does not create too optimistic values for certain MDO split values.
   ul_DpMaxCh= gul_DILV_MEM_SIZE>>6;

   // Same applies for Cw_min limitation. The us_Cwmin size is in our framing generation is not limited, but was
   // limited here artificially to generate achievable INP estimations (solving [AVNXFW-1970])
   us_CwMin = 48;

   // pulled this calculation of the ul_Lp Numerator out of the for loop, since it is always constant
   ul_LpNum = ((pt_Input->us_MaxRate <<3)+255)<<12;

   for(us_Cw=us_CwMin; us_Cw<=MAX_CWSIZE; us_Cw++)
   {
      // ************* Step1: Calculate ul_Lp (ul_Cw dependent) ********************
      // ul_Lp = (MaxRate+MaxOverheadRate)/ul_LpDenum*s_fs
      // ul_Lp = ul_LpNum / ul_LpDenum; with ul_LpNum = (MaxRate+OR) and ul_LpDenum = ( 1 - 16/us_Cw)*s_fs))

      // ul_LpDenum
      ul_Temp1 = ((16<<15)/us_Cw);
      ul_Temp1 = ((1<<15)-ul_Temp1);

      // ul_LpNum / ul_LpDenum
      ul_Lp = ul_LpNum / ul_Temp1 ;  // Add max overhead rate of 255
      ul_Lp = (ul_Lp<<(10+3))/(pt_Input->s_fs);

      // Check if ul_Lp value fits to channel capacity
      if(ul_Lp > (ul_LpMaxCh))
      {
         continue;
      }

      //ul_OneOverS = ul_Lp/(8.0*us_Cw)
      ul_OneOverS = (uint16)(((ul_Lp<<16)/(us_Cw*(1<<3))));

      // ul_OneOverS > 1/64 according to standard
      if ( (ul_OneOverS > (pt_Input->s_OneOverSmax<<16)) || (ul_OneOverS < (1<<10)) )
      {
         continue;
      }


      //************** Step2: Calculate ul_Dp (ul_Cw, ul_Lp dependent) ****************************
      // ul_Dp = Dp_num / Dp_denum; with Dp_num delay * s_fs
      // with Dp_denum = (( 1 - 1/ul_Cw)*(8.0*ul_Cw/ul_Lp));   Dp_denum = ul_Temp2 * ul_Temp

      // calc ul_DpDenum
      if(ul_Lp!=0)
      {
         //Temp2 = (( 1 - 1/us_Cw));
         ul_Temp2 = (1<<15)/us_Cw;
         ul_Temp2 = (1<<15)-ul_Temp2;
         // ul_Temp = (us_Cw<<3/ul_Lp)
         ul_Temp = ((us_Cw<<(3+16))/ul_Lp)>>2;
      }
      else
      {
         //avoid division by 0
         continue;
      }

      // calc ulDpNum
      if (ul_Temp != 0)
      {
         ul_Dp = (((pt_Input->s_MaxDelay*pt_Input->s_fs)<<5)/ul_Temp2);
         ul_Dp = ((ul_Dp<<16)/ul_Temp)>>2;
      }
      else
      {
         //avoid division by 0
         continue;
      }

      // check if ul_Dp is between boundaries
      if ( (ul_Dp < 1) || (gft_RestrictDpTo1 == 1 && ul_Dp>1) || (ul_Dp >= ul_DpMaxCh) || (ul_Dp >=pt_Input->s_Dmax))
      {
         continue;
      }

      // ul_Sp = 8*us_Cw / ul_Lp;
      ul_Sp = (us_Cw<<(3+16))/ul_Lp;

      // ul_Delay = (ul_Sp*(ul_Dp-1))*(1-(1/us_Cw))/s_fs;
      // ul_Temp2 contains already: 1-(1/us_Cw)
      ul_Delay = (((ul_Sp*(ul_Dp-1)>>8)*(ul_Temp2>>8)))/pt_Input->s_fs;

      if (pt_Input->s_MaxDelay != 0) // delay max is special value for no delay bound, so only check for delay bound != 0
      {
         if (ul_Delay > (pt_Input->s_MaxDelay<<6))
         {
            continue;
         }
      }

      // INP = (64*ul_Dp)/ul_Lp;
      ul_MaxINP_CIP1 = (ul_Dp<<6)/ul_Lp;

      //  If we reach this point we have a valid solution
      if(ul_MaxINP_CIP1 > pt_Input->s_MinINP)
      {
         pt_Input->s_MinINP = (int16)ul_MaxINP_CIP1;
         ft_CiPolicyInpIncrPossible = TRUE;
      }
   }//for loop us_Cw


   //Don't increase MinINP to a higher value than the max configurable value of 16
   if(pt_Input->s_MinINP > 16)
   {
      pt_Input->s_MinINP = 16;
   }

   // Check, if we were able to increase the original INP value and if yes,
   // decrease the MinINP adjustment by 1 to be on the save side (wrt. framing algorithm finding a valid solution)
   if(pt_Input->s_MinINP > s_INP_bkp)
   {
      pt_Input->s_MinINP--;
   }

   return ft_CiPolicyInpIncrPossible;
}

