/* **COPYRIGHT******************************************************************
    INTEL CONFIDENTIAL
    Copyright (C) 2017 Intel Corporation
    Copyright (C), 1994-2002 Aware Inc. All Rights Reserved.
******************************************************************COPYRIGHT** */
/* **DISCLAIMER*****************************************************************
    The source code contained or described herein and all documents related
    to the source code ("Material") are owned by Intel Corporation or its
    suppliers or licensors. Title to the Material remains with Intel
    Corporation or its suppliers and licensors. The Material may contain
    trade secrets and proprietary and confidential information of Intel
    Corporation and its suppliers and licensors, and is protected by
    worldwide copyright and trade secret laws and treaty provisions. No part
    of the Material may be used, copied, reproduced, modified, published,
    uploaded, posted, transmitted, distributed, or disclosed in any way
    without Intel's prior express written permission.

    No license under any patent, copyright, trade secret or other
    intellectual property right is granted to or conferred upon you by
    disclosure or delivery of the Materials, either expressly, by
    implication, inducement, estoppel or otherwise. Any license under
    such intellectual property rights must be express and approved by
    Intel in writing.
*****************************************************************DISCLAIMER** */
 /*****************************************************************************
;
;   Aware DMT Technology. Proprietary and Confidential.
;
;   40 Middlesex Turnpike, Bedford, MA 01730-1413
;   Phone (781) 276 - 4000
;   Fax   (781) 276 - 4001
;
;   ec_init.c
;
;  Routines to perform Echo Canceller (EC) training.
;
;
;****************************************************************************/
// ******************************************************************
// ec_init.c
//
// History
// 23/08/2011 Kannan: DS Performance improvement in DEC training.
//    we increased the DEC length from 400 to 480 and tail tapering
//    was not helpfull to improve the DS performance. So, the tail
//      code is commented, But the necessacity of tail tapering
//      should be clarified.
//      Grep for XDSLRTFW-251 PERF_DS_PlusBisDmt_ALL_DECTraining
// ******************************************************************

#include "common.h"
#include "dsp_op.h"
#include "gdata.h"
#include "DSLEngin.h"
#include "compiler.h"
#include "norm_acc48.h"
#include "ec_data.h"
#include "const.h"
#include "ec_init.h"
#include "cmv.h"
#include "data_alloc.h"
#include "ifft_cutback.h"


int16 gs_EcTrainBufIndex;

DATA_MAP_deILV2_BIS

int16  *usa_sym_taper_tail;
int16  *gsa_DECTrainBuf32;
int16  *gsa_DECCorrBuf32;
int16 *gsa_DECTrainBuf_AnnexA;
int16 *gsa_DECCorrBuf_AnnexA;
#ifdef ADSL_62
int16 *gsa_DECTrainBuf_AnnexA_Plus;
int16 *gsa_DECCorrBuf_AnnexA_Plus;
#else
int16 *gsa_DECTrainBuf_AnnexB;
int16 *gsa_DECCorrBuf_AnnexB;
#endif
int16 *gsa_DECTrainBuf_AnnexL_Mask1;
int16 *gsa_DECCorrBuf_AnnexL_Mask1;

int16 *gsa_DECTrainBuf_AnnexL_Mask2;
int16 *gsa_DECCorrBuf_AnnexL_Mask2;
DATA_MAP_END//DATA_MAP_deILV2_BIS

int16 gs_dec_delay_saved;
/*******************************************************************/
// Initialized to DEC training sequence used in AnnexA modes
// May be overwritten post-handshake depending on mode selected or IFFT size
/*******************************************************************/
int16 *gpsa_DECTrainBuf;
int16 *gpsa_DECCorrBuf;
int16 gs_CorrelateShift;

void dec_training_seq_reconfig(FlagT ft_Ifft128Enable, int16 s_mode)
{

    gpsa_DECTrainBuf = gsa_DECTrainBuf_AnnexA;
   gpsa_DECCorrBuf = gsa_DECCorrBuf_AnnexA;
   gs_CorrelateShift = gs_CorrelateShift_AnnexA;

#ifndef ADSL_62
   /* Reconfig for Annex B */
   if ((s_mode & STAT_ConfigMode_G992_1_B) ||
      (s_mode & STAT_ConfigMode_G992_3_B) ||
      (s_mode & STAT_ConfigMode_G992_5_B))
   {
      gpsa_DECTrainBuf = gsa_DECTrainBuf_AnnexB;
      gpsa_DECCorrBuf = gsa_DECCorrBuf_AnnexB;
      gs_CorrelateShift = gs_CorrelateShift_AnnexB;
   }
#ifndef ISDN
   /* Reconfig for Annex L */
   else if(s_mode & STAT_ConfigMode_G992_3_L)
   {
      if (gs_Preferred_PSDMask_G9923x[G992_3_ANNEX_A] == REACHEXT_G992_3L_USPSDMASK1)
      {
         gpsa_DECTrainBuf = gsa_DECTrainBuf_AnnexL_Mask1;
         gpsa_DECCorrBuf = gsa_DECCorrBuf_AnnexL_Mask1;
         gs_CorrelateShift = gs_CorrelateShift_AnnexL_Mask1;
      }
      else if (gs_Preferred_PSDMask_G9923x[G992_3_ANNEX_A] == REACHEXT_G992_3L_USPSDMASK2)
      {
         gpsa_DECTrainBuf = gsa_DECTrainBuf_AnnexL_Mask2;
         gpsa_DECCorrBuf = gsa_DECCorrBuf_AnnexL_Mask2;
         gs_CorrelateShift = gs_CorrelateShift_AnnexL_Mask2;
      }
   }
#endif//ISDN
#else //ADSL_62
    if (gl_SelectedMode & MODE_G992_5)
    {
      if (gft_V14==0)
      {
         gpsa_DECTrainBuf = gsa_DECTrainBuf_AnnexA_Plus;
         gpsa_DECCorrBuf = gsa_DECCorrBuf_AnnexA_Plus;
         gs_CorrelateShift = gs_CorrelateShift_AnnexA_Plus;
      }
   }
#endif//#ifdef ADSL_62

   /* Reconfig for 64 pt IFFT */
   else if(ft_Ifft128Enable == FALSE)
   {
#ifndef HERCULES_ADSL_CPE
      Update64PtIfftDecTrainCorrSequence();
#endif
   }


   if (gft_XtraIfftCutback)
   {
      // Scale down time-domain DEC train sequence
      int16 i, *psa_DECTrainBuf;

      psa_DECTrainBuf = gpsa_DECTrainBuf;
      for (i=0 ; i< gs_TxFftLength ; i++)
         *psa_DECTrainBuf++ >>= 1;

      gs_CorrelateShift++;
   }
}

/***********************************************************************************************
;  Prototype:
;     void DECTrain2b(int16 *psa_TxDataBuf, int16 *psa_RxInBuf, int32 *pla_Xcorr)
;
;  Description:
;     This routine is the second part of the DEC training routine. It estimates the
;     impulse response and input delay of the DEC using the circular cross-correlation
;     between the received sequence and a second sequence designated here as the
;     "decorrelating sequence." Both sequences are DEC_TRAINING_PERIOD frames long.
;     The decorrelating sequence is such that the circular cross-correlation between
;     the DEC training sequence and the decorrelating sequence is a scaled unit
;     impulse sequence.
;
;     The circular cross-correlation coefficients Xcorr[j] for j=0, 1, ... ,
;     DEC_TRAINING_PERIOD*gs_RxSamplesPerFrame-1, between the received sequence and the
;     decorrelating sequence are computed as follows:
;
;     Let x[n], for n=0,1,...,DEC_TRAINING_PERIOD*gs_TxFftLength, denote the decorrelating sequence,
;     and y[n], for n=0,1,...,DEC_TRAINING_PERIOD*gs_RxSamplesPerFrame, denote the received sequence.
;     The j-th circular cross-correlation coefficient Xcorr[j] is given by
;
;        k = (int)(j/gs_DECUpsamplingFactor);
;        Xcorr[j] = sum from n= -k to (DEC_TRAINING_PERIOD*gs_TxFftLength-1-k)
;                 (x[m] * y[gs_DECUpsamplingFactor*n+j])
;     where
;
;        m = n,                              if n >= 0;
;           DEC_TRAINING_PERIOD*gs_TxFftLength-n,  else.
;
;     Actually, because of constraints on the maximum DEC delay (= MAX_DEC_DELAY)
;     only a portion of the  cross-correlation coefficients will be used.
;     Therefore the cross-correlation is evaluated only for the
;     first DEC_ORDER + (MAX_DEC_DELAY * gs_DECUpsamplingFactor) points.
;
;     The DEC input delay is obtained by searching for the
;     offset < MAX_DEC_DELAY * gs_DECUpsamplingFactor, in integer
;     multiples of gs_DECUpsamplingFactor, corresponding to  the
;     cross correlation segment with maximum energy.
;
;     The DEC input delay,
;     gs_pre_dec_h_delay = offset/gs_DECUpsamplingFactor < MAX_DEC_DELAY.
;
;     The DEC impulse response h[n] is obtained by appropriately scaling the
;     coefficients xcorr[0], xcorr[1], ... , xcorr[DEC_ORDER-1].
;     The impulse response coefficients are represented by a normalized
;     16-bit word and a common exponent (1.15 * 2^exponent). The exponent has
;     range [-3,+3].
;
;  Arguments:
;     ps_TxDataBuf   (I)     pointer to the decorrelating sequence
;     ps_RxInBuf  (I)     pointer to the received sequence
;     pla_Xcorr   (I/O) pointer to the cross-correlation buffer
;
;  Return Value:
;     none
;
;  Global Variables:
;     gsa_pre_dec_h     array of DEC normalized impulse
;              response coefficients (1.15 * 2^exponent)
;     gs_pre_dec_h_exp  exponent for DEC coefficients
;     gs_pre_dec_h_delay   DEC input delay
;
;****************************************************************************/
FlagT InitDECXcorr(int16 *psa_TxDataBuf, int16 *psa_RxInBuf, int16 s_TxBufLen, int16 s_RxBufLen, int16 s_pitch);
void ComputeDECXcorrSequence(int16 *psa_TxDataBuf, int16 *psa_RxInBuf, int32 *pla_Xcorr,
               int16 s_TxSignalLen, int16 s_RxSignalLen,
               int16 s_Log2TxSignalLen, int16 s_XCorrelationLength,
               int16 s_DECUpsamplingFactor, int16 s_Log2DECUpsamplingFactor);

void DECTrain2b(int16 *psa_TxDataBuf, int16 *psa_RxInBuf, int32 *pla_Xcorr)
{

   int16 i, j = 0, k;
   int32 l_Xcorr;
   uint32 ul_gap;
   int32 l_square,l_tail,l_head,l_delta,l_round_term;
   int16 s_TxLog2FftLength, s_NumIntegerBits, s_RShift, s_XCorrelationLength;
   int32 la_xcorr[DEC_ORDER_MAX];
   int32 l_Xcorr_max,l_aux;
   int16 s_Log2Xcorr_max;
   int16 s_Xcorr_shift, s_precision_loss;
   int16 s_MaxDECDelay, s_Log2DECUpsamplingFactor;
   int16 s_Signal1Length, s_Signal2Length;


   /* perform any platform-specific initializations */
   InitDECXcorr(psa_TxDataBuf, psa_RxInBuf, (int16) (DEC_TRAINING_PERIOD*gs_TxFftLength),
      (int16) (DEC_TRAINING_PERIOD*gs_RxSamplesPerFrame), gs_DECUpsamplingFactor);

   for(i=gs_DECUpsamplingFactor, s_Log2DECUpsamplingFactor=0; i>1; i >>= 1)
      s_Log2DECUpsamplingFactor++;

   s_TxLog2FftLength = gs_TxLog2FftLength1+1;

   // Minimum required cross-correlation length is:
   //
   //    min(Rx signal length,DEC_ORDER + (MAX_DEC_DELAY * DECUpsamplingFactor))
   //
   // where the Rx signal length is currently DEC_TRAINING_PERIOD*(2*RxNumTones).

   // The cross-correlation is periodic with a period equal to the Rx signal length, hence
   // the first term in the min().
   // The maximum echo delay is specified in Tx samples by the value MAX_DEC_DELAY, hence
   // the second term in the min().

   //   Note: potential DEC_ORDER = 400, currently DEC_ORDER = 120;

   s_XCorrelationLength = gs_DEC_ORDER + (MAX_DEC_DELAY<<s_Log2DECUpsamplingFactor);
   if (s_XCorrelationLength > DEC_TRAINING_PERIOD*gs_RxSamplesPerFrame)
      s_XCorrelationLength = DEC_TRAINING_PERIOD*gs_RxSamplesPerFrame;

   s_Signal1Length = (int16) (DEC_TRAINING_PERIOD * gs_TxFftLength);
   s_Signal2Length = (int16) (DEC_TRAINING_PERIOD * gs_RxSamplesPerFrame);

   /* Compute the X-correlation */
   ComputeDECXcorrSequence(psa_TxDataBuf, psa_RxInBuf, pla_Xcorr,
            s_Signal1Length, s_Signal2Length, (int16)(s_TxLog2FftLength+1),
            s_XCorrelationLength, gs_DECUpsamplingFactor, s_Log2DECUpsamplingFactor);


   /* Estimate DEC input delay.--------------- */

   /* Algorithm Description

   The difference in energy between one segment and the next is delta = head - tail,
   where head is energy added in the current segment,
   and tail is energy to be subtracted from previous segment

   Gap is the difference in energy from the previous segment to the maximum energy.
   If delta > gap, the current segment has the maximum energy.

    The algorithm computes a new delta each step and keeps track of the gap.
   */

   /* Precision Details

   GAP
   The maximum value of gap is equal to the maximum difference in energy.
   This positive quantity is stored in 32 bits.
      Overflow prevention:
      Maximum increment of gap is delta.
      Delta is limited to 31 bits
      Controlling gap's bit # 31, we enforce that overflow can not occur.

   DELTA
   Can be positive or negative. 31 bits absolute value.
   |Delta| < 2*(Max cross correlation)*(gs_DECUpsamplingFactor)
   An initial loop calculates Max cross correlation.
   Input data is right shifted to enforce a max 31bit Delta.
   */


   l_Xcorr_max=0;
   s_Xcorr_shift=0;
   s_precision_loss=0;
   l_delta = 0;
   ul_gap = 0;
   gs_pre_dec_h_delay = 0;


   /* Calculate the max cross-correlation (absolute value) */
   for(i=0; i<s_XCorrelationLength; i++) {
      if(pla_Xcorr[i] > l_Xcorr_max) l_Xcorr_max = pla_Xcorr[i];
      else if(-pla_Xcorr[i] > l_Xcorr_max) l_Xcorr_max = -pla_Xcorr[i];
   }

   for(l_aux=l_Xcorr_max, s_Log2Xcorr_max=0; l_aux>1; l_aux >>= 1)
      s_Log2Xcorr_max++;

   /* Compute input data s_Xcorr_shift */
   if(s_Log2Xcorr_max + 1 > ((31 - s_Log2DECUpsamplingFactor)>>1))
      s_Xcorr_shift= s_Log2Xcorr_max + 1 - ((31 - s_Log2DECUpsamplingFactor)>>1);

   if(s_Xcorr_shift > 0) l_round_term = (1<<(s_Xcorr_shift-1));
   else l_round_term=0;


   // Max DEC delay considered will be less than MAX_DEC_DELAY in the case where
   // DEC_ORDER + (MAX_DEC_DELAY<<s_Log2DECUpsamplingFactor) > 2*gs_RxSamplesPerFrame.

   s_MaxDECDelay = (s_XCorrelationLength - gs_DEC_ORDER) >> s_Log2DECUpsamplingFactor;

#ifdef ADSL_62
#define DEC_DELAY_MULT  8
#else
#define DEC_DELAY_MULT  1
#endif

for (i = DEC_DELAY_MULT; i <= s_MaxDECDelay; i+=DEC_DELAY_MULT ){ /* allowable range of DEC delays */

      j = (i-DEC_DELAY_MULT) << s_Log2DECUpsamplingFactor;     /* j = (i-1)*gs_DECUpsamplingFactor */

      /* Compute head */
      l_head = 0;
      for(k = 0; k<gs_DECUpsamplingFactor * DEC_DELAY_MULT ; k++){

         /* scale down bits before squaring */
         l_square = (pla_Xcorr[gs_DEC_ORDER + j + k] + l_round_term) >> s_Xcorr_shift;
         l_square *= l_square;

         /* accumulate result */
         l_head += l_square;
      }

      /* Compute tail*/
      l_tail = 0;
      for(k = 0; k< gs_DECUpsamplingFactor * DEC_DELAY_MULT ; k++){

         /* scale down before squaring */
         l_square = (pla_Xcorr[k + j] + l_round_term) >> s_Xcorr_shift;
         l_square *= l_square;

         /* accumulate result */
         l_tail += l_square ;
      }

      // DEnergy[i]= - Tail[i] + Head[i]
      l_delta = (l_head - l_tail) >> s_precision_loss;


      /* gs_pre_dec_h_delay = index of segment with maximum energy = DEC delay */

      if ((l_delta > 0) && (l_delta > ul_gap)) {
         ul_gap = 0;
         gs_pre_dec_h_delay = i;
      }
      else {
         ul_gap -= l_delta;
         // overflow of ul_gap is prevented by losing precision of comparison between delta and gap.
         if ((ul_gap >> 30) > 0){
            ul_gap >>= 1;
            s_precision_loss ++;
         }
      }
   }
#ifdef ADSL_62
if ( ((gs_DECUpsamplingFactor == 2) && (gft_V14 ==1)) ||((gl_SelectedMode & MODE_G992_5) ==0))
{
   gs_dec_delay_saved = gs_pre_dec_h_delay;
   gs_pre_dec_h_delay  =0;
}
#endif //ADSL_62
#if 0 //XDSLRTFW-251 PERF_DS_PlusBisDmt_ALL_DECTraining (START - END)
   //This change is applicable for all modes and all CO's
    //we increased the DEC length from 400 to 480 and tail tapering
   //was not helpfull to improve the DS performance. But the necessacity
   //of tail tapering should be clarified.
/* Disable DEC tapering for Annex B modes */
   if (((OPTNArray[OPTN_AlgControl] & OPTN_DECTaperDisable)==FALSE) &&
      ((( (gl_SelectedMode & (MODE_G992_3))) && (gl_SelectedMode & (ANNEX_B)  )) == 0) &&
      ((( (gl_SelectedMode & (MODE_G992_1))) && (gl_SelectedMode & (ANNEX_B)  )) == 0) &&
      ((( (gl_SelectedMode & (MODE_G992_5))) && (gl_SelectedMode & (ANNEX_B)  )) == 0) &&
      ((( (gl_SelectedMode & (MODE_G992_5))) && (gl_SelectedMode & (ANNEX_M)  )) == 0) &&
      ((( (gl_SelectedMode & (MODE_G992_5))) && (gl_SelectedMode & (ANNEX_J)  )) == 0) &&
      ((( (gl_SelectedMode & (MODE_G992_3))) && (gl_SelectedMode & (ANNEX_M)  )) == 0) &&
        ((( (gl_SelectedMode & (MODE_G992_3))) && (gl_SelectedMode & (ANNEX_J)  )) == 0))
   {
      /* Scale down the tails of the maximum cross correlation segment
         before tapering the tails with a raised cosine, usa_sym_taper_tail.  */
      for(i=0; i<TAIL_TAPER_LEN; i++){
         /* taper left tail */
#ifndef DANUBE
         // taper left tail with window
         la_xcorr[i] = (pla_Xcorr[i+(gs_pre_dec_h_delay<<s_Log2DECUpsamplingFactor)]/(1<<LOG2_TAPER_WIN_SCALE)) * usa_sym_taper_tail[i];
#else
         // For danube don't taper left tail. IIR interp response has a shorter left tail and tapering introduces residual echo.
         la_xcorr[i] = (pla_Xcorr[i+(gs_pre_dec_h_delay<<s_Log2DECUpsamplingFactor)]);
#endif
         /* taper right tail */
         la_xcorr[gs_DEC_ORDER-1-i] = (pla_Xcorr[gs_DEC_ORDER-1-i+(gs_pre_dec_h_delay<<s_Log2DECUpsamplingFactor)]/(1<<LOG2_TAPER_WIN_SCALE)) * usa_sym_taper_tail[i];
      }
      /* center coefficients, of length (DEC_ORDER-2*TAIL_TAPER_LEN),
         of the maximum cross correlation segment are not tapered. */
      for (i = TAIL_TAPER_LEN; i<gs_DEC_ORDER-TAIL_TAPER_LEN; i++)
           la_xcorr[i] = pla_Xcorr[i+(gs_pre_dec_h_delay<<s_Log2DECUpsamplingFactor)];
   }
   else
#endif //XDSLRTFW-251 PERF_DS_PlusBisDmt_ALL_DECTraining (START - END)
   {
      /* max xcorrelation window without scaling or tapering:*/
      for(k=0; k<gs_DEC_ORDER; k++)
         la_xcorr[k] = pla_Xcorr[k+(gs_pre_dec_h_delay<<s_Log2DECUpsamplingFactor)];
   }

   /* Find max absolute cross-correlation */
   l_Xcorr = 0;

   for(k=0; k<gs_DEC_ORDER; k++) {

      if(la_xcorr[k] > l_Xcorr) {
         l_Xcorr = la_xcorr[k];
         j = k;
      }
      else if(-la_xcorr[k] > l_Xcorr) {
         l_Xcorr = -la_xcorr[k];
         j = k;
      }
   }

   s_NumIntegerBits = 2+s_TxLog2FftLength+1;
   /* we need one more factor of 2 when received and xmit correlate to 0.5 */
   gs_pre_dec_h_exp = s_NumIntegerBits-norm_l(la_xcorr[j]<<gs_CorrelateShift)-1;

   if(gs_pre_dec_h_exp > 3)
      gs_pre_dec_h_exp = 3;
   else if(gs_pre_dec_h_exp < -3)
      gs_pre_dec_h_exp = -3;

   s_RShift = 32-s_NumIntegerBits-15+gs_pre_dec_h_exp - gs_CorrelateShift;

   for(k=0; k<gs_DEC_ORDER; k++) {
      gsa_pre_dec_h[k] = (int16) (la_xcorr[k] >> s_RShift);
   }

#ifndef ADSL_62
   // scale mantissa of DEC by 2, to allow room for expansion during showtime dec adaptation
   // no significant performance loss is seen by scaling DEC
   if (gs_pre_dec_h_exp <3) // check dec exponent for safety
   {
      for(k=0; k<gs_DEC_ORDER; k++)
      {
         gsa_pre_dec_h[k] = gsa_pre_dec_h[k] >> 1;
      }

      gs_pre_dec_h_exp++;

   }
#endif

}

