/* **COPYRIGHT******************************************************************
    INTEL CONFIDENTIAL
    Copyright (C) 2017 Intel Corporation
    Copyright (C), 1994-1998 Aware Inc. All Rights Reserved.
******************************************************************COPYRIGHT** */
/* **DISCLAIMER*****************************************************************
    The source code contained or described herein and all documents related
    to the source code ("Material") are owned by Intel Corporation or its
    suppliers or licensors. Title to the Material remains with Intel
    Corporation or its suppliers and licensors. The Material may contain
    trade secrets and proprietary and confidential information of Intel
    Corporation and its suppliers and licensors, and is protected by
    worldwide copyright and trade secret laws and treaty provisions. No part
    of the Material may be used, copied, reproduced, modified, published,
    uploaded, posted, transmitted, distributed, or disclosed in any way
    without Intel's prior express written permission.

    No license under any patent, copyright, trade secret or other
    intellectual property right is granted to or conferred upon you by
    disclosure or delivery of the Materials, either expressly, by
    implication, inducement, estoppel or otherwise. Any license under
    such intellectual property rights must be express and approved by
    Intel in writing.
*****************************************************************DISCLAIMER** */
/****************************************************************************
;  Aware DMT Technology. Proprietary and Confidential.
;
;   40 Middlesex Turnpike, Bedford, MA 01730-1413 USA
;  Phone (781) 276 - 4000
;   Fax   (781) 276 - 4001
;
;  fft_fix.c
;
;  This file contains subroutines of fixed-point implementation of
;  Fast Fourier Transform (FFT) and the inverse FFT (IFFT).
;
;***************************************************************************/

#include "typedef.h"
#include "dsp_op.h"
#include "compiler.h"
#include "fft_tab.h"
#include "gdata.h"

/* =============================================== */
/* constants used by this file only */
/* =============================================== */
#define TWID_SHIFT                  15             /* twiddle table shift factor */
#define LOG2N_256             8
#define LOG2N_512             9

#define FFT_LEN_1024          1024

#define IFFT_INPUT_RSH_COUNT     1              /* right shift count for IFFT input value, or comment out for no shift */

#define RX_MUL_PREC  24 /* Rx FFT Multiplier output precision */
#define PP_ALIGN  6  /* Rx FFT post processing shift for ARM, ARP, AIM, AIP */
#define POST_SHIFT   6  /* Rx FFT post processing shift right before output */

void IfftRadix2Dif(int16* psa_inbuf, int16* psa_outbuf, int16 s_fftn, int16 s_log2n);
void BitReverse(int16 *psa_inbuf, int16 *psa_outbuf, int16 s_fftn, int16 s_nbits,  int16 *psa_bit_reverse_idx);


/*****************************************************************************
;  Subroutine Name: IfftReal()
;
;  This subroutine computes the IFFT of X[k] which is 2N-point FFT of
;  a real sequence of samples.
;
;  Assume that input is represented by X[k]=Xr[k]+jXi[k], k=0,1,...,N
;
;  IFFT is done in the following steps:
;   1) compute A[k] = Ar[k]+jAi[k] from X[k] using the following relation:
;  Ar[k]={Xr[k]+Xr[N-k]-c[k]*(Xi[k]+Xi[N-k])+s[k]*(Xr[k]-Xr[N-k])}/2
;  Ai[k]={Xi[k]-Xi[N-k]+c[k]*(Xr[k]-Xr[N-k])+s[k]*(Xi[k]+Xi[N-k])}/2
;  Ar[N-k]={Xr[k]+Xr[N-k]+c[k]*(Xi[k]+Xi[N-k])-s[k]*(Xr[k]-Xr[N-k])}/2
;  Ai[N-k]={-(Xi[k]-Xi[N-k])+c[k]*(Xr[k]-Xr[N-k])+s[k]*(Xi[k]+Xi[N-k])}/2
;  k=0,1,...,N/2-1,
;  c[k]=cos(-(2*PI*k/(2*N)), s[k]=sin(-(2*PI*k/(2*N)), and
;  Ar[N/2] = Xr[N/2], Ai[N/2] = -Xi[N/2]
;
;  2) perform N-point complex IFFT: a[n] = ar[n]+jai[n] = IFFT(A[k]),
;     n=0,1,...,N-1
;
;  3) form real sequence as: x[2n]=ar[n], x[2n+1]=ai[n], n=0,1,...,N-1
;
;  Prototype:
;     void IfftReal(int16* psa_inbuf, int16* psa_outbuf, int s_fftn, int s_log2n)
;
;  Input Arguments:
;     psa_inbuf -- a pointer to an array of complext numbers,
;           stored as Xr[0], Xr[N], Xr[1], Xi[1], Xr[2], Xi[2] ...
;           Xr[N-1],Xi[N-1] (note Xi[0] = Xi[N] = 0 for real FFT)
;     WARNING: Due to internal use, the input buffer size
;           should at least be s_fftn*2+2
;
;
;     s_fftn -- FFT size (i.e. N)
;     s_log2n -- log2(s_fftn)
;
;
;  Output Arguments:
;     psa_outbuf -- a pointer to IFFT output, sequence of real samples
;           x[n], n=0, 1, ..., 2*s_fftn-1
;
;  Global Variables Used:
;     gsa_sk_tab[N/2] -- sin(2*PI*k/(2*s_fftn)), k=0,1,...,s_fftn/2-1
;     gsa_twid_real[N/2] -- cos(2*PI*k/s_fftn) for k=0,1,...,s_fftn/2-1
;     gsa_twid_imag[N/2] -- -sin(2*PI*k/s_fftn) for k=0,1,...,s_fftn/2-1
;     gsa_bit_reverse_idx[N] -- table for bit-reversed index
;
;****************************************************************************/
C_SCOPE void IfftReal(int16* psa_inbuf, int16* psa_outbuf, int16 s_fftn, int16 s_log2n)
{
   int16 k, k2, nk2, k2p1, nk2p1, ktable, nktable, s_FFTLogLengthReductionFactor;
   int16 s_Xrp, s_Xrm, s_Xip, s_Xim, s_ck;
   int16 sa_A[(2*RX_BUFFER_SIZE)+2];

   int32 l_Bp, l_Bm;
   int32 l_acc0;

   if (gs_RxFftLength == FFT_LEN_1024)
      s_FFTLogLengthReductionFactor = LOG2N_512 - s_log2n;
   else
      s_FFTLogLengthReductionFactor = LOG2N_256 - s_log2n;


#ifdef IFFT_INPUT_RSH_COUNT
   /* Scale down input to prevent overflow */
   for(k = 0; k < s_fftn*2; k++)
      psa_inbuf[k] >>= IFFT_INPUT_RSH_COUNT;
#endif /*  IFFT_INPUT_RSH_COUNT */

   /* Set X[N] = Xr[B] + j0 = psa_inbuf[1]+j0 */
   psa_inbuf[s_fftn*2] = psa_inbuf[1];
   psa_inbuf[s_fftn*2+1] = 0;

   /* Set Xi[0] = 0; */
   psa_inbuf[1] = 0;

   /* Compute A[k] from X[k] = FFT(x[n]) */
   for(k=0; k<(s_fftn>>1); k++) {

      k2 = k<<1;
      nk2 = (s_fftn-k)<<1;

      k2p1 = k2+1;
      nk2p1 = nk2+1;

      ktable  = k<<s_FFTLogLengthReductionFactor;
      nktable = ((s_fftn>>1)-k)<<s_FFTLogLengthReductionFactor;

      if (gs_RxFftLength != FFT_LEN_1024)
      {
         /* since the twiddle factor tables are for adsl2plus, for bis and dmt modes */
         /* we need every alternate value in the twiddle factor tables. Hence, multiply */
         /* s_twid_factor by 2 to access the alternate value */

         ktable *= 2;
         nktable *= 2;
      }
      /* Xrp = Xr[k] + Xr[s_fftn-k]; */
      l_acc0 = (int32)psa_inbuf[k2] + psa_inbuf[nk2];
      l_acc0 = round(l_acc0, 1); /* apply 1/2 to the final result */
      s_Xrp = sature16(l_acc0);

      /* Xrm = Xr[k] - Xr[s_fftn-k]; */
      l_acc0 = (int32)psa_inbuf[k2] - psa_inbuf[nk2];
      l_acc0 = round(l_acc0, 1);
      s_Xrm = sature16(l_acc0);

      /* Xip = Xi[k] + Xi[s_fftn-k]; */
      l_acc0 = (int32)psa_inbuf[k2p1] + psa_inbuf[nk2p1];
      l_acc0 = round(l_acc0, 1);
      s_Xip = sature16(l_acc0);

      /* Xim = Xi[k] - Xi[s_fftn-k]; */
      l_acc0 = (int32)psa_inbuf[k2p1] - psa_inbuf[nk2p1];
      l_acc0 = round(l_acc0, 1);
      s_Xim = sature16(l_acc0);

      /* ck[k] = -sk[gs_RxFftLength/4-k] */
      s_ck = -gsa_sk_tab256[nktable];

      /* Bm = c[k]*Xip-s[k]*Xrm */
      l_Bm = (int32)s_ck*s_Xip - (int32)gsa_sk_tab256[ktable]*s_Xrm;
      l_Bm = round(l_Bm, TWID_SHIFT);

      /* Bp = s[k]*Xip+c[k]*Xrm */
      l_Bp = (int32)gsa_sk_tab256[ktable]*s_Xip + (int32)s_ck*s_Xrm;
      l_Bp = round(l_Bp, TWID_SHIFT);

      /* Ar[k] = Xrp - Bm */
      l_acc0 = (int32)s_Xrp - l_Bm;
      sa_A[k2] = sature16(l_acc0);

      /* Ai[k] = Xim + Bp */
      l_acc0 = (int32)s_Xim + l_Bp;
      sa_A[k2p1] = sature16(l_acc0);

      /* Ar[N-k] = Xrp + Bm */
      l_acc0 = (int32)s_Xrp + l_Bm;
      sa_A[nk2] = sature16(l_acc0);

      /* Ai[N-k] = -Xim + Bp */
      l_acc0 = l_Bp - (int32)s_Xim;
      sa_A[nk2p1] = sature16(l_acc0);
   }

   /* Compute A[N/2] */
   sa_A[s_fftn] = psa_inbuf[s_fftn];
   sa_A[s_fftn+1] = -psa_inbuf[s_fftn+1];

   /* Perform the complex IFFT to A[k] = Ar[k]+jAi[k], k=0, 1, ..., s_fftn-1 */
   /* obtain a[n] = ar[n]+jai[n] = x[2n]+jx[2n+1], n=0,1,...,s_fftn-1 */
   IfftRadix2Dif(sa_A, psa_outbuf, s_fftn, s_log2n);
}

/*****************************************************************************
;  Subroutine Name: IfftRadix2Dif(psa_inbuf, psa_outbuf, s_fftn, s_log2n)
;
;  This subroutine performs IFFT based Radix-2 Decimation In Frequency domain
;   algorithm to a block of complex numbers. To prevent overflow in this computation,
;  the data are first scaled down by IFFT_INPUT_RSH_COUNT bits where IFFT_INPUT_RSH_COUNT
;  is a constant defined in file fft_fix.c. In addition, the data input to each even butterfly
;  stage is scaled down by 1 bit.
;
;  Prototype:
;     void IfftRadix2Dif(int16* psa_inbuf, int16* psa_outbuf, int s_fftn, int s_log2n)
;
;  Input Arguments:
;     psa_inbuf -- a pointer to an array of IFFT input complex numbers,
;        where the real and imaginary values are stored in interleaved
;        format (real followed by imaginary number)
;     s_fftn -- FFT length
;     s_log2n -- = log2(s_fftn)
;
;
;  Output Arguments:
;     psa_outbuf -- a pointer to an array of IFFT output complex numbers
;           where the real and imaginary values are stored in interleaved
;           format (real followed by imaginary number)
;
;  Global Variables Used:
;     gsa_twid_real[ifftn/2] -- cos(2*PI*k/s_fftn) for k=0,1,...,s_fftn/2-1
;     gsa_twid_imag[ifftn/2] -- -sin(2*PI*k/s_fftn) for k=0,1,...,s_fftn/2-1
;     gsa_bit_reverse_idx[N] -- table for bit-reversed index
;
;****************************************************************************/
C_SCOPE void IfftRadix2Dif(int16* psa_inbuf, int16* psa_outbuf, int16 s_fftn, int16 s_log2n)
{

   int16 s_stage, s_group, s_bfly, s_FFTLogLengthReductionFactor;
   int16 s_groups, s_node_space, s_bflys_per_group, s_twid_factor, s_extra_shift;
   int16 *psa_in0, *psa_in1;
   int16 *psa_twid_real, *psa_twid_imag;
   int16 s_x0, s_x1, s_y0, s_y1;
   int16 s_c, s_s;
   int32 l_xx0, l_xx1, l_yy0, l_yy1;

   /* Initialization */

   if (gs_RxFftLength == FFT_LEN_1024)
      s_FFTLogLengthReductionFactor = LOG2N_512 - s_log2n;
   else
      s_FFTLogLengthReductionFactor = LOG2N_256 - s_log2n;


   s_groups = 1;
   s_node_space = s_fftn;
   s_bflys_per_group = (s_fftn >> 1);

   s_twid_factor = (1) << s_FFTLogLengthReductionFactor;

   /* Compute the first s_log2n-1 stages */
   for(s_stage = 0; s_stage < s_log2n-1; s_stage++) {

      /* Scale the butterfly output by 2 every other stage */
      s_extra_shift = 1-s_stage&1;

      psa_in0 = psa_inbuf;
      psa_in1 = psa_inbuf + s_node_space;

      for(s_group = 0; s_group < s_groups; s_group++ ) {

         psa_twid_real = gsa_twid_real256;   // Single table is used for all FFT lengths <= 256.
         psa_twid_imag = gsa_twid_imag256;

         for(s_bfly = 0; s_bfly < s_bflys_per_group; s_bfly++) {

            /* Get butterfly inputs */
            s_x0 = *psa_in0++;
            s_y0 = *psa_in0--;
            s_x1 = *psa_in1++;
            s_y1 = *psa_in1--;
            s_c = *psa_twid_real;
            s_s = *psa_twid_imag;

            if (gs_RxFftLength == FFT_LEN_1024)
            {
               psa_twid_real += s_twid_factor;
               psa_twid_imag += s_twid_factor;
            }
            else
            {
               /* since the twiddle factor tables are for adsl2plus, for bis and dmt modes */
               /* we need every alternate value in the twiddle factor tables. Hence, multiply */
               /* s_twid_factor by 2 to access the alternate value */
               psa_twid_real += 2*s_twid_factor;
               psa_twid_imag += 2*s_twid_factor;
            }

            /* Compute butterfly */
            l_xx0 = (int32)s_x0 + s_x1;
            l_xx0 = round(l_xx0, s_extra_shift);

            /* store output to memory */
            *psa_in0++ = sature16(l_xx0);

            l_xx0 = (int32)s_y0 + s_y1;
            l_xx0 = round(l_xx0, s_extra_shift);

            /* store output to memory */
            *psa_in0++ = sature16(l_xx0);

            l_xx0 = (int32)s_x0 - s_x1;
            l_xx0 = round(l_xx0, s_extra_shift);
            s_x0 = sature16(l_xx0);

            l_xx0 = (int32)s_y0 - s_y1;
            l_xx0 = round(l_xx0, s_extra_shift);
            s_y0 = sature16(l_xx0);

            l_xx1 = (int32)s_c*s_x0 + (int32)s_s*s_y0;
            l_yy1 = (int32)s_c*s_y0 - (int32)s_s*s_x0;

            /* Apply rounding */
            l_xx1 = round(l_xx1, (TWID_SHIFT));
            l_yy1 = round(l_yy1, (TWID_SHIFT));

            /* Store butterfly outputs to memory */
            *psa_in1++ = sature16(l_xx1);
            *psa_in1++ = sature16(l_yy1);

         }  /* end of butterfly loop */

         /* set pointers to the first sample of group */
         psa_in0 += (s_bflys_per_group<<1);
         psa_in1 += (s_bflys_per_group<<1);


      } /* end of group loop; */

      s_twid_factor *= 2;
      s_groups *= 2;
      s_bflys_per_group >>= 1;
      s_node_space >>= 1;

   } /* end of stage loop */

   /* Compute the last stage */
   psa_in0 = psa_inbuf;
   psa_in1 = psa_inbuf + s_node_space;

   for(s_group = 0; s_group < s_groups; s_group++ ) {

      /* Get butterfly inputs */
      s_x0 = *psa_in0++;
      s_y0 = *psa_in0--;
      s_x1 = *psa_in1++;
      s_y1 = *psa_in1--;

      /* Compute butterfly outputs */
      l_xx0 = (int32)s_x0 + s_x1;
      l_yy0 = (int32)s_y0 + s_y1;
      l_xx1 = (int32)s_x0 - s_x1;
      l_yy1 = (int32)s_y0 - s_y1;

      /* Scale down about outputs by 2 */
      l_xx0 = round(l_xx0, 1);
      l_yy0 = round(l_yy0, 1);
      l_xx1 = round(l_xx1, 1);
      l_yy1 = round(l_yy1, 1);

      /* Store butterfly outputs to memory with saturation control */
      *psa_in0++ = (int16)(l_xx0);
      *psa_in0++ = (int16)(l_yy0);
      *psa_in1++ = (int16)(l_xx1);
      *psa_in1++ = (int16)(l_yy1);

      /* set pointers to the first sample of group */
      psa_in0 += 2;
      psa_in1 += 2;
   } /* end of group loop; */

   /* unscramble output */
   BitReverse(psa_inbuf, psa_outbuf, s_fftn, s_log2n, gsa_bit_reverse_idx256);
}


/******************************************************************************************
;  Subroutine Name: BitReverse(psa_inbuf, psa_outbuf, s_fftn, s_nbits, psa_bit_reverse_idx)
;
;  This subroutine scrambles input data array to output data array by
;   bit-reversing each input array index as defined in FFT algorithm.
;  **NOTE: psa_inbuf and psa_outbuf buffers must not overlap.
;
;  Prototype:
;     void BitReverse(int16 *psa_inbuf, int16 *psa_outbuf, int s_fftn, int s_nbits,
;                 int16 *psa_bit_reverse_idx)
;
;  Input Arguments:
;     psa_inbuf -- a pointer to an array of input complex numbers,
;        where the real and imaginary values are stored in interleaved
;        format (real followed by imaginary number)
;     s_fftn -- FFT length
;     s_nbits -- = log2(s_fftn)
;     psa_bit_reverse_idx -- pointer to bit-reverse index table
;
;
;  Output Arguments:
;     psa_outbuf -- a pointer to an array of output complex numbers
;           after scrambing, a pointer pointing to an array of output complex
;           numbers where the real and imaginary values are stored in interleaved
;           format (real followed by imaginary number)
;
;  Global Variables Used:
;     gsa_bit_reverse_idx[N] -- table for bit-reversed index
;
;*******************************************************************************************/
C_SCOPE void BitReverse(int16 *psa_inbuf, int16 *psa_outbuf, int16 s_fftn, int16 s_nbits,
            int16 *psa_bit_reverse_idx)
{
   int16 n, s_new_index, s_TableAdj;

   if (gs_RxFftLength == FFT_LEN_1024)
      s_TableAdj = LOG2N_512-s_nbits;
   else
      s_TableAdj = LOG2N_256-s_nbits;
      //s_TableAdj = LOG2N_512-s_nbits;

   for(n = 0; n < s_fftn; n++) {

      if (gs_RxFftLength == FFT_LEN_1024)
         s_new_index = psa_bit_reverse_idx[n<<s_TableAdj];
      else
      {
         /* since the bit reverse table is for adsl2plus, for bis and dmt modes */
         /* we need every alternate value in the bit reverse table. Hence, multiply */
         /* the index into bit reverse array by 2 to access the alternate value */

         s_new_index = psa_bit_reverse_idx[2*(n<<s_TableAdj)];
      }

      psa_outbuf[2*s_new_index] = psa_inbuf[2*n];
      psa_outbuf[2*s_new_index+1] = psa_inbuf[2*n+1];
   }

}

/* undefine constant used by this file only */
#undef TWID_SHIFT
#undef LOG2N_256
#undef LOG2N_512
#undef FFT_LEN_1024
