/* **COPYRIGHT******************************************************************
    INTEL CONFIDENTIAL
    Copyright (C) 2017 Intel Corporation
    Copyright (C), 1994-1998 Aware Inc. All Rights Reserved.
******************************************************************COPYRIGHT** */
/* **DISCLAIMER*****************************************************************
    The source code contained or described herein and all documents related
    to the source code ("Material") are owned by Intel Corporation or its
    suppliers or licensors. Title to the Material remains with Intel
    Corporation or its suppliers and licensors. The Material may contain
    trade secrets and proprietary and confidential information of Intel
    Corporation and its suppliers and licensors, and is protected by
    worldwide copyright and trade secret laws and treaty provisions. No part
    of the Material may be used, copied, reproduced, modified, published,
    uploaded, posted, transmitted, distributed, or disclosed in any way
    without Intel's prior express written permission.

    No license under any patent, copyright, trade secret or other
    intellectual property right is granted to or conferred upon you by
    disclosure or delivery of the Materials, either expressly, by
    implication, inducement, estoppel or otherwise. Any license under
    such intellectual property rights must be express and approved by
    Intel in writing.
*****************************************************************DISCLAIMER** */
/****************************************************************************
;  Aware DMT Technology. Proprietary and Confidential.
;
;   40 Middlesex Turnpike, Bedford, MA 01730-1413 USA
;  Phone (781) 276 - 4000
;   Fax   (781) 276 - 4001
;
;  fft_fix.c
;
;  This file contains subroutines of fixed-point implementation of
;  Fast Fourier Transform (FFT) and the inverse FFT (IFFT).
;
;***************************************************************************/

#include <string.h>
#include <math.h>
#include "typedef.h"
#include "dsp_op.h"
#include "compiler.h"
#include "fft_tab.h"
#include "gdata.h"

/* =============================================== */
/* constants used by this file only */
/* =============================================== */
#define TWID_SHIFT                  15             /* twiddle table shift factor */

#define RX_MUL_PREC  24 /* Rx FFT Multiplier output precision */
#define PP_ALIGN  6  /* Rx FFT post processing shift for ARM, ARP, AIM, AIP */
#define POST_SHIFT   6  /* Rx FFT post processing shift right before output */
#define LOG2N_256 8
#define LOG2N_512 9
#define FFT_LEN_1024          1024

/* =============================================== */
/* static function prototypes */
/* =============================================== */
static void FftRadix2Dit(int16* psa_inbuf, int16 s_fftn, int16 s_log2n);
static void FFTPostProcess(int16* psa_inbuf, int16 s_fftn, int16 s_log2n);
//static int32 mult17x16(int32 l_a, int32 l_b);
//static int32 trunc(int32 L_var, int16 s_bit_position);

void BitReverse(int16 *psa_inbuf, int16 *psa_outbuf, int16 s_fftn, int16 s_nbits,  int16 *psa_bit_reverse_idx);



C_SCOPE void FftReal(int16* psa_inbuf, int16* psa_outbuf, int16 s_fftn, int16 s_log2n)
{
   /* Pre-scramble output using bit-reversed addressing */
   // Have to allocate a second buffer because BitReverse routine cannot operate
   // on buffer in-place.

   int16 psa_bitrevbuf[2*RX_NUM_TONES];

   BitReverse(psa_inbuf, psa_bitrevbuf, s_fftn, s_log2n, gsa_bit_reverse_idx256);

   memcpy(psa_outbuf, psa_bitrevbuf, 2*s_fftn*sizeof(int16));

   FftRadix2Dit(psa_outbuf, s_fftn, s_log2n);

   FFTPostProcess(psa_outbuf, s_fftn, s_log2n);
}

/*****************************************************************************
;  Subroutine Name: FFTPostProcess()
;
;  Post-processing routine for FFT.
;
;  The output X[k] is computed from the input A[k], for k=0, 1, ..., N,
;     using the following relation:
;
;     Xr[k]={Ar[k]+Ar[N-k]+c[k]*(Ai[k]+Ai[N-k])+s[k]*(Ar[k]-Ar[N-k])}/2
;     Xi[k]={Ai[k]-Ai[N-k]-c[k]*(Ar[k]-Ar[N-k])+s[k]*(Ai[k]+Ai[N-k])}/2
;     Xr[N-k]={Ar[k]+Ar[N-k]-c[k]*(Ai[k]+Ai[N-k])-s[k]*(Ar[k]-Ar[N-k])}/2
;     Xi[N-k]={-(Ai[k]-Ai[N-k])-c[k]*(Ar[k]-Ar[N-k])+s[k]*(Ai[k]+Ai[N-k])}/2
;     k=0,1,...,N/2-1,
;     where c[k]=cos(-(2*PI*k/(2*N)), s[k]=sin(-(2*PI*k/(2*N)) and
;     Xr[N/2] = Ar[N/2], Xi[N/2] = -Ai[N/2]
;
;  4) Drop Xi[0] and Xi[N] (since they are zero) and output data in
;     the order: Xr[0], Xr[N], Xr[1], Xi[1],..., Xr[N-1],Xi[N-1]
;
;  Prototype:
;     void FFTPostProcess(int16* psa_inbuf, int s_fftn, int s_log2n)
;
;  Input Arguments:
;     psa_inbuf -- a pointer to an array of input real samples x[n]
;     s_fftn -- FFT size, which equals half number of input samples
;     s_log2n -- log2(s_fftn)
;
;
;  Output Arguments:
;     psa_outbuf -- a pointer to an array of output complex
;        numbers stored in the following format
;        Xr[0], Xr[N], Xr[1], Xi[1],..., Xr[N-1],Xi[N-1]
;        where Xr[k] and Xi[k] are the real and imaginary part of
;        complex number X[k]
;     WARNING: Due to internal use, the output buffer size
;           should at least be s_fftn*2+2.
;
;****************************************************************************/

static void FFTPostProcess(int16* psa_inbuf, int16 s_fftn, int16 s_log2n)
{
   int16 k, k2, nk2, k2p1, nk2p1, ktable, nktable, s_FFTLogLengthReductionFactor;
   int16 sa_A[2*RX_BUFFER_SIZE+2], s_ck;
   int32 sa_Arp, sa_Arm, sa_Aip, sa_Aim;
   int32 l_Bp_a, l_Bp_b, l_Bm_a, l_Bm_b, l_Bp, l_Bm;
   int32 l_acc0;

   if (gs_RxFftLength == FFT_LEN_1024)
      s_FFTLogLengthReductionFactor = LOG2N_512 - s_log2n;
   else
      s_FFTLogLengthReductionFactor = LOG2N_256 - s_log2n;

   /*  Copy input into buffer of length s_fftn*2+2; */
   for (k=0 ; k<s_fftn*2 ; k++){
      sa_A[k] = (psa_inbuf[k]/2); //implements the divide by 2
   }

   /* Set A[N] = A[0] */
   sa_A[s_fftn*2] = sa_A[0];
   sa_A[s_fftn*2+1] = sa_A[1];

   /* Compute X[k] from A[k]. */
   for(k=0; k<(s_fftn>>1); k++) {

      k2 = k<<1;
      nk2 = (s_fftn-k)<<1;

      k2p1 = k2+1;
      nk2p1 = nk2+1;

      ktable = k<<s_FFTLogLengthReductionFactor;
      nktable = ((s_fftn>>1)-k)<<s_FFTLogLengthReductionFactor;

      if (gs_RxFftLength != FFT_LEN_1024)
      {
         /* since the twiddle factor tables are for adsl2plus, for bis and dmt modes */
         /* we need every alternate value in the twiddle factor tables. Hence, multiply */
         /* s_twid_factor by 2 to access the alternate value */

         ktable *= 2;
         nktable *= 2;
      }

      /* Arp = Ar[k] + Ar[s_fftn-k]; */
      l_acc0 = (int32)sa_A[k2] + sa_A[nk2];
      sa_Arp = sature16(l_acc0);

      /* Arm = Ar[k] - Ar[s_fftn-k]; */
      l_acc0 = (int32)sa_A[k2] - sa_A[nk2];
      sa_Arm = sature16(l_acc0);

      /* Aip = Ai[k] + Ai[s_fftn-k]; */
      l_acc0 = (int32)sa_A[k2p1] + sa_A[nk2p1];
      sa_Aip=  sature16(l_acc0);

      /* Aim = Ai[k] - Ai[s_fftn-k]; */
      l_acc0 = (int32)sa_A[k2p1] - sa_A[nk2p1];
      sa_Aim= sature16(l_acc0);

      /* ck[k] = -sk[gs_RxFftLength/4-k] */
      s_ck = -gsa_sk_tab256[nktable];

      /**************************/
      /* Bp = c[k]*Aip+s[k]*Arm */
      /**************************/
      l_Bp_a =(int32) sa_Aip*s_ck ;
      l_Bp_b = (int32)sa_Arm* gsa_sk_tab256[ktable];
      l_Bp = l_Bp_a + l_Bp_b;
      /**************************/
      /* Bm = s[k]*Aip-c[k]*Arm */
      /**************************/
      l_Bm_a = (int32)sa_Aip*gsa_sk_tab256[ktable];
      l_Bm_b = (int32)sa_Arm*s_ck;
      l_Bm = l_Bm_a - l_Bm_b;
      /**************************/
      /* Xr[k] = Arp + Bp */
      /**************************/
      l_acc0 = (int32)(sa_Arp<<TWID_SHIFT) + l_Bp;     /* Need to align operands */
      l_acc0 = round(l_acc0, TWID_SHIFT);
      psa_inbuf[k2] = sature16(l_acc0);
      // Check for overflow here by checking gs_Overflow variable;

      /**************************/
      /* Xi[k] = Aim + Bm */
      /**************************/
      l_acc0 = (int32)(sa_Aim<<TWID_SHIFT) + l_Bm;
      l_acc0 = round(l_acc0, TWID_SHIFT);
      psa_inbuf[k2p1] = sature16(l_acc0);
      // Check for overflow here by checking gs_Overflow variable;

      /**************************/
      /* Xr[N-k] = Arp - Bp */
      /**************************/
      l_acc0 = (int32)(sa_Arp<<TWID_SHIFT) - l_Bp;     /* Need to align operands */
      l_acc0 = round(l_acc0, (TWID_SHIFT));
      psa_inbuf[nk2] = sature16(l_acc0);
      // Check for overflow here by checking gs_Overflow variable;

      /**************************/
      /* Xi[N-k] = -Aim + Bm */
      /**************************/
      l_acc0 = l_Bm - (int32)(sa_Aim<<TWID_SHIFT);
      l_acc0 = round(l_acc0, TWID_SHIFT);
      psa_inbuf[nk2p1] = sature16(l_acc0);
      // Check for overflow here by checking gs_Overflow variable;
   }

   /* Compute X[N/2] */
   psa_inbuf[s_fftn] = sa_A[s_fftn]<<1;
   psa_inbuf[s_fftn+1] = sature16(-sa_A[s_fftn+1]<<1);
   // Check for overflow here by checking gs_Overflow variable;

   /* Compute X[1] */
   psa_inbuf[1] = psa_inbuf[s_fftn<<1];

}

static void FftRadix2Dit(int16* psa_inbuf, int16 s_fftn, int16 s_log2n)
{
   uint8 uc_extra_shift;
   int16 s_stage, s_group, s_bfly, s_FFTLogLengthReductionFactor;
   int16 s_groups, s_node_space, s_bflys_per_group, s_twid_factor;
   int16 *psa_in0, *psa_in1;
   int16 *psa_twid_real, *psa_twid_imag;
   int16 s_c, s_s;
   int16 s_UpperReal, s_UpperImag, s_LowerReal, s_LowerImag;
   int32 l_xx0, l_yy0;
   int32 l_Part1, l_Part2, l_Part3, l_A_Minus_B, l_C_Minus_D, l_C_Plus_D;
   int16 s_A_Minus_B, s_C_Minus_D, s_C_Plus_D;

   if (gs_RxFftLength == FFT_LEN_1024)
      s_FFTLogLengthReductionFactor = LOG2N_512 - s_log2n;
   else
      s_FFTLogLengthReductionFactor = LOG2N_256 - s_log2n;

   /* Initialization */
   s_groups = (s_fftn >> 1);
   s_node_space = 2;             // Spacing between top and bottom of butterfly.
   s_bflys_per_group = 1;
   s_twid_factor = (s_fftn >> 1) << s_FFTLogLengthReductionFactor;

   /* Compute the s_log2n stages */
   for(s_stage = 0; s_stage < s_log2n; s_stage++) {

      /* Scale the butterfly output for every even stage, and always for the last stage */
      uc_extra_shift = 1-(s_stage&1);

      if (s_stage == s_log2n-1)
         uc_extra_shift = 1;

      psa_in0 = psa_inbuf;       // Top of first butterfly.
      psa_in1 = psa_inbuf + s_node_space; // Bottom of first butterfly.
      for(s_group = 0; s_group < s_groups; s_group++ ) {

         psa_twid_real = gsa_twid_real256;   // Single table is used for all FFT lengths <= 256.
         psa_twid_imag = gsa_twid_imag256;

         for(s_bfly = 0; s_bfly < s_bflys_per_group; s_bfly++) {

         /* Get butterfly inputs. "Upper" and "lower" refer to upper and lower
            rails of butterfly. */
            s_UpperReal = (*psa_in0)/2;
            s_UpperImag = (*(psa_in0+1))/2;
            s_LowerReal = (*psa_in1)/2;
            s_LowerImag = (*(psa_in1+1))/2;
            s_c = (*psa_twid_real)/2;
            s_s = (*psa_twid_imag)/2;

            if (gs_RxFftLength == FFT_LEN_1024)
            {
               psa_twid_real += s_twid_factor;
               psa_twid_imag += s_twid_factor;
            }
            else
            {
               /* since the twiddle factor tables are for adsl2plus, for bis and dmt modes */
               /* we need every alternate value in the twiddle factor tables. Hence, multiply */
               /* s_twid_factor by 2 to access the alternate value */
               psa_twid_real += 2*s_twid_factor;
               psa_twid_imag += 2*s_twid_factor;
            }

            /* Multiply lower value by complex twiddle factor */
            // Given A +jB and C+jD, calculate E+jF = (A+jB)*(C+jD)
            // E = (A-B)*D + (C-D)*A
            // F = (A-B)*D + (C+D)*B
            // where A=s_UpperReal, B=s_UpperImag, C=s_LowerReal, D=s_LowerImag

            l_A_Minus_B = (int32)s_LowerReal - s_LowerImag;

            s_A_Minus_B = sature16(l_A_Minus_B);

            l_C_Minus_D = (int32) s_c - s_s;

            s_C_Minus_D = sature16(l_C_Minus_D);

            l_C_Plus_D = (int32) s_c + s_s;     // 17-bit sum

            s_C_Plus_D = sature16(l_C_Plus_D);

            /* Multiply and Apply rounding */
            l_Part1 = (int32)s_A_Minus_B * s_s;       // l_Part1 = (A-B)*D
            l_Part1 = round(l_Part1, TWID_SHIFT-1);

            l_Part2 = (int32)s_C_Minus_D * s_LowerReal;  // l_Part2 = (C-D)*A;
            l_Part2 = round(l_Part2, TWID_SHIFT-1);

            l_Part3 = (int32)s_C_Plus_D * s_LowerImag;   // l_Part3 =(C+D)*B
            l_Part3 = round(l_Part3, TWID_SHIFT-1);


            /* sum */
            l_xx0 = l_Part1 + l_Part2; // Real, 32-bit
            l_yy0 = l_Part1 + l_Part3; // Imag, 32-bit

            /* Store butterfly outputs to memory */
            s_LowerReal = sature16(l_xx0);
            s_LowerImag = sature16(l_yy0);

            /* Now compute butterfly using scaled lower value */
            l_xx0 = (int32)s_UpperReal + s_LowerReal;
            l_xx0 = round(l_xx0<<1, uc_extra_shift);
            /* Store upper real output to memory */
            *psa_in0 = sature16(l_xx0);

            // Check for overflow here by checking gs_Overflow variable(s_stage);
            l_xx0 = (int32)s_UpperImag + s_LowerImag;
            l_xx0 = round(l_xx0<<1, uc_extra_shift);
            /* Store upper imag output to memory */
            *(psa_in0+1) = sature16(l_xx0);
            // Check for overflow here by checking gs_Overflow variable( s_stage);

            l_xx0 = (int32)s_UpperReal - s_LowerReal;
            l_xx0 = round(l_xx0<<1, uc_extra_shift);
            /* Store lower real output to memory */
            *psa_in1 = sature16(l_xx0);
            // Check for overflow here by checking gs_Overflow variable(s_stage);

            l_xx0 = (int32)s_UpperImag - s_LowerImag;
            l_xx0 = round(l_xx0<<1, uc_extra_shift);
            /* Store lower real output to memory */
            *(psa_in1+1) = sature16(l_xx0);
            // Check for overflow here by checking gs_Overflow variable( s_stage);

            /* Advance pointers to next butterfly */
            psa_in0 += 2;
            psa_in1 += 2;

         }  /* end of butterfly loop */

         /* Advance pointers to the start of next group */
         psa_in0 += s_node_space;
         psa_in1 += s_node_space;

      } /* end of group loop; */

      s_twid_factor >>= 1;
      s_groups >>= 1;
      s_bflys_per_group <<= 1;
      s_node_space <<= 1;

   } /* end of stage loop */

}

/*  undefine constants to clarify code */



#undef LOG2N_256
#undef LOG2N_512
#undef FFT_LEN_1024
