/* **COPYRIGHT******************************************************************
    INTEL CONFIDENTIAL
    Copyright (C) 2017 Intel Corporation
    Copyright (C), 1994-1998 Aware Inc. All Rights Reserved.
******************************************************************COPYRIGHT** */
/* **DISCLAIMER*****************************************************************
    The source code contained or described herein and all documents related
    to the source code ("Material") are owned by Intel Corporation or its
    suppliers or licensors. Title to the Material remains with Intel
    Corporation or its suppliers and licensors. The Material may contain
    trade secrets and proprietary and confidential information of Intel
    Corporation and its suppliers and licensors, and is protected by
    worldwide copyright and trade secret laws and treaty provisions. No part
    of the Material may be used, copied, reproduced, modified, published,
    uploaded, posted, transmitted, distributed, or disclosed in any way
    without Intel's prior express written permission.

    No license under any patent, copyright, trade secret or other
    intellectual property right is granted to or conferred upon you by
    disclosure or delivery of the Materials, either expressly, by
    implication, inducement, estoppel or otherwise. Any license under
    such intellectual property rights must be express and approved by
    Intel in writing.
*****************************************************************DISCLAIMER** */
/*****************************************************************************
;
;   Aware DMT Technology. Proprietary and Confidential.
;
;   40 Middlesex Turnpike, Bedford, MA 01730-1413 USA
;   Phone (781) 276 - 4000
;   Fax   (781) 276 - 4001
;
;  matrix.c
;  Matrix utility functions with the option for floating point emulation.
;
;******************************************************************************/

#include "common.h"
#include "ieee_flt.h"
#include "matrix.h"
#if 0
#include "stdio.h"
#endif

/*^^^
*-------------------------------------------------------------------
*
*  Prototype: void MatrixMult(Float32 *pfa_mat_x, Float32 *pfa_mat_y, Float32 *pfa_mat_z,
*              int16 s_xdim, int16 s_ydim, int16 s_xydim, int16 s_transpose);
*
*  Description: matrix multiply z = x*y or (x'*y) if s_transpose != 0
*               dimensions are,
*               for s_transpose != 0
*               x (s_xdim, s_xydim), y(s_xydim, ydim)
*               for s_transpose == 0
*               x(s_xydim, s_xdim), y(s_xydim, ydim)
*
*-------------------------------------------------------------------
*^^^
*/
#ifdef FLOAT_IN_SW
/*^^^
*-------------------------------------------------------------------
*
*  Prototype: void Cholesky(Float32 *pfa_R, int16 s_len);
*
*  Description: Performs Cholesky decomposition of a matrix.
*
*-------------------------------------------------------------------
*^^^
*/
void Cholesky(Float32 *pfa_R, int16 s_len){

   int16 i,j,k;

   Float32 f_temp1, f_temp2, f_temp3;

   Float32 f_sum, f_d;

    Float32 f_temp;

     for (i = 0; i < s_len; i++) {

      f_sum = 0x00000000;

      for (k = 0; k < i; k++){

         f_temp1 = RdLMat(pfa_R, k, k, s_len);
             f_temp2 = RdLMat(pfa_R, k, i, s_len);

             f_temp = mpyf32(f_temp2, f_temp2);
             f_temp = mpyf32(f_temp, f_temp1);
             f_sum = addf32(f_sum, f_temp);

      }



      f_d = subf32(RdLMat(pfa_R, i, i, s_len), f_sum);

      LMat(pfa_R, i, i, s_len) = f_d;

         for (j = i+1; j < s_len; j++){

             f_sum = 0x00000000;

             for (k = 0; k < i; k++){

            f_temp1 = (RdLMat(pfa_R, k, k, s_len));
            f_temp2 = (RdLMat(pfa_R, k, i, s_len));
            f_temp3 = (RdLMat(pfa_R, k, j, s_len));

                 f_temp = mpyf32(f_temp3, f_temp2);
                 f_temp = mpyf32(f_temp, f_temp1);
            f_sum = addf32(f_sum, f_temp);

         }

             LMat(pfa_R, i, j, s_len) = divf32( subf32(RdLMat(pfa_R, i, j, s_len), f_sum) ,f_d);

      }

   }

}

/*^^^
*-------------------------------------------------------------------
*
*  Prototype: void BackSubstitution(Float32 *pfa_L, Float32 *pfa_result, int16 s_len);
*
*  Description: Performs back substitution for solving a matrix.
*
*-------------------------------------------------------------------
*^^^
*/
void BackSubstitution(Float32 *pfa_L, Float32 *pfa_result, int16 s_len) {

     int16 i, j;

     Float32 f_sum;


     for (j=0; j<s_len; j++) { pfa_result[j] = 0x00000000; }

     /* memset(pfa_result, 0, s_len*2); */

     for (i = s_len-2; i >= 0; i--) {

         f_sum = RdLMat(pfa_L, i, (int16)(s_len-1), s_len);

         for (j = i+1; j < s_len-1; j++) {

             f_sum = subf32(f_sum, mpyf32(RdLMat(pfa_L, i, j, s_len), pfa_result[j]) );

         }

         pfa_result[i] = f_sum;

     }

}
/* Designed to correlate an input of shorts with a reversed filter of Float32s (for tdq_init).*/
void FloatConvolve(Float32 *pfa_input, Float32 *pfa_filter, Float32 *pfa_output, int16 s_inputlen,
               int16 s_CPLen, int16 s_filterlen) {

   int16 i, j, Index, Index2;
   /* int16 f_Sum; */
   Float32 f_Sum;

   Index = s_CPLen - s_filterlen;
   for(i=0; i<s_inputlen; i++){
      f_Sum = 0;
      Index++;
      if(Index < 0){
         Index += s_inputlen;
      }
      if(Index >= s_inputlen){
         Index -= s_inputlen;
      }
      for(j=0; j<s_filterlen; j++){
         if((Index + j) >= s_inputlen) {
            Index2 = Index + j - s_inputlen;
         }
         else Index2 = Index + j;
         f_Sum = addf32(f_Sum,mpyf32(pfa_input[Index2], pfa_filter [j]));
      }
      pfa_output[i] = f_Sum;
   }
}

#else /*  NOT FLOAT_IN_SW */

/*^^^
*-------------------------------------------------------------------
*
*  Prototype: void Cholesky(Float32 *pfa_R, int16 s_len);
*
*  Description: Performs Cholesky decomposition of a matrix.
*
*-------------------------------------------------------------------
*^^^
*/
void Cholesky(float R[], int s_len){

   int i,j,k;

   float temp1, temp2, temp3;

   float sum, d;



     for (i = 0; i < s_len; i++) {

      sum = 0.0;

      for (k = 0; k < i; k++){



         temp1 = RdLMat(R, k, k, s_len);

         temp2 = RdLMat(R, k, i, s_len);

         sum  += temp1*temp2*temp2;

      }



      d = RdLMat(R, i, i, s_len) - sum;

      LMat(R, i, i, s_len) = d;



         for (j = i+1; j < s_len; j++){

             sum = 0.0;

             for (k = 0; k < i; k++){



            temp1 = (RdLMat(R, k, k, s_len));

            temp2 = (RdLMat(R, k, i, s_len));

            temp3 = (RdLMat(R, k, j, s_len));



            sum  += temp1*temp2*temp3;

         }

         LMat(R, i, j, s_len) = (RdLMat(R, i, j, s_len) - sum)/d;

      }

   }

}

/*^^^
*-------------------------------------------------------------------
*
*  Prototype: void BackSubstitution(Float32 *pfa_L, Float32 *pfa_result, int16 s_len);
*
*  Description: Performs back substitution for solving a matrix.
*
*-------------------------------------------------------------------
*^^^
*/
void BackSubstitution(float L[], float pfa_result[], int s_len) {

     int i, j;

     float sum;



     for (j=0; j<s_len; j++) { pfa_result[j] = 0.0; }

     /* memset(pfa_result, 0, s_len*2); */

     for (i = s_len-2; i >= 0; i--) {

         sum = RdLMat(L, i, s_len-1, s_len);

         for (j = i+1; j < s_len-1; j++) {

             sum -= RdLMat(L, i, j, s_len)*pfa_result[j];

         }

         pfa_result[i] = sum;

     }

}


/* Designed to correlate an input of shorts with a reversed filter of floats (for tdq_init).*/
void FloatConvolve(int16 *psa_input, float *pfa_filter, float *pfa_output, int16 s_inputlen,
               int16 s_CPLen, int16 s_filterlen) {

   int16 i, j, Index, Index2;
   /* int16 f_Sum; */
   float f_Sum;

   Index = s_CPLen - s_filterlen;
   for(i=0; i<s_inputlen; i++){
      f_Sum = 0;
      Index++;
      if(Index < 0){
         Index += s_inputlen;
      }
      if(Index >= s_inputlen){
         Index -= s_inputlen;
      }
      for(j=0; j<s_filterlen; j++){
         if((Index + j) >= s_inputlen) {
            Index2 = Index + j - s_inputlen;
         }
         else Index2 = Index + j;
         f_Sum += (float)*(psa_input + Index2)* *(pfa_filter + j);
      }
      *(pfa_output + i) = f_Sum;
   }
}

#endif /*  FLOAT_IN_SW */

