/* **COPYRIGHT******************************************************************
    INTEL CONFIDENTIAL
    Copyright (C) 2017 Intel Corporation
    Copyright (C), 1994-2002 Aware Inc. All Rights Reserved.
******************************************************************COPYRIGHT** */
/* **DISCLAIMER*****************************************************************
    The source code contained or described herein and all documents related
    to the source code ("Material") are owned by Intel Corporation or its
    suppliers or licensors. Title to the Material remains with Intel
    Corporation or its suppliers and licensors. The Material may contain
    trade secrets and proprietary and confidential information of Intel
    Corporation and its suppliers and licensors, and is protected by
    worldwide copyright and trade secret laws and treaty provisions. No part
    of the Material may be used, copied, reproduced, modified, published,
    uploaded, posted, transmitted, distributed, or disclosed in any way
    without Intel's prior express written permission.

    No license under any patent, copyright, trade secret or other
    intellectual property right is granted to or conferred upon you by
    disclosure or delivery of the Materials, either expressly, by
    implication, inducement, estoppel or otherwise. Any license under
    such intellectual property rights must be express and approved by
    Intel in writing.
*****************************************************************DISCLAIMER** */
/*************************************************************************
* Aware DMT Technology. Proprietary and Confidential.
*
* ADDRESS:         40 Middlesex Turnpike, Bedford, MA 01730-1413 USA
* TELEPHONE:       781.276.4000
* FAX:             781.276.4001
* WEB:             http://www.aware.com
*
* FILENAME:        cmplxvecmult.c
*
* DESCRIPTION:     Function for computing product of two complex arrays
*
**************************************************************************/

#include "xy_iof.h"
#include "gdata.h"

/*^^^
 *------------------------------------------------------------------------
 *
 *  Name : CmplxVectorMultiply
 *
 *  Description:
 *    This routine multiplies and shifts, two complex arrays to form a single
 *    complex array, which is of the same size as the input arrays.
 *
 * Memory Allocation:
 *    The three arrays in X memory can be at the same location but otherwise
 *    they must not overlap. The two arrays in Y memory can be at the same
 *    location but otherwise they must not overlap. The sizes of each of the
 *    cmplx input and output arrays is "2*s_length" words. The sizes of each
 *    of the cmplx scratch pads is "4*s_length" words.
 *
 * Algorithm:
 *             Z = X * Y
 *          (Zr + j*Zi) = (Xr + j*Xi) * (Yr + j*Yi)
 *    To efficiently implement complex multiplies using the dual MAC unit,
 *    we store the complex data , X = Xr + j*Xi, in X memory as:
 *       ------------
 *       | Xi | Xr  |   address n in 32-bit XY addressing mode
 *       | Xr | -Xi |   address n+1 in 32-bit XY addressing mode
 *       ------------
 *
 *    and Y = Yr + j*Yi, in Y memory as:
 *       -----------
 *       | Yr | Yr |    address m in 32-bit XY addressing mode
 *       | Yi | Yi |    address m+1 in 32-bit XY addressing mode
 *       -----------
 *
 *    To compute the complex multiply, we simply call two MAC instructions and
 *    copy the result in the accumulator to a register %r:
 *       mul 0, %x0_u, %y0_u     (assume that %x0_u=n and %y0_u = m)
 *       mac %r, %x0_u, %y0_u
 *    The result in %r is the complex product P:
 *       | Pi | Pr |
 *
 *  Prototype:
 *    void CmplxVectorMultiply(int16 *psa_destbuf, int16 *psa_srcbuf1, int16 *psa_srcbuf2, int16 s_resultShift, int16 s_length, int16 *psa_CmplxMultScratchPadX, int16 *psa_CmplxMultScratchPadY)
 *
 *  Input Arguments:
 *    *psa_srcbuf1         - Pointer to complex input vector 1 in X memory
 *    *psa_srcbuf2         - Pointer to complex input vector 2 in Y memory
 *    ft_conjugate_src1    - 1 Implies Conjugate complex input vector 1
 *    s_resultShift        - Scalar shift to be applied to result
 *    s_length          - Number of complex elements in array
 *    *psa_CmplxMultScratchPadX  - Scratch Pad in X memory of size 2*sizeof(input_vector1)
 *    *psa_CmplxMultScratchPadY  - Scratch Pad in Y memory of size 2*sizeof(input_vector2)
 *
 *  Output Arguments:
 *    *psa_destbuf         - Pointer to complex output vector in X memory
 *
 *  Return:
 *
 *  Notes:
 *    -  cmplx_vector_D = (cmplx_vector_A * cmplx_vector_B) >> shift if ft_conjugate_src1 == FALSE
 *    -  cmplx_vector_D = (conj(cmplx_vector_A) * cmplx_vector_B) >> shift if ft_conjugate_src1 == TRUE
 *    - shift must be within the following range
 *       0 <= x <= 16
 *    - Elements of all the other vectors are assumed to be signed words
 *       ie 0x8000 <= x <=0x7fff
 *    - No saturation checks are performed
 *------------------------------------------------------------------------
 *^^^
 */
void CmplxVectorMultiplyLoop_PreProcess(int32 src_arr1, int32 src_arr2, int32 scratch_padX, int32 scratch_padY, int16 length, int16 conj_src_arr1);
void CmplxVectorMultiplyLoop(int32 dest_arr, int32 src_arr1, int32 src_arr2, int32 src_shift_arr, int16 length);
void CmplxVectorMultiply(int16 *psa_destbuf, int16 *psa_srcbuf1, int16 *psa_srcbuf2, FlagT ft_conjugate_src1, int16 s_resultShift, int16 s_length, int16 *psa_CmplxMultScratchPadX, int16 *psa_CmplxMultScratchPadY)
{
#if 1

   CmplxVectorMultiplyLoop_PreProcess(
      gl_XYDec_DD_rxFDQOutput_X16,
      gl_XYDec_DD_NoiseError_Y16,
      gl_XYDec_DD_CmplxMultScratchPadX_X16,
      gl_XYDec_DD_CmplxMultScratchPadY_Y16,
      s_length, (int16)ft_conjugate_src1);

   CmplxVectorMultiplyLoop(
      gl_XYDec_DD_rxFDQErrCorrBuf_X16,
      (int16)(gl_XYDec_DD_CmplxMultScratchPadX_X16>>1), /* switch to ADDR_MODE_32 */
      (int16)(gl_XYDec_DD_CmplxMultScratchPadY_Y16>>1), /* switch to ADDR_MODE_32 */
      s_resultShift, s_length);

#else //Disabled for now (since only DD showtime processing is using this) to conserve program memory

   int32 l_vectorOutputX, l_vectorInputX, l_vectorInputY;
   int32 l_vectorCmplxMultScratchPadX, l_vectorCmplxMultScratchPadY;

   if(CheckXYAddrRange(psa_srcbuf1, X_MEM) == FAIL)
      return;
   l_vectorInputX = XYAddrDecode((int32 *) psa_srcbuf1, ADDR_MODE_16);

   if(CheckXYAddrRange(psa_srcbuf2, Y_MEM) == FAIL)
      return;
   l_vectorInputY = XYAddrDecode((int32 *) psa_srcbuf2, ADDR_MODE_16);

   if(CheckXYAddrRange(psa_CmplxMultScratchPadX, X_MEM) == FAIL)
      return;
   l_vectorCmplxMultScratchPadX = XYAddrDecode((int32 *) psa_CmplxMultScratchPadX, ADDR_MODE_16);

   if(CheckXYAddrRange(psa_CmplxMultScratchPadY, Y_MEM) == FAIL)
      return;
   l_vectorCmplxMultScratchPadY = XYAddrDecode((int32 *) psa_CmplxMultScratchPadY, ADDR_MODE_16);

   if(CheckXYAddrRange(psa_destbuf, X_MEM) == FAIL)
      return;
   l_vectorOutputX = XYAddrDecode((int32 *) psa_destbuf, ADDR_MODE_16);

   CmplxVectorMultiplyLoop_PreProcess(l_vectorInputX, l_vectorInputY, l_vectorCmplxMultScratchPadX, l_vectorCmplxMultScratchPadY, s_length, (int16)ft_conjugate_src1);

   l_vectorCmplxMultScratchPadX >>= 1; // switch to ADDR_MODE_32
   l_vectorCmplxMultScratchPadY >>= 1; // switch to ADDR_MODE_32;
   CmplxVectorMultiplyLoop(l_vectorOutputX, l_vectorCmplxMultScratchPadX, l_vectorCmplxMultScratchPadY, s_resultShift, s_length);

#endif
}
