;; **COPYRIGHT******************************************************************
;;    INTEL CONFIDENTIAL
;;    Copyright (C) 2017 Intel Corporation
;;    Copyright (C), 1994-2002 Aware Inc. All Rights Reserved.
;; ******************************************************************COPYRIGHT**
;; **DISCLAIMER*****************************************************************
;;   The source code contained or described herein and all documents related
;;   to the source code ("Material") are owned by Intel Corporation or its
;;   suppliers or licensors. Title to the Material remains with Intel
;;   Corporation or its suppliers and licensors. The Material may contain
;;   trade secrets and proprietary and confidential information of Intel
;;   Corporation and its suppliers and licensors, and is protected by
;;   worldwide copyright and trade secret laws and treaty provisions. No part
;;   of the Material may be used, copied, reproduced, modified, published,
;;   uploaded, posted, transmitted, distributed, or disclosed in any way
;;   without Intels prior express written permission.
;;
;;   No license under any patent, copyright, trade secret or other
;;   intellectual property right is granted to or conferred upon you by
;;   disclosure or delivery of the Materials, either expressly, by
;;   implication, inducement, estoppel or otherwise. Any license under
;;   such intellectual property rights must be express and approved by
;;   Intel in writing.
;; *****************************************************************DISCLAIMER**
;*************************************************************************
; Aware DMT Technology. Proprietary and Confidential.
;
; ADDRESS:         40 Middlesex Turnpike, Bedford, MA 01730-1413 USA
; TELEPHONE:       781.276.4000
; FAX:             781.276.4001
; WEB:             http://www.aware.com
;
; FILENAME:        vecmultloop.s
;
; DESCRIPTION:     Function for multiplying two vectors
;
;*************************************************************************
.include "asm.h"
.include "xy_regs.h"

.text



;******************************************************************************
; void FdqMultiplyLoop(int32 fdq_coef_arr, int32 fdq_exp_arr, int32 fdq_multiplier, int16 length, int16 fdq_mantissa_wordlength, int16 fdq_multiplier_frac_bits)
.global FdqMultiplyLoop
FdqMultiplyLoop:
   ; Input Arguments
   ; %r0 = Start Address of fdq coeffiecients in Y memory
   ; %r1 = Start Address of fdq exponents
   ; %r2 = ((Multiplier << 16) | Multiplier) where Multiplier is 16 bit word
   ; %r3 = Smallest array length
   ; %r4 = Fdq mantissa word length
   ; %r5 = Number of fractional bits in Fdq Multiplier

   ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
   ; set up loop count for zero ovhd loop mechanism ;
   ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    mov.f %lp_count,%r3       ;Initialise loop counter - Dual
   blt 9f                  ; if (length <= 0) return
   nop
   nop

   ;;;;;;;;;;;;;;;;;;;;;;;;;;;
   ; set up window registers ;
   ;;;;;;;;;;;;;;;;;;;;;;;;;;;
    sr %r0,[%ay1]
    sr 0x2000_0001,[aux_my1]     ; offset = 1, postupdate, linear, 16-bit mode, AM_MODULO

   asr %r0, %r0, 1
    sr %r0,[%ay0]
    sr 0x1,[aux_my0]          ; offset = 1, postupdate, linear, 32-bit mode, AM_MODULO

   ;;;;;;;;;;;;;;;;;;;;;;;;;;;
   ; pre processing     ;
   ;;;;;;;;;;;;;;;;;;;;;;;;;;;
   ;we pre-increment fdq exponent address in loop, hence decrement by 1
   sub %r1, %r1, 1

   ;we precompute a constant to be used with the per tone normalization_shift computed within the loop
   ;s_coef_rshift = 32 - FDQ_MANTISSA_WORDLENGTH - normalization_shift
   ;We need s_coef_lshift and s_exp_inc within the loop which are given by
   ;s_coef_lshift = 16 - s_coef_rshift
   ;s_exp_inc     = s_coef_rshift - MULTIPLIER_FRAC_BITS
   ;Hence we compute constants
   ;s_const1 = (16 - 32 + FDQ_MANTISSA_WORDLENGTH), so that s_coef_lshift = s_const1 + normalization_shift
   ;s_const2 = (32 - FDQ_MANTISSA_WORDLENGTH - MULTIPLIER_FRAC_BITS), so that s_exp_inc = s_const2 - normalization_shift
   sub %r7, %r4, 16
   sub %r8, 32, %r4
   sub %r8, %r8, %r5

   lp 9f ; for(i=0; i<length; i++)
      ;multiply
      mul_op 0,aux_y0_u,%r2
      lr %r3, [%aux_xmac2]
      lr %r4, [%aux_xmac1]

      ;normalize coef, use min of the two shifts
      norm %r5, %r3
      norm %r6, %r4
      min %r5, %r5, %r6

      add %r6, %r7, %r5    ;s_coef_lshift = s_const1 + normalization_shift
      sub %r5, %r8, %r5    ;s_exp_inc = s_const2 - normalization_shift

      ;Shift result right and save @ destination addr
      asl aux_y1_u, %r3, %r6     ;real coef

      ;instead of adding nops, save mips by updating exponent here
      ldb %r9, [%r1,1]     ;exponent
      add %r9, %r9, %r5
      min %r9, %r9, 15
      stb.a %r9, [%r1,1]

      asl aux_y1_u, %r4, %r6     ;imag coef

   9:
   j [%blink]

;******************************************************************************




