1825 lines
		
	
	
		
			47 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			1825 lines
		
	
	
		
			47 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| /***************************************************************************
 | |
| Copyright (c) 2013-2019, The OpenBLAS Project
 | |
| All rights reserved.
 | |
| Redistribution and use in source and binary forms, with or without
 | |
| modification, are permitted provided that the following conditions are
 | |
| met:
 | |
| 1. Redistributions of source code must retain the above copyright
 | |
| notice, this list of conditions and the following disclaimer.
 | |
| 2. Redistributions in binary form must reproduce the above copyright
 | |
| notice, this list of conditions and the following disclaimer in
 | |
| the documentation and/or other materials provided with the
 | |
| distribution.
 | |
| 3. Neither the name of the OpenBLAS project nor the names of
 | |
| its contributors may be used to endorse or promote products
 | |
| derived from this software without specific prior written permission.
 | |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | |
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | |
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | |
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | |
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | |
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | |
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | |
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| *****************************************************************************/
 | |
| 
 | |
| #define unit_size 16
 | |
| #define DISP32(ind,disp) (ind*unit_size*32+disp)
 | |
| #define DISP16(ind,disp) (ind*unit_size*16+disp)
 | |
| #define DISP8(ind,disp) (ind*unit_size*8+disp)
 | |
| #define DISP4(ind,disp) (ind*unit_size*4+disp)
 | |
| #define DISP2(ind,disp) (ind*unit_size*2+disp)
 | |
| #define DISP1(ind,disp) (ind*unit_size+disp)
 | |
| #define DISPX(disp)  (disp)
 | |
| /*	HELPERS FOR SAVE	*/
 | |
| /* {r0,i0} and {r1,i1} into  {r0,r1} {i0,i1} */
 | |
| 
 | |
| 
 | |
| .macro LOAD_COUPLE_AS_RR_II  VS_OUT1,VS_OUT2,VS_TEMP1,VS_TEMP2,REG,LOFFSET 
 | |
| #ifndef TRMMKERNEL 
 | |
|   lxv	\VS_TEMP1,	DISPX(\LOFFSET)(\REG)
 | |
|   lxv	\VS_TEMP2,	DISPX(\LOFFSET+16)(\REG)
 | |
|   xxmrgld  \VS_OUT1,\VS_TEMP1,\VS_TEMP2
 | |
|   xxmrghd  \VS_OUT2,\VS_TEMP1,\VS_TEMP2	
 | |
| #endif	
 | |
| .endm
 | |
| /*from 2 result {a0r*br,a0i*bi} and {a1r*br,a1i*bi} pack into {a0r*br,a1r*br} and {a0i*bi,a1i*bi}*/
 | |
| 
 | |
| 
 | |
| .macro RESULT_INTO_REALREAL_IMAGEIMAGE VSIN1,VSIN2,VSOUT1,VSOUT2
 | |
| 	xxmrgld	\VSOUT1, \VSIN1,\VSIN2 /*  real*real from 2 results*/
 | |
| 	xxmrghd	\VSOUT2, \VSIN1,\VSIN2 /*  imag*imag from 2 results*/
 | |
| .endm 
 | |
| /*from 2 result {a0r*bi,a0i*br} and {a1r*bi,a1i*br} pack into {a0r*bi,a1r*bi} and {a0i*br,a1i*br}*/
 | |
| 
 | |
| 
 | |
| .macro RESULT_INTO_REALIMAG_IMAGREAL VSIN1,VSIN2,VSOUT1,VSOUT2 
 | |
| 	xxmrgld	\VSOUT1, \VSIN1,\VSIN2 /*  real*imag */
 | |
| 	xxmrghd	\VSOUT2, \VSIN1,\VSIN2 /*  imag*real*/
 | |
| .endm
 | |
| /* {a0r*br op a0i*bi ,a1r*br op a1i*bi} ~ {r0,r1}; {a0r*bi op a0i*br ,a1r*bi op a1i*br} ~ {i0,i1}*/
 | |
| 
 | |
| 
 | |
| .macro  AGGREGATE_REALS_IMAGES  VSINR_OUT1,VSINR,VSINI_OUT2,VSINI
 | |
| #if	defined(NN) || defined(NT) || defined(TN) || defined(TT) 
 | |
| 	xvsubdp  \VSINR_OUT1,\VSINR_OUT1,\VSINR
 | |
| 	xvadddp  \VSINI_OUT2,\VSINI_OUT2,\VSINI  
 | |
| #elif  defined(CN) || defined(CT) || defined(RN) || defined(RT) 
 | |
| 	xvadddp  \VSINR_OUT1,\VSINR_OUT1,\VSINR
 | |
| 	xvsubdp  \VSINI_OUT2,\VSINI_OUT2,\VSINI 
 | |
| #elif  defined(NC) || defined(TC) || defined(NR) || defined(TR) 
 | |
| 	xvadddp  \VSINR_OUT1,\VSINR_OUT1,\VSINR
 | |
| 	xvsubdp  \VSINI_OUT2,\VSINI,\VSINI_OUT2  
 | |
| #else	// CC || CR || RC || RR 
 | |
|     /*we will assume {-alpha_r,-alpha_i} for this case */
 | |
|     /*i1i2-r1r2 so we will negate alpha real instead to fix sign*/
 | |
| 	xvsubdp  \VSINR_OUT1,\VSINR,\VSINR_OUT1
 | |
|     /*we will negate alpha image instead  instead to fix sign*/
 | |
| 	xvadddp  \VSINI_OUT2,\VSINI_OUT2,\VSINI 
 | |
| #endif
 | |
| .endm 
 | |
| /* {i0,i1} * {alpha_i,alpha_i} - VSOUT1 ;VSOUT2 + {r0,r1}*{alpha_i,alpha_i} */
 | |
| 
 | |
| 
 | |
| .macro MULT_APLHA_PART1  VSINRR,VSINII,VSOUT1,VSOUT2
 | |
| #ifndef TRMMKERNEL  
 | |
| 	xvmsubadp \VSOUT1,\VSINII, alpha_i
 | |
| 	xvmaddadp  \VSOUT2,\VSINRR, alpha_i
 | |
| #else 
 | |
| 	xvmuldp \VSOUT1,\VSINII, alpha_i 
 | |
| 	xvmuldp  \VSOUT2,\VSINRR, alpha_i
 | |
| #endif 
 | |
| .endm
 | |
| /*   {r0,r1} * {alpha_r,alpha_r} -  VSOUT1 ;VSOUT2 + {i0,i1} * {alpha_r,alpha_r} */
 | |
| 
 | |
| 
 | |
| .macro MULT_APLHA_PART2  VSINRR,VSINII,VSOUT1,VSOUT2 
 | |
| 	xvmsubadp  \VSOUT1,\VSINRR, alpha_r
 | |
| 	xvmaddadp \VSOUT2,\VSINII, alpha_r
 | |
| .endm
 | |
| /* unpack to store 2{r,r} {i,i} into  {r,i} {r,i} (big endian because of stxv) */
 | |
| 
 | |
| 
 | |
| .macro UNPACK_FOR_STORE VSIN1,VSIN2,VSOUT1,VSOUT2 
 | |
| 	xxmrghd  \VSOUT1,\VSIN2,\VSIN1
 | |
| 	xxmrgld  \VSOUT2,\VSIN2,\VSIN1
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro STORE_COUPLE REG,LOFFSET,VSIN1,VSIN2
 | |
| 	stxv	\VSIN1,	DISPX(\LOFFSET)(\REG)
 | |
| 	stxv	\VSIN2,	DISPX(\LOFFSET+16)(\REG)
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro SAVE8 VSRes1,VSRes2,VSRes3,VSRes4,VSRes5,VSRes6,VSRes7,VSRes8,VSRes9,VSRes10,VSRes11,VSRes12,VSRes13,VSRes14,VSRes15,VSRes16,BASE_REG,LOFFSET
 | |
|   RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes1,\VSRes3,vs2,vs3
 | |
|   LOAD_COUPLE_AS_RR_II	vs14,vs15,vs18,vs19,\BASE_REG,\LOFFSET
 | |
|   RESULT_INTO_REALIMAG_IMAGREAL	\VSRes2,\VSRes4,vs4,vs5
 | |
|   LOAD_COUPLE_AS_RR_II	vs16,vs17,vs20,vs21,\BASE_REG,(\LOFFSET+32)
 | |
|   RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes5,\VSRes7,vs6,vs7
 | |
|   LOAD_COUPLE_AS_RR_II	vs24,vs25,vs18,vs19,\BASE_REG,(\LOFFSET +64)
 | |
|   RESULT_INTO_REALIMAG_IMAGREAL	\VSRes6,\VSRes8,vs8,vs9 
 | |
|   LOAD_COUPLE_AS_RR_II	vs26,vs27,vs20,vs21,\BASE_REG,(\LOFFSET+96)
 | |
|   RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes9,\VSRes11,vs10,vs11
 | |
|   AGGREGATE_REALS_IMAGES	vs2,vs3,vs4,vs5
 | |
|   RESULT_INTO_REALIMAG_IMAGREAL	\VSRes10,\VSRes12,vs12,vs13 
 | |
|   AGGREGATE_REALS_IMAGES	vs6,vs7,vs8,vs9  
 | |
|   RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes13,\VSRes15,\VSRes1,\VSRes2
 | |
|   MULT_APLHA_PART1	vs2,vs4, vs14,vs15
 | |
|   RESULT_INTO_REALIMAG_IMAGREAL	\VSRes14,\VSRes16,\VSRes3,\VSRes4
 | |
|   MULT_APLHA_PART1	vs6,vs8,vs16,vs17
 | |
|   MULT_APLHA_PART2  vs2,vs4,vs14,vs15 
 | |
|   AGGREGATE_REALS_IMAGES	vs10,vs11,vs12,vs13
 | |
|   MULT_APLHA_PART2	vs6,vs8,vs16,vs17
 | |
|   AGGREGATE_REALS_IMAGES	\VSRes1,\VSRes2,\VSRes3,\VSRes4	
 | |
|   UNPACK_FOR_STORE	vs14,vs15,vs7,vs9
 | |
|   MULT_APLHA_PART1	vs10,vs12, vs24,vs25
 | |
|   UNPACK_FOR_STORE	vs16,vs17,vs3,vs5 
 | |
|   MULT_APLHA_PART1	\VSRes1,\VSRes3, vs26,vs27
 | |
|   STORE_COUPLE	\BASE_REG,\LOFFSET,vs7,vs9
 | |
|   MULT_APLHA_PART2	vs10,vs12,vs24,vs25
 | |
|   STORE_COUPLE	\BASE_REG,(\LOFFSET+32),vs3,vs5 
 | |
|   MULT_APLHA_PART2	\VSRes1,\VSRes3, vs26,vs27
 | |
|   UNPACK_FOR_STORE	vs24,vs25,vs10,vs12
 | |
|   UNPACK_FOR_STORE	vs26,vs27,\VSRes1,\VSRes3
 | |
|   STORE_COUPLE	\BASE_REG,(\LOFFSET +64),vs10,vs12
 | |
|   STORE_COUPLE	\BASE_REG,(\LOFFSET+96),\VSRes1,\VSRes3
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro SAVE4  VSRes1,VSRes2,VSRes3,VSRes4,VSRes5,VSRes6,VSRes7,VSRes8,BASE_REG,LOFFSET
 | |
|   RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes1,\VSRes3,vs2,vs3
 | |
|   LOAD_COUPLE_AS_RR_II	vs14,vs15,vs18,vs19,\BASE_REG,\LOFFSET
 | |
|   RESULT_INTO_REALIMAG_IMAGREAL	\VSRes2,\VSRes4,vs4,vs5
 | |
|   LOAD_COUPLE_AS_RR_II	vs16,vs17,vs20,vs21,\BASE_REG,(\LOFFSET+32)
 | |
|   RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes5,\VSRes7,vs6,vs7
 | |
|   RESULT_INTO_REALIMAG_IMAGREAL	\VSRes6,\VSRes8,vs8,vs9 
 | |
|   AGGREGATE_REALS_IMAGES	vs2,vs3,vs4,vs5	
 | |
|   AGGREGATE_REALS_IMAGES	vs6,vs7,vs8,vs9  
 | |
|   MULT_APLHA_PART1	vs2,vs4, vs14,vs15
 | |
|   MULT_APLHA_PART1	vs6,vs8, vs16,vs17
 | |
|   MULT_APLHA_PART2	vs2,vs4, vs14,vs15 
 | |
|   MULT_APLHA_PART2	vs6,vs8,vs16,vs17
 | |
|   UNPACK_FOR_STORE	vs14,vs15,vs7,vs9
 | |
|   UNPACK_FOR_STORE	vs16,vs17,vs3,vs5
 | |
|   STORE_COUPLE	\BASE_REG,\LOFFSET,vs7,vs9
 | |
|   STORE_COUPLE	\BASE_REG,(\LOFFSET+32),vs3,vs5
 | |
| .endm
 | |
| 
 | |
| 
 | |
| 
 | |
| .macro SAVE2  VSRes1,VSRes2,VSRes3,VSRes4,BASE_REG,LOFFSET
 | |
|   RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes1,\VSRes3,vs2,vs3
 | |
|   LOAD_COUPLE_AS_RR_II	vs14,vs15,vs18,vs19,\BASE_REG,\LOFFSET
 | |
|   RESULT_INTO_REALIMAG_IMAGREAL	\VSRes2,\VSRes4,vs4,vs5	
 | |
|   AGGREGATE_REALS_IMAGES	vs2,vs3,vs4,vs5	
 | |
|   MULT_APLHA_PART1	vs2,vs4, vs14,vs15	
 | |
|   MULT_APLHA_PART2	vs2,vs4, vs14,vs15  
 | |
|   UNPACK_FOR_STORE	vs14,vs15,vs7,vs9	
 | |
|   STORE_COUPLE	\BASE_REG,\LOFFSET,vs7,vs9  
 | |
| .endm
 | |
| 
 | |
| 
 | |
| 
 | |
| .macro SAVE1  VSRes1,VSRes2,BASE_REG,LOFFSET
 | |
|   RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes1,\VSRes1,vs2,vs3
 | |
| #ifndef TRMMKERNEL 
 | |
|   lxv	vs18,	(\LOFFSET)(\BASE_REG) 
 | |
|   xxmrgld  vs14,vs18,vs18
 | |
|   xxmrghd  vs15,vs18,vs18	
 | |
| #endif	
 | |
|   RESULT_INTO_REALIMAG_IMAGREAL	\VSRes2,\VSRes2,vs4,vs5	
 | |
|   AGGREGATE_REALS_IMAGES	vs2,vs3,vs4,vs5	
 | |
|   MULT_APLHA_PART1	vs2,vs4, vs14,vs15	
 | |
|   MULT_APLHA_PART2	vs2,vs4, vs14,vs15  
 | |
|   UNPACK_FOR_STORE	vs14,vs15,vs7,vs9 
 | |
|   xxmrghd  vs7,vs15,vs14	
 | |
|   stxv	vs7,	(\LOFFSET)(\BASE_REG) 
 | |
| .endm
 | |
| /**********************************************************************************************
 | |
| *
 | |
| 
 | |
| .macros for N=2 and M=8
 | |
| **********************************************************************************************/
 | |
| 
 | |
| .macro Zero2x8
 | |
| 	xxlxor	vs32,	vs32,	vs32
 | |
| 	xxlxor	vs33,	vs33,	vs33
 | |
| 	xxlxor	vs34,	vs34,	vs34
 | |
| 	xxlxor	vs35,	vs35,	vs35
 | |
| 	xxlxor	vs36,	vs36,	vs36
 | |
| 	xxlxor	vs37,	vs37,	vs37
 | |
| 	xxlxor	vs38,	vs38,	vs38
 | |
| 	xxlxor	vs39,	vs39,	vs39
 | |
| 	xxlxor	vs40,	vs40,	vs40
 | |
| 	xxlxor	vs41,	vs41,	vs41
 | |
| 	xxlxor	vs42,	vs42,	vs42
 | |
| 	xxlxor	vs43,	vs43,	vs43
 | |
| 	xxlxor	vs44,	vs44,	vs44
 | |
| 	xxlxor	vs45,	vs45,	vs45
 | |
| 	xxlxor	vs46,	vs46,	vs46
 | |
| 	xxlxor	vs47,	vs47,	vs47
 | |
| 	xxlxor	vs48,	vs48,	vs48
 | |
| 	xxlxor	vs49,	vs49,	vs49
 | |
| 	xxlxor	vs50,	vs50,	vs50
 | |
| 	xxlxor	vs51,	vs51,	vs51
 | |
| 	xxlxor	vs52,	vs52,	vs52
 | |
| 	xxlxor	vs53,	vs53,	vs53
 | |
| 	xxlxor	vs54,	vs54,	vs54
 | |
| 	xxlxor	vs55,	vs55,	vs55
 | |
| 	xxlxor	vs56,	vs56,	vs56
 | |
| 	xxlxor	vs57,	vs57,	vs57
 | |
| 	xxlxor	vs58,	vs58,	vs58
 | |
| 	xxlxor	vs59,	vs59,	vs59
 | |
| 	xxlxor	vs60,	vs60,	vs60
 | |
| 	xxlxor	vs61,	vs61,	vs61
 | |
| 	xxlxor	vs62,	vs62,	vs62
 | |
| 	xxlxor	vs63,	vs63,	vs63
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD2x8   
 | |
| 	LOAD2x8O 0,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD2x8O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	lxv	vs18,	(\OffsetB+16)(BO)	// load real,imag from B 
 | |
| 	xxswapd	vs17, vs16
 | |
| 	xxswapd	vs19, vs18
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs1,	(16+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs2,	(32+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs3,	(48+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs4,	(64+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs5,	(80+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs6,	(96+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs7,	(112+\OffsetA)(AO)	// load real,imag from A
 | |
|  
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END2x8_NORMAL
 | |
| 	END2x8 AO,BO,128,32
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END2x8_WITHOUT_ADD
 | |
| 	END2x8 AO,BO,0,0
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END2x8	AREG, BREG, OffsetA, OffsetB
 | |
| .if \OffsetB != 0
 | |
| 	addi	\BREG, \BREG, \OffsetB
 | |
| .endif
 | |
| .if \OffsetA != 0
 | |
| 	addi	\AREG, \AREG, \OffsetA
 | |
| .endif
 | |
| 	xvmaddadp	vs32,	vs0,	vs16
 | |
| 	xvmaddadp	vs48,	vs0,	vs18
 | |
| 	xvmaddadp	vs33,	vs0,	vs17
 | |
| 	xvmaddadp	vs49,	vs0,	vs19
 | |
| 	xvmaddadp	vs34,	vs1,	vs16
 | |
| 	xvmaddadp	vs50,	vs1,	vs18
 | |
| 	xvmaddadp	vs35,	vs1,	vs17
 | |
| 	xvmaddadp	vs51,	vs1,	vs19
 | |
| 	xvmaddadp	vs36,	vs2,	vs16
 | |
| 	xvmaddadp	vs52,	vs2,	vs18
 | |
| 	xvmaddadp	vs37,	vs2,	vs17
 | |
| 	xvmaddadp	vs53,	vs2,	vs19
 | |
| 	xvmaddadp	vs38,	vs3,	vs16
 | |
| 	xvmaddadp	vs54,	vs3,	vs18
 | |
| 	xvmaddadp	vs39,	vs3,	vs17
 | |
| 	xvmaddadp	vs55,	vs3,	vs19
 | |
| 	xvmaddadp	vs40,	vs4,	vs16
 | |
| 	xvmaddadp	vs56,	vs4,	vs18
 | |
| 	xvmaddadp	vs41,	vs4,	vs17
 | |
| 	xvmaddadp	vs57,	vs4,	vs19
 | |
| 	xvmaddadp	vs42,	vs5,	vs16
 | |
| 	xvmaddadp	vs58,	vs5,	vs18
 | |
| 	xvmaddadp	vs43,	vs5,	vs17
 | |
| 	xvmaddadp	vs59,	vs5,	vs19
 | |
| 	xvmaddadp	vs44,	vs6,	vs16
 | |
| 	xvmaddadp	vs60,	vs6,	vs18
 | |
| 	xvmaddadp	vs45,	vs6,	vs17
 | |
| 	xvmaddadp	vs61,	vs6,	vs19
 | |
| 	xvmaddadp	vs46,	vs7,	vs16
 | |
| 	xvmaddadp	vs62,	vs7,	vs18
 | |
| 	xvmaddadp	vs47,	vs7,	vs17
 | |
| 	xvmaddadp	vs63,	vs7,	vs19
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD2x8_2
 | |
|     LOAD2x8_2O 0,0
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro LOAD2x8_2O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	lxv	vs18,	(\OffsetB+16)(BO)	// load real,imag from B
 | |
| 	lxv	vs20,	(\OffsetB+32)(BO)	// load real,imag	from B
 | |
| 	lxv	vs22,	(\OffsetB+48)(BO)	// load real,imag  from B	
 | |
| 	xxswapd	vs17, vs16
 | |
| 	xxswapd	vs19, vs18
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs1,	(16+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs2,	(32+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs3,	(48+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs4,	(64+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs5,	(80+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs6,	(96+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs7,	(112+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs8,	(128+0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs9,	(128+16+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs10,   (128+32+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs11,   (128+48+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs12,   (128+64+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs13,   (128+80+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs14,   (128+96+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs15,   (128+112+\OffsetA)(AO)	// load real,imag from A
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro END2x8_2	  
 | |
|   /*for load2 offset will be 256 and 64*/
 | |
|    KERNEL2x8_2	AO,BO,	256,64,0 ,1,1 
 | |
| .endm
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x8_E2	OffsetA,OffsetB, Index,IsLast 
 | |
|   KERNEL2x8_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,1 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x8_L2	OffsetA,OffsetB, Index,IsLast
 | |
|   KERNEL2x8_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x8_2	AREG,BREG,	OffsetA,OffsetB, Index,IsLast ,Complete
 | |
| 	xvmaddadp	vs32,	vs0,	vs16
 | |
| 	xvmaddadp	vs48,	vs0,	vs18
 | |
| 	xvmaddadp	vs33,	vs0,	vs17
 | |
| 	xvmaddadp	vs49,	vs0,	vs19
 | |
|   xxswapd	vs21, vs20
 | |
|   xxswapd	vs23, vs22
 | |
| 	xvmaddadp	vs34,	vs1,	vs16
 | |
| 	xvmaddadp	vs50,	vs1,	vs18
 | |
| 	xvmaddadp	vs35,	vs1,	vs17
 | |
| 	xvmaddadp	vs51,	vs1,	vs19
 | |
| .if \Complete==0	
 | |
| 	lxv	vs0,	DISP16(\Index, 0 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs1,	DISP16(\Index,16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
| 	xvmaddadp	vs36,	vs2,	vs16
 | |
| 	xvmaddadp	vs52,	vs2,	vs18
 | |
| 	xvmaddadp	vs37,	vs2,	vs17
 | |
| 	xvmaddadp	vs53,	vs2,	vs19
 | |
| 	xvmaddadp	vs38,	vs3,	vs16
 | |
| 	xvmaddadp	vs54,	vs3,	vs18
 | |
| 	xvmaddadp	vs39,	vs3,	vs17
 | |
| 	xvmaddadp	vs55,	vs3,	vs19
 | |
| .if \Complete==0	
 | |
| 	lxv	vs2,	DISP16(\Index,32 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs3,	DISP16(\Index,48 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
| 	xvmaddadp	vs40,	vs4,	vs16
 | |
| 	xvmaddadp	vs56,	vs4,	vs18
 | |
| 	xvmaddadp	vs41,	vs4,	vs17
 | |
| 	xvmaddadp	vs57,	vs4,	vs19
 | |
| 	xvmaddadp	vs42,	vs5,	vs16
 | |
| 	xvmaddadp	vs58,	vs5,	vs18
 | |
| 	xvmaddadp	vs43,	vs5,	vs17
 | |
| 	xvmaddadp	vs59,	vs5,	vs19
 | |
| .if \Complete==0		
 | |
| 	lxv	vs4,	DISP16(\Index,64+ \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs5,	DISP16(\Index,64+16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
| 	xvmaddadp	vs44,	vs6,	vs16
 | |
| 	xvmaddadp	vs60,	vs6,	vs18
 | |
| 	xvmaddadp	vs45,	vs6,	vs17
 | |
| 	xvmaddadp	vs61,	vs6,	vs19
 | |
| 	xvmaddadp	vs46,	vs7,	vs16
 | |
| 	xvmaddadp	vs62,	vs7,	vs18
 | |
| 	xvmaddadp	vs47,	vs7,	vs17
 | |
| 	xvmaddadp	vs63,	vs7,	vs19	
 | |
| .if \Complete==0		
 | |
| 	lxv	vs16,	DISP4(\Index, 0+\OffsetB)(\BREG)	// load real imag from B
 | |
| 	lxv	vs18,	DISP4(\Index, 16+\OffsetB)(\BREG)	// load real,imag from B
 | |
| .endif
 | |
| 	xvmaddadp	vs32,	vs8,	vs20
 | |
| 	xvmaddadp	vs48,	vs8,	vs22
 | |
| .if \Complete==0
 | |
| 	lxv	vs6,	DISP16(\Index,64+32 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs7,	DISP16(\Index,64+48 + \OffsetA)(\AREG)	// load real,imag from A	
 | |
| .endif	
 | |
| 	xvmaddadp	vs33,	vs8,	vs21
 | |
| 	xvmaddadp	vs49,	vs8,	vs23
 | |
| .if \Complete==0		
 | |
|   xxswapd	vs17, vs16
 | |
|   xxswapd	vs19, vs18
 | |
| .endif
 | |
| 	xvmaddadp	vs34,	vs9,	vs20
 | |
| 	xvmaddadp	vs50,	vs9,	vs22
 | |
| 	xvmaddadp	vs35,	vs9,	vs21
 | |
| 	xvmaddadp	vs51,	vs9,	vs23
 | |
| .if \Complete==0		
 | |
| 	lxv	vs8,	DISP16(\Index,128+ + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs9,	DISP16(\Index,128+16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif
 | |
| 	xvmaddadp	vs36,	vs10,	vs20
 | |
| 	xvmaddadp	vs52,	vs10,	vs22
 | |
| 	xvmaddadp	vs37,	vs10,	vs21
 | |
| 	xvmaddadp	vs53,	vs10,	vs23
 | |
| 	xvmaddadp	vs38,	vs11,	vs20
 | |
| 	xvmaddadp	vs54,	vs11,	vs22
 | |
| 	xvmaddadp	vs39,	vs11,	vs21
 | |
| 	xvmaddadp	vs55,	vs11,	vs23
 | |
| .if \Complete==0	
 | |
| 	lxv	vs10,	DISP16(\Index,128+32 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs11,	DISP16(\Index,128+48 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
| 	xvmaddadp	vs40,	vs12,	vs20
 | |
| 	xvmaddadp	vs56,	vs12,	vs22
 | |
| 	xvmaddadp	vs41,	vs12,	vs21
 | |
| 	xvmaddadp	vs57,	vs12,	vs23
 | |
| 	xvmaddadp	vs42,	vs13,	vs20
 | |
| 	xvmaddadp	vs58,	vs13,	vs22
 | |
| 	xvmaddadp	vs43,	vs13,	vs21
 | |
| 	xvmaddadp	vs59,	vs13,	vs23
 | |
| .if \Complete==0	
 | |
| 	lxv	vs12,	DISP16(\Index, 192 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs13,	DISP16(\Index,192 +16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
| 	xvmaddadp	vs44,	vs14,	vs20
 | |
| 	xvmaddadp	vs60,	vs14,	vs22
 | |
| 	xvmaddadp	vs45,	vs14,	vs21
 | |
| 	xvmaddadp	vs61,	vs14,	vs23
 | |
| 	xvmaddadp	vs46,	vs15,	vs20
 | |
| 	xvmaddadp	vs62,	vs15,	vs22
 | |
| 	xvmaddadp	vs47,	vs15,	vs21
 | |
| 	xvmaddadp	vs63,	vs15,	vs23
 | |
| .if \Complete==0	
 | |
| 	lxv	vs14,	DISP16(\Index,192 +32 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs15,	DISP16(\Index,192 +48 + \OffsetA)(\AREG)	// load real,imag from A
 | |
|  	lxv	vs20,	DISP4(\Index, 32+\OffsetB)(\BREG)	// load real,imag	from B
 | |
| 	lxv	vs22,	DISP4(\Index, 48+\OffsetB)(\BREG)	// load real,imag  from B
 | |
| .endif
 | |
| .if \IsLast==1
 | |
| .if \Complete==1
 | |
| 	addi	\AREG, \AREG,  DISP16(\Index,\OffsetA)
 | |
| 	addi	\BREG, \BREG,  DISP4(\Index,\OffsetB)
 | |
| .else
 | |
| 	addi	\AREG, \AREG, DISP16(\Index,256)
 | |
| 	addi	\BREG, \BREG,  DISP4(\Index,64)
 | |
| .endif
 | |
| .endif 
 | |
| .endm
 | |
| 
 | |
|  
 | |
| 
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x8
 | |
|   LOAD2x8
 | |
|   END2x8  AO, BO, 128,32
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro SAVE2x8
 | |
| 	add	T1, CO ,LDC 
 | |
| 	SAVE8  vs32,vs33,vs34,vs35,vs36,vs37,vs38,vs39,vs40,vs41,vs42,vs43,vs44,vs45,vs46,vs47,CO,0
 | |
| 	SAVE8  vs48,vs49,vs50,vs51,vs52,vs53,vs54,vs55,vs56,vs57,vs58,vs59,vs60,vs61,vs62,vs63,T1,0  
 | |
| 	addi	CO, CO, 128
 | |
| .endm
 | |
| /**********************************************************************************************
 | |
| *
 | |
| 
 | |
| .macros for N=2 and M=4
 | |
| **********************************************************************************************/
 | |
| 
 | |
| 
 | |
| .macro Zero2x4
 | |
| 	xxlxor	vs32,	vs32,	vs32
 | |
| 	xxlxor	vs33,	vs33,	vs33
 | |
| 	xxlxor	vs34,	vs34,	vs34
 | |
| 	xxlxor	vs35,	vs35,	vs35
 | |
| 	xxlxor	vs36,	vs36,	vs36
 | |
| 	xxlxor	vs37,	vs37,	vs37
 | |
| 	xxlxor	vs38,	vs38,	vs38
 | |
| 	xxlxor	vs39,	vs39,	vs39
 | |
| 	xxlxor	vs40,	vs40,	vs40
 | |
| 	xxlxor	vs41,	vs41,	vs41
 | |
| 	xxlxor	vs42,	vs42,	vs42
 | |
| 	xxlxor	vs43,	vs43,	vs43
 | |
| 	xxlxor	vs44,	vs44,	vs44
 | |
| 	xxlxor	vs45,	vs45,	vs45
 | |
| 	xxlxor	vs46,	vs46,	vs46
 | |
| 	xxlxor	vs47,	vs47,	vs47
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD2x4   
 | |
| 	LOAD2x4O 0,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD2x4O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	lxv	vs18,	(\OffsetB+16)(BO)	// load real,imag from B 
 | |
| 	xxswapd	vs17, vs16
 | |
| 	xxswapd	vs19, vs18
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs1,	(16+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs2,	(32+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs3,	(48+\OffsetA)(AO)	// load real,imag from A  
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END2x4_NORMAL
 | |
| 	END2x4 AO,BO,64,32
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END2x4_WITHOUT_ADD
 | |
| 	END2x4 AO,BO,0,0
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END2x4	AREG, BREG, OffsetA, OffsetB
 | |
| .if \OffsetB != 0
 | |
| 	addi	\BREG, \BREG, \OffsetB
 | |
| .endif
 | |
| .if \OffsetA != 0
 | |
| 	addi	\AREG, \AREG, \OffsetA
 | |
| .endif
 | |
| 	xvmaddadp	vs32,	vs0,	vs16
 | |
| 	xvmaddadp	vs40,	vs0,	vs18
 | |
| 	xvmaddadp	vs33,	vs0,	vs17
 | |
| 	xvmaddadp	vs41,	vs0,	vs19
 | |
| 	xvmaddadp	vs34,	vs1,	vs16
 | |
| 	xvmaddadp	vs42,	vs1,	vs18
 | |
| 	xvmaddadp	vs35,	vs1,	vs17
 | |
| 	xvmaddadp	vs43,	vs1,	vs19
 | |
| 	xvmaddadp	vs36,	vs2,	vs16
 | |
| 	xvmaddadp	vs44,	vs2,	vs18
 | |
| 	xvmaddadp	vs37,	vs2,	vs17
 | |
| 	xvmaddadp	vs45,	vs2,	vs19
 | |
| 	xvmaddadp	vs38,	vs3,	vs16
 | |
| 	xvmaddadp	vs46,	vs3,	vs18
 | |
| 	xvmaddadp	vs39,	vs3,	vs17
 | |
| 	xvmaddadp	vs47,	vs3,	vs19
 | |
| 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD2x4_2
 | |
|     LOAD2x4_2O 0,0
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro LOAD2x4_2O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	lxv	vs18,	(\OffsetB+16)(BO)	// load real,imag from B
 | |
| 	lxv	vs20,	(\OffsetB+32)(BO)	// load real,imag	from B
 | |
| 	lxv	vs22,	(\OffsetB+48)(BO)	// load real,imag  from B	
 | |
| 	xxswapd	vs17, vs16
 | |
| 	xxswapd	vs19, vs18
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs1,	(16+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs2,	(32+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs3,	(48+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs8,	(64+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs9,	(80+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs10,	(96+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs11,	(112+\OffsetA)(AO)	// load real,imag from A 
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro END2x4_2	  
 | |
|   /*for load2 offset will be 128 and 64*/
 | |
|    KERNEL2x4_2	AO,BO,	128,64,0 ,1,1 
 | |
| .endm
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x4_E2	OffsetA,OffsetB, Index,IsLast 
 | |
|   KERNEL2x4_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,1 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x4_L2	OffsetA,OffsetB, Index,IsLast
 | |
|   KERNEL2x4_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x4_2	AREG,BREG,	OffsetA,OffsetB, Index,IsLast ,Complete
 | |
| 	xvmaddadp	vs32,	vs0,	vs16
 | |
| 	xvmaddadp	vs40,	vs0,	vs18
 | |
| 	xvmaddadp	vs33,	vs0,	vs17
 | |
| 	xvmaddadp	vs41,	vs0,	vs19
 | |
|   xxswapd	vs21, vs20
 | |
|   xxswapd	vs23, vs22
 | |
| 	xvmaddadp	vs34,	vs1,	vs16
 | |
| 	xvmaddadp	vs42,	vs1,	vs18
 | |
| 	xvmaddadp	vs35,	vs1,	vs17
 | |
| 	xvmaddadp	vs43,	vs1,	vs19
 | |
| .if \Complete==0	
 | |
| 	lxv	vs0,	DISP8(\Index, 0 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs1,	DISP8(\Index,16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
| 	xvmaddadp	vs36,	vs2,	vs16
 | |
| 	xvmaddadp	vs44,	vs2,	vs18
 | |
| 	xvmaddadp	vs37,	vs2,	vs17
 | |
| 	xvmaddadp	vs45,	vs2,	vs19
 | |
| 	xvmaddadp	vs38,	vs3,	vs16
 | |
| 	xvmaddadp	vs46,	vs3,	vs18
 | |
| 	xvmaddadp	vs39,	vs3,	vs17
 | |
| 	xvmaddadp	vs47,	vs3,	vs19
 | |
| .if \Complete==0	
 | |
| 	lxv	vs2,	DISP8(\Index,32 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs3,	DISP8(\Index,48 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
|  
 | |
| .if \Complete==0		
 | |
| 	lxv	vs16,	DISP4(\Index, 0+\OffsetB)(\BREG)	// load real imag from B
 | |
| 	lxv	vs18,	DISP4(\Index, 16+\OffsetB)(\BREG)	// load real,imag from B
 | |
| .endif
 | |
| 	xvmaddadp	vs32,	vs8,	vs20
 | |
| 	xvmaddadp	vs40,	vs8,	vs22 
 | |
| 	xvmaddadp	vs33,	vs8,	vs21
 | |
| 	xvmaddadp	vs41,	vs8,	vs23
 | |
| .if \Complete==0		
 | |
|   xxswapd	vs17, vs16
 | |
|   xxswapd	vs19, vs18
 | |
| .endif
 | |
| 	xvmaddadp	vs34,	vs9,	vs20
 | |
| 	xvmaddadp	vs42,	vs9,	vs22
 | |
| 	xvmaddadp	vs35,	vs9,	vs21
 | |
| 	xvmaddadp	vs43,	vs9,	vs23
 | |
| .if \Complete==0		
 | |
| 	lxv	vs8,	DISP8(\Index,64+0+ \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs9,	DISP8(\Index,64+16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif
 | |
| 	xvmaddadp	vs36,	vs10,	vs20
 | |
| 	xvmaddadp	vs44,	vs10,	vs22
 | |
| 	xvmaddadp	vs37,	vs10,	vs21
 | |
| 	xvmaddadp	vs45,	vs10,	vs23
 | |
| 	xvmaddadp	vs38,	vs11,	vs20
 | |
| 	xvmaddadp	vs46,	vs11,	vs22
 | |
| 	xvmaddadp	vs39,	vs11,	vs21
 | |
| 	xvmaddadp	vs47,	vs11,	vs23
 | |
| .if \Complete==0	
 | |
| 	lxv	vs10,	DISP8(\Index,64+32 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs11,	DISP8(\Index,64+48 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
|  
 | |
| .if \Complete==0	 
 | |
|  	lxv	vs20,	DISP4(\Index, 32+\OffsetB)(\BREG)	// load real,imag	from B
 | |
| 	lxv	vs22,	DISP4(\Index, 48+\OffsetB)(\BREG)	// load real,imag  from B
 | |
| .endif
 | |
| .if \IsLast==1
 | |
| .if \Complete==1
 | |
| 	addi	\AREG, \AREG,  DISP8(\Index,\OffsetA)
 | |
| 	addi	\BREG, \BREG,  DISP4(\Index,\OffsetB)
 | |
| .else
 | |
| 	addi	\AREG, \AREG, DISP8(\Index,128)
 | |
| 	addi	\BREG, \BREG,  DISP4(\Index,64)
 | |
| .endif
 | |
| .endif 
 | |
| .endm
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x4
 | |
|   LOAD2x4
 | |
|   END2x4  AO, BO, 64,32
 | |
| .endm
 | |
| 
 | |
| 
 | |
| 
 | |
| .macro SAVE2x4 
 | |
| 	add	T1, CO ,LDC 
 | |
| 	SAVE4  vs32,vs33,vs34,vs35,vs36,vs37,vs38,vs39,CO,0
 | |
| 	SAVE4  vs40,vs41,vs42,vs43,vs44,vs45,vs46,vs47,T1,0  
 | |
| 	addi	CO, CO, 64
 | |
| .endm
 | |
| /**********************************************************************************************
 | |
| *
 | |
| 
 | |
| .macros for N=2 and M=2
 | |
| **********************************************************************************************/
 | |
| 
 | |
| 
 | |
| .macro Zero2x2
 | |
| 	xxlxor	vs32,	vs32,	vs32
 | |
| 	xxlxor	vs33,	vs33,	vs33
 | |
| 	xxlxor	vs34,	vs34,	vs34
 | |
| 	xxlxor	vs35,	vs35,	vs35
 | |
| 	xxlxor	vs36,	vs36,	vs36
 | |
| 	xxlxor	vs37,	vs37,	vs37
 | |
| 	xxlxor	vs38,	vs38,	vs38
 | |
| 	xxlxor	vs39,	vs39,	vs39
 | |
| 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD2x2   
 | |
| 	LOAD2x2O 0,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD2x2O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	lxv	vs18,	(\OffsetB+16)(BO)	// load real,imag from B 
 | |
| 	xxswapd	vs17, vs16
 | |
| 	xxswapd	vs19, vs18
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs1,	(16+\OffsetA)(AO)	// load real,imag from A
 | |
|  
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END2x2_NORMAL
 | |
| 	END2x2 AO,BO,32,32
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END2x2_WITHOUT_ADD
 | |
| 	END2x2 AO,BO,0,0
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END2x2	AREG, BREG, OffsetA, OffsetB
 | |
| .if \OffsetB != 0
 | |
| 	addi	\BREG, \BREG, \OffsetB
 | |
| .endif
 | |
| .if \OffsetA != 0
 | |
| 	addi	\AREG, \AREG, \OffsetA
 | |
| .endif
 | |
| 	xvmaddadp	vs32,	vs0,	vs16
 | |
| 	xvmaddadp	vs36,	vs0,	vs18
 | |
| 	xvmaddadp	vs33,	vs0,	vs17
 | |
| 	xvmaddadp	vs37,	vs0,	vs19
 | |
| 	xvmaddadp	vs34,	vs1,	vs16
 | |
| 	xvmaddadp	vs38,	vs1,	vs18
 | |
| 	xvmaddadp	vs35,	vs1,	vs17
 | |
| 	xvmaddadp	vs39,	vs1,	vs19 
 | |
| 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD2x2_2
 | |
|     LOAD2x2_2O 0,0
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro LOAD2x2_2O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	lxv	vs18,	(\OffsetB+16)(BO)	// load real,imag from B
 | |
| 	lxv	vs20,	(\OffsetB+32)(BO)	// load real,imag	from B
 | |
| 	lxv	vs22,	(\OffsetB+48)(BO)	// load real,imag  from B	
 | |
| 	xxswapd	vs17, vs16
 | |
| 	xxswapd	vs19, vs18
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs1,	(16+\OffsetA)(AO)	// load real,imag from A 
 | |
| 	lxv	vs8,	(32+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs9,	(48+\OffsetA)(AO)	// load real,imag from A
 | |
|  	
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro END2x2_2	  
 | |
|   /*for load2 offset will be 64 and 64*/
 | |
|    KERNEL2x2_2	AO,BO,	64,64,0 ,1,1 
 | |
| .endm
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x2_E2	OffsetA,OffsetB, Index,IsLast 
 | |
|   KERNEL2x2_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,1 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x2_L2	OffsetA,OffsetB, Index,IsLast
 | |
|   KERNEL2x2_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x2_2	AREG,BREG,	OffsetA,OffsetB, Index,IsLast ,Complete
 | |
| 	xvmaddadp	vs32,	vs0,	vs16
 | |
| 	xvmaddadp	vs36,	vs0,	vs18
 | |
| 	xvmaddadp	vs33,	vs0,	vs17
 | |
| 	xvmaddadp	vs37,	vs0,	vs19
 | |
|   xxswapd	vs21, vs20
 | |
|   xxswapd	vs23, vs22
 | |
| 	xvmaddadp	vs34,	vs1,	vs16
 | |
| 	xvmaddadp	vs38,	vs1,	vs18
 | |
| 	xvmaddadp	vs35,	vs1,	vs17
 | |
| 	xvmaddadp	vs39,	vs1,	vs19
 | |
| .if \Complete==0	
 | |
| 	lxv	vs0,	DISP4(\Index, 0 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs1,	DISP4(\Index,16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	 
 | |
| .if \Complete==0		
 | |
| 	lxv	vs16,	DISP4(\Index, 0+\OffsetB)(\BREG)	// load real imag from B
 | |
| 	lxv	vs18,	DISP4(\Index, 16+\OffsetB)(\BREG)	// load real,imag from B
 | |
| .endif
 | |
| 	xvmaddadp	vs32,	vs8,	vs20
 | |
| 	xvmaddadp	vs36,	vs8,	vs22 
 | |
| 	xvmaddadp	vs33,	vs8,	vs21
 | |
| 	xvmaddadp	vs37,	vs8,	vs23
 | |
| .if \Complete==0		
 | |
|   xxswapd	vs17, vs16
 | |
|   xxswapd	vs19, vs18
 | |
| .endif
 | |
| 	xvmaddadp	vs34,	vs9,	vs20
 | |
| 	xvmaddadp	vs38,	vs9,	vs22
 | |
| 	xvmaddadp	vs35,	vs9,	vs21
 | |
| 	xvmaddadp	vs39,	vs9,	vs23
 | |
| .if \Complete==0	 
 | |
|  	lxv	vs20,	DISP4(\Index, 32+\OffsetB)(\BREG)	// load real,imag	from B
 | |
| 	lxv	vs22,	DISP4(\Index, 48+\OffsetB)(\BREG)	// load real,imag  from B
 | |
| .endif
 | |
| .if \Complete==0		
 | |
| 	lxv	vs8,	DISP4(\Index,32+0+ \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs9,	DISP4(\Index,32+16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif
 | |
|  
 | |
|  
 | |
| 
 | |
| .if \IsLast==1
 | |
| .if \Complete==1
 | |
| 	addi	\AREG, \AREG,  DISP4(\Index,\OffsetA)
 | |
| 	addi	\BREG, \BREG,  DISP4(\Index,\OffsetB)
 | |
| .else
 | |
| 	addi	\AREG, \AREG, DISP4(\Index,64)
 | |
| 	addi	\BREG, \BREG,  DISP4(\Index,64)
 | |
| .endif
 | |
| .endif 
 | |
| .endm
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x2
 | |
|   LOAD2x2
 | |
|   END2x2  AO, BO, 32,32
 | |
| .endm
 | |
| 
 | |
| 
 | |
| 
 | |
| .macro SAVE2x2 
 | |
| 	add	T1, CO ,LDC 
 | |
| 	SAVE2  vs32,vs33,vs34,vs35,CO,0
 | |
| 	SAVE2  vs36,vs37,vs38,vs39,T1,0 
 | |
| 	addi	CO, CO, 32 
 | |
| .endm
 | |
| /**********************************************************************************************
 | |
| *
 | |
| 
 | |
| .macros for N=2 and M=1
 | |
| **********************************************************************************************/
 | |
| 
 | |
| 
 | |
| 
 | |
| .macro Zero2x1
 | |
| 	xxlxor	vs32,	vs32,	vs32
 | |
| 	xxlxor	vs33,	vs33,	vs33
 | |
| 	xxlxor	vs34,	vs34,	vs34
 | |
| 	xxlxor	vs35,	vs35,	vs35
 | |
|  
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD2x1   
 | |
| 	LOAD2x1O 0,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD2x1O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	lxv	vs18,	(\OffsetB+16)(BO)	// load real,imag from B 
 | |
| 	xxswapd	vs17, vs16
 | |
| 	xxswapd	vs19, vs18
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END2x1_NORMAL
 | |
| 	END2x1 AO,BO,16,32
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END2x1_WITHOUT_ADD
 | |
| 	END2x1 AO,BO,0,0
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END2x1	AREG, BREG, OffsetA, OffsetB
 | |
| .if \OffsetB != 0
 | |
| 	addi	\BREG, \BREG, \OffsetB
 | |
| .endif
 | |
| .if \OffsetA != 0
 | |
| 	addi	\AREG, \AREG, \OffsetA
 | |
| .endif
 | |
| 	xvmaddadp	vs32,	vs0,	vs16
 | |
| 	xvmaddadp	vs34,	vs0,	vs18
 | |
| 	xvmaddadp	vs33,	vs0,	vs17
 | |
| 	xvmaddadp	vs35,	vs0,	vs19 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD2x1_2
 | |
|     LOAD2x1_2O 0,0
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro LOAD2x1_2O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	lxv	vs18,	(\OffsetB+16)(BO)	// load real,imag from B
 | |
| 	lxv	vs20,	(\OffsetB+32)(BO)	// load real,imag	from B
 | |
| 	lxv	vs22,	(\OffsetB+48)(BO)	// load real,imag  from B	
 | |
| 	xxswapd	vs17, vs16
 | |
| 	xxswapd	vs19, vs18
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs8,	(16+\OffsetA)(AO)	// load real,imag from A 
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro END2x1_2	  
 | |
|   /*for load2 offset will be 32 and 64*/
 | |
|    KERNEL2x1_2	AO,BO,	32,64,0 ,1,1 
 | |
| .endm
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x1_E2	OffsetA,OffsetB, Index,IsLast 
 | |
|   KERNEL2x1_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,1 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x1_L2	OffsetA,OffsetB, Index,IsLast
 | |
|   KERNEL2x1_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x1_2	AREG,BREG,	OffsetA,OffsetB, Index,IsLast ,Complete
 | |
|   xxswapd	vs21, vs20
 | |
|   xxswapd	vs23, vs22 
 | |
| 	xvmaddadp	vs32,	vs0,	vs16
 | |
| 	xvmaddadp	vs34,	vs0,	vs18
 | |
| 	xvmaddadp	vs33,	vs0,	vs17
 | |
| 	xvmaddadp	vs35,	vs0,	vs19
 | |
| .if \Complete==0	
 | |
| 	lxv	vs0,	DISP2(\Index, 0 + \OffsetA)(\AREG)	// load real,imag from A 
 | |
| .endif	 
 | |
| .if \Complete==0		
 | |
| 	lxv	vs16,	DISP4(\Index, 0+\OffsetB)(\BREG)	// load real imag from B
 | |
| 	lxv	vs18,	DISP4(\Index, 16+\OffsetB)(\BREG)	// load real,imag from B
 | |
| .endif
 | |
| .if \Complete==0		
 | |
|   xxswapd	vs17, vs16
 | |
|   xxswapd	vs19, vs18
 | |
| .endif 
 | |
| 	xvmaddadp	vs32,	vs8,	vs20
 | |
| 	xvmaddadp	vs34,	vs8,	vs22 
 | |
| 	xvmaddadp	vs33,	vs8,	vs21
 | |
| 	xvmaddadp	vs35,	vs8,	vs23
 | |
| .if \Complete==0		
 | |
| 	lxv	vs8,	DISP2(\Index,16+0+ \OffsetA)(\AREG)	// load real,imag from A 
 | |
| .endif
 | |
|  
 | |
| .if \Complete==0	 
 | |
|  	lxv	vs20,	DISP4(\Index, 32+\OffsetB)(\BREG)	// load real,imag	from B
 | |
| 	lxv	vs22,	DISP4(\Index, 48+\OffsetB)(\BREG)	// load real,imag  from B
 | |
| .endif
 | |
| .if \IsLast==1
 | |
| .if \Complete==1
 | |
| 	addi	\AREG, \AREG,  DISP2(\Index,\OffsetA)
 | |
| 	addi	\BREG, \BREG,  DISP4(\Index,\OffsetB)
 | |
| .else
 | |
| 	addi	\AREG, \AREG, DISP2(\Index,32)
 | |
| 	addi	\BREG, \BREG,  DISP4(\Index,64)
 | |
| .endif
 | |
| .endif 
 | |
| .endm
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro KERNEL2x1
 | |
|   LOAD2x1
 | |
|   END2x1  AO, BO, 16,32
 | |
| .endm
 | |
| 
 | |
| 
 | |
| 
 | |
| .macro SAVE2x1
 | |
| 	add	T1, CO ,LDC 
 | |
| 	SAVE1  vs32,vs33,CO,0
 | |
| 	SAVE1  vs34,vs35,T1,0  
 | |
| 	addi	CO, CO, 16 
 | |
| .endm
 | |
| 
 | |
| /**********************************************************************************************
 | |
| *
 | |
| 
 | |
| .macros for N=1 and M=8
 | |
| **********************************************************************************************/
 | |
| 
 | |
| 
 | |
| .macro Zero1x8
 | |
| 	xxlxor	vs32,	vs32,	vs32
 | |
| 	xxlxor	vs33,	vs33,	vs33
 | |
| 	xxlxor	vs34,	vs34,	vs34
 | |
| 	xxlxor	vs35,	vs35,	vs35
 | |
| 	xxlxor	vs36,	vs36,	vs36
 | |
| 	xxlxor	vs37,	vs37,	vs37
 | |
| 	xxlxor	vs38,	vs38,	vs38
 | |
| 	xxlxor	vs39,	vs39,	vs39
 | |
| 	xxlxor	vs40,	vs40,	vs40
 | |
| 	xxlxor	vs41,	vs41,	vs41
 | |
| 	xxlxor	vs42,	vs42,	vs42
 | |
| 	xxlxor	vs43,	vs43,	vs43
 | |
| 	xxlxor	vs44,	vs44,	vs44
 | |
| 	xxlxor	vs45,	vs45,	vs45
 | |
| 	xxlxor	vs46,	vs46,	vs46
 | |
| 	xxlxor	vs47,	vs47,	vs47
 | |
| 	xxlxor	vs48,	vs48,	vs48
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD1x8   
 | |
| 	LOAD1x8O 0,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD1x8O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B 
 | |
| 	xxswapd	vs17, vs16 
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs1,	(16+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs2,	(32+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs3,	(48+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs4,	(64+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs5,	(80+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs6,	(96+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs7,	(112+\OffsetA)(AO)	// load real,imag from A
 | |
|  
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END1x8_NORMAL
 | |
| 	END1x8 AO,BO,128,16
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END1x8_WITHOUT_ADD
 | |
| 	END1x8 AO,BO,0,0
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END1x8	AREG, BREG, OffsetA, OffsetB
 | |
| .if \OffsetB != 0
 | |
| 	addi	\BREG, \BREG, \OffsetB
 | |
| .endif
 | |
| .if \OffsetA != 0
 | |
| 	addi	\AREG, \AREG, \OffsetA
 | |
| .endif
 | |
| 	xvmaddadp	vs32,	vs0,	vs16
 | |
| 	xvmaddadp	vs33,	vs0,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs34,	vs1,	vs16
 | |
| 	xvmaddadp	vs35,	vs1,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs36,	vs2,	vs16
 | |
| 	xvmaddadp	vs37,	vs2,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs38,	vs3,	vs16
 | |
| 	xvmaddadp	vs39,	vs3,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs40,	vs4,	vs16
 | |
| 	xvmaddadp	vs41,	vs4,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs42,	vs5,	vs16
 | |
| 	xvmaddadp	vs43,	vs5,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs44,	vs6,	vs16
 | |
| 	xvmaddadp	vs45,	vs6,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs46,	vs7,	vs16
 | |
| 	xvmaddadp	vs47,	vs7,	vs17
 | |
| 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD1x8_2
 | |
|     LOAD1x8_2O 0,0
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro LOAD1x8_2O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	lxv	vs20,	(\OffsetB+16)(BO)	// load real,imag	from B
 | |
| 	xxswapd	vs17, vs16
 | |
| 
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs1,	(16+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs2,	(32+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs3,	(48+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs4,	(64+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs5,	(80+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs6,	(96+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs7,	(112+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs8,	(128+0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs9,	(128+16+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs10,   (128+32+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs11,   (128+48+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs12,   (128+64+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs13,   (128+80+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs14,   (128+96+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs15,   (128+112+\OffsetA)(AO)	// load real,imag from A
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro END1x8_2	  
 | |
|   /*for load2 offset will be 256 and 32*/
 | |
|    KERNEL1x8_2	AO,BO,	256,32,0 ,1,1 
 | |
| .endm
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x8_E2	OffsetA,OffsetB, Index,IsLast 
 | |
|   KERNEL1x8_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,1 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x8_L2	OffsetA,OffsetB, Index,IsLast
 | |
|   KERNEL1x8_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x8_2	AREG,BREG,	OffsetA,OffsetB, Index,IsLast ,Complete
 | |
| 	xvmaddadp	vs32,	vs0,	vs16
 | |
| 	xvmaddadp	vs33,	vs0,	vs17
 | |
|   xxswapd	vs21, vs20
 | |
| 	xvmaddadp	vs34,	vs1,	vs16
 | |
| 	xvmaddadp	vs35,	vs1,	vs17
 | |
| .if \Complete==0	
 | |
| 	lxv	vs0,	DISP16(\Index, 0 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs1,	DISP16(\Index,16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
| 	xvmaddadp	vs36,	vs2,	vs16
 | |
| 	xvmaddadp	vs37,	vs2,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs38,	vs3,	vs16
 | |
| 	xvmaddadp	vs39,	vs3,	vs17
 | |
| .if \Complete==0	
 | |
| 	lxv	vs2,	DISP16(\Index,32 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs3,	DISP16(\Index,48 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
| 	xvmaddadp	vs40,	vs4,	vs16
 | |
| 	xvmaddadp	vs41,	vs4,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs42,	vs5,	vs16
 | |
| 	xvmaddadp	vs43,	vs5,	vs17
 | |
| .if \Complete==0		
 | |
| 	lxv	vs4,	DISP16(\Index,64+ \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs5,	DISP16(\Index,64+16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
| 	xvmaddadp	vs44,	vs6,	vs16
 | |
| 	xvmaddadp	vs45,	vs6,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs46,	vs7,	vs16
 | |
| 	xvmaddadp	vs47,	vs7,	vs17
 | |
| .if \Complete==0		
 | |
| 	lxv	vs16,	DISP2(\Index, 0+\OffsetB)(\BREG)	// load real imag from B
 | |
| .endif
 | |
| .if \Complete==0		
 | |
|   xxswapd	vs17, vs16
 | |
| .endif
 | |
| 	xvmaddadp	vs32,	vs8,	vs20
 | |
| 	xvmaddadp	vs33,	vs8,	vs21
 | |
| .if \Complete==0
 | |
| 	lxv	vs6,	DISP16(\Index,64+32 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs7,	DISP16(\Index,64+48 + \OffsetA)(\AREG)	// load real,imag from A	
 | |
| .endif	
 | |
| 	xvmaddadp	vs34,	vs9,	vs20
 | |
| 	xvmaddadp	vs35,	vs9,	vs21
 | |
| .if \Complete==0		
 | |
| 	lxv	vs8,	DISP16(\Index,128+ + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs9,	DISP16(\Index,128+16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif
 | |
| 	xvmaddadp	vs36,	vs10,	vs20
 | |
| 	xvmaddadp	vs37,	vs10,	vs21
 | |
| 	xvmaddadp	vs38,	vs11,	vs20
 | |
| 	xvmaddadp	vs39,	vs11,	vs21
 | |
| .if \Complete==0	
 | |
| 	lxv	vs10,	DISP16(\Index,128+32 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs11,	DISP16(\Index,128+48 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
| 	xvmaddadp	vs40,	vs12,	vs20
 | |
| 	xvmaddadp	vs41,	vs12,	vs21
 | |
| 	xvmaddadp	vs42,	vs13,	vs20
 | |
| 	xvmaddadp	vs43,	vs13,	vs21
 | |
| .if \Complete==0	
 | |
| 	lxv	vs12,	DISP16(\Index, 192 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs13,	DISP16(\Index,192 +16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
| 	xvmaddadp	vs44,	vs14,	vs20
 | |
| 	xvmaddadp	vs45,	vs14,	vs21
 | |
| 	xvmaddadp	vs46,	vs15,	vs20
 | |
| 	xvmaddadp	vs47,	vs15,	vs21
 | |
| .if \Complete==0	
 | |
| 	lxv	vs14,	DISP16(\Index,192 +32 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs15,	DISP16(\Index,192 +48 + \OffsetA)(\AREG)	// load real,imag from A
 | |
|  	lxv	vs20,	DISP2(\Index, 16+\OffsetB)(\BREG)	// load real,imag	from B
 | |
| .endif
 | |
| .if \IsLast==1
 | |
| .if \Complete==1
 | |
| 	addi	\AREG, \AREG,  DISP16(\Index,\OffsetA)
 | |
| 	addi	\BREG, \BREG,  DISP2(\Index,\OffsetB)
 | |
| .else
 | |
| 	addi	\AREG, \AREG, DISP16(\Index,256)
 | |
| 	addi	\BREG, \BREG,  DISP2(\Index,32)
 | |
| .endif
 | |
| .endif 
 | |
| .endm
 | |
| 
 | |
|  
 | |
| 
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x8
 | |
|   LOAD1x8
 | |
|   END1x8  AO, BO, 128,16
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro SAVE1x8
 | |
| 	SAVE8  vs32,vs33,vs34,vs35,vs36,vs37,vs38,vs39,vs40,vs41,vs42,vs43,vs44,vs45,vs46,vs47,CO,0
 | |
| 	addi	CO, CO, 128
 | |
| .endm
 | |
| /**********************************************************************************************
 | |
| *
 | |
| 
 | |
| .macros for N=2 and M=4
 | |
| **********************************************************************************************/
 | |
| 
 | |
| 
 | |
| .macro Zero1x4
 | |
| 	xxlxor	vs32,	vs32,	vs32
 | |
| 	xxlxor	vs33,	vs33,	vs33
 | |
| 	xxlxor	vs34,	vs34,	vs34
 | |
| 	xxlxor	vs35,	vs35,	vs35
 | |
| 	xxlxor	vs36,	vs36,	vs36
 | |
| 	xxlxor	vs37,	vs37,	vs37
 | |
| 	xxlxor	vs38,	vs38,	vs38
 | |
| 	xxlxor	vs39,	vs39,	vs39
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD1x4   
 | |
| 	LOAD1x4O 0,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD1x4O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	xxswapd	vs17, vs16
 | |
| 
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs1,	(16+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs2,	(32+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs3,	(48+\OffsetA)(AO)	// load real,imag from A 
 | |
|  
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END1x4_NORMAL
 | |
| 	END1x4 AO,BO,64,16
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END1x4_WITHOUT_ADD
 | |
| 	END1x4 AO,BO,0,0
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END1x4	AREG, BREG, OffsetA, OffsetB
 | |
| .if \OffsetB != 0
 | |
| 	addi	\BREG, \BREG, \OffsetB
 | |
| .endif
 | |
| .if \OffsetA != 0
 | |
| 	addi	\AREG, \AREG, \OffsetA
 | |
| .endif
 | |
| 	xvmaddadp	vs32,	vs0,	vs16
 | |
| 	xvmaddadp	vs33,	vs0,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs34,	vs1,	vs16
 | |
| 	xvmaddadp	vs35,	vs1,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs36,	vs2,	vs16
 | |
| 	xvmaddadp	vs37,	vs2,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs38,	vs3,	vs16
 | |
| 	xvmaddadp	vs39,	vs3,	vs17
 | |
| 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD1x4_2
 | |
|     LOAD1x4_2O 0,0
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro LOAD1x4_2O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	lxv	vs20,	(\OffsetB+16)(BO)	// load real,imag	from B
 | |
| 	xxswapd	vs17, vs16
 | |
| 
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs1,	(16+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs2,	(32+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs3,	(48+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs8,	(64+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs9,	(80+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs10,	(96+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs11,	(112+\OffsetA)(AO)	// load real,imag from A 
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro END1x4_2	  
 | |
|   /*for load2 offset will be 128 and 32*/
 | |
|    KERNEL1x4_2	AO,BO,	128,32,0 ,1,1 
 | |
| .endm
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x4_E2	OffsetA,OffsetB, Index,IsLast 
 | |
|   KERNEL1x4_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,1 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x4_L2	OffsetA,OffsetB, Index,IsLast
 | |
|   KERNEL1x4_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x4_2	AREG,BREG,	OffsetA,OffsetB, Index,IsLast ,Complete
 | |
| 	xvmaddadp	vs32,	vs0,	vs16
 | |
| 	xvmaddadp	vs33,	vs0,	vs17
 | |
|   xxswapd	vs21, vs20
 | |
| 	xvmaddadp	vs34,	vs1,	vs16
 | |
| 	xvmaddadp	vs35,	vs1,	vs17
 | |
| .if \Complete==0	
 | |
| 	lxv	vs0,	DISP8(\Index, 0 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs1,	DISP8(\Index,16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
| 	xvmaddadp	vs36,	vs2,	vs16
 | |
| 	xvmaddadp	vs37,	vs2,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs38,	vs3,	vs16
 | |
| 	xvmaddadp	vs39,	vs3,	vs17
 | |
| .if \Complete==0	
 | |
| 	lxv	vs2,	DISP8(\Index,32 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs3,	DISP8(\Index,48 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
|  
 | |
| .if \Complete==0		
 | |
| 	lxv	vs16,	DISP2(\Index, 0+\OffsetB)(\BREG)	// load real imag from B
 | |
| .endif
 | |
| 	xvmaddadp	vs32,	vs8,	vs20
 | |
| 	xvmaddadp	vs33,	vs8,	vs21
 | |
| .if \Complete==0		
 | |
|   xxswapd	vs17, vs16
 | |
| .endif
 | |
| 	xvmaddadp	vs34,	vs9,	vs20
 | |
| 	xvmaddadp	vs35,	vs9,	vs21
 | |
| .if \Complete==0		
 | |
| 	lxv	vs8,	DISP8(\Index,64+0+ \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs9,	DISP8(\Index,64+16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif
 | |
| 	xvmaddadp	vs36,	vs10,	vs20
 | |
| 	xvmaddadp	vs37,	vs10,	vs21
 | |
| 	xvmaddadp	vs38,	vs11,	vs20
 | |
| 	xvmaddadp	vs39,	vs11,	vs21
 | |
| .if \Complete==0	
 | |
| 	lxv	vs10,	DISP8(\Index,64+32 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs11,	DISP8(\Index,64+48 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	
 | |
|  
 | |
| .if \Complete==0	 
 | |
|  	lxv	vs20,	DISP2(\Index, 16+\OffsetB)(\BREG)	// load real,imag	from B
 | |
| .endif
 | |
| .if \IsLast==1
 | |
| .if \Complete==1
 | |
| 	addi	\AREG, \AREG,  DISP8(\Index,\OffsetA)
 | |
| 	addi	\BREG, \BREG,  DISP2(\Index,\OffsetB)
 | |
| .else
 | |
| 	addi	\AREG, \AREG, DISP8(\Index,128)
 | |
| 	addi	\BREG, \BREG,  DISP2(\Index,32)
 | |
| .endif
 | |
| .endif 
 | |
| .endm
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x4
 | |
|   LOAD1x4
 | |
|   END1x4  AO, BO, 64,16
 | |
| .endm
 | |
| 
 | |
| 
 | |
| 
 | |
| .macro SAVE1x4 
 | |
| 	SAVE4  vs32,vs33,vs34,vs35,vs36,vs37,vs38,vs39,CO,0
 | |
| 	addi	CO, CO, 64
 | |
| .endm
 | |
| /**********************************************************************************************
 | |
| *
 | |
| 
 | |
| .macros for N=2 and M=2
 | |
| **********************************************************************************************/
 | |
| 
 | |
| 
 | |
| .macro Zero1x2
 | |
| 	xxlxor	vs32,	vs32,	vs32
 | |
| 	xxlxor	vs33,	vs33,	vs33
 | |
| 	xxlxor	vs34,	vs34,	vs34
 | |
| 	xxlxor	vs35,	vs35,	vs35 
 | |
| 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD1x2   
 | |
| 	LOAD1x2O 0,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD1x2O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	xxswapd	vs17, vs16
 | |
| 
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs1,	(16+\OffsetA)(AO)	// load real,imag from A 
 | |
| 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END1x2_NORMAL
 | |
| 	END1x2 AO,BO,32,16
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END1x2_WITHOUT_ADD
 | |
| 	END1x2 AO,BO,0,0
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END1x2	AREG, BREG, OffsetA, OffsetB
 | |
| .if \OffsetB != 0
 | |
| 	addi	\BREG, \BREG, \OffsetB
 | |
| .endif
 | |
| .if \OffsetA != 0
 | |
| 	addi	\AREG, \AREG, \OffsetA
 | |
| .endif
 | |
| 	xvmaddadp	vs32,	vs0,	vs16
 | |
| 	xvmaddadp	vs33,	vs0,	vs17
 | |
| 
 | |
| 	xvmaddadp	vs34,	vs1,	vs16
 | |
| 	xvmaddadp	vs35,	vs1,	vs17
 | |
| 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD1x2_2
 | |
|     LOAD1x2_2O 0,0
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro LOAD1x2_2O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	lxv	vs20,	(\OffsetB+16)(BO)	// load real,imag	from B
 | |
| 	xxswapd	vs17, vs16
 | |
| 
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs1,	(16+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs8,	(32+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs9,	(48+\OffsetA)(AO)	// load real,imag from A
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro END1x2_2	  
 | |
|   /*for load2 offset will be 64 and 32*/
 | |
|    KERNEL1x2_2	AO,BO,	64,32,0 ,1,1 
 | |
| .endm
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x2_E2	OffsetA,OffsetB, Index,IsLast 
 | |
|   KERNEL1x2_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,1 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x2_L2	OffsetA,OffsetB, Index,IsLast
 | |
|   KERNEL1x2_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x2_2	AREG,BREG,	OffsetA,OffsetB, Index,IsLast ,Complete
 | |
| 	xvmaddadp	vs32,	vs0,	vs16
 | |
| 	xvmaddadp	vs33,	vs0,	vs17
 | |
|   xxswapd	vs21, vs20
 | |
| 	xvmaddadp	vs34,	vs1,	vs16
 | |
| 	xvmaddadp	vs35,	vs1,	vs17
 | |
| .if \Complete==0	
 | |
| 	lxv	vs0,	DISP4(\Index, 0 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs1,	DISP4(\Index,16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif	 
 | |
| .if \Complete==0		
 | |
| 	lxv	vs16,	DISP2(\Index, 0+\OffsetB)(\BREG)	// load real imag from B
 | |
| .endif
 | |
| 	xvmaddadp	vs32,	vs8,	vs20
 | |
| 	xvmaddadp	vs33,	vs8,	vs21
 | |
| .if \Complete==0		
 | |
|   xxswapd	vs17, vs16
 | |
| .endif
 | |
| 	xvmaddadp	vs34,	vs9,	vs20
 | |
| 	xvmaddadp	vs35,	vs9,	vs21
 | |
| .if \Complete==0	 
 | |
|  	lxv	vs20,	DISP2(\Index, 16+\OffsetB)(\BREG)	// load real,imag	from B
 | |
| .endif
 | |
| .if \Complete==0		
 | |
| 	lxv	vs8,	DISP4(\Index,32+0+ \OffsetA)(\AREG)	// load real,imag from A
 | |
| 	lxv	vs9,	DISP4(\Index,32+16 + \OffsetA)(\AREG)	// load real,imag from A
 | |
| .endif
 | |
|  
 | |
|  
 | |
| 
 | |
| .if \IsLast==1
 | |
| .if \Complete==1
 | |
| 	addi	\AREG, \AREG,  DISP4(\Index,\OffsetA)
 | |
| 	addi	\BREG, \BREG,  DISP2(\Index,\OffsetB)
 | |
| .else
 | |
| 	addi	\AREG, \AREG, DISP4(\Index,64)
 | |
| 	addi	\BREG, \BREG,  DISP2(\Index,32)
 | |
| .endif
 | |
| .endif 
 | |
| .endm
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x2
 | |
|   LOAD1x2
 | |
|   END1x2  AO, BO, 32,16
 | |
| .endm
 | |
| 
 | |
| 
 | |
| 
 | |
| .macro SAVE1x2 
 | |
| 	SAVE2  vs32,vs33,vs34,vs35,CO,0
 | |
| 	addi	CO, CO, 32 
 | |
| .endm
 | |
| /**********************************************************************************************
 | |
| *
 | |
| 
 | |
| .macros for N=2 and M=1
 | |
| **********************************************************************************************/
 | |
| 
 | |
| 
 | |
| 
 | |
| .macro Zero1x1
 | |
| 	xxlxor	vs32,	vs32,	vs32
 | |
| 	xxlxor	vs33,	vs33,	vs33 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD1x1   
 | |
| 	LOAD1x1O 0,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD1x1O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A 
 | |
| 	xxswapd	vs17, vs16
 | |
| 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END1x1_NORMAL
 | |
| 	END1x1 AO,BO,16,16
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END1x1_WITHOUT_ADD
 | |
| 	END1x1 AO,BO,0,0
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro END1x1	AREG, BREG, OffsetA, OffsetB
 | |
| .if \OffsetB != 0
 | |
| 	addi	\BREG, \BREG, \OffsetB
 | |
| .endif
 | |
| .if \OffsetA != 0
 | |
| 	addi	\AREG, \AREG, \OffsetA
 | |
| .endif
 | |
| 	xvmaddadp	vs32,	vs0,	vs16 
 | |
| 	xvmaddadp	vs33,	vs0,	vs17 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro LOAD1x1_2
 | |
|     LOAD1x1_2O 0,0
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro LOAD1x1_2O  OffsetA,OffsetB
 | |
| 	lxv	vs16,(\OffsetB+	0)(BO)	// load real imag from B
 | |
| 	lxv	vs20,	(\OffsetB+16)(BO)	// load real,imag	from B
 | |
| 	xxswapd	vs17, vs16
 | |
| 
 | |
| 	lxv	vs0,	(0+\OffsetA)(AO)	// load real,imag from A
 | |
| 	lxv	vs8,	(16+\OffsetA)(AO)	// load real,imag from A 
 | |
| .endm	
 | |
| 
 | |
| 
 | |
| .macro END1x1_2	  
 | |
|   /*for load2 offset will be 32 and 32*/
 | |
|    KERNEL1x1_2	AO,BO,	32,32,0 ,1,1 
 | |
| .endm
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x1_E2	OffsetA,OffsetB, Index,IsLast 
 | |
|   KERNEL1x1_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,1 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x1_L2	OffsetA,OffsetB, Index,IsLast
 | |
|   KERNEL1x1_2	AO,BO,	\OffsetA,\OffsetB, \Index,\IsLast ,0 
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x1_2	AREG,BREG,	OffsetA,OffsetB, Index,IsLast ,Complete
 | |
|   xxswapd	vs21, vs20
 | |
| 	xvmaddadp	vs32,	vs0,	vs16 
 | |
| 	xvmaddadp	vs33,	vs0,	vs17 
 | |
| .if \Complete==0	
 | |
| 	lxv	vs0,	DISP2(\Index, 0 + \OffsetA)(\AREG)	// load real,imag from A 
 | |
| .endif	 
 | |
| .if \Complete==0		
 | |
| 	lxv	vs16,	DISP2(\Index, 0+\OffsetB)(\BREG)	// load real imag from B
 | |
| .endif
 | |
| .if \Complete==0		
 | |
|   xxswapd	vs17, vs16
 | |
| .endif 
 | |
| 	xvmaddadp	vs32,	vs8,	vs20
 | |
| 	xvmaddadp	vs33,	vs8,	vs21 
 | |
| .if \Complete==0		
 | |
| 	lxv	vs8,	DISP2(\Index,16+0+ \OffsetA)(\AREG)	// load real,imag from A 
 | |
| .endif
 | |
|  
 | |
| .if \Complete==0	 
 | |
|  	lxv	vs20,	DISP2(\Index, 16+\OffsetB)(\BREG)	// load real,imag	from B
 | |
| .endif
 | |
| .if \IsLast==1
 | |
| .if \Complete==1
 | |
| 	addi	\AREG, \AREG,  DISP2(\Index,\OffsetA)
 | |
| 	addi	\BREG, \BREG,  DISP2(\Index,\OffsetB)
 | |
| .else
 | |
| 	addi	\AREG, \AREG, DISP2(\Index,32)
 | |
| 	addi	\BREG, \BREG,  DISP2(\Index,32)
 | |
| .endif
 | |
| .endif 
 | |
| .endm
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro KERNEL1x1
 | |
|   LOAD1x1
 | |
|   END1x1  AO, BO, 16,16
 | |
| .endm
 | |
| 
 | |
| 
 | |
| 
 | |
| .macro SAVE1x1
 | |
| 	SAVE1  vs32,vs33,CO,0
 | |
| 	addi	CO, CO, 16 
 | |
| .endm
 | |
| 
 | |
| /****************************TRMM POINTER REFRESH
 | |
| 
 | |
| .macroSES*************************/
 | |
| 
 | |
| 
 | |
| .macro SHIFT_REG  REG1,REG2,SHIFT_VAL
 | |
| 		.if \SHIFT_VAL==16 
 | |
| 			slwi		\REG1,	\REG2,	8			
 | |
| 		.elseif \SHIFT_VAL==8  
 | |
| 			slwi		\REG1,	\REG2,	7			 
 | |
| 		.elseif \SHIFT_VAL==4
 | |
| 			slwi		\REG1,	\REG2,	6			  
 | |
| 		.elseif \SHIFT_VAL==2
 | |
| 			slwi		\REG1,	\REG2,	5			 
 | |
| 		.elseif \SHIFT_VAL==1
 | |
| 			slwi		\REG1,	\REG2,	4			 
 | |
| 		.endif
 | |
| .endm
 | |
| /*
 | |
| //#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | |
| // 		ptrbb = bb;
 | |
| // #else
 | |
| // 		ptrba += off*16;
 | |
| // 		ptrbb = bb + off*2;
 | |
| // #endif
 | |
| */
 | |
| 
 | |
| 
 | |
| .macro REFRESH_POINTERS  PTR_A,PTR_B,OFF_VAL,B_VAL,C_A,C_B
 | |
|     #if (defined(LEFT) &&  defined(TRANSA)) ||  (!defined(LEFT) && !defined(TRANSA))
 | |
|         /* ptrbb = bb;*/
 | |
|         mr \PTR_B,\B_VAL     /* refresh BPOINT */
 | |
|     #else
 | |
| 		    /*
 | |
|         // ptrba  =ptrba+ off*C_A;
 | |
|         // ptrbb = bb + off*C_B; 
 | |
| 				*/
 | |
| 		SHIFT_REG T4,\OFF_VAL,\C_B		/* Number of values in B shifted  */
 | |
| 		SHIFT_REG T2,\OFF_VAL,\C_A		/* Number of values in A shifted  */
 | |
| 		add		\PTR_B,	\B_VAL ,	T4				/* Add values to BO */
 | |
| 		add		\PTR_A,	\PTR_A,	T2				/* Add values to AO  */
 | |
|     #endif 
 | |
| .endm
 | |
| 
 | |
| /*
 | |
| // #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | |
| // 		temp = bk-off;
 | |
| // #elif defined(LEFT)
 | |
| // 		temp = off+16;	// number of values in A
 | |
| // #else
 | |
| // 		temp = off+2;	// number of values in B
 | |
| // #endif
 | |
| */
 | |
| 
 | |
| 
 | |
| .macro REFRESH_TEMP_BK TEMP_BK,BK_VAL,OFF_VAL,INCR_A,INCR_B
 | |
|     #if (defined(LEFT) && !defined(TRANSA)) ||  (!defined(LEFT) && defined(TRANSA))
 | |
|                             /* temp = bk-off;*/
 | |
|            sub \TEMP_BK,\BK_VAL,\OFF_VAL
 | |
|     #elif defined(LEFT)
 | |
|                             /* temp = off+INCR_A;	// number of values in A */
 | |
|            addi \TEMP_BK, \OFF_VAL, \INCR_A
 | |
|     #else
 | |
|                             /* temp = off+INCR_B	// number of values in B*/
 | |
|            addi \TEMP_BK,\OFF_VAL, \INCR_B
 | |
|     #endif
 | |
| .endm
 | |
| /*
 | |
| // #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | |
| // 		temp = bk - off;
 | |
| // #ifdef LEFT
 | |
| // 		temp -= 16; // number of values in A
 | |
| // #else
 | |
| // 		temp -= 2; // number of values in B
 | |
| // #endif
 | |
| // 		ptrba += temp*16;
 | |
| // 		ptrbb += temp*2;
 | |
| // #endif
 | |
| // #ifdef LEFT
 | |
| // 		off += 16; // number of values in A
 | |
| // #endif
 | |
| */
 | |
|  
 | |
| 
 | |
| 
 | |
| .macro REFRESH_AFTER_SAVE TEMP_BK,BK_VAL,OFF_VAL,PTR_B,PTR_A,C_A,C_B
 | |
|     #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | |
|                     /*temp = bk - off;*/
 | |
|                 sub \TEMP_BK,\BK_VAL,\OFF_VAL
 | |
|     #ifdef LEFT
 | |
|                     /*temp -= 8; // number of values in A*/
 | |
|                 addi \TEMP_BK,\TEMP_BK,-\C_A
 | |
|     #else
 | |
|                     /*temp -= 4; // number of values in B*/
 | |
|                 addi \TEMP_BK,\TEMP_BK,-\C_B 
 | |
|     #endif
 | |
|                     /*ptrba += temp*C_A;
 | |
|                     ptrbb += temp*C_B;*/ 
 | |
|                 SHIFT_REG T4,\TEMP_BK,\C_A
 | |
| 								SHIFT_REG T2,\TEMP_BK,\C_B
 | |
|                 add \PTR_A, \PTR_A,T4/*ptrba+temp*C_A*/ 
 | |
| 								add \PTR_B, \PTR_B,T2 
 | |
|     #endif
 | |
|     #ifdef LEFT
 | |
|                     /*off += 8; // number of values in A*/
 | |
|                  addi \OFF_VAL,\OFF_VAL,\C_A
 | |
|     #endif
 | |
| .endm |