7121 lines
		
	
	
		
			93 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			7121 lines
		
	
	
		
			93 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| /***************************************************************************
 | |
| Copyright (c) 2013-2016, The OpenBLAS Project
 | |
| All rights reserved.
 | |
| Redistribution and use in source and binary forms, with or without
 | |
| modification, are permitted provided that the following conditions are
 | |
| met:
 | |
| 1. Redistributions of source code must retain the above copyright
 | |
| notice, this list of conditions and the following disclaimer.
 | |
| 2. Redistributions in binary form must reproduce the above copyright
 | |
| notice, this list of conditions and the following disclaimer in
 | |
| the documentation and/or other materials provided with the
 | |
| distribution.
 | |
| 3. Neither the name of the OpenBLAS project nor the names of
 | |
| its contributors may be used to endorse or promote products
 | |
| derived from this software without specific prior written permission.
 | |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | |
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | |
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | |
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | |
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | |
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | |
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | |
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| *****************************************************************************/
 | |
| 
 | |
| /**************************************************************************************
 | |
| * 2016/04/02 Werner Saar (wernsaar@googlemail.com)
 | |
| * 	 BLASTEST 		: OK
 | |
| * 	 CTEST			: OK
 | |
| * 	 TEST			: OK
 | |
| * 	 LAPACK-TEST		: OK
 | |
| **************************************************************************************/
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=8 and M=16
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD8x16_1', `
 | |
| #else
 | |
| .macro LOAD8x16_1
 | |
| #endif
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs12,	vs29,	0
 | |
| 	xxspltw		vs13,	vs29,	1
 | |
| 	xxspltw		vs14,	vs29,	2
 | |
| 	xxspltw		vs15,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x16_I1', `
 | |
| #else
 | |
| .macro KERNEL8x16_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 	lxvw4x		vs6,	o32,	AO
 | |
| 	lxvw4x		vs7,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 	xxspltw		vs18,	vs28,	2
 | |
| 	xxspltw		vs19,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs20,	vs29,	0
 | |
| 	xxspltw		vs21,	vs29,	1
 | |
| 	xxspltw		vs22,	vs29,	2
 | |
| 	xxspltw		vs23,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 	xvmulsp		vs34,	vs2,	vs8
 | |
| 	xvmulsp		vs35,	vs3,	vs8
 | |
| 
 | |
| 	xvmulsp		vs36,	vs0,	vs9
 | |
| 	xvmulsp		vs37,	vs1,	vs9
 | |
| 	xvmulsp		vs38,	vs2,	vs9
 | |
| 	xvmulsp		vs39,	vs3,	vs9
 | |
| 
 | |
| 	xvmulsp		vs40,	vs0,	vs10
 | |
| 	xvmulsp		vs41,	vs1,	vs10
 | |
| 	xvmulsp		vs42,	vs2,	vs10
 | |
| 	xvmulsp		vs43,	vs3,	vs10
 | |
| 
 | |
| 	xvmulsp		vs44,	vs0,	vs11
 | |
| 	xvmulsp		vs45,	vs1,	vs11
 | |
| 	xvmulsp		vs46,	vs2,	vs11
 | |
| 	xvmulsp		vs47,	vs3,	vs11
 | |
| 
 | |
| 	xvmulsp		vs48,	vs0,	vs12
 | |
| 	xvmulsp		vs49,	vs1,	vs12
 | |
| 	xvmulsp		vs50,	vs2,	vs12
 | |
| 	xvmulsp		vs51,	vs3,	vs12
 | |
| 
 | |
| 	xvmulsp		vs52,	vs0,	vs13
 | |
| 	xvmulsp		vs53,	vs1,	vs13
 | |
| 	xvmulsp		vs54,	vs2,	vs13
 | |
| 	xvmulsp		vs55,	vs3,	vs13
 | |
| 
 | |
| 	xvmulsp		vs56,	vs0,	vs14
 | |
| 	xvmulsp		vs57,	vs1,	vs14
 | |
| 	xvmulsp		vs58,	vs2,	vs14
 | |
| 	xvmulsp		vs59,	vs3,	vs14
 | |
| 
 | |
| 	xvmulsp		vs60,	vs0,	vs15
 | |
| 	xvmulsp		vs61,	vs1,	vs15
 | |
| 	xvmulsp		vs62,	vs2,	vs15
 | |
| 	xvmulsp		vs63,	vs3,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x16_1', `
 | |
| #else
 | |
| .macro KERNEL8x16_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 	lxvw4x		vs6,	o32,	AO
 | |
| 	lxvw4x		vs7,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 	xxspltw		vs18,	vs28,	2
 | |
| 	xxspltw		vs19,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs20,	vs29,	0
 | |
| 	xxspltw		vs21,	vs29,	1
 | |
| 	xxspltw		vs22,	vs29,	2
 | |
| 	xxspltw		vs23,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 	xvmaddasp	vs34,	vs2,	vs8
 | |
| 	xvmaddasp	vs35,	vs3,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs0,	vs9
 | |
| 	xvmaddasp	vs37,	vs1,	vs9
 | |
| 	xvmaddasp	vs38,	vs2,	vs9
 | |
| 	xvmaddasp	vs39,	vs3,	vs9
 | |
| 
 | |
| 	xvmaddasp	vs40,	vs0,	vs10
 | |
| 	xvmaddasp	vs41,	vs1,	vs10
 | |
| 	xvmaddasp	vs42,	vs2,	vs10
 | |
| 	xvmaddasp	vs43,	vs3,	vs10
 | |
| 
 | |
| 	xvmaddasp	vs44,	vs0,	vs11
 | |
| 	xvmaddasp	vs45,	vs1,	vs11
 | |
| 	xvmaddasp	vs46,	vs2,	vs11
 | |
| 	xvmaddasp	vs47,	vs3,	vs11
 | |
| 
 | |
| 	xvmaddasp	vs48,	vs0,	vs12
 | |
| 	xvmaddasp	vs49,	vs1,	vs12
 | |
| 	xvmaddasp	vs50,	vs2,	vs12
 | |
| 	xvmaddasp	vs51,	vs3,	vs12
 | |
| 
 | |
| 	xvmaddasp	vs52,	vs0,	vs13
 | |
| 	xvmaddasp	vs53,	vs1,	vs13
 | |
| 	xvmaddasp	vs54,	vs2,	vs13
 | |
| 	xvmaddasp	vs55,	vs3,	vs13
 | |
| 
 | |
| 	xvmaddasp	vs56,	vs0,	vs14
 | |
| 	xvmaddasp	vs57,	vs1,	vs14
 | |
| 	xvmaddasp	vs58,	vs2,	vs14
 | |
| 	xvmaddasp	vs59,	vs3,	vs14
 | |
| 
 | |
| 	xvmaddasp	vs60,	vs0,	vs15
 | |
| 	xvmaddasp	vs61,	vs1,	vs15
 | |
| 	xvmaddasp	vs62,	vs2,	vs15
 | |
| 	xvmaddasp	vs63,	vs3,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x16_2', `
 | |
| #else
 | |
| .macro KERNEL8x16_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs12,	vs29,	0
 | |
| 	xxspltw		vs13,	vs29,	1
 | |
| 	xxspltw		vs14,	vs29,	2
 | |
| 	xxspltw		vs15,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 	xvmaddasp	vs34,	vs6,	vs16
 | |
| 	xvmaddasp	vs35,	vs7,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs4,	vs17
 | |
| 	xvmaddasp	vs37,	vs5,	vs17
 | |
| 	xvmaddasp	vs38,	vs6,	vs17
 | |
| 	xvmaddasp	vs39,	vs7,	vs17
 | |
| 
 | |
| 	xvmaddasp	vs40,	vs4,	vs18
 | |
| 	xvmaddasp	vs41,	vs5,	vs18
 | |
| 	xvmaddasp	vs42,	vs6,	vs18
 | |
| 	xvmaddasp	vs43,	vs7,	vs18
 | |
| 
 | |
| 	xvmaddasp	vs44,	vs4,	vs19
 | |
| 	xvmaddasp	vs45,	vs5,	vs19
 | |
| 	xvmaddasp	vs46,	vs6,	vs19
 | |
| 	xvmaddasp	vs47,	vs7,	vs19
 | |
| 
 | |
| 	xvmaddasp	vs48,	vs4,	vs20
 | |
| 	xvmaddasp	vs49,	vs5,	vs20
 | |
| 	xvmaddasp	vs50,	vs6,	vs20
 | |
| 	xvmaddasp	vs51,	vs7,	vs20
 | |
| 
 | |
| 	xvmaddasp	vs52,	vs4,	vs21
 | |
| 	xvmaddasp	vs53,	vs5,	vs21
 | |
| 	xvmaddasp	vs54,	vs6,	vs21
 | |
| 	xvmaddasp	vs55,	vs7,	vs21
 | |
| 
 | |
| 	xvmaddasp	vs56,	vs4,	vs22
 | |
| 	xvmaddasp	vs57,	vs5,	vs22
 | |
| 	xvmaddasp	vs58,	vs6,	vs22
 | |
| 	xvmaddasp	vs59,	vs7,	vs22
 | |
| 
 | |
| 	xvmaddasp	vs60,	vs4,	vs23
 | |
| 	xvmaddasp	vs61,	vs5,	vs23
 | |
| 	xvmaddasp	vs62,	vs6,	vs23
 | |
| 	xvmaddasp	vs63,	vs7,	vs23
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x16_E2', `
 | |
| #else
 | |
| .macro KERNEL8x16_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 	xvmaddasp	vs34,	vs6,	vs16
 | |
| 	xvmaddasp	vs35,	vs7,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs4,	vs17
 | |
| 	xvmaddasp	vs37,	vs5,	vs17
 | |
| 	xvmaddasp	vs38,	vs6,	vs17
 | |
| 	xvmaddasp	vs39,	vs7,	vs17
 | |
| 
 | |
| 	xvmaddasp	vs40,	vs4,	vs18
 | |
| 	xvmaddasp	vs41,	vs5,	vs18
 | |
| 	xvmaddasp	vs42,	vs6,	vs18
 | |
| 	xvmaddasp	vs43,	vs7,	vs18
 | |
| 
 | |
| 	xvmaddasp	vs44,	vs4,	vs19
 | |
| 	xvmaddasp	vs45,	vs5,	vs19
 | |
| 	xvmaddasp	vs46,	vs6,	vs19
 | |
| 	xvmaddasp	vs47,	vs7,	vs19
 | |
| 
 | |
| 	xvmaddasp	vs48,	vs4,	vs20
 | |
| 	xvmaddasp	vs49,	vs5,	vs20
 | |
| 	xvmaddasp	vs50,	vs6,	vs20
 | |
| 	xvmaddasp	vs51,	vs7,	vs20
 | |
| 
 | |
| 	xvmaddasp	vs52,	vs4,	vs21
 | |
| 	xvmaddasp	vs53,	vs5,	vs21
 | |
| 	xvmaddasp	vs54,	vs6,	vs21
 | |
| 	xvmaddasp	vs55,	vs7,	vs21
 | |
| 
 | |
| 	xvmaddasp	vs56,	vs4,	vs22
 | |
| 	xvmaddasp	vs57,	vs5,	vs22
 | |
| 	xvmaddasp	vs58,	vs6,	vs22
 | |
| 	xvmaddasp	vs59,	vs7,	vs22
 | |
| 
 | |
| 	xvmaddasp	vs60,	vs4,	vs23
 | |
| 	xvmaddasp	vs61,	vs5,	vs23
 | |
| 	xvmaddasp	vs62,	vs6,	vs23
 | |
| 	xvmaddasp	vs63,	vs7,	vs23
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x16_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL8x16_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs12,	vs29,	0
 | |
| 	xxspltw		vs13,	vs29,	1
 | |
| 	xxspltw		vs14,	vs29,	2
 | |
| 	xxspltw		vs15,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 	xvmulsp		vs34,	vs2,	vs8
 | |
| 	xvmulsp		vs35,	vs3,	vs8
 | |
| 
 | |
| 	xvmulsp		vs36,	vs0,	vs9
 | |
| 	xvmulsp		vs37,	vs1,	vs9
 | |
| 	xvmulsp		vs38,	vs2,	vs9
 | |
| 	xvmulsp		vs39,	vs3,	vs9
 | |
| 
 | |
| 	xvmulsp		vs40,	vs0,	vs10
 | |
| 	xvmulsp		vs41,	vs1,	vs10
 | |
| 	xvmulsp		vs42,	vs2,	vs10
 | |
| 	xvmulsp		vs43,	vs3,	vs10
 | |
| 
 | |
| 	xvmulsp		vs44,	vs0,	vs11
 | |
| 	xvmulsp		vs45,	vs1,	vs11
 | |
| 	xvmulsp		vs46,	vs2,	vs11
 | |
| 	xvmulsp		vs47,	vs3,	vs11
 | |
| 
 | |
| 	xvmulsp		vs48,	vs0,	vs12
 | |
| 	xvmulsp		vs49,	vs1,	vs12
 | |
| 	xvmulsp		vs50,	vs2,	vs12
 | |
| 	xvmulsp		vs51,	vs3,	vs12
 | |
| 
 | |
| 	xvmulsp		vs52,	vs0,	vs13
 | |
| 	xvmulsp		vs53,	vs1,	vs13
 | |
| 	xvmulsp		vs54,	vs2,	vs13
 | |
| 	xvmulsp		vs55,	vs3,	vs13
 | |
| 
 | |
| 	xvmulsp		vs56,	vs0,	vs14
 | |
| 	xvmulsp		vs57,	vs1,	vs14
 | |
| 	xvmulsp		vs58,	vs2,	vs14
 | |
| 	xvmulsp		vs59,	vs3,	vs14
 | |
| 
 | |
| 	xvmulsp		vs60,	vs0,	vs15
 | |
| 	xvmulsp		vs61,	vs1,	vs15
 | |
| 	xvmulsp		vs62,	vs2,	vs15
 | |
| 	xvmulsp		vs63,	vs3,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x16_SUB1', `
 | |
| #else
 | |
| .macro KERNEL8x16_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs12,	vs29,	0
 | |
| 	xxspltw		vs13,	vs29,	1
 | |
| 	xxspltw		vs14,	vs29,	2
 | |
| 	xxspltw		vs15,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 	xvmaddasp	vs34,	vs2,	vs8
 | |
| 	xvmaddasp	vs35,	vs3,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs0,	vs9
 | |
| 	xvmaddasp	vs37,	vs1,	vs9
 | |
| 	xvmaddasp	vs38,	vs2,	vs9
 | |
| 	xvmaddasp	vs39,	vs3,	vs9
 | |
| 
 | |
| 	xvmaddasp	vs40,	vs0,	vs10
 | |
| 	xvmaddasp	vs41,	vs1,	vs10
 | |
| 	xvmaddasp	vs42,	vs2,	vs10
 | |
| 	xvmaddasp	vs43,	vs3,	vs10
 | |
| 
 | |
| 	xvmaddasp	vs44,	vs0,	vs11
 | |
| 	xvmaddasp	vs45,	vs1,	vs11
 | |
| 	xvmaddasp	vs46,	vs2,	vs11
 | |
| 	xvmaddasp	vs47,	vs3,	vs11
 | |
| 
 | |
| 	xvmaddasp	vs48,	vs0,	vs12
 | |
| 	xvmaddasp	vs49,	vs1,	vs12
 | |
| 	xvmaddasp	vs50,	vs2,	vs12
 | |
| 	xvmaddasp	vs51,	vs3,	vs12
 | |
| 
 | |
| 	xvmaddasp	vs52,	vs0,	vs13
 | |
| 	xvmaddasp	vs53,	vs1,	vs13
 | |
| 	xvmaddasp	vs54,	vs2,	vs13
 | |
| 	xvmaddasp	vs55,	vs3,	vs13
 | |
| 
 | |
| 	xvmaddasp	vs56,	vs0,	vs14
 | |
| 	xvmaddasp	vs57,	vs1,	vs14
 | |
| 	xvmaddasp	vs58,	vs2,	vs14
 | |
| 	xvmaddasp	vs59,	vs3,	vs14
 | |
| 
 | |
| 	xvmaddasp	vs60,	vs0,	vs15
 | |
| 	xvmaddasp	vs61,	vs1,	vs15
 | |
| 	xvmaddasp	vs62,	vs2,	vs15
 | |
| 	xvmaddasp	vs63,	vs3,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE8x16', `
 | |
| #else
 | |
| .macro SAVE8x16
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs32,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs33,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs34,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs35,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs32,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs33,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs34,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs35,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs36,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs37,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs38,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs39,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs36,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs37,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs38,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs39,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs40,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs41,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs42,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs43,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs40,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs41,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs42,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs43,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs44,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs45,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs46,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs47,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs44,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs45,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs46,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs47,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs48,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs49,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs50,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs51,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs48,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs49,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs50,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs51,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs52,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs53,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs54,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs55,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs52,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs53,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs54,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs55,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs56,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs57,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs58,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs59,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs56,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs57,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs58,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs59,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs60,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs61,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs62,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs63,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs60,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs61,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs62,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs63,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	64
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=8 and M=8
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD8x8_1', `
 | |
| #else
 | |
| .macro LOAD8x8_1
 | |
| #endif
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs12,	vs29,	0
 | |
| 	xxspltw		vs13,	vs29,	1
 | |
| 	xxspltw		vs14,	vs29,	2
 | |
| 	xxspltw		vs15,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x8_I1', `
 | |
| #else
 | |
| .macro KERNEL8x8_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 	xxspltw		vs18,	vs28,	2
 | |
| 	xxspltw		vs19,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs20,	vs29,	0
 | |
| 	xxspltw		vs21,	vs29,	1
 | |
| 	xxspltw		vs22,	vs29,	2
 | |
| 	xxspltw		vs23,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 
 | |
| 	xvmulsp		vs34,	vs0,	vs9
 | |
| 	xvmulsp		vs35,	vs1,	vs9
 | |
| 
 | |
| 	xvmulsp		vs36,	vs0,	vs10
 | |
| 	xvmulsp		vs37,	vs1,	vs10
 | |
| 
 | |
| 	xvmulsp		vs38,	vs0,	vs11
 | |
| 	xvmulsp		vs39,	vs1,	vs11
 | |
| 
 | |
| 	xvmulsp		vs40,	vs0,	vs12
 | |
| 	xvmulsp		vs41,	vs1,	vs12
 | |
| 
 | |
| 	xvmulsp		vs42,	vs0,	vs13
 | |
| 	xvmulsp		vs43,	vs1,	vs13
 | |
| 
 | |
| 	xvmulsp		vs44,	vs0,	vs14
 | |
| 	xvmulsp		vs45,	vs1,	vs14
 | |
| 
 | |
| 	xvmulsp		vs46,	vs0,	vs15
 | |
| 	xvmulsp		vs47,	vs1,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x8_1', `
 | |
| #else
 | |
| .macro KERNEL8x8_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 	xxspltw		vs18,	vs28,	2
 | |
| 	xxspltw		vs19,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs20,	vs29,	0
 | |
| 	xxspltw		vs21,	vs29,	1
 | |
| 	xxspltw		vs22,	vs29,	2
 | |
| 	xxspltw		vs23,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs0,	vs9
 | |
| 	xvmaddasp	vs35,	vs1,	vs9
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs0,	vs10
 | |
| 	xvmaddasp	vs37,	vs1,	vs10
 | |
| 
 | |
| 	xvmaddasp	vs38,	vs0,	vs11
 | |
| 	xvmaddasp	vs39,	vs1,	vs11
 | |
| 
 | |
| 	xvmaddasp	vs40,	vs0,	vs12
 | |
| 	xvmaddasp	vs41,	vs1,	vs12
 | |
| 
 | |
| 	xvmaddasp	vs42,	vs0,	vs13
 | |
| 	xvmaddasp	vs43,	vs1,	vs13
 | |
| 
 | |
| 	xvmaddasp	vs44,	vs0,	vs14
 | |
| 	xvmaddasp	vs45,	vs1,	vs14
 | |
| 
 | |
| 	xvmaddasp	vs46,	vs0,	vs15
 | |
| 	xvmaddasp	vs47,	vs1,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x8_2', `
 | |
| #else
 | |
| .macro KERNEL8x8_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs12,	vs29,	0
 | |
| 	xxspltw		vs13,	vs29,	1
 | |
| 	xxspltw		vs14,	vs29,	2
 | |
| 	xxspltw		vs15,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs4,	vs17
 | |
| 	xvmaddasp	vs35,	vs5,	vs17
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs4,	vs18
 | |
| 	xvmaddasp	vs37,	vs5,	vs18
 | |
| 
 | |
| 	xvmaddasp	vs38,	vs4,	vs19
 | |
| 	xvmaddasp	vs39,	vs5,	vs19
 | |
| 
 | |
| 	xvmaddasp	vs40,	vs4,	vs20
 | |
| 	xvmaddasp	vs41,	vs5,	vs20
 | |
| 
 | |
| 	xvmaddasp	vs42,	vs4,	vs21
 | |
| 	xvmaddasp	vs43,	vs5,	vs21
 | |
| 
 | |
| 	xvmaddasp	vs44,	vs4,	vs22
 | |
| 	xvmaddasp	vs45,	vs5,	vs22
 | |
| 
 | |
| 	xvmaddasp	vs46,	vs4,	vs23
 | |
| 	xvmaddasp	vs47,	vs5,	vs23
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x8_E2', `
 | |
| #else
 | |
| .macro KERNEL8x8_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs4,	vs17
 | |
| 	xvmaddasp	vs35,	vs5,	vs17
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs4,	vs18
 | |
| 	xvmaddasp	vs37,	vs5,	vs18
 | |
| 
 | |
| 	xvmaddasp	vs38,	vs4,	vs19
 | |
| 	xvmaddasp	vs39,	vs5,	vs19
 | |
| 
 | |
| 	xvmaddasp	vs40,	vs4,	vs20
 | |
| 	xvmaddasp	vs41,	vs5,	vs20
 | |
| 
 | |
| 	xvmaddasp	vs42,	vs4,	vs21
 | |
| 	xvmaddasp	vs43,	vs5,	vs21
 | |
| 
 | |
| 	xvmaddasp	vs44,	vs4,	vs22
 | |
| 	xvmaddasp	vs45,	vs5,	vs22
 | |
| 
 | |
| 	xvmaddasp	vs46,	vs4,	vs23
 | |
| 	xvmaddasp	vs47,	vs5,	vs23
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x8_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL8x8_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs12,	vs29,	0
 | |
| 	xxspltw		vs13,	vs29,	1
 | |
| 	xxspltw		vs14,	vs29,	2
 | |
| 	xxspltw		vs15,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 
 | |
| 	xvmulsp		vs34,	vs0,	vs9
 | |
| 	xvmulsp		vs35,	vs1,	vs9
 | |
| 
 | |
| 	xvmulsp		vs36,	vs0,	vs10
 | |
| 	xvmulsp		vs37,	vs1,	vs10
 | |
| 
 | |
| 	xvmulsp		vs38,	vs0,	vs11
 | |
| 	xvmulsp		vs39,	vs1,	vs11
 | |
| 
 | |
| 	xvmulsp		vs40,	vs0,	vs12
 | |
| 	xvmulsp		vs41,	vs1,	vs12
 | |
| 
 | |
| 	xvmulsp		vs42,	vs0,	vs13
 | |
| 	xvmulsp		vs43,	vs1,	vs13
 | |
| 
 | |
| 	xvmulsp		vs44,	vs0,	vs14
 | |
| 	xvmulsp		vs45,	vs1,	vs14
 | |
| 
 | |
| 	xvmulsp		vs46,	vs0,	vs15
 | |
| 	xvmulsp		vs47,	vs1,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x8_SUB1', `
 | |
| #else
 | |
| .macro KERNEL8x8_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs12,	vs29,	0
 | |
| 	xxspltw		vs13,	vs29,	1
 | |
| 	xxspltw		vs14,	vs29,	2
 | |
| 	xxspltw		vs15,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs0,	vs9
 | |
| 	xvmaddasp	vs35,	vs1,	vs9
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs0,	vs10
 | |
| 	xvmaddasp	vs37,	vs1,	vs10
 | |
| 
 | |
| 	xvmaddasp	vs38,	vs0,	vs11
 | |
| 	xvmaddasp	vs39,	vs1,	vs11
 | |
| 
 | |
| 	xvmaddasp	vs40,	vs0,	vs12
 | |
| 	xvmaddasp	vs41,	vs1,	vs12
 | |
| 
 | |
| 	xvmaddasp	vs42,	vs0,	vs13
 | |
| 	xvmaddasp	vs43,	vs1,	vs13
 | |
| 
 | |
| 	xvmaddasp	vs44,	vs0,	vs14
 | |
| 	xvmaddasp	vs45,	vs1,	vs14
 | |
| 
 | |
| 	xvmaddasp	vs46,	vs0,	vs15
 | |
| 	xvmaddasp	vs47,	vs1,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE8x8', `
 | |
| #else
 | |
| .macro SAVE8x8
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs32,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs33,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs32,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs33,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs34,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs35,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs34,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs35,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs36,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs37,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs36,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs37,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs38,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs39,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs38,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs39,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs40,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs41,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs40,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs41,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs42,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs43,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs42,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs43,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs44,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs45,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs44,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs45,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs46,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs47,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs46,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs47,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	32
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=8 and M=4
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD8x4_1', `
 | |
| #else
 | |
| .macro LOAD8x4_1
 | |
| #endif
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs12,	vs29,	0
 | |
| 	xxspltw		vs13,	vs29,	1
 | |
| 	xxspltw		vs14,	vs29,	2
 | |
| 	xxspltw		vs15,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x4_I1', `
 | |
| #else
 | |
| .macro KERNEL8x4_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 	xxspltw		vs18,	vs28,	2
 | |
| 	xxspltw		vs19,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs20,	vs29,	0
 | |
| 	xxspltw		vs21,	vs29,	1
 | |
| 	xxspltw		vs22,	vs29,	2
 | |
| 	xxspltw		vs23,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 
 | |
| 	xvmulsp		vs33,	vs0,	vs9
 | |
| 
 | |
| 	xvmulsp		vs34,	vs0,	vs10
 | |
| 
 | |
| 	xvmulsp		vs35,	vs0,	vs11
 | |
| 
 | |
| 	xvmulsp		vs36,	vs0,	vs12
 | |
| 
 | |
| 	xvmulsp		vs37,	vs0,	vs13
 | |
| 
 | |
| 	xvmulsp		vs38,	vs0,	vs14
 | |
| 
 | |
| 	xvmulsp		vs39,	vs0,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x4_1', `
 | |
| #else
 | |
| .macro KERNEL8x4_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 	xxspltw		vs18,	vs28,	2
 | |
| 	xxspltw		vs19,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs20,	vs29,	0
 | |
| 	xxspltw		vs21,	vs29,	1
 | |
| 	xxspltw		vs22,	vs29,	2
 | |
| 	xxspltw		vs23,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs33,	vs0,	vs9
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs0,	vs10
 | |
| 
 | |
| 	xvmaddasp	vs35,	vs0,	vs11
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs0,	vs12
 | |
| 
 | |
| 	xvmaddasp	vs37,	vs0,	vs13
 | |
| 
 | |
| 	xvmaddasp	vs38,	vs0,	vs14
 | |
| 
 | |
| 	xvmaddasp	vs39,	vs0,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x4_2', `
 | |
| #else
 | |
| .macro KERNEL8x4_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs12,	vs29,	0
 | |
| 	xxspltw		vs13,	vs29,	1
 | |
| 	xxspltw		vs14,	vs29,	2
 | |
| 	xxspltw		vs15,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs33,	vs4,	vs17
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs4,	vs18
 | |
| 
 | |
| 	xvmaddasp	vs35,	vs4,	vs19
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs4,	vs20
 | |
| 
 | |
| 	xvmaddasp	vs37,	vs4,	vs21
 | |
| 
 | |
| 	xvmaddasp	vs38,	vs4,	vs22
 | |
| 
 | |
| 	xvmaddasp	vs39,	vs4,	vs23
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x4_E2', `
 | |
| #else
 | |
| .macro KERNEL8x4_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs33,	vs4,	vs17
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs4,	vs18
 | |
| 
 | |
| 	xvmaddasp	vs35,	vs4,	vs19
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs4,	vs20
 | |
| 
 | |
| 	xvmaddasp	vs37,	vs4,	vs21
 | |
| 
 | |
| 	xvmaddasp	vs38,	vs4,	vs22
 | |
| 
 | |
| 	xvmaddasp	vs39,	vs4,	vs23
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x4_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL8x4_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs12,	vs29,	0
 | |
| 	xxspltw		vs13,	vs29,	1
 | |
| 	xxspltw		vs14,	vs29,	2
 | |
| 	xxspltw		vs15,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 
 | |
| 	xvmulsp		vs33,	vs0,	vs9
 | |
| 
 | |
| 	xvmulsp		vs34,	vs0,	vs10
 | |
| 
 | |
| 	xvmulsp		vs35,	vs0,	vs11
 | |
| 
 | |
| 	xvmulsp		vs36,	vs0,	vs12
 | |
| 
 | |
| 	xvmulsp		vs37,	vs0,	vs13
 | |
| 
 | |
| 	xvmulsp		vs38,	vs0,	vs14
 | |
| 
 | |
| 	xvmulsp		vs39,	vs0,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x4_SUB1', `
 | |
| #else
 | |
| .macro KERNEL8x4_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	lxvw4x		vs29,	o16,	BO
 | |
| 
 | |
| 	xxspltw		vs12,	vs29,	0
 | |
| 	xxspltw		vs13,	vs29,	1
 | |
| 	xxspltw		vs14,	vs29,	2
 | |
| 	xxspltw		vs15,	vs29,	3
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs33,	vs0,	vs9
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs0,	vs10
 | |
| 
 | |
| 	xvmaddasp	vs35,	vs0,	vs11
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs0,	vs12
 | |
| 
 | |
| 	xvmaddasp	vs37,	vs0,	vs13
 | |
| 
 | |
| 	xvmaddasp	vs38,	vs0,	vs14
 | |
| 
 | |
| 	xvmaddasp	vs39,	vs0,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE8x4', `
 | |
| #else
 | |
| .macro SAVE8x4
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs32,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs32,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs33,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs33,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs34,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs34,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs35,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs35,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs36,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs36,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs37,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs37,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs38,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs38,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs39,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs39,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	16
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=8 and M=2
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD8x2_1', `
 | |
| #else
 | |
| .macro LOAD8x2_1
 | |
| #endif
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		T1,	T1,	16
 | |
| 
 | |
| 	lxsspx		vs12,	o0,	T1
 | |
| 	lxsspx		vs13,	o4,	T1
 | |
| 	lxsspx		vs14,	o8,	T1
 | |
| 	lxsspx		vs15,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x2_I1', `
 | |
| #else
 | |
| .macro KERNEL8x2_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 	lxsspx		vs5,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 	lxsspx		vs17,	o4,	T1
 | |
| 	lxsspx		vs18,	o8,	T1
 | |
| 	lxsspx		vs19,	o12,	T1
 | |
| 
 | |
| 	addi		T1,	T1,	16
 | |
| 
 | |
| 	lxsspx		vs20,	o0,	T1
 | |
| 	lxsspx		vs21,	o4,	T1
 | |
| 	lxsspx		vs22,	o8,	T1
 | |
| 	lxsspx		vs23,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 	xsmuldp		vs33,	vs1,	vs8
 | |
| 
 | |
| 	xsmuldp		vs34,	vs0,	vs9
 | |
| 	xsmuldp		vs35,	vs1,	vs9
 | |
| 
 | |
| 	xsmuldp		vs36,	vs0,	vs10
 | |
| 	xsmuldp		vs37,	vs1,	vs10
 | |
| 
 | |
| 	xsmuldp		vs38,	vs0,	vs11
 | |
| 	xsmuldp		vs39,	vs1,	vs11
 | |
| 
 | |
| 	xsmuldp		vs40,	vs0,	vs12
 | |
| 	xsmuldp		vs41,	vs1,	vs12
 | |
| 
 | |
| 	xsmuldp		vs42,	vs0,	vs13
 | |
| 	xsmuldp		vs43,	vs1,	vs13
 | |
| 
 | |
| 	xsmuldp		vs44,	vs0,	vs14
 | |
| 	xsmuldp		vs45,	vs1,	vs14
 | |
| 
 | |
| 	xsmuldp		vs46,	vs0,	vs15
 | |
| 	xsmuldp		vs47,	vs1,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x2_1', `
 | |
| #else
 | |
| .macro KERNEL8x2_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 	lxsspx		vs5,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 	lxsspx		vs17,	o4,	T1
 | |
| 	lxsspx		vs18,	o8,	T1
 | |
| 	lxsspx		vs19,	o12,	T1
 | |
| 
 | |
| 	addi		T1,	T1,	16
 | |
| 
 | |
| 	lxsspx		vs20,	o0,	T1
 | |
| 	lxsspx		vs21,	o4,	T1
 | |
| 	lxsspx		vs22,	o8,	T1
 | |
| 	lxsspx		vs23,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 	xsmaddadp	vs33,	vs1,	vs8
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs0,	vs9
 | |
| 	xsmaddadp	vs35,	vs1,	vs9
 | |
| 
 | |
| 	xsmaddadp	vs36,	vs0,	vs10
 | |
| 	xsmaddadp	vs37,	vs1,	vs10
 | |
| 
 | |
| 	xsmaddadp	vs38,	vs0,	vs11
 | |
| 	xsmaddadp	vs39,	vs1,	vs11
 | |
| 
 | |
| 	xsmaddadp	vs40,	vs0,	vs12
 | |
| 	xsmaddadp	vs41,	vs1,	vs12
 | |
| 
 | |
| 	xsmaddadp	vs42,	vs0,	vs13
 | |
| 	xsmaddadp	vs43,	vs1,	vs13
 | |
| 
 | |
| 	xsmaddadp	vs44,	vs0,	vs14
 | |
| 	xsmaddadp	vs45,	vs1,	vs14
 | |
| 
 | |
| 	xsmaddadp	vs46,	vs0,	vs15
 | |
| 	xsmaddadp	vs47,	vs1,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x2_2', `
 | |
| #else
 | |
| .macro KERNEL8x2_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		T1,	T1,	16
 | |
| 
 | |
| 	lxsspx		vs12,	o0,	T1
 | |
| 	lxsspx		vs13,	o4,	T1
 | |
| 	lxsspx		vs14,	o8,	T1
 | |
| 	lxsspx		vs15,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 	xsmaddadp	vs33,	vs5,	vs16
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs4,	vs17
 | |
| 	xsmaddadp	vs35,	vs5,	vs17
 | |
| 
 | |
| 	xsmaddadp	vs36,	vs4,	vs18
 | |
| 	xsmaddadp	vs37,	vs5,	vs18
 | |
| 
 | |
| 	xsmaddadp	vs38,	vs4,	vs19
 | |
| 	xsmaddadp	vs39,	vs5,	vs19
 | |
| 
 | |
| 	xsmaddadp	vs40,	vs4,	vs20
 | |
| 	xsmaddadp	vs41,	vs5,	vs20
 | |
| 
 | |
| 	xsmaddadp	vs42,	vs4,	vs21
 | |
| 	xsmaddadp	vs43,	vs5,	vs21
 | |
| 
 | |
| 	xsmaddadp	vs44,	vs4,	vs22
 | |
| 	xsmaddadp	vs45,	vs5,	vs22
 | |
| 
 | |
| 	xsmaddadp	vs46,	vs4,	vs23
 | |
| 	xsmaddadp	vs47,	vs5,	vs23
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x2_E2', `
 | |
| #else
 | |
| .macro KERNEL8x2_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 	xsmaddadp	vs33,	vs5,	vs16
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs4,	vs17
 | |
| 	xsmaddadp	vs35,	vs5,	vs17
 | |
| 
 | |
| 	xsmaddadp	vs36,	vs4,	vs18
 | |
| 	xsmaddadp	vs37,	vs5,	vs18
 | |
| 
 | |
| 	xsmaddadp	vs38,	vs4,	vs19
 | |
| 	xsmaddadp	vs39,	vs5,	vs19
 | |
| 
 | |
| 	xsmaddadp	vs40,	vs4,	vs20
 | |
| 	xsmaddadp	vs41,	vs5,	vs20
 | |
| 
 | |
| 	xsmaddadp	vs42,	vs4,	vs21
 | |
| 	xsmaddadp	vs43,	vs5,	vs21
 | |
| 
 | |
| 	xsmaddadp	vs44,	vs4,	vs22
 | |
| 	xsmaddadp	vs45,	vs5,	vs22
 | |
| 
 | |
| 	xsmaddadp	vs46,	vs4,	vs23
 | |
| 	xsmaddadp	vs47,	vs5,	vs23
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x2_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL8x2_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		T1,	T1,	16
 | |
| 
 | |
| 	lxsspx		vs12,	o0,	T1
 | |
| 	lxsspx		vs13,	o4,	T1
 | |
| 	lxsspx		vs14,	o8,	T1
 | |
| 	lxsspx		vs15,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 	xsmuldp		vs33,	vs1,	vs8
 | |
| 
 | |
| 	xsmuldp		vs34,	vs0,	vs9
 | |
| 	xsmuldp		vs35,	vs1,	vs9
 | |
| 
 | |
| 	xsmuldp		vs36,	vs0,	vs10
 | |
| 	xsmuldp		vs37,	vs1,	vs10
 | |
| 
 | |
| 	xsmuldp		vs38,	vs0,	vs11
 | |
| 	xsmuldp		vs39,	vs1,	vs11
 | |
| 
 | |
| 	xsmuldp		vs40,	vs0,	vs12
 | |
| 	xsmuldp		vs41,	vs1,	vs12
 | |
| 
 | |
| 	xsmuldp		vs42,	vs0,	vs13
 | |
| 	xsmuldp		vs43,	vs1,	vs13
 | |
| 
 | |
| 	xsmuldp		vs44,	vs0,	vs14
 | |
| 	xsmuldp		vs45,	vs1,	vs14
 | |
| 
 | |
| 	xsmuldp		vs46,	vs0,	vs15
 | |
| 	xsmuldp		vs47,	vs1,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x2_SUB1', `
 | |
| #else
 | |
| .macro KERNEL8x2_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		T1,	T1,	16
 | |
| 
 | |
| 	lxsspx		vs12,	o0,	T1
 | |
| 	lxsspx		vs13,	o4,	T1
 | |
| 	lxsspx		vs14,	o8,	T1
 | |
| 	lxsspx		vs15,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 	xsmaddadp	vs33,	vs1,	vs8
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs0,	vs9
 | |
| 	xsmaddadp	vs35,	vs1,	vs9
 | |
| 
 | |
| 	xsmaddadp	vs36,	vs0,	vs10
 | |
| 	xsmaddadp	vs37,	vs1,	vs10
 | |
| 
 | |
| 	xsmaddadp	vs38,	vs0,	vs11
 | |
| 	xsmaddadp	vs39,	vs1,	vs11
 | |
| 
 | |
| 	xsmaddadp	vs40,	vs0,	vs12
 | |
| 	xsmaddadp	vs41,	vs1,	vs12
 | |
| 
 | |
| 	xsmaddadp	vs42,	vs0,	vs13
 | |
| 	xsmaddadp	vs43,	vs1,	vs13
 | |
| 
 | |
| 	xsmaddadp	vs44,	vs0,	vs14
 | |
| 	xsmaddadp	vs45,	vs1,	vs14
 | |
| 
 | |
| 	xsmaddadp	vs46,	vs0,	vs15
 | |
| 	xsmaddadp	vs47,	vs1,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE8x2', `
 | |
| #else
 | |
| .macro SAVE8x2
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs32,	alpha_r
 | |
| 	xsmuldp		vs1,	vs33,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs32,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs33,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs34,	alpha_r
 | |
| 	xsmuldp		vs1,	vs35,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs34,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs35,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs36,	alpha_r
 | |
| 	xsmuldp		vs1,	vs37,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs36,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs37,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs38,	alpha_r
 | |
| 	xsmuldp		vs1,	vs39,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs38,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs39,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs40,	alpha_r
 | |
| 	xsmuldp		vs1,	vs41,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs40,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs41,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs42,	alpha_r
 | |
| 	xsmuldp		vs1,	vs43,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs42,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs43,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs44,	alpha_r
 | |
| 	xsmuldp		vs1,	vs45,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs44,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs45,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs46,	alpha_r
 | |
| 	xsmuldp		vs1,	vs47,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs46,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs47,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	8
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=8 and M=1
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD8x1_1', `
 | |
| #else
 | |
| .macro LOAD8x1_1
 | |
| #endif
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		T1,	T1,	16
 | |
| 
 | |
| 	lxsspx		vs12,	o0,	T1
 | |
| 	lxsspx		vs13,	o4,	T1
 | |
| 	lxsspx		vs14,	o8,	T1
 | |
| 	lxsspx		vs15,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x1_I1', `
 | |
| #else
 | |
| .macro KERNEL8x1_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 	lxsspx		vs17,	o4,	T1
 | |
| 	lxsspx		vs18,	o8,	T1
 | |
| 	lxsspx		vs19,	o12,	T1
 | |
| 
 | |
| 	addi		T1,	T1,	16
 | |
| 
 | |
| 	lxsspx		vs20,	o0,	T1
 | |
| 	lxsspx		vs21,	o4,	T1
 | |
| 	lxsspx		vs22,	o8,	T1
 | |
| 	lxsspx		vs23,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 
 | |
| 	xsmuldp		vs33,	vs0,	vs9
 | |
| 
 | |
| 	xsmuldp		vs34,	vs0,	vs10
 | |
| 
 | |
| 	xsmuldp		vs35,	vs0,	vs11
 | |
| 
 | |
| 	xsmuldp		vs36,	vs0,	vs12
 | |
| 
 | |
| 	xsmuldp		vs37,	vs0,	vs13
 | |
| 
 | |
| 	xsmuldp		vs38,	vs0,	vs14
 | |
| 
 | |
| 	xsmuldp		vs39,	vs0,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x1_1', `
 | |
| #else
 | |
| .macro KERNEL8x1_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 	lxsspx		vs17,	o4,	T1
 | |
| 	lxsspx		vs18,	o8,	T1
 | |
| 	lxsspx		vs19,	o12,	T1
 | |
| 
 | |
| 	addi		T1,	T1,	16
 | |
| 
 | |
| 	lxsspx		vs20,	o0,	T1
 | |
| 	lxsspx		vs21,	o4,	T1
 | |
| 	lxsspx		vs22,	o8,	T1
 | |
| 	lxsspx		vs23,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 
 | |
| 	xsmaddadp	vs33,	vs0,	vs9
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs0,	vs10
 | |
| 
 | |
| 	xsmaddadp	vs35,	vs0,	vs11
 | |
| 
 | |
| 	xsmaddadp	vs36,	vs0,	vs12
 | |
| 
 | |
| 	xsmaddadp	vs37,	vs0,	vs13
 | |
| 
 | |
| 	xsmaddadp	vs38,	vs0,	vs14
 | |
| 
 | |
| 	xsmaddadp	vs39,	vs0,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x1_2', `
 | |
| #else
 | |
| .macro KERNEL8x1_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		T1,	T1,	16
 | |
| 
 | |
| 	lxsspx		vs12,	o0,	T1
 | |
| 	lxsspx		vs13,	o4,	T1
 | |
| 	lxsspx		vs14,	o8,	T1
 | |
| 	lxsspx		vs15,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 
 | |
| 	xsmaddadp	vs33,	vs4,	vs17
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs4,	vs18
 | |
| 
 | |
| 	xsmaddadp	vs35,	vs4,	vs19
 | |
| 
 | |
| 	xsmaddadp	vs36,	vs4,	vs20
 | |
| 
 | |
| 	xsmaddadp	vs37,	vs4,	vs21
 | |
| 
 | |
| 	xsmaddadp	vs38,	vs4,	vs22
 | |
| 
 | |
| 	xsmaddadp	vs39,	vs4,	vs23
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x1_E2', `
 | |
| #else
 | |
| .macro KERNEL8x1_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 
 | |
| 	xsmaddadp	vs33,	vs4,	vs17
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs4,	vs18
 | |
| 
 | |
| 	xsmaddadp	vs35,	vs4,	vs19
 | |
| 
 | |
| 	xsmaddadp	vs36,	vs4,	vs20
 | |
| 
 | |
| 	xsmaddadp	vs37,	vs4,	vs21
 | |
| 
 | |
| 	xsmaddadp	vs38,	vs4,	vs22
 | |
| 
 | |
| 	xsmaddadp	vs39,	vs4,	vs23
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x1_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL8x1_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		T1,	T1,	16
 | |
| 
 | |
| 	lxsspx		vs12,	o0,	T1
 | |
| 	lxsspx		vs13,	o4,	T1
 | |
| 	lxsspx		vs14,	o8,	T1
 | |
| 	lxsspx		vs15,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 
 | |
| 	xsmuldp		vs33,	vs0,	vs9
 | |
| 
 | |
| 	xsmuldp		vs34,	vs0,	vs10
 | |
| 
 | |
| 	xsmuldp		vs35,	vs0,	vs11
 | |
| 
 | |
| 	xsmuldp		vs36,	vs0,	vs12
 | |
| 
 | |
| 	xsmuldp		vs37,	vs0,	vs13
 | |
| 
 | |
| 	xsmuldp		vs38,	vs0,	vs14
 | |
| 
 | |
| 	xsmuldp		vs39,	vs0,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL8x1_SUB1', `
 | |
| #else
 | |
| .macro KERNEL8x1_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		T1,	T1,	16
 | |
| 
 | |
| 	lxsspx		vs12,	o0,	T1
 | |
| 	lxsspx		vs13,	o4,	T1
 | |
| 	lxsspx		vs14,	o8,	T1
 | |
| 	lxsspx		vs15,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	32
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 
 | |
| 	xsmaddadp	vs33,	vs0,	vs9
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs0,	vs10
 | |
| 
 | |
| 	xsmaddadp	vs35,	vs0,	vs11
 | |
| 
 | |
| 	xsmaddadp	vs36,	vs0,	vs12
 | |
| 
 | |
| 	xsmaddadp	vs37,	vs0,	vs13
 | |
| 
 | |
| 	xsmaddadp	vs38,	vs0,	vs14
 | |
| 
 | |
| 	xsmaddadp	vs39,	vs0,	vs15
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE8x1', `
 | |
| #else
 | |
| .macro SAVE8x1
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs32,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs32,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs33,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs33,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs34,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs34,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs35,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs35,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs36,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs36,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs37,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs37,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs38,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs38,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs39,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs39,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	4
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=4 and M=16
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD4x16_1', `
 | |
| #else
 | |
| .macro LOAD4x16_1
 | |
| #endif
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x16_I1', `
 | |
| #else
 | |
| .macro KERNEL4x16_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 	lxvw4x		vs6,	o32,	AO
 | |
| 	lxvw4x		vs7,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 	xxspltw		vs18,	vs28,	2
 | |
| 	xxspltw		vs19,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 	xvmulsp		vs34,	vs2,	vs8
 | |
| 	xvmulsp		vs35,	vs3,	vs8
 | |
| 
 | |
| 	xvmulsp		vs36,	vs0,	vs9
 | |
| 	xvmulsp		vs37,	vs1,	vs9
 | |
| 	xvmulsp		vs38,	vs2,	vs9
 | |
| 	xvmulsp		vs39,	vs3,	vs9
 | |
| 
 | |
| 	xvmulsp		vs40,	vs0,	vs10
 | |
| 	xvmulsp		vs41,	vs1,	vs10
 | |
| 	xvmulsp		vs42,	vs2,	vs10
 | |
| 	xvmulsp		vs43,	vs3,	vs10
 | |
| 
 | |
| 	xvmulsp		vs44,	vs0,	vs11
 | |
| 	xvmulsp		vs45,	vs1,	vs11
 | |
| 	xvmulsp		vs46,	vs2,	vs11
 | |
| 	xvmulsp		vs47,	vs3,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x16_1', `
 | |
| #else
 | |
| .macro KERNEL4x16_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 	lxvw4x		vs6,	o32,	AO
 | |
| 	lxvw4x		vs7,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 	xxspltw		vs18,	vs28,	2
 | |
| 	xxspltw		vs19,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 	xvmaddasp	vs34,	vs2,	vs8
 | |
| 	xvmaddasp	vs35,	vs3,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs0,	vs9
 | |
| 	xvmaddasp	vs37,	vs1,	vs9
 | |
| 	xvmaddasp	vs38,	vs2,	vs9
 | |
| 	xvmaddasp	vs39,	vs3,	vs9
 | |
| 
 | |
| 	xvmaddasp	vs40,	vs0,	vs10
 | |
| 	xvmaddasp	vs41,	vs1,	vs10
 | |
| 	xvmaddasp	vs42,	vs2,	vs10
 | |
| 	xvmaddasp	vs43,	vs3,	vs10
 | |
| 
 | |
| 	xvmaddasp	vs44,	vs0,	vs11
 | |
| 	xvmaddasp	vs45,	vs1,	vs11
 | |
| 	xvmaddasp	vs46,	vs2,	vs11
 | |
| 	xvmaddasp	vs47,	vs3,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x16_2', `
 | |
| #else
 | |
| .macro KERNEL4x16_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 	xvmaddasp	vs34,	vs6,	vs16
 | |
| 	xvmaddasp	vs35,	vs7,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs4,	vs17
 | |
| 	xvmaddasp	vs37,	vs5,	vs17
 | |
| 	xvmaddasp	vs38,	vs6,	vs17
 | |
| 	xvmaddasp	vs39,	vs7,	vs17
 | |
| 
 | |
| 	xvmaddasp	vs40,	vs4,	vs18
 | |
| 	xvmaddasp	vs41,	vs5,	vs18
 | |
| 	xvmaddasp	vs42,	vs6,	vs18
 | |
| 	xvmaddasp	vs43,	vs7,	vs18
 | |
| 
 | |
| 	xvmaddasp	vs44,	vs4,	vs19
 | |
| 	xvmaddasp	vs45,	vs5,	vs19
 | |
| 	xvmaddasp	vs46,	vs6,	vs19
 | |
| 	xvmaddasp	vs47,	vs7,	vs19
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x16_E2', `
 | |
| #else
 | |
| .macro KERNEL4x16_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 	xvmaddasp	vs34,	vs6,	vs16
 | |
| 	xvmaddasp	vs35,	vs7,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs4,	vs17
 | |
| 	xvmaddasp	vs37,	vs5,	vs17
 | |
| 	xvmaddasp	vs38,	vs6,	vs17
 | |
| 	xvmaddasp	vs39,	vs7,	vs17
 | |
| 
 | |
| 	xvmaddasp	vs40,	vs4,	vs18
 | |
| 	xvmaddasp	vs41,	vs5,	vs18
 | |
| 	xvmaddasp	vs42,	vs6,	vs18
 | |
| 	xvmaddasp	vs43,	vs7,	vs18
 | |
| 
 | |
| 	xvmaddasp	vs44,	vs4,	vs19
 | |
| 	xvmaddasp	vs45,	vs5,	vs19
 | |
| 	xvmaddasp	vs46,	vs6,	vs19
 | |
| 	xvmaddasp	vs47,	vs7,	vs19
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x16_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL4x16_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 	xvmulsp		vs34,	vs2,	vs8
 | |
| 	xvmulsp		vs35,	vs3,	vs8
 | |
| 
 | |
| 	xvmulsp		vs36,	vs0,	vs9
 | |
| 	xvmulsp		vs37,	vs1,	vs9
 | |
| 	xvmulsp		vs38,	vs2,	vs9
 | |
| 	xvmulsp		vs39,	vs3,	vs9
 | |
| 
 | |
| 	xvmulsp		vs40,	vs0,	vs10
 | |
| 	xvmulsp		vs41,	vs1,	vs10
 | |
| 	xvmulsp		vs42,	vs2,	vs10
 | |
| 	xvmulsp		vs43,	vs3,	vs10
 | |
| 
 | |
| 	xvmulsp		vs44,	vs0,	vs11
 | |
| 	xvmulsp		vs45,	vs1,	vs11
 | |
| 	xvmulsp		vs46,	vs2,	vs11
 | |
| 	xvmulsp		vs47,	vs3,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x16_SUB1', `
 | |
| #else
 | |
| .macro KERNEL4x16_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 	xvmaddasp	vs34,	vs2,	vs8
 | |
| 	xvmaddasp	vs35,	vs3,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs0,	vs9
 | |
| 	xvmaddasp	vs37,	vs1,	vs9
 | |
| 	xvmaddasp	vs38,	vs2,	vs9
 | |
| 	xvmaddasp	vs39,	vs3,	vs9
 | |
| 
 | |
| 	xvmaddasp	vs40,	vs0,	vs10
 | |
| 	xvmaddasp	vs41,	vs1,	vs10
 | |
| 	xvmaddasp	vs42,	vs2,	vs10
 | |
| 	xvmaddasp	vs43,	vs3,	vs10
 | |
| 
 | |
| 	xvmaddasp	vs44,	vs0,	vs11
 | |
| 	xvmaddasp	vs45,	vs1,	vs11
 | |
| 	xvmaddasp	vs46,	vs2,	vs11
 | |
| 	xvmaddasp	vs47,	vs3,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE4x16', `
 | |
| #else
 | |
| .macro SAVE4x16
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs32,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs33,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs34,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs35,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs32,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs33,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs34,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs35,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs36,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs37,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs38,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs39,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs36,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs37,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs38,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs39,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs40,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs41,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs42,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs43,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs40,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs41,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs42,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs43,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs44,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs45,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs46,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs47,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs44,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs45,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs46,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs47,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	64
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=4 and M=8
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD4x8_1', `
 | |
| #else
 | |
| .macro LOAD4x8_1
 | |
| #endif
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x8_I1', `
 | |
| #else
 | |
| .macro KERNEL4x8_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 	xxspltw		vs18,	vs28,	2
 | |
| 	xxspltw		vs19,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 
 | |
| 	xvmulsp		vs34,	vs0,	vs9
 | |
| 	xvmulsp		vs35,	vs1,	vs9
 | |
| 
 | |
| 	xvmulsp		vs36,	vs0,	vs10
 | |
| 	xvmulsp		vs37,	vs1,	vs10
 | |
| 
 | |
| 	xvmulsp		vs38,	vs0,	vs11
 | |
| 	xvmulsp		vs39,	vs1,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x8_1', `
 | |
| #else
 | |
| .macro KERNEL4x8_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 	xxspltw		vs18,	vs28,	2
 | |
| 	xxspltw		vs19,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs0,	vs9
 | |
| 	xvmaddasp	vs35,	vs1,	vs9
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs0,	vs10
 | |
| 	xvmaddasp	vs37,	vs1,	vs10
 | |
| 
 | |
| 	xvmaddasp	vs38,	vs0,	vs11
 | |
| 	xvmaddasp	vs39,	vs1,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x8_2', `
 | |
| #else
 | |
| .macro KERNEL4x8_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs4,	vs17
 | |
| 	xvmaddasp	vs35,	vs5,	vs17
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs4,	vs18
 | |
| 	xvmaddasp	vs37,	vs5,	vs18
 | |
| 
 | |
| 	xvmaddasp	vs38,	vs4,	vs19
 | |
| 	xvmaddasp	vs39,	vs5,	vs19
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x8_E2', `
 | |
| #else
 | |
| .macro KERNEL4x8_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs4,	vs17
 | |
| 	xvmaddasp	vs35,	vs5,	vs17
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs4,	vs18
 | |
| 	xvmaddasp	vs37,	vs5,	vs18
 | |
| 
 | |
| 	xvmaddasp	vs38,	vs4,	vs19
 | |
| 	xvmaddasp	vs39,	vs5,	vs19
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x8_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL4x8_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 
 | |
| 	xvmulsp		vs34,	vs0,	vs9
 | |
| 	xvmulsp		vs35,	vs1,	vs9
 | |
| 
 | |
| 	xvmulsp		vs36,	vs0,	vs10
 | |
| 	xvmulsp		vs37,	vs1,	vs10
 | |
| 
 | |
| 	xvmulsp		vs38,	vs0,	vs11
 | |
| 	xvmulsp		vs39,	vs1,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x8_SUB1', `
 | |
| #else
 | |
| .macro KERNEL4x8_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs0,	vs9
 | |
| 	xvmaddasp	vs35,	vs1,	vs9
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs0,	vs10
 | |
| 	xvmaddasp	vs37,	vs1,	vs10
 | |
| 
 | |
| 	xvmaddasp	vs38,	vs0,	vs11
 | |
| 	xvmaddasp	vs39,	vs1,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE4x8', `
 | |
| #else
 | |
| .macro SAVE4x8
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs32,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs33,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs32,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs33,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs34,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs35,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs34,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs35,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs36,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs37,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs36,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs37,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs38,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs39,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs38,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs39,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	32
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=4 and M=4
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD4x4_1', `
 | |
| #else
 | |
| .macro LOAD4x4_1
 | |
| #endif
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x4_I1', `
 | |
| #else
 | |
| .macro KERNEL4x4_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 	xxspltw		vs18,	vs28,	2
 | |
| 	xxspltw		vs19,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 
 | |
| 	xvmulsp		vs33,	vs0,	vs9
 | |
| 
 | |
| 	xvmulsp		vs34,	vs0,	vs10
 | |
| 
 | |
| 	xvmulsp		vs35,	vs0,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x4_1', `
 | |
| #else
 | |
| .macro KERNEL4x4_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 	xxspltw		vs18,	vs28,	2
 | |
| 	xxspltw		vs19,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs33,	vs0,	vs9
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs0,	vs10
 | |
| 
 | |
| 	xvmaddasp	vs35,	vs0,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x4_2', `
 | |
| #else
 | |
| .macro KERNEL4x4_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs33,	vs4,	vs17
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs4,	vs18
 | |
| 
 | |
| 	xvmaddasp	vs35,	vs4,	vs19
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x4_E2', `
 | |
| #else
 | |
| .macro KERNEL4x4_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs33,	vs4,	vs17
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs4,	vs18
 | |
| 
 | |
| 	xvmaddasp	vs35,	vs4,	vs19
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x4_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL4x4_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 
 | |
| 	xvmulsp		vs33,	vs0,	vs9
 | |
| 
 | |
| 	xvmulsp		vs34,	vs0,	vs10
 | |
| 
 | |
| 	xvmulsp		vs35,	vs0,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x4_SUB1', `
 | |
| #else
 | |
| .macro KERNEL4x4_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 	xxspltw		vs10,	vs28,	2
 | |
| 	xxspltw		vs11,	vs28,	3
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs33,	vs0,	vs9
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs0,	vs10
 | |
| 
 | |
| 	xvmaddasp	vs35,	vs0,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE4x4', `
 | |
| #else
 | |
| .macro SAVE4x4
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs32,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs32,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs33,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs33,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs34,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs34,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs35,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs35,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	16
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=4 and M=2
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD4x2_1', `
 | |
| #else
 | |
| .macro LOAD4x2_1
 | |
| #endif
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x2_I1', `
 | |
| #else
 | |
| .macro KERNEL4x2_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 	lxsspx		vs5,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 	lxsspx		vs17,	o4,	T1
 | |
| 	lxsspx		vs18,	o8,	T1
 | |
| 	lxsspx		vs19,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 	xsmuldp		vs33,	vs1,	vs8
 | |
| 
 | |
| 	xsmuldp		vs34,	vs0,	vs9
 | |
| 	xsmuldp		vs35,	vs1,	vs9
 | |
| 
 | |
| 	xsmuldp		vs36,	vs0,	vs10
 | |
| 	xsmuldp		vs37,	vs1,	vs10
 | |
| 
 | |
| 	xsmuldp		vs38,	vs0,	vs11
 | |
| 	xsmuldp		vs39,	vs1,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x2_1', `
 | |
| #else
 | |
| .macro KERNEL4x2_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 	lxsspx		vs5,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 	lxsspx		vs17,	o4,	T1
 | |
| 	lxsspx		vs18,	o8,	T1
 | |
| 	lxsspx		vs19,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 	xsmaddadp	vs33,	vs1,	vs8
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs0,	vs9
 | |
| 	xsmaddadp	vs35,	vs1,	vs9
 | |
| 
 | |
| 	xsmaddadp	vs36,	vs0,	vs10
 | |
| 	xsmaddadp	vs37,	vs1,	vs10
 | |
| 
 | |
| 	xsmaddadp	vs38,	vs0,	vs11
 | |
| 	xsmaddadp	vs39,	vs1,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x2_2', `
 | |
| #else
 | |
| .macro KERNEL4x2_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 	xsmaddadp	vs33,	vs5,	vs16
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs4,	vs17
 | |
| 	xsmaddadp	vs35,	vs5,	vs17
 | |
| 
 | |
| 	xsmaddadp	vs36,	vs4,	vs18
 | |
| 	xsmaddadp	vs37,	vs5,	vs18
 | |
| 
 | |
| 	xsmaddadp	vs38,	vs4,	vs19
 | |
| 	xsmaddadp	vs39,	vs5,	vs19
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x2_E2', `
 | |
| #else
 | |
| .macro KERNEL4x2_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 	xsmaddadp	vs33,	vs5,	vs16
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs4,	vs17
 | |
| 	xsmaddadp	vs35,	vs5,	vs17
 | |
| 
 | |
| 	xsmaddadp	vs36,	vs4,	vs18
 | |
| 	xsmaddadp	vs37,	vs5,	vs18
 | |
| 
 | |
| 	xsmaddadp	vs38,	vs4,	vs19
 | |
| 	xsmaddadp	vs39,	vs5,	vs19
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x2_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL4x2_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 	xsmuldp		vs33,	vs1,	vs8
 | |
| 
 | |
| 	xsmuldp		vs34,	vs0,	vs9
 | |
| 	xsmuldp		vs35,	vs1,	vs9
 | |
| 
 | |
| 	xsmuldp		vs36,	vs0,	vs10
 | |
| 	xsmuldp		vs37,	vs1,	vs10
 | |
| 
 | |
| 	xsmuldp		vs38,	vs0,	vs11
 | |
| 	xsmuldp		vs39,	vs1,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x2_SUB1', `
 | |
| #else
 | |
| .macro KERNEL4x2_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 	xsmaddadp	vs33,	vs1,	vs8
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs0,	vs9
 | |
| 	xsmaddadp	vs35,	vs1,	vs9
 | |
| 
 | |
| 	xsmaddadp	vs36,	vs0,	vs10
 | |
| 	xsmaddadp	vs37,	vs1,	vs10
 | |
| 
 | |
| 	xsmaddadp	vs38,	vs0,	vs11
 | |
| 	xsmaddadp	vs39,	vs1,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE4x2', `
 | |
| #else
 | |
| .macro SAVE4x2
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs32,	alpha_r
 | |
| 	xsmuldp		vs1,	vs33,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs32,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs33,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs34,	alpha_r
 | |
| 	xsmuldp		vs1,	vs35,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs34,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs35,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs36,	alpha_r
 | |
| 	xsmuldp		vs1,	vs37,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs36,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs37,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs38,	alpha_r
 | |
| 	xsmuldp		vs1,	vs39,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs38,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs39,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	8
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=4 and M=1
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD4x1_1', `
 | |
| #else
 | |
| .macro LOAD4x1_1
 | |
| #endif
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x1_I1', `
 | |
| #else
 | |
| .macro KERNEL4x1_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 	lxsspx		vs17,	o4,	T1
 | |
| 	lxsspx		vs18,	o8,	T1
 | |
| 	lxsspx		vs19,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 
 | |
| 	xsmuldp		vs33,	vs0,	vs9
 | |
| 
 | |
| 	xsmuldp		vs34,	vs0,	vs10
 | |
| 
 | |
| 	xsmuldp		vs35,	vs0,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x1_1', `
 | |
| #else
 | |
| .macro KERNEL4x1_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 	lxsspx		vs17,	o4,	T1
 | |
| 	lxsspx		vs18,	o8,	T1
 | |
| 	lxsspx		vs19,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 
 | |
| 	xsmaddadp	vs33,	vs0,	vs9
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs0,	vs10
 | |
| 
 | |
| 	xsmaddadp	vs35,	vs0,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x1_2', `
 | |
| #else
 | |
| .macro KERNEL4x1_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 
 | |
| 	xsmaddadp	vs33,	vs4,	vs17
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs4,	vs18
 | |
| 
 | |
| 	xsmaddadp	vs35,	vs4,	vs19
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x1_E2', `
 | |
| #else
 | |
| .macro KERNEL4x1_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 
 | |
| 	xsmaddadp	vs33,	vs4,	vs17
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs4,	vs18
 | |
| 
 | |
| 	xsmaddadp	vs35,	vs4,	vs19
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x1_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL4x1_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 
 | |
| 	xsmuldp		vs33,	vs0,	vs9
 | |
| 
 | |
| 	xsmuldp		vs34,	vs0,	vs10
 | |
| 
 | |
| 	xsmuldp		vs35,	vs0,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL4x1_SUB1', `
 | |
| #else
 | |
| .macro KERNEL4x1_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 	lxsspx		vs10,	o8,	T1
 | |
| 	lxsspx		vs11,	o12,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	16
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 
 | |
| 	xsmaddadp	vs33,	vs0,	vs9
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs0,	vs10
 | |
| 
 | |
| 	xsmaddadp	vs35,	vs0,	vs11
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE4x1', `
 | |
| #else
 | |
| .macro SAVE4x1
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs32,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs32,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs33,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs33,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs34,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs34,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs35,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs35,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	4
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=2 and M=16
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD2x16_1', `
 | |
| #else
 | |
| .macro LOAD2x16_1
 | |
| #endif
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x16_I1', `
 | |
| #else
 | |
| .macro KERNEL2x16_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 	lxvw4x		vs6,	o32,	AO
 | |
| 	lxvw4x		vs7,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 	xvmulsp		vs34,	vs2,	vs8
 | |
| 	xvmulsp		vs35,	vs3,	vs8
 | |
| 
 | |
| 	xvmulsp		vs36,	vs0,	vs9
 | |
| 	xvmulsp		vs37,	vs1,	vs9
 | |
| 	xvmulsp		vs38,	vs2,	vs9
 | |
| 	xvmulsp		vs39,	vs3,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x16_1', `
 | |
| #else
 | |
| .macro KERNEL2x16_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 	lxvw4x		vs6,	o32,	AO
 | |
| 	lxvw4x		vs7,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 	xvmaddasp	vs34,	vs2,	vs8
 | |
| 	xvmaddasp	vs35,	vs3,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs0,	vs9
 | |
| 	xvmaddasp	vs37,	vs1,	vs9
 | |
| 	xvmaddasp	vs38,	vs2,	vs9
 | |
| 	xvmaddasp	vs39,	vs3,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x16_2', `
 | |
| #else
 | |
| .macro KERNEL2x16_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 	xvmaddasp	vs34,	vs6,	vs16
 | |
| 	xvmaddasp	vs35,	vs7,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs4,	vs17
 | |
| 	xvmaddasp	vs37,	vs5,	vs17
 | |
| 	xvmaddasp	vs38,	vs6,	vs17
 | |
| 	xvmaddasp	vs39,	vs7,	vs17
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x16_E2', `
 | |
| #else
 | |
| .macro KERNEL2x16_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 	xvmaddasp	vs34,	vs6,	vs16
 | |
| 	xvmaddasp	vs35,	vs7,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs4,	vs17
 | |
| 	xvmaddasp	vs37,	vs5,	vs17
 | |
| 	xvmaddasp	vs38,	vs6,	vs17
 | |
| 	xvmaddasp	vs39,	vs7,	vs17
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x16_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL2x16_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 	xvmulsp		vs34,	vs2,	vs8
 | |
| 	xvmulsp		vs35,	vs3,	vs8
 | |
| 
 | |
| 	xvmulsp		vs36,	vs0,	vs9
 | |
| 	xvmulsp		vs37,	vs1,	vs9
 | |
| 	xvmulsp		vs38,	vs2,	vs9
 | |
| 	xvmulsp		vs39,	vs3,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x16_SUB1', `
 | |
| #else
 | |
| .macro KERNEL2x16_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 	xvmaddasp	vs34,	vs2,	vs8
 | |
| 	xvmaddasp	vs35,	vs3,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs36,	vs0,	vs9
 | |
| 	xvmaddasp	vs37,	vs1,	vs9
 | |
| 	xvmaddasp	vs38,	vs2,	vs9
 | |
| 	xvmaddasp	vs39,	vs3,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE2x16', `
 | |
| #else
 | |
| .macro SAVE2x16
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs32,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs33,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs34,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs35,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs32,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs33,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs34,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs35,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs36,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs37,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs38,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs39,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs36,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs37,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs38,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs39,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	64
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=2 and M=8
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD2x8_1', `
 | |
| #else
 | |
| .macro LOAD2x8_1
 | |
| #endif
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x8_I1', `
 | |
| #else
 | |
| .macro KERNEL2x8_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 
 | |
| 	xvmulsp		vs34,	vs0,	vs9
 | |
| 	xvmulsp		vs35,	vs1,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x8_1', `
 | |
| #else
 | |
| .macro KERNEL2x8_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs0,	vs9
 | |
| 	xvmaddasp	vs35,	vs1,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x8_2', `
 | |
| #else
 | |
| .macro KERNEL2x8_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs4,	vs17
 | |
| 	xvmaddasp	vs35,	vs5,	vs17
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x8_E2', `
 | |
| #else
 | |
| .macro KERNEL2x8_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs4,	vs17
 | |
| 	xvmaddasp	vs35,	vs5,	vs17
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x8_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL2x8_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 
 | |
| 	xvmulsp		vs34,	vs0,	vs9
 | |
| 	xvmulsp		vs35,	vs1,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x8_SUB1', `
 | |
| #else
 | |
| .macro KERNEL2x8_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs34,	vs0,	vs9
 | |
| 	xvmaddasp	vs35,	vs1,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE2x8', `
 | |
| #else
 | |
| .macro SAVE2x8
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs32,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs33,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs32,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs33,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs34,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs35,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs34,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs35,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	32
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=2 and M=4
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD2x4_1', `
 | |
| #else
 | |
| .macro LOAD2x4_1
 | |
| #endif
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x4_I1', `
 | |
| #else
 | |
| .macro KERNEL2x4_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 
 | |
| 	xvmulsp		vs33,	vs0,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x4_1', `
 | |
| #else
 | |
| .macro KERNEL2x4_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 	xxspltw		vs17,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs33,	vs0,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x4_2', `
 | |
| #else
 | |
| .macro KERNEL2x4_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs33,	vs4,	vs17
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x4_E2', `
 | |
| #else
 | |
| .macro KERNEL2x4_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 
 | |
| 	xvmaddasp	vs33,	vs4,	vs17
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x4_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL2x4_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 
 | |
| 	xvmulsp		vs33,	vs0,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x4_SUB1', `
 | |
| #else
 | |
| .macro KERNEL2x4_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 	xxspltw		vs9,	vs28,	1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 
 | |
| 	xvmaddasp	vs33,	vs0,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE2x4', `
 | |
| #else
 | |
| .macro SAVE2x4
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs32,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs32,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs33,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs33,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	16
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=2 and M=2
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD2x2_1', `
 | |
| #else
 | |
| .macro LOAD2x2_1
 | |
| #endif
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x2_I1', `
 | |
| #else
 | |
| .macro KERNEL2x2_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 	lxsspx		vs5,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 	lxsspx		vs17,	o4,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 	xsmuldp		vs33,	vs1,	vs8
 | |
| 
 | |
| 	xsmuldp		vs34,	vs0,	vs9
 | |
| 	xsmuldp		vs35,	vs1,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x2_1', `
 | |
| #else
 | |
| .macro KERNEL2x2_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 	lxsspx		vs5,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 	lxsspx		vs17,	o4,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 	xsmaddadp	vs33,	vs1,	vs8
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs0,	vs9
 | |
| 	xsmaddadp	vs35,	vs1,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x2_2', `
 | |
| #else
 | |
| .macro KERNEL2x2_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 	xsmaddadp	vs33,	vs5,	vs16
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs4,	vs17
 | |
| 	xsmaddadp	vs35,	vs5,	vs17
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x2_E2', `
 | |
| #else
 | |
| .macro KERNEL2x2_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 	xsmaddadp	vs33,	vs5,	vs16
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs4,	vs17
 | |
| 	xsmaddadp	vs35,	vs5,	vs17
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x2_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL2x2_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 	xsmuldp		vs33,	vs1,	vs8
 | |
| 
 | |
| 	xsmuldp		vs34,	vs0,	vs9
 | |
| 	xsmuldp		vs35,	vs1,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x2_SUB1', `
 | |
| #else
 | |
| .macro KERNEL2x2_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 	xsmaddadp	vs33,	vs1,	vs8
 | |
| 
 | |
| 	xsmaddadp	vs34,	vs0,	vs9
 | |
| 	xsmaddadp	vs35,	vs1,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE2x2', `
 | |
| #else
 | |
| .macro SAVE2x2
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs32,	alpha_r
 | |
| 	xsmuldp		vs1,	vs33,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs32,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs33,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs34,	alpha_r
 | |
| 	xsmuldp		vs1,	vs35,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs34,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs35,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	8
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=2 and M=1
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD2x1_1', `
 | |
| #else
 | |
| .macro LOAD2x1_1
 | |
| #endif
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x1_I1', `
 | |
| #else
 | |
| .macro KERNEL2x1_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 	lxsspx		vs17,	o4,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 
 | |
| 	xsmuldp		vs33,	vs0,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x1_1', `
 | |
| #else
 | |
| .macro KERNEL2x1_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 	lxsspx		vs17,	o4,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 
 | |
| 	xsmaddadp	vs33,	vs0,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x1_2', `
 | |
| #else
 | |
| .macro KERNEL2x1_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 
 | |
| 	xsmaddadp	vs33,	vs4,	vs17
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x1_E2', `
 | |
| #else
 | |
| .macro KERNEL2x1_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 
 | |
| 	xsmaddadp	vs33,	vs4,	vs17
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x1_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL2x1_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 
 | |
| 	xsmuldp		vs33,	vs0,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL2x1_SUB1', `
 | |
| #else
 | |
| .macro KERNEL2x1_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 	lxsspx		vs9,	o4,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	8
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 
 | |
| 	xsmaddadp	vs33,	vs0,	vs9
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE2x1', `
 | |
| #else
 | |
| .macro SAVE2x1
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs32,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs32,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs33,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs33,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	4
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=1 and M=16
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD1x16_1', `
 | |
| #else
 | |
| .macro LOAD1x16_1
 | |
| #endif
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x16_I1', `
 | |
| #else
 | |
| .macro KERNEL1x16_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 	lxvw4x		vs6,	o32,	AO
 | |
| 	lxvw4x		vs7,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 	xvmulsp		vs34,	vs2,	vs8
 | |
| 	xvmulsp		vs35,	vs3,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x16_1', `
 | |
| #else
 | |
| .macro KERNEL1x16_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 	lxvw4x		vs6,	o32,	AO
 | |
| 	lxvw4x		vs7,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 	xvmaddasp	vs34,	vs2,	vs8
 | |
| 	xvmaddasp	vs35,	vs3,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x16_2', `
 | |
| #else
 | |
| .macro KERNEL1x16_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 	xvmaddasp	vs34,	vs6,	vs16
 | |
| 	xvmaddasp	vs35,	vs7,	vs16
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x16_E2', `
 | |
| #else
 | |
| .macro KERNEL1x16_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 	xvmaddasp	vs34,	vs6,	vs16
 | |
| 	xvmaddasp	vs35,	vs7,	vs16
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x16_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL1x16_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 	xvmulsp		vs34,	vs2,	vs8
 | |
| 	xvmulsp		vs35,	vs3,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x16_SUB1', `
 | |
| #else
 | |
| .macro KERNEL1x16_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 	lxvw4x		vs2,	o32,	AO
 | |
| 	lxvw4x		vs3,	o48,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	64
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 	xvmaddasp	vs34,	vs2,	vs8
 | |
| 	xvmaddasp	vs35,	vs3,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE1x16', `
 | |
| #else
 | |
| .macro SAVE1x16
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 	lxvw4x		vs2,	o32,	T1
 | |
| 	lxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs32,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs33,	alpha_vr
 | |
| 	xvmulsp		vs2,	vs34,	alpha_vr
 | |
| 	xvmulsp		vs3,	vs35,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs32,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs33,	alpha_vr
 | |
| 	xvmaddasp	vs2,	vs34,	alpha_vr
 | |
| 	xvmaddasp	vs3,	vs35,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 	stxvw4x		vs2,	o32,	T1
 | |
| 	stxvw4x		vs3,	o48,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	64
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=1 and M=8
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD1x8_1', `
 | |
| #else
 | |
| .macro LOAD1x8_1
 | |
| #endif
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x8_I1', `
 | |
| #else
 | |
| .macro KERNEL1x8_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x8_1', `
 | |
| #else
 | |
| .macro KERNEL1x8_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 	lxvw4x		vs5,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x8_2', `
 | |
| #else
 | |
| .macro KERNEL1x8_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x8_E2', `
 | |
| #else
 | |
| .macro KERNEL1x8_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 	xvmaddasp	vs33,	vs5,	vs16
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x8_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL1x8_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 	xvmulsp		vs33,	vs1,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x8_SUB1', `
 | |
| #else
 | |
| .macro KERNEL1x8_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 	lxvw4x		vs1,	o16,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	32
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 	xvmaddasp	vs33,	vs1,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE1x8', `
 | |
| #else
 | |
| .macro SAVE1x8
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 	lxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs32,	alpha_vr
 | |
| 	xvmulsp		vs1,	vs33,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs32,	alpha_vr
 | |
| 	xvmaddasp	vs1,	vs33,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 	stxvw4x		vs1,	o16,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	32
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=1 and M=4
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD1x4_1', `
 | |
| #else
 | |
| .macro LOAD1x4_1
 | |
| #endif
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x4_I1', `
 | |
| #else
 | |
| .macro KERNEL1x4_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x4_1', `
 | |
| #else
 | |
| .macro KERNEL1x4_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs16,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x4_2', `
 | |
| #else
 | |
| .macro KERNEL1x4_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x4_E2', `
 | |
| #else
 | |
| .macro KERNEL1x4_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs4,	vs16
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x4_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL1x4_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmulsp		vs32,	vs0,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x4_SUB1', `
 | |
| #else
 | |
| .macro KERNEL1x4_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	16
 | |
| 
 | |
| 	lxvw4x		vs28,	o0,	BO
 | |
| 
 | |
| 	xxspltw		vs8,	vs28,	0
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xvmaddasp	vs32,	vs0,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE1x4', `
 | |
| #else
 | |
| .macro SAVE1x4
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xvmulsp		vs0,	vs32,	alpha_vr
 | |
| #else
 | |
| 	xvmaddasp	vs0,	vs32,	alpha_vr
 | |
| #endif
 | |
| 
 | |
| 	stxvw4x		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	16
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=1 and M=2
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD1x2_1', `
 | |
| #else
 | |
| .macro LOAD1x2_1
 | |
| #endif
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x2_I1', `
 | |
| #else
 | |
| .macro KERNEL1x2_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 	lxsspx		vs5,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 	xsmuldp		vs33,	vs1,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x2_1', `
 | |
| #else
 | |
| .macro KERNEL1x2_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 	lxsspx		vs5,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 	xsmaddadp	vs33,	vs1,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x2_2', `
 | |
| #else
 | |
| .macro KERNEL1x2_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 	xsmaddadp	vs33,	vs5,	vs16
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x2_E2', `
 | |
| #else
 | |
| .macro KERNEL1x2_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 	xsmaddadp	vs33,	vs5,	vs16
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x2_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL1x2_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 	xsmuldp		vs33,	vs1,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x2_SUB1', `
 | |
| #else
 | |
| .macro KERNEL1x2_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 	lxsspx		vs1,	o4,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	8
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 	xsmaddadp	vs33,	vs1,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE1x2', `
 | |
| #else
 | |
| .macro SAVE1x2
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 	lxsspx		vs1,	o4,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs32,	alpha_r
 | |
| 	xsmuldp		vs1,	vs33,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs32,	alpha_r
 | |
| 	xsmaddadp	vs1,	vs33,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 	stxsspx		vs1,	o4,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	8
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /**********************************************************************************************
 | |
| * Macros for N=1 and M=1
 | |
| **********************************************************************************************/
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`LOAD1x1_1', `
 | |
| #else
 | |
| .macro LOAD1x1_1
 | |
| #endif
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x1_I1', `
 | |
| #else
 | |
| .macro KERNEL1x1_I1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x1_1', `
 | |
| #else
 | |
| .macro KERNEL1x1_1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs4,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs16,	o0,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x1_2', `
 | |
| #else
 | |
| .macro KERNEL1x1_2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x1_E2', `
 | |
| #else
 | |
| .macro KERNEL1x1_E2
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs4,	vs16
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x1_SUBI1', `
 | |
| #else
 | |
| .macro KERNEL1x1_SUBI1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xsmuldp		vs32,	vs0,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`KERNEL1x1_SUB1', `
 | |
| #else
 | |
| .macro KERNEL1x1_SUB1
 | |
| #endif
 | |
| 
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	AO
 | |
| 
 | |
| 	addi		AO,	AO,	4
 | |
| 
 | |
| 	mr		T1,	BO
 | |
| 
 | |
| 	lxsspx		vs8,	o0,	T1
 | |
| 
 | |
| 	addi		BO,	BO,	4
 | |
| 
 | |
| 
 | |
| 	xsmaddadp	vs32,	vs0,	vs8
 | |
| 
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| #if defined(_AIX)
 | |
| define(`SAVE1x1', `
 | |
| #else
 | |
| .macro SAVE1x1
 | |
| #endif
 | |
| 
 | |
| 	mr		T1,	CO
 | |
| 
 | |
| #ifndef TRMMKERNEL
 | |
| 
 | |
| 	lxsspx		vs0,	o0,	T1
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef TRMMKERNEL
 | |
| 	xsmuldp		vs0,	vs32,	alpha_r
 | |
| #else
 | |
| 	xsmaddadp	vs0,	vs32,	alpha_r
 | |
| #endif
 | |
| 
 | |
| 	stxsspx		vs0,	o0,	T1
 | |
| 
 | |
| 	add		T1,	T1,	LDC
 | |
| 
 | |
| 	addi		CO,	CO,	4
 | |
| 
 | |
| #if defined(_AIX)
 | |
| ')
 | |
| #else
 | |
| .endm
 | |
| #endif
 | |
| 
 |