998 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			998 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| /***************************************************************************
 | |
| Copyright (c) 2013-2016, The OpenBLAS Project
 | |
| All rights reserved.
 | |
| Redistribution and use in source and binary forms, with or without
 | |
| modification, are permitted provided that the following conditions are
 | |
| met:
 | |
| 1. Redistributions of source code must retain the above copyright
 | |
| notice, this list of conditions and the following disclaimer.
 | |
| 2. Redistributions in binary form must reproduce the above copyright
 | |
| notice, this list of conditions and the following disclaimer in
 | |
| the documentation and/or other materials provided with the
 | |
| distribution.
 | |
| 3. Neither the name of the OpenBLAS project nor the names of
 | |
| its contributors may be used to endorse or promote products
 | |
| derived from this software without specific prior written permission.
 | |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | |
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | |
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | |
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | |
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | |
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | |
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | |
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| *****************************************************************************/
 | |
| 
 | |
| /**************************************************************************************
 | |
| * 2016/04/22 Werner Saar (wernsaar@googlemail.com)
 | |
| * 	 BLASTEST 		: OK
 | |
| * 	 CTEST			: OK
 | |
| * 	 TEST			: OK
 | |
| *	 LAPACK-TEST		: OK
 | |
| **************************************************************************************/
 | |
| 
 | |
| 
 | |
| 	srawi.		J,	N,	1
 | |
| 	ble		ZGEMM_L2_END
 | |
| 
 | |
| ZGEMM_L2_BEGIN:
 | |
| 
 | |
| 	mr		BO,	B
 | |
| 	mr		BBO,	BBUFFER
 | |
| 	srawi.		T1,	K,	2
 | |
| 	ble		ZGEMM_L2_COPYB1
 | |
| 
 | |
| ZGEMM_L2_COPYB8:
 | |
| 
 | |
| 	addi		T2,	PRE, 128
 | |
| 	dcbt		BO,	PRE
 | |
| 	dcbtst		BBO,	PRE
 | |
| 	dcbtst		BBO,	T2
 | |
| 	ZCOPYB_8x1
 | |
| 	addic.		T1,	T1,	-1
 | |
| 
 | |
| 	bgt		ZGEMM_L2_COPYB8
 | |
| 
 | |
| ZGEMM_L2_COPYB1:
 | |
| 
 | |
| 	andi.		T1,	K,	3
 | |
| 	ble		ZGEMM_L2_COPYB_END
 | |
| 
 | |
| ZGEMM_L2_COPYB_LOOP:
 | |
| 
 | |
| 	ZCOPYB_1x1
 | |
| 	ZCOPYB_1x1
 | |
| 	addic.          T1,     T1,     -1
 | |
| 
 | |
| 	bgt             ZGEMM_L2_COPYB_LOOP
 | |
| 
 | |
| ZGEMM_L2_COPYB_END:
 | |
| 
 | |
| 	mr		CO,	C
 | |
| 	mr		AO,	A
 | |
| 	slwi		T1,	LDC	,	1
 | |
| 	add		C,	C,	T1
 | |
| 	srawi.		I,	M,	3
 | |
| 	ble		ZGEMM_L2x8_END
 | |
| 
 | |
| ZGEMM_L2x8_BEGIN:
 | |
| 
 | |
| 
 | |
| 	mr		BO,	BBUFFER
 | |
| 	srawi.		L,	K,	3
 | |
| 	ble		ZGEMM_L2x8_SUB0
 | |
| 	cmpwi		cr0,	L,	1
 | |
| 	ble		ZGEMM_L2x8_SUB4
 | |
| 
 | |
| ZGEMM_L2x8_LOOP_START:
 | |
| 
 | |
| 	dcbt		AO,	PRE
 | |
| 	dcbt		BO,	PRE
 | |
| 	LOAD2x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_I1
 | |
| 	dcbt		AO,	PRE
 | |
| 	dcbt		BO,	PRE
 | |
| 	KERNEL2x8_2
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	dcbt		BO,	PRE
 | |
| 	KERNEL2x8_2
 | |
| 
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	dcbt		BO,	PRE
 | |
| 	KERNEL2x8_2
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	dcbt		BO,	PRE
 | |
| 	KERNEL2x8_2
 | |
| 
 | |
| 	addic.		L,	L,	-2
 | |
| 	ble		ZGEMM_L2x8_LOOP_END
 | |
| 
 | |
| 	.align 5
 | |
| 
 | |
| ZGEMM_L2x8_LOOP:
 | |
| 
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	dcbt		BO,	PRE
 | |
| 	KERNEL2x8_2
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	dcbt		BO,	PRE
 | |
| 	KERNEL2x8_2
 | |
| 
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	dcbt		BO,	PRE
 | |
| 	KERNEL2x8_2
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	dcbt		BO,	PRE
 | |
| 	KERNEL2x8_2
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L2x8_LOOP
 | |
| 
 | |
| ZGEMM_L2x8_LOOP_END:
 | |
| 
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	dcbt		BO,	PRE
 | |
| 	KERNEL2x8_2
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	dcbt		BO,	PRE
 | |
| 	KERNEL2x8_2
 | |
| 
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_2
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_1
 | |
| 	KERNEL2x8_E2
 | |
| 
 | |
| 	b		ZGEMM_L2x8_SUB1
 | |
| 
 | |
| ZGEMM_L2x8_SUB4:
 | |
| 
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_SUBI1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_SUB1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_SUB1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL2x8_SUB1
 | |
| 
 | |
| 	KERNEL2x8_SUB1
 | |
| 	KERNEL2x8_SUB1
 | |
| 	KERNEL2x8_SUB1
 | |
| 	KERNEL2x8_SUB1
 | |
| 
 | |
| 	b		ZGEMM_L2x8_SUB1
 | |
| 
 | |
| ZGEMM_L2x8_SUB0:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 
 | |
| 	KERNEL2x8_SUBI1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	ble		ZGEMM_L2x8_SAVE
 | |
| 	b		ZGEMM_L2x8_SUB2
 | |
| 
 | |
| ZGEMM_L2x8_SUB1:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 	ble		ZGEMM_L2x8_SAVE
 | |
| 
 | |
| ZGEMM_L2x8_SUB2:
 | |
| 
 | |
| 	KERNEL2x8_SUB1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L2x8_SUB2
 | |
| 
 | |
| ZGEMM_L2x8_SAVE:
 | |
| 
 | |
| 	SAVE2x8
 | |
| 
 | |
| 	addic.		I,	I,	-1
 | |
| 	bgt		ZGEMM_L2x8_BEGIN
 | |
| 
 | |
| ZGEMM_L2x8_END:
 | |
| 
 | |
| ZGEMM_L2x4_BEGIN:
 | |
| 
 | |
| 	andi.		T2,	M,	7
 | |
| 	ble		ZGEMM_L2x1_END
 | |
| 
 | |
| 	andi.		T1,	M,	4
 | |
| 	ble		ZGEMM_L2x4_END
 | |
| 	mr		BO,	BBUFFER
 | |
| 	srawi.		L,	K,	3
 | |
| 	ble		ZGEMM_L2x4_SUB0
 | |
| 	cmpwi		cr0,	L,	1
 | |
| 	ble		ZGEMM_L2x4_SUB4
 | |
| 
 | |
| ZGEMM_L2x4_LOOP_START:
 | |
| 
 | |
| 	LOAD2x4_1
 | |
| 	KERNEL2x4_I1
 | |
| 	KERNEL2x4_2
 | |
| 	KERNEL2x4_1
 | |
| 	KERNEL2x4_2
 | |
| 
 | |
| 	KERNEL2x4_1
 | |
| 	KERNEL2x4_2
 | |
| 	KERNEL2x4_1
 | |
| 	KERNEL2x4_2
 | |
| 
 | |
| 	addic.		L,	L,	-2
 | |
| 	ble		ZGEMM_L2x4_LOOP_END
 | |
| 
 | |
| 	.align 5
 | |
| 
 | |
| ZGEMM_L2x4_LOOP:
 | |
| 
 | |
| 	KERNEL2x4_1
 | |
| 	KERNEL2x4_2
 | |
| 	KERNEL2x4_1
 | |
| 	KERNEL2x4_2
 | |
| 
 | |
| 	KERNEL2x4_1
 | |
| 	KERNEL2x4_2
 | |
| 	KERNEL2x4_1
 | |
| 	KERNEL2x4_2
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L2x4_LOOP
 | |
| 
 | |
| ZGEMM_L2x4_LOOP_END:
 | |
| 
 | |
| 	KERNEL2x4_1
 | |
| 	KERNEL2x4_2
 | |
| 	KERNEL2x4_1
 | |
| 	KERNEL2x4_2
 | |
| 
 | |
| 	KERNEL2x4_1
 | |
| 	KERNEL2x4_2
 | |
| 	KERNEL2x4_1
 | |
| 	KERNEL2x4_E2
 | |
| 
 | |
| 	b		ZGEMM_L2x4_SUB1
 | |
| 
 | |
| ZGEMM_L2x4_SUB4:
 | |
| 
 | |
| 	KERNEL2x4_SUBI1
 | |
| 	KERNEL2x4_SUB1
 | |
| 	KERNEL2x4_SUB1
 | |
| 	KERNEL2x4_SUB1
 | |
| 
 | |
| 	KERNEL2x4_SUB1
 | |
| 	KERNEL2x4_SUB1
 | |
| 	KERNEL2x4_SUB1
 | |
| 	KERNEL2x4_SUB1
 | |
| 
 | |
| 	b		ZGEMM_L2x4_SUB1
 | |
| 
 | |
| ZGEMM_L2x4_SUB0:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 
 | |
| 	KERNEL2x4_SUBI1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	ble		ZGEMM_L2x4_SAVE
 | |
| 	b		ZGEMM_L2x4_SUB2
 | |
| 
 | |
| ZGEMM_L2x4_SUB1:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 	ble		ZGEMM_L2x4_SAVE
 | |
| 
 | |
| ZGEMM_L2x4_SUB2:
 | |
| 
 | |
| 	KERNEL2x4_SUB1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L2x4_SUB2
 | |
| 
 | |
| ZGEMM_L2x4_SAVE:
 | |
| 
 | |
| 	SAVE2x4
 | |
| 
 | |
| ZGEMM_L2x4_END:
 | |
| 
 | |
| ZGEMM_L2x2_BEGIN:
 | |
| 
 | |
| 
 | |
| 	andi.		T1,	M,	2
 | |
| 	ble		ZGEMM_L2x2_END
 | |
| 	mr		BO,	BBUFFER
 | |
| 	srawi.		L,	K,	3
 | |
| 	ble		ZGEMM_L2x2_SUB0
 | |
| 	cmpwi		cr0,	L,	1
 | |
| 	ble		ZGEMM_L2x2_SUB4
 | |
| 
 | |
| ZGEMM_L2x2_LOOP_START:
 | |
| 
 | |
| 	LOAD2x2_1
 | |
| 	KERNEL2x2_I1
 | |
| 	KERNEL2x2_2
 | |
| 	KERNEL2x2_1
 | |
| 	KERNEL2x2_2
 | |
| 
 | |
| 	KERNEL2x2_1
 | |
| 	KERNEL2x2_2
 | |
| 	KERNEL2x2_1
 | |
| 	KERNEL2x2_2
 | |
| 
 | |
| 	addic.		L,	L,	-2
 | |
| 	ble		ZGEMM_L2x2_LOOP_END
 | |
| 
 | |
| 	.align 5
 | |
| 
 | |
| ZGEMM_L2x2_LOOP:
 | |
| 
 | |
| 	KERNEL2x2_1
 | |
| 	KERNEL2x2_2
 | |
| 	KERNEL2x2_1
 | |
| 	KERNEL2x2_2
 | |
| 
 | |
| 	KERNEL2x2_1
 | |
| 	KERNEL2x2_2
 | |
| 	KERNEL2x2_1
 | |
| 	KERNEL2x2_2
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L2x2_LOOP
 | |
| 
 | |
| ZGEMM_L2x2_LOOP_END:
 | |
| 
 | |
| 	KERNEL2x2_1
 | |
| 	KERNEL2x2_2
 | |
| 	KERNEL2x2_1
 | |
| 	KERNEL2x2_2
 | |
| 
 | |
| 	KERNEL2x2_1
 | |
| 	KERNEL2x2_2
 | |
| 	KERNEL2x2_1
 | |
| 	KERNEL2x2_E2
 | |
| 
 | |
| 	b		ZGEMM_L2x2_SUB1
 | |
| 
 | |
| ZGEMM_L2x2_SUB4:
 | |
| 
 | |
| 	KERNEL2x2_SUBI1
 | |
| 	KERNEL2x2_SUB1
 | |
| 	KERNEL2x2_SUB1
 | |
| 	KERNEL2x2_SUB1
 | |
| 
 | |
| 	KERNEL2x2_SUB1
 | |
| 	KERNEL2x2_SUB1
 | |
| 	KERNEL2x2_SUB1
 | |
| 	KERNEL2x2_SUB1
 | |
| 
 | |
| 	b		ZGEMM_L2x2_SUB1
 | |
| 
 | |
| ZGEMM_L2x2_SUB0:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 
 | |
| 	KERNEL2x2_SUBI1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	ble		ZGEMM_L2x2_SAVE
 | |
| 	b		ZGEMM_L2x2_SUB2
 | |
| 
 | |
| ZGEMM_L2x2_SUB1:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 	ble		ZGEMM_L2x2_SAVE
 | |
| 
 | |
| ZGEMM_L2x2_SUB2:
 | |
| 
 | |
| 	KERNEL2x2_SUB1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L2x2_SUB2
 | |
| 
 | |
| ZGEMM_L2x2_SAVE:
 | |
| 
 | |
| 	SAVE2x2
 | |
| 
 | |
| ZGEMM_L2x2_END:
 | |
| 
 | |
| ZGEMM_L2x1_BEGIN:
 | |
| 
 | |
| 
 | |
| 	andi.		T1,	M,	1
 | |
| 	ble		ZGEMM_L2x1_END
 | |
| 	mr		BO,	BBUFFER
 | |
| 	srawi.		L,	K,	3
 | |
| 	ble		ZGEMM_L2x1_SUB0
 | |
| 	cmpwi		cr0,	L,	1
 | |
| 	ble		ZGEMM_L2x1_SUB4
 | |
| 
 | |
| ZGEMM_L2x1_LOOP_START:
 | |
| 
 | |
| 	LOAD2x1_1
 | |
| 	KERNEL2x1_I1
 | |
| 	KERNEL2x1_2
 | |
| 	KERNEL2x1_1
 | |
| 	KERNEL2x1_2
 | |
| 
 | |
| 	KERNEL2x1_1
 | |
| 	KERNEL2x1_2
 | |
| 	KERNEL2x1_1
 | |
| 	KERNEL2x1_2
 | |
| 
 | |
| 	addic.		L,	L,	-2
 | |
| 	ble		ZGEMM_L2x1_LOOP_END
 | |
| 
 | |
| 	.align 5
 | |
| 
 | |
| ZGEMM_L2x1_LOOP:
 | |
| 
 | |
| 	KERNEL2x1_1
 | |
| 	KERNEL2x1_2
 | |
| 	KERNEL2x1_1
 | |
| 	KERNEL2x1_2
 | |
| 
 | |
| 	KERNEL2x1_1
 | |
| 	KERNEL2x1_2
 | |
| 	KERNEL2x1_1
 | |
| 	KERNEL2x1_2
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L2x1_LOOP
 | |
| 
 | |
| ZGEMM_L2x1_LOOP_END:
 | |
| 
 | |
| 	KERNEL2x1_1
 | |
| 	KERNEL2x1_2
 | |
| 	KERNEL2x1_1
 | |
| 	KERNEL2x1_2
 | |
| 
 | |
| 	KERNEL2x1_1
 | |
| 	KERNEL2x1_2
 | |
| 	KERNEL2x1_1
 | |
| 	KERNEL2x1_E2
 | |
| 
 | |
| 	b		ZGEMM_L2x1_SUB1
 | |
| 
 | |
| ZGEMM_L2x1_SUB4:
 | |
| 
 | |
| 	KERNEL2x1_SUBI1
 | |
| 	KERNEL2x1_SUB1
 | |
| 	KERNEL2x1_SUB1
 | |
| 	KERNEL2x1_SUB1
 | |
| 
 | |
| 	KERNEL2x1_SUB1
 | |
| 	KERNEL2x1_SUB1
 | |
| 	KERNEL2x1_SUB1
 | |
| 	KERNEL2x1_SUB1
 | |
| 
 | |
| 	b		ZGEMM_L2x1_SUB1
 | |
| 
 | |
| ZGEMM_L2x1_SUB0:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 
 | |
| 	KERNEL2x1_SUBI1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	ble		ZGEMM_L2x1_SAVE
 | |
| 	b		ZGEMM_L2x1_SUB2
 | |
| 
 | |
| ZGEMM_L2x1_SUB1:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 	ble		ZGEMM_L2x1_SAVE
 | |
| 
 | |
| ZGEMM_L2x1_SUB2:
 | |
| 
 | |
| 	KERNEL2x1_SUB1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L2x1_SUB2
 | |
| 
 | |
| ZGEMM_L2x1_SAVE:
 | |
| 
 | |
| 	SAVE2x1
 | |
| 
 | |
| ZGEMM_L2x1_END:
 | |
| 
 | |
| 	slwi		T1,	K,	5
 | |
| 	add		B,	B,	T1
 | |
| 
 | |
| 	addic.		J,	J,	-1
 | |
| 	bgt		ZGEMM_L2_BEGIN
 | |
| 
 | |
| 	andi.		T2,	N,	1
 | |
| 	ble		L999
 | |
| 
 | |
| ZGEMM_L2_END:
 | |
| 
 | |
| 	b		ZGEMM_L1_BEGIN
 | |
| 
 | |
| L999_H1:
 | |
| 
 | |
| 	b		L999
 | |
| 
 | |
| ZGEMM_L1_BEGIN:
 | |
| 
 | |
| 	mr		BO,	B
 | |
| 	mr		BBO,	BBUFFER
 | |
| 	slwi		T1,	K,	0
 | |
| 
 | |
| ZGEMM_L1_COPYB:
 | |
| 	dcbtst		BBO,	PRE
 | |
| 
 | |
| 	lxvdsx		vs4,	o0,	BO              // b0_r
 | |
| 	lxvdsx		vs5,	o8,	BO              // b0_i
 | |
| 	addi		BO,	BO,	16
 | |
| 	stxvd2x		vs4,	o0,	BBO
 | |
| 	stxvd2x		vs5,	o16,	BBO
 | |
| 	addic.		T1,	T1,	-1
 | |
| 	addi		BBO,	BBO,	32
 | |
| 
 | |
| 	bge		ZGEMM_L1_COPYB
 | |
| 
 | |
| 
 | |
| 	andi.		T1,	N,	1
 | |
| 	ble		ZGEMM_L1_END
 | |
| 	mr		CO,	C
 | |
| 	mr		AO,	A
 | |
| 	srawi.		I,	M,	3
 | |
| 	ble		ZGEMM_L1x8_END
 | |
| 
 | |
| ZGEMM_L1x8_BEGIN:
 | |
| 
 | |
| 
 | |
| 	mr		BO,	BBUFFER
 | |
| 	srawi.		L,	K,	3
 | |
| 	ble		ZGEMM_L1x8_SUB0
 | |
| 	cmpwi		cr0,	L,	1
 | |
| 	ble		ZGEMM_L1x8_SUB4
 | |
| 
 | |
| ZGEMM_L1x8_LOOP_START:
 | |
| 
 | |
| 	dcbt		AO,	PRE
 | |
| 	LOAD1x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_I1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_2
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_2
 | |
| 
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_2
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_2
 | |
| 
 | |
| 	addic.		L,	L,	-2
 | |
| 	ble		ZGEMM_L1x8_LOOP_END
 | |
| 
 | |
| 	.align 5
 | |
| 
 | |
| ZGEMM_L1x8_LOOP:
 | |
| 
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_2
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_2
 | |
| 
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_2
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_2
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L1x8_LOOP
 | |
| 
 | |
| ZGEMM_L1x8_LOOP_END:
 | |
| 
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_2
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_2
 | |
| 
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_2
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_1
 | |
| 	KERNEL1x8_E2
 | |
| 
 | |
| 	b		ZGEMM_L1x8_SUB1
 | |
| 
 | |
| ZGEMM_L1x8_SUB4:
 | |
| 
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_SUBI1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_SUB1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_SUB1
 | |
| 	dcbt		AO,	PRE
 | |
| 	KERNEL1x8_SUB1
 | |
| 
 | |
| 	KERNEL1x8_SUB1
 | |
| 	KERNEL1x8_SUB1
 | |
| 	KERNEL1x8_SUB1
 | |
| 	KERNEL1x8_SUB1
 | |
| 
 | |
| 	b		ZGEMM_L1x8_SUB1
 | |
| 
 | |
| ZGEMM_L1x8_SUB0:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 
 | |
| 	KERNEL1x8_SUBI1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	ble		ZGEMM_L1x8_SAVE
 | |
| 	b		ZGEMM_L1x8_SUB2
 | |
| 
 | |
| ZGEMM_L1x8_SUB1:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 	ble		ZGEMM_L1x8_SAVE
 | |
| 
 | |
| ZGEMM_L1x8_SUB2:
 | |
| 
 | |
| 	KERNEL1x8_SUB1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L1x8_SUB2
 | |
| 
 | |
| ZGEMM_L1x8_SAVE:
 | |
| 
 | |
| 	SAVE1x8
 | |
| 
 | |
| 	addic.		I,	I,	-1
 | |
| 	bgt		ZGEMM_L1x8_BEGIN
 | |
| 
 | |
| ZGEMM_L1x8_END:
 | |
| 
 | |
| ZGEMM_L1x4_BEGIN:
 | |
| 
 | |
| 	andi.		T2,	M,	7
 | |
| 	ble		ZGEMM_L1x1_END
 | |
| 
 | |
| 	andi.		T1,	M,	4
 | |
| 	ble		ZGEMM_L1x4_END
 | |
| 	mr		BO,	BBUFFER
 | |
| 	srawi.		L,	K,	3
 | |
| 	ble		ZGEMM_L1x4_SUB0
 | |
| 	cmpwi		cr0,	L,	1
 | |
| 	ble		ZGEMM_L1x4_SUB4
 | |
| 
 | |
| ZGEMM_L1x4_LOOP_START:
 | |
| 
 | |
| 	LOAD1x4_1
 | |
| 	KERNEL1x4_I1
 | |
| 	KERNEL1x4_2
 | |
| 	KERNEL1x4_1
 | |
| 	KERNEL1x4_2
 | |
| 
 | |
| 	KERNEL1x4_1
 | |
| 	KERNEL1x4_2
 | |
| 	KERNEL1x4_1
 | |
| 	KERNEL1x4_2
 | |
| 
 | |
| 	addic.		L,	L,	-2
 | |
| 	ble		ZGEMM_L1x4_LOOP_END
 | |
| 
 | |
| 	.align 5
 | |
| 
 | |
| ZGEMM_L1x4_LOOP:
 | |
| 
 | |
| 	KERNEL1x4_1
 | |
| 	KERNEL1x4_2
 | |
| 	KERNEL1x4_1
 | |
| 	KERNEL1x4_2
 | |
| 
 | |
| 	KERNEL1x4_1
 | |
| 	KERNEL1x4_2
 | |
| 	KERNEL1x4_1
 | |
| 	KERNEL1x4_2
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L1x4_LOOP
 | |
| 
 | |
| ZGEMM_L1x4_LOOP_END:
 | |
| 
 | |
| 	KERNEL1x4_1
 | |
| 	KERNEL1x4_2
 | |
| 	KERNEL1x4_1
 | |
| 	KERNEL1x4_2
 | |
| 
 | |
| 	KERNEL1x4_1
 | |
| 	KERNEL1x4_2
 | |
| 	KERNEL1x4_1
 | |
| 	KERNEL1x4_E2
 | |
| 
 | |
| 	b		ZGEMM_L1x4_SUB1
 | |
| 
 | |
| ZGEMM_L1x4_SUB4:
 | |
| 
 | |
| 	KERNEL1x4_SUBI1
 | |
| 	KERNEL1x4_SUB1
 | |
| 	KERNEL1x4_SUB1
 | |
| 	KERNEL1x4_SUB1
 | |
| 
 | |
| 	KERNEL1x4_SUB1
 | |
| 	KERNEL1x4_SUB1
 | |
| 	KERNEL1x4_SUB1
 | |
| 	KERNEL1x4_SUB1
 | |
| 
 | |
| 	b		ZGEMM_L1x4_SUB1
 | |
| 
 | |
| ZGEMM_L1x4_SUB0:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 
 | |
| 	KERNEL1x4_SUBI1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	ble		ZGEMM_L1x4_SAVE
 | |
| 	b		ZGEMM_L1x4_SUB2
 | |
| 
 | |
| ZGEMM_L1x4_SUB1:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 	ble		ZGEMM_L1x4_SAVE
 | |
| 
 | |
| ZGEMM_L1x4_SUB2:
 | |
| 
 | |
| 	KERNEL1x4_SUB1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L1x4_SUB2
 | |
| 
 | |
| ZGEMM_L1x4_SAVE:
 | |
| 
 | |
| 	SAVE1x4
 | |
| 
 | |
| ZGEMM_L1x4_END:
 | |
| 
 | |
| ZGEMM_L1x2_BEGIN:
 | |
| 
 | |
| 
 | |
| 	andi.		T1,	M,	2
 | |
| 	ble		ZGEMM_L1x2_END
 | |
| 	mr		BO,	BBUFFER
 | |
| 	srawi.		L,	K,	3
 | |
| 	ble		ZGEMM_L1x2_SUB0
 | |
| 	cmpwi		cr0,	L,	1
 | |
| 	ble		ZGEMM_L1x2_SUB4
 | |
| 
 | |
| ZGEMM_L1x2_LOOP_START:
 | |
| 
 | |
| 	LOAD1x2_1
 | |
| 	KERNEL1x2_I1
 | |
| 	KERNEL1x2_2
 | |
| 	KERNEL1x2_1
 | |
| 	KERNEL1x2_2
 | |
| 
 | |
| 	KERNEL1x2_1
 | |
| 	KERNEL1x2_2
 | |
| 	KERNEL1x2_1
 | |
| 	KERNEL1x2_2
 | |
| 
 | |
| 	addic.		L,	L,	-2
 | |
| 	ble		ZGEMM_L1x2_LOOP_END
 | |
| 
 | |
| 	.align 5
 | |
| 
 | |
| ZGEMM_L1x2_LOOP:
 | |
| 
 | |
| 	KERNEL1x2_1
 | |
| 	KERNEL1x2_2
 | |
| 	KERNEL1x2_1
 | |
| 	KERNEL1x2_2
 | |
| 
 | |
| 	KERNEL1x2_1
 | |
| 	KERNEL1x2_2
 | |
| 	KERNEL1x2_1
 | |
| 	KERNEL1x2_2
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L1x2_LOOP
 | |
| 
 | |
| ZGEMM_L1x2_LOOP_END:
 | |
| 
 | |
| 	KERNEL1x2_1
 | |
| 	KERNEL1x2_2
 | |
| 	KERNEL1x2_1
 | |
| 	KERNEL1x2_2
 | |
| 
 | |
| 	KERNEL1x2_1
 | |
| 	KERNEL1x2_2
 | |
| 	KERNEL1x2_1
 | |
| 	KERNEL1x2_E2
 | |
| 
 | |
| 	b		ZGEMM_L1x2_SUB1
 | |
| 
 | |
| ZGEMM_L1x2_SUB4:
 | |
| 
 | |
| 	KERNEL1x2_SUBI1
 | |
| 	KERNEL1x2_SUB1
 | |
| 	KERNEL1x2_SUB1
 | |
| 	KERNEL1x2_SUB1
 | |
| 
 | |
| 	KERNEL1x2_SUB1
 | |
| 	KERNEL1x2_SUB1
 | |
| 	KERNEL1x2_SUB1
 | |
| 	KERNEL1x2_SUB1
 | |
| 
 | |
| 	b		ZGEMM_L1x2_SUB1
 | |
| 
 | |
| ZGEMM_L1x2_SUB0:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 
 | |
| 	KERNEL1x2_SUBI1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	ble		ZGEMM_L1x2_SAVE
 | |
| 	b		ZGEMM_L1x2_SUB2
 | |
| 
 | |
| ZGEMM_L1x2_SUB1:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 	ble		ZGEMM_L1x2_SAVE
 | |
| 
 | |
| ZGEMM_L1x2_SUB2:
 | |
| 
 | |
| 	KERNEL1x2_SUB1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L1x2_SUB2
 | |
| 
 | |
| ZGEMM_L1x2_SAVE:
 | |
| 
 | |
| 	SAVE1x2
 | |
| 
 | |
| ZGEMM_L1x2_END:
 | |
| 
 | |
| ZGEMM_L1x1_BEGIN:
 | |
| 
 | |
| 
 | |
| 	andi.		T1,	M,	1
 | |
| 	ble		ZGEMM_L1x1_END
 | |
| 	mr		BO,	BBUFFER
 | |
| 	srawi.		L,	K,	3
 | |
| 	ble		ZGEMM_L1x1_SUB0
 | |
| 	cmpwi		cr0,	L,	1
 | |
| 	ble		ZGEMM_L1x1_SUB4
 | |
| 
 | |
| ZGEMM_L1x1_LOOP_START:
 | |
| 
 | |
| 	LOAD1x1_1
 | |
| 	KERNEL1x1_I1
 | |
| 	KERNEL1x1_2
 | |
| 	KERNEL1x1_1
 | |
| 	KERNEL1x1_2
 | |
| 
 | |
| 	KERNEL1x1_1
 | |
| 	KERNEL1x1_2
 | |
| 	KERNEL1x1_1
 | |
| 	KERNEL1x1_2
 | |
| 
 | |
| 	addic.		L,	L,	-2
 | |
| 	ble		ZGEMM_L1x1_LOOP_END
 | |
| 
 | |
| 	.align 5
 | |
| 
 | |
| ZGEMM_L1x1_LOOP:
 | |
| 
 | |
| 	KERNEL1x1_1
 | |
| 	KERNEL1x1_2
 | |
| 	KERNEL1x1_1
 | |
| 	KERNEL1x1_2
 | |
| 
 | |
| 	KERNEL1x1_1
 | |
| 	KERNEL1x1_2
 | |
| 	KERNEL1x1_1
 | |
| 	KERNEL1x1_2
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L1x1_LOOP
 | |
| 
 | |
| ZGEMM_L1x1_LOOP_END:
 | |
| 
 | |
| 	KERNEL1x1_1
 | |
| 	KERNEL1x1_2
 | |
| 	KERNEL1x1_1
 | |
| 	KERNEL1x1_2
 | |
| 
 | |
| 	KERNEL1x1_1
 | |
| 	KERNEL1x1_2
 | |
| 	KERNEL1x1_1
 | |
| 	KERNEL1x1_E2
 | |
| 
 | |
| 	b		ZGEMM_L1x1_SUB1
 | |
| 
 | |
| ZGEMM_L1x1_SUB4:
 | |
| 
 | |
| 	KERNEL1x1_SUBI1
 | |
| 	KERNEL1x1_SUB1
 | |
| 	KERNEL1x1_SUB1
 | |
| 	KERNEL1x1_SUB1
 | |
| 
 | |
| 	KERNEL1x1_SUB1
 | |
| 	KERNEL1x1_SUB1
 | |
| 	KERNEL1x1_SUB1
 | |
| 	KERNEL1x1_SUB1
 | |
| 
 | |
| 	b		ZGEMM_L1x1_SUB1
 | |
| 
 | |
| ZGEMM_L1x1_SUB0:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 
 | |
| 	KERNEL1x1_SUBI1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	ble		ZGEMM_L1x1_SAVE
 | |
| 	b		ZGEMM_L1x1_SUB2
 | |
| 
 | |
| ZGEMM_L1x1_SUB1:
 | |
| 
 | |
| 	andi.		L,	K,	7
 | |
| 	ble		ZGEMM_L1x1_SAVE
 | |
| 
 | |
| ZGEMM_L1x1_SUB2:
 | |
| 
 | |
| 	KERNEL1x1_SUB1
 | |
| 
 | |
| 	addic.		L,	L,	-1
 | |
| 	bgt		ZGEMM_L1x1_SUB2
 | |
| 
 | |
| ZGEMM_L1x1_SAVE:
 | |
| 
 | |
| 	SAVE1x1
 | |
| 
 | |
| ZGEMM_L1x1_END:
 | |
| 
 | |
| ZGEMM_L1_END:
 |