3530 lines
		
	
	
		
			57 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			3530 lines
		
	
	
		
			57 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| /*********************************************************************/
 | |
| /* Copyright 2009, 2010 The University of Texas at Austin.           */
 | |
| /* All rights reserved.                                              */
 | |
| /*                                                                   */
 | |
| /* Redistribution and use in source and binary forms, with or        */
 | |
| /* without modification, are permitted provided that the following   */
 | |
| /* conditions are met:                                               */
 | |
| /*                                                                   */
 | |
| /*   1. Redistributions of source code must retain the above         */
 | |
| /*      copyright notice, this list of conditions and the following  */
 | |
| /*      disclaimer.                                                  */
 | |
| /*                                                                   */
 | |
| /*   2. Redistributions in binary form must reproduce the above      */
 | |
| /*      copyright notice, this list of conditions and the following  */
 | |
| /*      disclaimer in the documentation and/or other materials       */
 | |
| /*      provided with the distribution.                              */
 | |
| /*                                                                   */
 | |
| /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
 | |
| /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
 | |
| /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
 | |
| /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
 | |
| /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
 | |
| /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
 | |
| /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
 | |
| /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
 | |
| /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
 | |
| /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
 | |
| /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
 | |
| /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
 | |
| /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
 | |
| /*    POSSIBILITY OF SUCH DAMAGE.                                    */
 | |
| /*                                                                   */
 | |
| /* The views and conclusions contained in the software and           */
 | |
| /* documentation are those of the authors and should not be          */
 | |
| /* interpreted as representing official policies, either expressed   */
 | |
| /* or implied, of The University of Texas at Austin.                 */
 | |
| /*********************************************************************/
 | |
| 
 | |
| #define ASSEMBLER
 | |
| #include "common.h"
 | |
| 
 | |
| #define M	$4
 | |
| #define	N	$5
 | |
| #define	K	$6
 | |
| #define A	$8
 | |
| #define B	$9
 | |
| #define C	$10
 | |
| #define LDC	$11
 | |
| 
 | |
| #define AO	$12
 | |
| #define BO	$13
 | |
| 
 | |
| #define I	$2
 | |
| #define J	$3
 | |
| #define L	$7
 | |
| 
 | |
| #define CO1	$14
 | |
| #define CO2	$15
 | |
| #define CO3	$16
 | |
| #define CO4	$17
 | |
| #define CO5	$18
 | |
| #define CO6	$19
 | |
| #define CO7	$20
 | |
| #define CO8	$21
 | |
| 
 | |
| #define OFFSET	$22
 | |
| #define KK	$23
 | |
| #define TEMP	$24
 | |
| #define AORIG	$25
 | |
| 
 | |
| #define a1	$f0
 | |
| #define a2	$f1
 | |
| #define a3	$f27
 | |
| #define a4	$f28
 | |
| 
 | |
| #define b1	$f2
 | |
| #define b2	$f3
 | |
| #define b3	$f4
 | |
| #define b4	$f5
 | |
| #define b5	$f6
 | |
| #define b6	$f7
 | |
| #define b7	$f8
 | |
| #define b8	$f9
 | |
| 
 | |
| #define a5	b8
 | |
| 
 | |
| #define c11	$f10
 | |
| #define c12	$f11
 | |
| #define c21	$f12
 | |
| #define c22	$f13
 | |
| #define c31	$f14
 | |
| #define c32	$f16
 | |
| #define c41	$f17
 | |
| #define c42	$f18
 | |
| #define c51	$f19
 | |
| #define c52	$f20
 | |
| #define c61	$f21
 | |
| #define c62	$f22
 | |
| #define c71	$f23
 | |
| #define c72	$f24
 | |
| #define c81	$f25
 | |
| #define c82	$f26
 | |
| 
 | |
| #define ALPHA	$f15
 | |
| 
 | |
| 	PROLOGUE
 | |
| 
 | |
| 	daddiu	$sp, $sp, -144
 | |
| 
 | |
| 	SDARG	$16,   0($sp)
 | |
| 	SDARG	$17,   8($sp)
 | |
| 	SDARG	$18,  16($sp)
 | |
| 	SDARG	$19,  24($sp)
 | |
| 	SDARG	$20,  32($sp)
 | |
| 	SDARG	$21,  40($sp)
 | |
| 	sdc1	$f24, 48($sp)
 | |
| 	sdc1	$f25, 56($sp)
 | |
| 	sdc1	$f26, 64($sp)
 | |
| 	sdc1	$f27, 72($sp)
 | |
| 	sdc1	$f28, 80($sp)
 | |
| 
 | |
| 	SDARG	$22,  88($sp)
 | |
| 	SDARG	$23,  96($sp)
 | |
| 	SDARG	$24, 104($sp)
 | |
| 	SDARG	$25, 112($sp)
 | |
| 
 | |
| #ifndef __64BIT__
 | |
| 	sdc1	$f20,112($sp)
 | |
| 	sdc1	$f21,120($sp)
 | |
| 	sdc1	$f22,128($sp)
 | |
| 	sdc1	$f23,136($sp)
 | |
| #endif
 | |
| 
 | |
| 	LDARG	OFFSET, 144($sp)
 | |
| 
 | |
| 	dsll	LDC, LDC, BASE_SHIFT
 | |
| 
 | |
| #ifdef LN
 | |
| 	mult	M, K
 | |
| 	mflo	TEMP
 | |
| 
 | |
| 	dsll	TEMP, TEMP, BASE_SHIFT
 | |
| 	daddu	A, A, TEMP
 | |
| 
 | |
| 	dsll	TEMP, M, BASE_SHIFT
 | |
| 	daddu	C, C, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	neg	KK, OFFSET
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	mult	N, K
 | |
| 	mflo	TEMP
 | |
| 
 | |
| 	dsll	TEMP, TEMP, BASE_SHIFT
 | |
| 	daddu	B, B, TEMP
 | |
| 
 | |
| 	mult	N, LDC
 | |
| 	mflo	TEMP
 | |
| 	daddu	C, C, TEMP
 | |
| 
 | |
| 	dsubu	KK, N, OFFSET
 | |
| #endif
 | |
| 
 | |
| 	andi	J,  N, 1
 | |
| 	blez	J, .L30
 | |
| 	NOP
 | |
| 
 | |
| #ifdef RT
 | |
| 	dsll	TEMP, K, BASE_SHIFT
 | |
| 	dsubu	B, B, TEMP
 | |
| 
 | |
| 	dsubu	C, C, LDC
 | |
| #endif
 | |
| 
 | |
| 	move	AO, A
 | |
| 	move	CO1, C
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddu	KK, M, OFFSET
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	move	KK, OFFSET
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| 	move	AORIG, A
 | |
| #else
 | |
| 	move	AO, A
 | |
| #endif
 | |
| #ifndef RT
 | |
| 	daddu	C,  CO1, LDC
 | |
| #endif
 | |
| 
 | |
| 	dsra	I,  M, 1
 | |
| 	blez	I, .L80
 | |
| 	NOP
 | |
| 
 | |
| .L71:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MTC	$0,  c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MOV	c21, c11
 | |
| 	LD	a5,  4 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	MOV	c12, c11
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	MOV	c22, c11
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	dsra	L,  KK, 2
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 
 | |
| 	blez	L, .L75
 | |
| 	move	BO,  B
 | |
| #else
 | |
| #ifdef LN
 | |
| 	dsll	TEMP,   K,  1 + BASE_SHIFT
 | |
| 	dsubu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| 	dsll	L,    KK, 1 + BASE_SHIFT
 | |
| 	dsll	TEMP, KK, 0 + BASE_SHIFT
 | |
| 
 | |
| 	daddu	AO, AORIG, L
 | |
| 	daddu	BO, B,     TEMP
 | |
| 
 | |
| 	dsubu	TEMP, K, KK
 | |
| 
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MTC	$0,  c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MOV	c21, c11
 | |
| 	LD	a5,  4 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	MOV	c12, c11
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MOV	c22, c11
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	dsra	L,  TEMP, 2
 | |
| 	LD	b6,  8 * SIZE(BO)
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 
 | |
| 	blez	L, .L75
 | |
| 	NOP
 | |
| #endif
 | |
| 	.align	3
 | |
| 
 | |
| .L72:
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 
 | |
| 	LD	a1,  2 * SIZE(AO)
 | |
| 	LD	a2,  3 * SIZE(AO)
 | |
| 	LD	b1,  1 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 
 | |
| 	LD	a1,  4 * SIZE(AO)
 | |
| 	LD	a2,  5 * SIZE(AO)
 | |
| 	LD	b1,  2 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 
 | |
| 	LD	a1,  6 * SIZE(AO)
 | |
| 	LD	a2,  7 * SIZE(AO)
 | |
| 	LD	b1,  3 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 
 | |
| 	daddiu	L, L, -1
 | |
| 	daddiu	AO, AO,  8 * SIZE
 | |
| 	bgtz	L, .L72
 | |
| 	daddiu	BO, BO,  4 * SIZE
 | |
| 	.align 3
 | |
| 
 | |
| .L75:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi	L, KK,  3
 | |
| #else
 | |
| 	andi	L, TEMP, 3
 | |
| #endif
 | |
| 	NOP
 | |
| 	blez	L, .L78
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L76:
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 
 | |
| 	daddiu	L, L, -1
 | |
| 	daddiu	AO, AO,  2 * SIZE
 | |
| 	bgtz	L, .L76
 | |
| 	daddiu	BO, BO,  1 * SIZE
 | |
| 
 | |
| .L78:
 | |
| 	ADD	c11, c11, c21
 | |
| 	ADD	c12, c12, c22
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	daddiu	TEMP, KK, -2
 | |
| #else
 | |
| 	daddiu	TEMP, KK, -1
 | |
| #endif
 | |
| 
 | |
| 	dsll	L,    TEMP, 1 + BASE_SHIFT
 | |
| 	dsll	TEMP, TEMP, 0 + BASE_SHIFT
 | |
| 	daddu	AO, AORIG, L
 | |
| 	daddu	BO, B,     TEMP
 | |
| #endif
 | |
| 
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| 	SUB	c12, b2, c12
 | |
| #else
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| 	LD	b2,  1 * SIZE(AO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| 	SUB	c12, b2, c12
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	LD	b1,  3 * SIZE(AO)
 | |
| 	LD	b2,  2 * SIZE(AO)
 | |
| 	LD	b3,  0 * SIZE(AO)
 | |
| 
 | |
| 	MUL	c12, b1, c12
 | |
| 	NMSUB	c11, c11, b2, c12
 | |
| 	MUL	c11, b3, c11
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| 	LD	b2,  1 * SIZE(AO)
 | |
| 	LD	b3,  3 * SIZE(AO)
 | |
| 
 | |
| 	MUL	c11, b1, c11
 | |
| 	NMSUB	c12, c12, b2, c11
 | |
| 	MUL	c12, b3, c12
 | |
| #endif
 | |
| 
 | |
| #if defined(RN) || defined(RT)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c11, b1, c11
 | |
| 	MUL	c12, b1, c12
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	CO1, CO1, -2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	ST	c11,  0 * SIZE(BO)
 | |
| 	ST	c12,  1 * SIZE(BO)
 | |
| #else
 | |
| 	ST	c11,  0 * SIZE(AO)
 | |
| 	ST	c12,  1 * SIZE(AO)
 | |
| #endif
 | |
| 
 | |
| 	ST	c11,  0 * SIZE(CO1)
 | |
| 	ST	c12,  1 * SIZE(CO1)
 | |
| 
 | |
| #ifndef LN
 | |
| 	daddiu	CO1, CO1, 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	dsll	TEMP, K, 1 + BASE_SHIFT
 | |
| 	daddu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	dsubu	TEMP, K, KK
 | |
| 	dsll	L,    TEMP, 1 + BASE_SHIFT
 | |
| 	dsll	TEMP, TEMP, 0 + BASE_SHIFT
 | |
| 	daddu	AO, AO, L
 | |
| 	daddu	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	daddiu	KK, KK, 2
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	KK, KK, -2
 | |
| #endif
 | |
| 
 | |
| 	daddiu	I, I, -1
 | |
| 
 | |
| 	bgtz	I, .L71
 | |
| 	NOP
 | |
| 	.align 3
 | |
| 
 | |
| .L80:
 | |
| 	andi	I,  M, 1
 | |
| 	blez	I, .L89
 | |
| 	NOP
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MTC	$0,  c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	LD	a3,  2 * SIZE(AO)
 | |
| 	LD	a4,  3 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	MOV	c21, c11
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	LD	b4,  3 * SIZE(B)
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 
 | |
| 	dsra	L,  KK, 2
 | |
| 	blez	L, .L85
 | |
| 	move	BO,  B
 | |
| #else
 | |
| #ifdef LN
 | |
| 	dsll	TEMP,   K,  BASE_SHIFT
 | |
| 	dsubu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| 	dsll	TEMP, KK, BASE_SHIFT
 | |
| 
 | |
| 	daddu	AO, AORIG, TEMP
 | |
| 	daddu	BO, B,     TEMP
 | |
| 
 | |
| 	dsubu	TEMP, K, KK
 | |
| 
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MTC	$0,  c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	LD	a3,  2 * SIZE(AO)
 | |
| 	LD	a4,  3 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	MOV	c21, c11
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	LD	b6,  8 * SIZE(BO)
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 
 | |
| 	dsra	L,  TEMP, 2
 | |
| 	blez	L, .L85
 | |
| 	NOP
 | |
| #endif
 | |
| 	.align	3
 | |
| 
 | |
| .L82:
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 
 | |
| 	LD	a1,  1 * SIZE(AO)
 | |
| 	LD	b1,  1 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c21, c21, a1, b1
 | |
| 
 | |
| 	LD	a1,  2 * SIZE(AO)
 | |
| 	LD	b1,  2 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 
 | |
| 	LD	a1,  3 * SIZE(AO)
 | |
| 	LD	b1,  3 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c21, c21, a1, b1
 | |
| 
 | |
| 	daddiu	L, L, -1
 | |
| 	daddiu	AO, AO,  4 * SIZE
 | |
| 	bgtz	L, .L82
 | |
| 	daddiu	BO, BO,  4 * SIZE
 | |
| 	.align 3
 | |
| 
 | |
| .L85:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi	L, KK,  3
 | |
| #else
 | |
| 	andi	L, TEMP, 3
 | |
| #endif
 | |
| 	NOP
 | |
| 	blez	L, .L88
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L86:
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 
 | |
| 	daddiu	L, L, -1
 | |
| 	daddiu	AO, AO,  1 * SIZE
 | |
| 	bgtz	L, .L86
 | |
| 	daddiu	BO, BO,  1 * SIZE
 | |
| 
 | |
| 
 | |
| .L88:
 | |
| 	ADD	c11, c11, c21
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	daddiu	TEMP, KK, -1
 | |
| #else
 | |
| 	daddiu	TEMP, KK, -1
 | |
| #endif
 | |
| 
 | |
| 	dsll	TEMP, TEMP, 0 + BASE_SHIFT
 | |
| 	daddu	AO, AORIG, TEMP
 | |
| 	daddu	BO, B,     TEMP
 | |
| #endif
 | |
| 
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| #else
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| 
 | |
| 	MUL	c11, b1, c11
 | |
| #endif
 | |
| 
 | |
| #if defined(RN) || defined(RT)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c11, b1, c11
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	CO1, CO1, -1 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	ST	c11,  0 * SIZE(BO)
 | |
| #else
 | |
| 	ST	c11,  0 * SIZE(AO)
 | |
| #endif
 | |
| 
 | |
| 	ST	c11,  0 * SIZE(CO1)
 | |
| 
 | |
| #ifndef LN
 | |
| 	daddiu	CO1, CO1, 1 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	dsll	TEMP, K, BASE_SHIFT
 | |
| 	daddu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	dsubu	TEMP, K, KK
 | |
| 	dsll	TEMP, TEMP, 0 + BASE_SHIFT
 | |
| 	daddu	AO, AO, TEMP
 | |
| 	daddu	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	daddiu	KK, KK, 1
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	KK, KK, -1
 | |
| #endif
 | |
| 	.align 3
 | |
| 
 | |
| .L89:
 | |
| #ifdef LN
 | |
| 	dsll	TEMP, K, BASE_SHIFT
 | |
| 	daddu	B, B, TEMP
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	move	B,  BO
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	daddiu	KK, KK,  1
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	daddiu	KK, KK, -1
 | |
| #endif
 | |
| 	.align 3
 | |
| 
 | |
| .L30:
 | |
| 	andi	J,  N, 2
 | |
| 	blez	J, .L50
 | |
| 	NOP
 | |
| 
 | |
| #ifdef RT
 | |
| 	dsll	TEMP, K, 1 + BASE_SHIFT
 | |
| 	dsubu	B, B, TEMP
 | |
| 
 | |
| 	dsll	TEMP, LDC, 1
 | |
| 	dsubu	C, C, TEMP
 | |
| #endif
 | |
| 
 | |
| 	move	AO, A
 | |
| 	move	CO1, C
 | |
| 	daddu	CO2, C,   LDC
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddu	KK, M, OFFSET
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	move	KK, OFFSET
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| 	move	AORIG, A
 | |
| #else
 | |
| 	move	AO, A
 | |
| #endif
 | |
| #ifndef RT
 | |
| 	daddu	C,  CO2, LDC
 | |
| #endif
 | |
| 
 | |
| 	dsra	I,  M, 1
 | |
| 	blez	I, .L60
 | |
| 	NOP
 | |
| 
 | |
| .L51:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MTC	$0,  c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MOV	c21, c11
 | |
| 	LD	a5,  4 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	MOV	c12, c11
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	MOV	c22, c11
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	dsra	L,  KK, 2
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 
 | |
| 	blez	L, .L55
 | |
| 	move	BO,  B
 | |
| 
 | |
| #else
 | |
| #ifdef LN
 | |
| 	dsll	TEMP,   K,  1 + BASE_SHIFT
 | |
| 	dsubu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| 	dsll	L,    KK, 1 + BASE_SHIFT
 | |
| 	dsll	TEMP, KK, 1 + BASE_SHIFT
 | |
| 
 | |
| 	daddu	AO, AORIG, L
 | |
| 	daddu	BO, B,     TEMP
 | |
| 
 | |
| 	dsubu	TEMP, K, KK
 | |
| 
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MTC	$0,  c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MOV	c21, c11
 | |
| 	LD	a5,  4 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	MOV	c12, c11
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MOV	c22, c11
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	dsra	L,  TEMP, 2
 | |
| 	LD	b6,  8 * SIZE(BO)
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 
 | |
| 	blez	L, .L55
 | |
| 	NOP
 | |
| #endif
 | |
| 	.align	3
 | |
| 
 | |
| .L52:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	a3,  2 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	a4,  3 * SIZE(AO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b1,  8 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a3, b3
 | |
| 	LD	a1,  8 * SIZE(AO)
 | |
| 	MADD	c21, c21, a3, b4
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c12, c12, a4, b3
 | |
| 	LD	a2,  5 * SIZE(AO)
 | |
| 	MADD	c22, c22, a4, b4
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a5, b5
 | |
| 	LD	a3,  6 * SIZE(AO)
 | |
| 	MADD	c21, c21, a5, b2
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 	MADD	c12, c12, a2, b5
 | |
| 	LD	a4,  7 * SIZE(AO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b5, 12 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a3, b3
 | |
| 	LD	a5, 12 * SIZE(AO)
 | |
| 	MADD	c21, c21, a3, b4
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	MADD	c12, c12, a4, b3
 | |
| 	LD	a2,  9 * SIZE(AO)
 | |
| 	MADD	c22, c22, a4, b4
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 
 | |
| 	daddiu	AO, AO,  8 * SIZE
 | |
| 	daddiu	L, L, -1
 | |
| 	bgtz	L, .L52
 | |
| 	daddiu	BO, BO,  8 * SIZE
 | |
| 	.align 3
 | |
| 
 | |
| .L55:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi	L, KK,  3
 | |
| #else
 | |
| 	andi	L, TEMP, 3
 | |
| #endif
 | |
| 	NOP
 | |
| 	blez	L, .L58
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L56:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	a1,  2 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1,  2 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  3 * SIZE(BO)
 | |
| 
 | |
| 	daddiu	L, L, -1
 | |
| 	daddiu	AO, AO,  2 * SIZE
 | |
| 	bgtz	L, .L56
 | |
| 	daddiu	BO, BO,  2 * SIZE
 | |
| 
 | |
| .L58:
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	daddiu	TEMP, KK, -2
 | |
| #else
 | |
| 	daddiu	TEMP, KK, -2
 | |
| #endif
 | |
| 
 | |
| 	dsll	L,    TEMP, 1 + BASE_SHIFT
 | |
| 	dsll	TEMP, TEMP, 1 + BASE_SHIFT
 | |
| 	daddu	AO, AORIG, L
 | |
| 	daddu	BO, B,     TEMP
 | |
| #endif
 | |
| 
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| 	SUB	c21, b2, c21
 | |
| 	SUB	c12, b3, c12
 | |
| 	SUB	c22, b4, c22
 | |
| #else
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| 	LD	b2,  1 * SIZE(AO)
 | |
| 	LD	b3,  2 * SIZE(AO)
 | |
| 	LD	b4,  3 * SIZE(AO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| 	SUB	c12, b2, c12
 | |
| 	SUB	c21, b3, c21
 | |
| 	SUB	c22, b4, c22
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	LD	b1,  3 * SIZE(AO)
 | |
| 	LD	b2,  2 * SIZE(AO)
 | |
| 	LD	b3,  0 * SIZE(AO)
 | |
| 
 | |
| 	MUL	c12, b1, c12
 | |
| 	MUL	c22, b1, c22
 | |
| 
 | |
| 	NMSUB	c11, c11, b2, c12
 | |
| 	NMSUB	c21, c21, b2, c22
 | |
| 
 | |
| 	MUL	c11, b3, c11
 | |
| 	MUL	c21, b3, c21
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| 	LD	b2,  1 * SIZE(AO)
 | |
| 	LD	b3,  3 * SIZE(AO)
 | |
| 
 | |
| 	MUL	c11, b1, c11
 | |
| 	MUL	c21, b1, c21
 | |
| 
 | |
| 	NMSUB	c12, c12, b2, c11
 | |
| 	NMSUB	c22, c22, b2, c21
 | |
| 
 | |
| 	MUL	c12, b3, c12
 | |
| 	MUL	c22, b3, c22
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  3 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c11, b1, c11
 | |
| 	MUL	c12, b1, c12
 | |
| 
 | |
| 	NMSUB	c21, c21, b2, c11
 | |
| 	NMSUB	c22, c22, b2, c12
 | |
| 
 | |
| 	MUL	c21, b3, c21
 | |
| 	MUL	c22, b3, c22
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LD	b1,  3 * SIZE(BO)
 | |
| 	LD	b2,  2 * SIZE(BO)
 | |
| 	LD	b3,  0 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c21, b1, c21
 | |
| 	MUL	c22, b1, c22
 | |
| 
 | |
| 	NMSUB	c11, c11, b2, c21
 | |
| 	NMSUB	c12, c12, b2, c22
 | |
| 
 | |
| 	MUL	c11, b3, c11
 | |
| 	MUL	c12, b3, c12
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	CO1, CO1, -2 * SIZE
 | |
| 	daddiu	CO2, CO2, -2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	ST	c11,  0 * SIZE(BO)
 | |
| 	ST	c21,  1 * SIZE(BO)
 | |
| 	ST	c12,  2 * SIZE(BO)
 | |
| 	ST	c22,  3 * SIZE(BO)
 | |
| #else
 | |
| 	ST	c11,  0 * SIZE(AO)
 | |
| 	ST	c12,  1 * SIZE(AO)
 | |
| 	ST	c21,  2 * SIZE(AO)
 | |
| 	ST	c22,  3 * SIZE(AO)
 | |
| #endif
 | |
| 
 | |
| 	ST	c11,  0 * SIZE(CO1)
 | |
| 	ST	c12,  1 * SIZE(CO1)
 | |
| 	ST	c21,  0 * SIZE(CO2)
 | |
| 	ST	c22,  1 * SIZE(CO2)
 | |
| 
 | |
| #ifndef LN
 | |
| 	daddiu	CO1, CO1, 2 * SIZE
 | |
| 	daddiu	CO2, CO2, 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	dsll	TEMP, K, 1 + BASE_SHIFT
 | |
| 	daddu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	dsubu	TEMP, K, KK
 | |
| 	dsll	TEMP, TEMP, 1 + BASE_SHIFT
 | |
| 	daddu	AO, AO, TEMP
 | |
| 	daddu	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	daddiu	KK, KK, 2
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	KK, KK, -2
 | |
| #endif
 | |
| 
 | |
| 	MTC	$0,  a1
 | |
| 
 | |
| 	MOV	c11, a1
 | |
| 	MOV	c21, a1
 | |
| 	MOV	c31, a1
 | |
| 
 | |
| 	daddiu	I, I, -1
 | |
| 
 | |
| 	bgtz	I, .L51
 | |
| 	MOV	c41, c11
 | |
| 	.align 3
 | |
| 
 | |
| .L60:
 | |
| 	andi	I,  M, 1
 | |
| 	blez	I, .L69
 | |
| 	NOP
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	dsra	L,  KK, 2
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MTC	$0,  c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MOV	c21, c11
 | |
| 	LD	a3,  2 * SIZE(AO)
 | |
| 	MOV	c31, c11
 | |
| 	LD	a4,  3 * SIZE(AO)
 | |
| 	MOV	c41, c11
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	LD	b4,  3 * SIZE(B)
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 
 | |
| 	blez	L, .L65
 | |
| 	move	BO,  B
 | |
| #else
 | |
| #ifdef LN
 | |
| 	dsll	TEMP,   K,  BASE_SHIFT
 | |
| 	dsubu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| 	dsll	L,    KK, 0 + BASE_SHIFT
 | |
| 	dsll	TEMP, KK, 1 + BASE_SHIFT
 | |
| 
 | |
| 	daddu	AO, AORIG, L
 | |
| 	daddu	BO, B,     TEMP
 | |
| 
 | |
| 	dsubu	TEMP, K, KK
 | |
| 
 | |
| 	dsra	L,  TEMP, 2
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MTC	$0,  c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MOV	c21, c11
 | |
| 	LD	a3,  2 * SIZE(AO)
 | |
| 	MOV	c31, c11
 | |
| 	LD	a4,  3 * SIZE(AO)
 | |
| 	MOV	c41, c11
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	LD	b6,  8 * SIZE(BO)
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 
 | |
| 	blez	L, .L65
 | |
| 	NOP
 | |
| #endif
 | |
| 	.align	3
 | |
| 
 | |
| .L62:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	b1,  4 * SIZE(BO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c31, c31, a2, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c41, c41, a2, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	LD	a1,  4 * SIZE(AO)
 | |
| 	LD	a2,  5 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c11, c11, a3, b1
 | |
| 	LD	b1,  8 * SIZE(BO)
 | |
| 	MADD	c21, c21, a3, b2
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	MADD	c31, c31, a4, b3
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	MADD	c41, c41, a4, b4
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	LD	a3,  6 * SIZE(AO)
 | |
| 	LD	a4,  7 * SIZE(AO)
 | |
| 
 | |
| 	daddiu	L, L, -1
 | |
| 	daddiu	AO, AO,  4 * SIZE
 | |
| 
 | |
| 	bgtz	L, .L62
 | |
| 	daddiu	BO, BO,  8 * SIZE
 | |
| 	.align 3
 | |
| 
 | |
| .L65:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi	L, KK,  3
 | |
| #else
 | |
| 	andi	L, TEMP, 3
 | |
| #endif
 | |
| 	NOP
 | |
| 	blez	L, .L68
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L66:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	b1,  2 * SIZE(BO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	b2,  3 * SIZE(BO)
 | |
| 
 | |
| 	LD	a1,  1 * SIZE(AO)
 | |
| 	daddiu	L, L, -1
 | |
| 
 | |
| 	daddiu	AO, AO,  1 * SIZE
 | |
| 	bgtz	L, .L66
 | |
| 	daddiu	BO, BO,  2 * SIZE
 | |
| 
 | |
| 
 | |
| .L68:
 | |
| 	ADD	c11, c11, c31
 | |
| 	ADD	c21, c21, c41
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	daddiu	TEMP, KK, -1
 | |
| #else
 | |
| 	daddiu	TEMP, KK, -2
 | |
| #endif
 | |
| 
 | |
| 	dsll	L,    TEMP, 0 + BASE_SHIFT
 | |
| 	dsll	TEMP, TEMP, 1 + BASE_SHIFT
 | |
| 	daddu	AO, AORIG, L
 | |
| 	daddu	BO, B,     TEMP
 | |
| #endif
 | |
| 
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| 	SUB	c21, b2, c21
 | |
| #else
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| 	LD	b2,  1 * SIZE(AO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| 	SUB	c21, b2, c21
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LD	b3,  0 * SIZE(AO)
 | |
| 
 | |
| 	MUL	c11, b3, c11
 | |
| 	MUL	c21, b3, c21
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  3 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c11, b1, c11
 | |
| 
 | |
| 	NMSUB	c21, c21, b2, c11
 | |
| 
 | |
| 	MUL	c21, b3, c21
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LD	b1,  3 * SIZE(BO)
 | |
| 	LD	b2,  2 * SIZE(BO)
 | |
| 	LD	b3,  0 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c21, b1, c21
 | |
| 
 | |
| 	NMSUB	c11, c11, b2, c21
 | |
| 
 | |
| 	MUL	c11, b3, c11
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	CO1, CO1, -1 * SIZE
 | |
| 	daddiu	CO2, CO2, -1 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	ST	c11,  0 * SIZE(BO)
 | |
| 	ST	c21,  1 * SIZE(BO)
 | |
| #else
 | |
| 	ST	c11,  0 * SIZE(AO)
 | |
| 	ST	c21,  1 * SIZE(AO)
 | |
| #endif
 | |
| 
 | |
| 	ST	c11,  0 * SIZE(CO1)
 | |
| 	ST	c21,  0 * SIZE(CO2)
 | |
| 
 | |
| #ifndef LN
 | |
| 	daddiu	CO1, CO1, 1 * SIZE
 | |
| 	daddiu	CO2, CO2, 1 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	dsll	TEMP, K, 0 + BASE_SHIFT
 | |
| 	daddu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	dsubu	TEMP, K, KK
 | |
| 	dsll	L,    TEMP, 0 + BASE_SHIFT
 | |
| 	dsll	TEMP, TEMP, 1 + BASE_SHIFT
 | |
| 	daddu	AO, AO, L
 | |
| 	daddu	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	daddiu	KK, KK, 1
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	KK, KK, -1
 | |
| #endif
 | |
| 	.align 3
 | |
| 
 | |
| .L69:
 | |
| #ifdef LN
 | |
| 	dsll	TEMP, K, 1 + BASE_SHIFT
 | |
| 	daddu	B, B, TEMP
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	move	B,  BO
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	daddiu	KK, KK,  2
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	daddiu	KK, KK, -2
 | |
| #endif
 | |
| 	.align 3
 | |
| 
 | |
| .L50:
 | |
| 	andi	J,  N, 4
 | |
| 	blez	J, .L70
 | |
| 	move	AO, A
 | |
| 
 | |
| #ifdef RT
 | |
| 	dsll	TEMP, K, 2 + BASE_SHIFT
 | |
| 	dsubu	B, B, TEMP
 | |
| 
 | |
| 	dsll	TEMP, LDC, 2
 | |
| 	dsubu	C, C, TEMP
 | |
| #endif
 | |
| 
 | |
| 	move	CO1, C
 | |
| 	MTC	$0,  c11
 | |
| 	daddu	CO2, C,   LDC
 | |
| 	daddu	CO3, CO2, LDC
 | |
| 	daddu	CO4, CO3, LDC
 | |
| 	MOV	c21, c11
 | |
| 	dsra	I,  M, 1
 | |
| 	MOV	c31, c11
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddu	KK, M, OFFSET
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	move	KK, OFFSET
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| 	move	AORIG, A
 | |
| #else
 | |
| 	move	AO, A
 | |
| #endif
 | |
| #ifndef RT
 | |
| 	daddu	C,  CO4, LDC
 | |
| #endif
 | |
| 
 | |
| 	blez	I, .L40
 | |
| 	MOV	c41, c11
 | |
| 
 | |
| .L31:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	LD	a3,  4 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	MOV	c12, c11
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	MOV	c22, c11
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	MOV	c32, c11
 | |
| 	LD	b4,  3 * SIZE(B)
 | |
| 	MOV	c42, c11
 | |
| 
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	dsra	L,  KK, 2
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 
 | |
| 	blez	L, .L35
 | |
| 	move	BO,  B
 | |
| #else
 | |
| #ifdef LN
 | |
| 	dsll	TEMP,   K,  1 + BASE_SHIFT
 | |
| 	dsubu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| 	dsll	L,    KK, 1 + BASE_SHIFT
 | |
| 	dsll	TEMP, KK, 2 + BASE_SHIFT
 | |
| 
 | |
| 	daddu	AO, AORIG, L
 | |
| 	daddu	BO, B,     TEMP
 | |
| 
 | |
| 	dsubu	TEMP, K, KK
 | |
| 
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	LD	a3,  4 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	MOV	c12, c11
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MOV	c22, c11
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MOV	c32, c11
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	MOV	c42, c11
 | |
| 
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	dsra	L,  TEMP, 2
 | |
| 	LD	b6,  8 * SIZE(BO)
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 
 | |
| 	blez	L, .L35
 | |
| 	NOP
 | |
| #endif
 | |
| 	.align	3
 | |
| 
 | |
| .L32:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	daddiu	L, L, -1
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	LD	a1,  2 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1, 16 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b5
 | |
| 	LD	a2,  3 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	LD	a1,  8 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c12, c12, a2, b5
 | |
| 	LD	b5, 20 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a3, b6
 | |
| 	LD	a2,  5 * SIZE(AO)
 | |
| 	MADD	c21, c21, a3, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a3, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a3, b4
 | |
| 	LD	a3,  6 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c12, c12, a2, b6
 | |
| 	LD	b6, 24 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2, 13 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 14 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 15 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a3, b7
 | |
| 	LD	a2,  7 * SIZE(AO)
 | |
| 	MADD	c21, c21, a3, b2
 | |
| 	daddiu	AO, AO,  8 * SIZE
 | |
| 	MADD	c31, c31, a3, b3
 | |
| 	daddiu	BO, BO, 16 * SIZE
 | |
| 	MADD	c41, c41, a3, b4
 | |
| 	LD	a3,  4 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c12, c12, a2, b7
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	NOP
 | |
| 
 | |
| 	bgtz	L, .L32
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	.align 3
 | |
| 
 | |
| .L35:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi	L, KK,  3
 | |
| #else
 | |
| 	andi	L, TEMP, 3
 | |
| #endif
 | |
| 	NOP
 | |
| 	blez	L, .L38
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L36:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	daddiu	L, L, -1
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	daddiu	AO, AO,  2 * SIZE
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1,  4 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	bgtz	L, .L36
 | |
| 	daddiu	BO, BO,  4 * SIZE
 | |
| 
 | |
| .L38:
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	daddiu	TEMP, KK, -2
 | |
| #else
 | |
| 	daddiu	TEMP, KK, -4
 | |
| #endif
 | |
| 
 | |
| 	dsll	L,    TEMP, 1 + BASE_SHIFT
 | |
| 	dsll	TEMP, TEMP, 2 + BASE_SHIFT
 | |
| 	daddu	AO, AORIG, L
 | |
| 	daddu	BO, B,     TEMP
 | |
| #endif
 | |
| 
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	LD	b6,  5 * SIZE(BO)
 | |
| 	LD	b7,  6 * SIZE(BO)
 | |
| 	LD	b8,  7 * SIZE(BO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| 	SUB	c21, b2, c21
 | |
| 	SUB	c31, b3, c31
 | |
| 	SUB	c41, b4, c41
 | |
| 	SUB	c12, b5, c12
 | |
| 	SUB	c22, b6, c22
 | |
| 	SUB	c32, b7, c32
 | |
| 	SUB	c42, b8, c42
 | |
| #else
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| 	LD	b2,  1 * SIZE(AO)
 | |
| 	LD	b3,  2 * SIZE(AO)
 | |
| 	LD	b4,  3 * SIZE(AO)
 | |
| 	LD	b5,  4 * SIZE(AO)
 | |
| 	LD	b6,  5 * SIZE(AO)
 | |
| 	LD	b7,  6 * SIZE(AO)
 | |
| 	LD	b8,  7 * SIZE(AO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| 	SUB	c12, b2, c12
 | |
| 	SUB	c21, b3, c21
 | |
| 	SUB	c22, b4, c22
 | |
| 	SUB	c31, b5, c31
 | |
| 	SUB	c32, b6, c32
 | |
| 	SUB	c41, b7, c41
 | |
| 	SUB	c42, b8, c42
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	LD	b1,  3 * SIZE(AO)
 | |
| 	LD	b2,  2 * SIZE(AO)
 | |
| 	LD	b3,  0 * SIZE(AO)
 | |
| 
 | |
| 	MUL	c12, b1, c12
 | |
| 	MUL	c22, b1, c22
 | |
| 	MUL	c32, b1, c32
 | |
| 	MUL	c42, b1, c42
 | |
| 
 | |
| 	NMSUB	c11, c11, b2, c12
 | |
| 	NMSUB	c21, c21, b2, c22
 | |
| 	NMSUB	c31, c31, b2, c32
 | |
| 	NMSUB	c41, c41, b2, c42
 | |
| 
 | |
| 	MUL	c11, b3, c11
 | |
| 	MUL	c21, b3, c21
 | |
| 	MUL	c31, b3, c31
 | |
| 	MUL	c41, b3, c41
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| 	LD	b2,  1 * SIZE(AO)
 | |
| 	LD	b3,  3 * SIZE(AO)
 | |
| 
 | |
| 	MUL	c11, b1, c11
 | |
| 	MUL	c21, b1, c21
 | |
| 	MUL	c31, b1, c31
 | |
| 	MUL	c41, b1, c41
 | |
| 
 | |
| 	NMSUB	c12, c12, b2, c11
 | |
| 	NMSUB	c22, c22, b2, c21
 | |
| 	NMSUB	c32, c32, b2, c31
 | |
| 	NMSUB	c42, c42, b2, c41
 | |
| 
 | |
| 	MUL	c12, b3, c12
 | |
| 	MUL	c22, b3, c22
 | |
| 	MUL	c32, b3, c32
 | |
| 	MUL	c42, b3, c42
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c11, b1, c11
 | |
| 	MUL	c12, b1, c12
 | |
| 
 | |
| 	NMSUB	c21, c21, b2, c11
 | |
| 	NMSUB	c22, c22, b2, c12
 | |
| 	NMSUB	c31, c31, b3, c11
 | |
| 	NMSUB	c32, c32, b3, c12
 | |
| 	NMSUB	c41, c41, b4, c11
 | |
| 	NMSUB	c42, c42, b4, c12
 | |
| 
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c21, b2, c21
 | |
| 	MUL	c22, b2, c22
 | |
| 
 | |
| 	NMSUB	c31, c31, b3, c21
 | |
| 	NMSUB	c32, c32, b3, c22
 | |
| 	NMSUB	c41, c41, b4, c21
 | |
| 	NMSUB	c42, c42, b4, c22
 | |
| 
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c31, b3, c31
 | |
| 	MUL	c32, b3, c32
 | |
| 
 | |
| 	NMSUB	c41, c41, b4, c31
 | |
| 	NMSUB	c42, c42, b4, c32
 | |
| 
 | |
| 	LD	b4, 15 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c41, b4, c41
 | |
| 	MUL	c42, b4, c42
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LD	b5, 15 * SIZE(BO)
 | |
| 	LD	b6, 14 * SIZE(BO)
 | |
| 	LD	b7, 13 * SIZE(BO)
 | |
| 	LD	b8, 12 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c41, b5, c41
 | |
| 	MUL	c42, b5, c42
 | |
| 
 | |
| 	NMSUB	c31, c31, b6, c41
 | |
| 	NMSUB	c32, c32, b6, c42
 | |
| 	NMSUB	c21, c21, b7, c41
 | |
| 	NMSUB	c22, c22, b7, c42
 | |
| 	NMSUB	c11, c11, b8, c41
 | |
| 	NMSUB	c12, c12, b8, c42
 | |
| 
 | |
| 	LD	b6, 10 * SIZE(BO)
 | |
| 	LD	b7,  9 * SIZE(BO)
 | |
| 	LD	b8,  8 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c31, b6, c31
 | |
| 	MUL	c32, b6, c32
 | |
| 
 | |
| 	NMSUB	c21, c21, b7, c31
 | |
| 	NMSUB	c22, c22, b7, c32
 | |
| 	NMSUB	c11, c11, b8, c31
 | |
| 	NMSUB	c12, c12, b8, c32
 | |
| 
 | |
| 	LD	b7,  5 * SIZE(BO)
 | |
| 	LD	b8,  4 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c21, b7, c21
 | |
| 	MUL	c22, b7, c22
 | |
| 
 | |
| 	NMSUB	c11, c11, b8, c21
 | |
| 	NMSUB	c12, c12, b8, c22
 | |
| 
 | |
| 	LD	b8,  0 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c11, b8, c11
 | |
| 	MUL	c12, b8, c12
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	CO1, CO1, -2 * SIZE
 | |
| 	daddiu	CO2, CO2, -2 * SIZE
 | |
| 	daddiu	CO3, CO3, -2 * SIZE
 | |
| 	daddiu	CO4, CO4, -2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	ST	c11,  0 * SIZE(BO)
 | |
| 	ST	c21,  1 * SIZE(BO)
 | |
| 	ST	c31,  2 * SIZE(BO)
 | |
| 	ST	c41,  3 * SIZE(BO)
 | |
| 	ST	c12,  4 * SIZE(BO)
 | |
| 	ST	c22,  5 * SIZE(BO)
 | |
| 	ST	c32,  6 * SIZE(BO)
 | |
| 	ST	c42,  7 * SIZE(BO)
 | |
| #else
 | |
| 	ST	c11,  0 * SIZE(AO)
 | |
| 	ST	c12,  1 * SIZE(AO)
 | |
| 	ST	c21,  2 * SIZE(AO)
 | |
| 	ST	c22,  3 * SIZE(AO)
 | |
| 	ST	c31,  4 * SIZE(AO)
 | |
| 	ST	c32,  5 * SIZE(AO)
 | |
| 	ST	c41,  6 * SIZE(AO)
 | |
| 	ST	c42,  7 * SIZE(AO)
 | |
| #endif
 | |
| 
 | |
| 	ST	c11,  0 * SIZE(CO1)
 | |
| 	ST	c12,  1 * SIZE(CO1)
 | |
| 	ST	c21,  0 * SIZE(CO2)
 | |
| 	ST	c22,  1 * SIZE(CO2)
 | |
| 	ST	c31,  0 * SIZE(CO3)
 | |
| 	ST	c32,  1 * SIZE(CO3)
 | |
| 	ST	c41,  0 * SIZE(CO4)
 | |
| 	ST	c42,  1 * SIZE(CO4)
 | |
| 
 | |
| #ifndef LN
 | |
| 	daddiu	CO1, CO1, 2 * SIZE
 | |
| 	daddiu	CO2, CO2, 2 * SIZE
 | |
| 	daddiu	CO3, CO3, 2 * SIZE
 | |
| 	daddiu	CO4, CO4, 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	dsll	TEMP, K, 1 + BASE_SHIFT
 | |
| 	daddu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	dsubu	TEMP, K, KK
 | |
| 	dsll	L,    TEMP, 1 + BASE_SHIFT
 | |
| 	dsll	TEMP, TEMP, 2 + BASE_SHIFT
 | |
| 	daddu	AO, AO, L
 | |
| 	daddu	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	daddiu	KK, KK, 2
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	KK, KK, -2
 | |
| #endif
 | |
| 
 | |
| 	MTC	$0,  a1
 | |
| 
 | |
| 	MOV	c11, a1
 | |
| 	MOV	c21, a1
 | |
| 	MOV	c31, a1
 | |
| 
 | |
| 	daddiu	I, I, -1
 | |
| 
 | |
| 	bgtz	I, .L31
 | |
| 	MOV	c41, c11
 | |
| 	.align 3
 | |
| 
 | |
| .L40:
 | |
| 	andi	I,  M, 1
 | |
| 	blez	I, .L49
 | |
| 	MOV	c61, c11
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MOV	c71, c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MOV	c81, c11
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	LD	b4,  3 * SIZE(B)
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 
 | |
| 	dsra	L,  KK, 2
 | |
| 
 | |
| 	blez	L, .L45
 | |
| 	move	BO,  B
 | |
| #else
 | |
| #ifdef LN
 | |
| 	dsll	TEMP,   K,  BASE_SHIFT
 | |
| 	dsubu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| 	dsll	L,    KK, 0 + BASE_SHIFT
 | |
| 	dsll	TEMP, KK, 2 + BASE_SHIFT
 | |
| 
 | |
| 	daddu	AO, AORIG, L
 | |
| 	daddu	BO, B,     TEMP
 | |
| 
 | |
| 	dsubu	TEMP, K, KK
 | |
| 
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MOV	c71, c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MOV	c81, c11
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	LD	b6,  8 * SIZE(BO)
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 
 | |
| 	dsra	L,  TEMP, 2
 | |
| 
 | |
| 	blez	L, .L45
 | |
| 	NOP
 | |
| #endif
 | |
| 	.align	3
 | |
| 
 | |
| .L42:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	b1, 16 * SIZE(BO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	LD	a1,  4 * SIZE(AO)
 | |
| 	daddiu	L, L, -1
 | |
| 
 | |
| 	MADD	c11, c11, a2, b5
 | |
| 	LD	b5, 20 * SIZE(BO)
 | |
| 	MADD	c21, c21, a2, b2
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	MADD	c31, c31, a2, b3
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	MADD	c41, c41, a2, b4
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	LD	a2,  2 * SIZE(AO)
 | |
| 	daddiu	AO, AO,  4 * SIZE
 | |
| 
 | |
| 	MADD	c11, c11, a2, b6
 | |
| 	LD	b6, 24 * SIZE(BO)
 | |
| 	MADD	c21, c21, a2, b2
 | |
| 	LD	b2, 13 * SIZE(BO)
 | |
| 	MADD	c31, c31, a2, b3
 | |
| 	LD	b3, 14 * SIZE(BO)
 | |
| 	MADD	c41, c41, a2, b4
 | |
| 	LD	b4, 15 * SIZE(BO)
 | |
| 
 | |
| 	LD	a2, -1 * SIZE(AO)
 | |
| 	daddiu	BO, BO, 16 * SIZE
 | |
| 
 | |
| 	MADD	c11, c11, a2, b7
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 	MADD	c21, c21, a2, b2
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MADD	c31, c31, a2, b3
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MADD	c41, c41, a2, b4
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 
 | |
| 	bgtz	L, .L42
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	.align 3
 | |
| 
 | |
| .L45:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi	L, KK,  3
 | |
| #else
 | |
| 	andi	L, TEMP, 3
 | |
| #endif
 | |
| 	NOP
 | |
| 	blez	L, .L48
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L46:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	b1,  4 * SIZE(BO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	LD	a1,  1 * SIZE(AO)
 | |
| 
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 	daddiu	L, L, -1
 | |
| 
 | |
| 	daddiu	AO, AO,  1 * SIZE
 | |
| 	MOV	a2, a2
 | |
| 	bgtz	L, .L46
 | |
| 	daddiu	BO, BO,  4 * SIZE
 | |
| 
 | |
| 
 | |
| .L48:
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	daddiu	TEMP, KK, -1
 | |
| #else
 | |
| 	daddiu	TEMP, KK, -4
 | |
| #endif
 | |
| 
 | |
| 	dsll	L,    TEMP, 0 + BASE_SHIFT
 | |
| 	dsll	TEMP, TEMP, 2 + BASE_SHIFT
 | |
| 	daddu	AO, AORIG, L
 | |
| 	daddu	BO, B,     TEMP
 | |
| #endif
 | |
| 
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| 	SUB	c21, b2, c21
 | |
| 	SUB	c31, b3, c31
 | |
| 	SUB	c41, b4, c41
 | |
| #else
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| 	LD	b2,  1 * SIZE(AO)
 | |
| 	LD	b3,  2 * SIZE(AO)
 | |
| 	LD	b4,  3 * SIZE(AO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| 	SUB	c21, b2, c21
 | |
| 	SUB	c31, b3, c31
 | |
| 	SUB	c41, b4, c41
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| 
 | |
| 	MUL	c11, b1, c11
 | |
| 	MUL	c21, b1, c21
 | |
| 	MUL	c31, b1, c31
 | |
| 	MUL	c41, b1, c41
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c11, b1, c11
 | |
| 
 | |
| 	NMSUB	c21, c21, b2, c11
 | |
| 	NMSUB	c31, c31, b3, c11
 | |
| 	NMSUB	c41, c41, b4, c11
 | |
| 
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c21, b2, c21
 | |
| 
 | |
| 	NMSUB	c31, c31, b3, c21
 | |
| 	NMSUB	c41, c41, b4, c21
 | |
| 
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c31, b3, c31
 | |
| 
 | |
| 	NMSUB	c41, c41, b4, c31
 | |
| 
 | |
| 	LD	b4, 15 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c41, b4, c41
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LD	b5, 15 * SIZE(BO)
 | |
| 	LD	b6, 14 * SIZE(BO)
 | |
| 	LD	b7, 13 * SIZE(BO)
 | |
| 	LD	b8, 12 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c41, b5, c41
 | |
| 
 | |
| 	NMSUB	c31, c31, b6, c41
 | |
| 	NMSUB	c21, c21, b7, c41
 | |
| 	NMSUB	c11, c11, b8, c41
 | |
| 
 | |
| 	LD	b6, 10 * SIZE(BO)
 | |
| 	LD	b7,  9 * SIZE(BO)
 | |
| 	LD	b8,  8 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c31, b6, c31
 | |
| 
 | |
| 	NMSUB	c21, c21, b7, c31
 | |
| 	NMSUB	c11, c11, b8, c31
 | |
| 
 | |
| 	LD	b7,  5 * SIZE(BO)
 | |
| 	LD	b8,  4 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c21, b7, c21
 | |
| 
 | |
| 	NMSUB	c11, c11, b8, c21
 | |
| 
 | |
| 	LD	b8,  0 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c11, b8, c11
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	CO1, CO1, -1 * SIZE
 | |
| 	daddiu	CO2, CO2, -1 * SIZE
 | |
| 	daddiu	CO3, CO3, -1 * SIZE
 | |
| 	daddiu	CO4, CO4, -1 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	ST	c11,  0 * SIZE(BO)
 | |
| 	ST	c21,  1 * SIZE(BO)
 | |
| 	ST	c31,  2 * SIZE(BO)
 | |
| 	ST	c41,  3 * SIZE(BO)
 | |
| #else
 | |
| 	ST	c11,  0 * SIZE(AO)
 | |
| 	ST	c21,  1 * SIZE(AO)
 | |
| 	ST	c31,  2 * SIZE(AO)
 | |
| 	ST	c41,  3 * SIZE(AO)
 | |
| #endif
 | |
| 
 | |
| 	ST	c11,  0 * SIZE(CO1)
 | |
| 	ST	c21,  0 * SIZE(CO2)
 | |
| 	ST	c31,  0 * SIZE(CO3)
 | |
| 	ST	c41,  0 * SIZE(CO4)
 | |
| 
 | |
| #ifndef LN
 | |
| 	daddiu	CO1, CO1, 1 * SIZE
 | |
| 	daddiu	CO2, CO2, 1 * SIZE
 | |
| 	daddiu	CO3, CO3, 1 * SIZE
 | |
| 	daddiu	CO4, CO4, 1 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	dsll	TEMP, K, BASE_SHIFT
 | |
| 	daddu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	dsubu	TEMP, K, KK
 | |
| 	dsll	L,    TEMP, 0 + BASE_SHIFT
 | |
| 	dsll	TEMP, TEMP, 2 + BASE_SHIFT
 | |
| 	daddu	AO, AO, L
 | |
| 	daddu	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	daddiu	KK, KK, 1
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	KK, KK, -1
 | |
| #endif
 | |
| 	.align 3
 | |
| 
 | |
| .L49:
 | |
| #ifdef LN
 | |
| 	dsll	TEMP, K, 2 + BASE_SHIFT
 | |
| 	daddu	B, B, TEMP
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	move	B,  BO
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	daddiu	KK, KK,  4
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	daddiu	KK, KK, -4
 | |
| #endif
 | |
| 	.align 3
 | |
| 
 | |
| .L70:
 | |
| 	dsra	J,  N, 3
 | |
| 	blez	J, .L999
 | |
| 	nop
 | |
| 
 | |
| .L10:
 | |
| #ifdef RT
 | |
| 	dsll	TEMP, K, 3 + BASE_SHIFT
 | |
| 	dsubu	B, B, TEMP
 | |
| 
 | |
| 	dsll	TEMP, LDC, 3
 | |
| 	dsubu	C, C, TEMP
 | |
| #endif
 | |
| 
 | |
| 	move	CO1, C
 | |
| 	MTC	$0,  c11
 | |
| 	daddu	CO2, C,   LDC
 | |
| 	daddu	CO3, CO2, LDC
 | |
| 	daddiu	J, J, -1
 | |
| 	daddu	CO4, CO3, LDC
 | |
| 	MOV	c21, c11
 | |
| 	daddu	CO5, CO4, LDC
 | |
| 	MOV	c31, c11
 | |
| 	daddu	CO6, CO5, LDC
 | |
| 	MOV	c41, c11
 | |
| 	daddu	CO7, CO6, LDC
 | |
| 	MOV	c51, c11
 | |
| 	daddu	CO8, CO7, LDC
 | |
| 	dsra	I,  M, 1
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddu	KK, M, OFFSET
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	move	KK, OFFSET
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| 	move	AORIG, A
 | |
| #else
 | |
| 	move	AO, A
 | |
| #endif
 | |
| #ifndef RT
 | |
| 	daddu	C,  CO8, LDC
 | |
| #endif
 | |
| 
 | |
| 	blez	I, .L20
 | |
| 	MOV	c61, c11
 | |
| 
 | |
| .L11:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MOV	c71, c11
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	MOV	c81, c11
 | |
| 
 | |
| 	LD	a3,  4 * SIZE(AO)
 | |
| 	MOV	c12, c11
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	MOV	c22, c11
 | |
| 
 | |
| 	dsra	L,  KK, 2
 | |
| 	MOV	c32, c11
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	MOV	c42, c11
 | |
| 
 | |
| 	LD	b4,  3 * SIZE(B)
 | |
| 	MOV	c52, c11
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	MOV	c62, c11
 | |
| 
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	MOV	c72, c11
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 	MOV	c82, c11
 | |
| 
 | |
| 	blez	L, .L15
 | |
| 	move	BO,  B
 | |
| #else
 | |
| 
 | |
| #ifdef LN
 | |
| 	dsll	TEMP,   K,  1 + BASE_SHIFT
 | |
| 	dsubu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| 	dsll	L,    KK, 1 + BASE_SHIFT
 | |
| 	dsll	TEMP, KK, 3 + BASE_SHIFT
 | |
| 
 | |
| 	daddu	AO, AORIG, L
 | |
| 	daddu	BO, B,     TEMP
 | |
| 
 | |
| 	dsubu	TEMP, K, KK
 | |
| 
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MOV	c71, c11
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	MOV	c81, c11
 | |
| 
 | |
| 	LD	a3,  4 * SIZE(AO)
 | |
| 	MOV	c12, c11
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MOV	c22, c11
 | |
| 
 | |
| 	MOV	c32, c11
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MOV	c42, c11
 | |
| 
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	MOV	c52, c11
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	MOV	c62, c11
 | |
| 
 | |
| 	LD	b6,  8 * SIZE(BO)
 | |
| 	MOV	c72, c11
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 	MOV	c82, c11
 | |
| 
 | |
| 	dsra	L,  TEMP, 2
 | |
| 	blez	L, .L15
 | |
| 	NOP
 | |
| #endif
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	daddiu	L, L, -1
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	blez	L, .L13
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L12:
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1, 16 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a1, b5
 | |
| 	NOP
 | |
| 	MADD	c61, c61, a1, b2
 | |
| 	LD	a4,  2 * SIZE(AO)
 | |
| 	MADD	c71, c71, a1, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a1, b4
 | |
| 	LD	a1,  8 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c52, c52, a2, b5
 | |
| 	LD	b5, 20 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a4, b6
 | |
| 	LD	a2,  3 * SIZE(AO)
 | |
| 	MADD	c21, c21, a4, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a4, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c12, c12, a2, b6
 | |
| 	LD	b6, 24 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2, 13 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 14 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 15 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a4, b7
 | |
| 	NOP
 | |
| 	MADD	c61, c61, a4, b2
 | |
| 	NOP
 | |
| 	MADD	c71, c71, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a4, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c52, c52, a2, b7
 | |
| 	LD	b7, 28 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2, 17 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3, 18 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4, 19 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a3, b1
 | |
| 	LD	a2,  5 * SIZE(AO)
 | |
| 	MADD	c21, c21, a3, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a3, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a3, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1, 32 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2, 21 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 22 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 23 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a3, b5
 | |
| 	NOP
 | |
| 	MADD	c61, c61, a3, b2
 | |
| 	LD	a4,  6 * SIZE(AO)
 | |
| 	MADD	c71, c71, a3, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a3, b4
 | |
| 	LD	a3, 12 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c52, c52, a2, b5
 | |
| 	LD	b5, 36 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2, 25 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3, 26 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4, 27 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a4, b6
 | |
| 	LD	a2,  7 * SIZE(AO)
 | |
| 	MADD	c21, c21, a4, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a4, b4
 | |
| 	daddiu	L, L, -1
 | |
| 
 | |
| 	MADD	c12, c12, a2, b6
 | |
| 	LD	b6, 40 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2, 29 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 30 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 31 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a4, b7
 | |
| 	daddiu	BO, BO, 32 * SIZE
 | |
| 	MADD	c61, c61, a4, b2
 | |
| 	daddiu	AO, AO,  8 * SIZE
 | |
| 	MADD	c71, c71, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a4, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c52, c52, a2, b7
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	bgtz	L, .L12
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	NOP
 | |
| 	.align 3
 | |
| 
 | |
| .L13:
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1, 16 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a1, b5
 | |
| 	NOP
 | |
| 	MADD	c61, c61, a1, b2
 | |
| 	LD	a4,  2 * SIZE(AO)
 | |
| 	MADD	c71, c71, a1, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a1, b4
 | |
| 	LD	a1,  8 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c52, c52, a2, b5
 | |
| 	LD	b5, 20 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a4, b6
 | |
| 	LD	a2,  3 * SIZE(AO)
 | |
| 	MADD	c21, c21, a4, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a4, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c12, c12, a2, b6
 | |
| 	LD	b6, 24 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2, 13 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 14 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 15 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a4, b7
 | |
| 	NOP
 | |
| 	MADD	c61, c61, a4, b2
 | |
| 	NOP
 | |
| 	MADD	c71, c71, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a4, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c52, c52, a2, b7
 | |
| 	LD	b7, 28 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2, 17 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3, 18 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4, 19 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a3, b1
 | |
| 	LD	a2,  5 * SIZE(AO)
 | |
| 	MADD	c21, c21, a3, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a3, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a3, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1, 32 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2, 21 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 22 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 23 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a3, b5
 | |
| 	NOP
 | |
| 	MADD	c61, c61, a3, b2
 | |
| 	LD	a4,  6 * SIZE(AO)
 | |
| 	MADD	c71, c71, a3, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a3, b4
 | |
| 	LD	a3, 12 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c52, c52, a2, b5
 | |
| 	LD	b5, 36 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2, 25 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3, 26 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4, 27 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a4, b6
 | |
| 	LD	a2,  7 * SIZE(AO)
 | |
| 	MADD	c21, c21, a4, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a4, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c12, c12, a2, b6
 | |
| 	LD	b6, 40 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2, 29 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 30 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 31 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a4, b7
 | |
| 	daddiu	BO, BO, 32 * SIZE
 | |
| 	MADD	c61, c61, a4, b2
 | |
| 	daddiu	AO, AO,  8 * SIZE
 | |
| 	MADD	c71, c71, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a4, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c52, c52, a2, b7
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	.align 3
 | |
| 
 | |
| .L15:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi	L, KK,  3
 | |
| #else
 | |
| 	andi	L, TEMP, 3
 | |
| #endif
 | |
| 	blez	L, .L18
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L16:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1,  8 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a1, b5
 | |
| 	daddiu	L, L, -1
 | |
| 	MADD	c61, c61, a1, b2
 | |
| 	daddiu	AO, AO,  2 * SIZE
 | |
| 	MADD	c71, c71, a1, b3
 | |
| 	daddiu	BO, BO,  8 * SIZE
 | |
| 	MADD	c81, c81, a1, b4
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c52, c52, a2, b5
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	bgtz	L, .L16
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 
 | |
| .L18:
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	daddiu	TEMP, KK, -2
 | |
| #else
 | |
| 	daddiu	TEMP, KK, -8
 | |
| #endif
 | |
| 
 | |
| 	dsll	L,    TEMP, 1 + BASE_SHIFT
 | |
| 	dsll	TEMP, TEMP, 3 + BASE_SHIFT
 | |
| 	daddu	AO, AORIG, L
 | |
| 	daddu	BO, B,     TEMP
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	SUB	c21, b2, c21
 | |
| 	LD	b6,  5 * SIZE(BO)
 | |
| 	SUB	c31, b3, c31
 | |
| 	LD	b7,  6 * SIZE(BO)
 | |
| 	SUB	c41, b4, c41
 | |
| 	LD	b8,  7 * SIZE(BO)
 | |
| 
 | |
| 	SUB	c51, b5, c51
 | |
| 	LD	b1,  8 * SIZE(BO)
 | |
| 	SUB	c61, b6, c61
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	SUB	c71, b7, c71
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	SUB	c81, b8, c81
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	SUB	c12, b1, c12
 | |
| 	LD	b5, 12 * SIZE(BO)
 | |
| 	SUB	c22, b2, c22
 | |
| 	LD	b6, 13 * SIZE(BO)
 | |
| 	SUB	c32, b3, c32
 | |
| 	LD	b7, 14 * SIZE(BO)
 | |
| 	SUB	c42, b4, c42
 | |
| 	LD	b8, 15 * SIZE(BO)
 | |
| 
 | |
| 	SUB	c52, b5, c52
 | |
| #ifdef LN
 | |
| 	LD	b1,  3 * SIZE(AO)
 | |
| #else
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| #endif
 | |
| 	SUB	c62, b6, c62
 | |
| 	SUB	c72, b7, c72
 | |
| 	SUB	c82, b8, c82
 | |
| #else
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| 	LD	b2,  1 * SIZE(AO)
 | |
| 	LD	b3,  2 * SIZE(AO)
 | |
| 	LD	b4,  3 * SIZE(AO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| 	LD	b5,  4 * SIZE(AO)
 | |
| 	SUB	c12, b2, c12
 | |
| 	LD	b6,  5 * SIZE(AO)
 | |
| 	SUB	c21, b3, c21
 | |
| 	LD	b7,  6 * SIZE(AO)
 | |
| 	SUB	c22, b4, c22
 | |
| 	LD	b8,  7 * SIZE(AO)
 | |
| 
 | |
| 	SUB	c31, b5, c31
 | |
| 	LD	b1,  8 * SIZE(AO)
 | |
| 	SUB	c32, b6, c32
 | |
| 	LD	b2,  9 * SIZE(AO)
 | |
| 	SUB	c41, b7, c41
 | |
| 	LD	b3, 10 * SIZE(AO)
 | |
| 	SUB	c42, b8, c42
 | |
| 	LD	b4, 11 * SIZE(AO)
 | |
| 
 | |
| 	LD	b5, 12 * SIZE(AO)
 | |
| 	SUB	c51, b1, c51
 | |
| 	LD	b6, 13 * SIZE(AO)
 | |
| 	SUB	c52, b2, c52
 | |
| 	LD	b7, 14 * SIZE(AO)
 | |
| 	SUB	c61, b3, c61
 | |
| 	LD	b8, 15 * SIZE(AO)
 | |
| 	SUB	c62, b4, c62
 | |
| 
 | |
| 	SUB	c71, b5, c71
 | |
| 	SUB	c72, b6, c72
 | |
| 	SUB	c81, b7, c81
 | |
| 	SUB	c82, b8, c82
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	MUL	c12, b1, c12
 | |
| 	LD	b2,  2 * SIZE(AO)
 | |
| 	MUL	c22, b1, c22
 | |
| 	MUL	c32, b1, c32
 | |
| 	MUL	c42, b1, c42
 | |
| 	MUL	c52, b1, c52
 | |
| 	MUL	c62, b1, c62
 | |
| 	MUL	c72, b1, c72
 | |
| 	MUL	c82, b1, c82
 | |
| 
 | |
| 	NMSUB	c11, c11, b2, c12
 | |
| 	LD	b3,  0 * SIZE(AO)
 | |
| 	NMSUB	c21, c21, b2, c22
 | |
| 	NMSUB	c31, c31, b2, c32
 | |
| 	NMSUB	c41, c41, b2, c42
 | |
| 	NMSUB	c51, c51, b2, c52
 | |
| 	NMSUB	c61, c61, b2, c62
 | |
| 	NMSUB	c71, c71, b2, c72
 | |
| 	NMSUB	c81, c81, b2, c82
 | |
| 
 | |
| 	MUL	c11, b3, c11
 | |
| 	daddiu	CO1, CO1, -2 * SIZE
 | |
| 	MUL	c21, b3, c21
 | |
| 	daddiu	CO2, CO2, -2 * SIZE
 | |
| 	MUL	c31, b3, c31
 | |
| 	daddiu	CO3, CO3, -2 * SIZE
 | |
| 	MUL	c41, b3, c41
 | |
| 	daddiu	CO4, CO4, -2 * SIZE
 | |
| 	MUL	c51, b3, c51
 | |
| 	daddiu	CO5, CO5, -2 * SIZE
 | |
| 	MUL	c61, b3, c61
 | |
| 	daddiu	CO6, CO6, -2 * SIZE
 | |
| 	MUL	c71, b3, c71
 | |
| 	daddiu	CO7, CO7, -2 * SIZE
 | |
| 	MUL	c81, b3, c81
 | |
| 	daddiu	CO8, CO8, -2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	MUL	c11, b1, c11
 | |
| 	LD	b2,  1 * SIZE(AO)
 | |
| 	MUL	c21, b1, c21
 | |
| 	MUL	c31, b1, c31
 | |
| 	MUL	c41, b1, c41
 | |
| 	MUL	c51, b1, c51
 | |
| 	MUL	c61, b1, c61
 | |
| 	MUL	c71, b1, c71
 | |
| 	MUL	c81, b1, c81
 | |
| 
 | |
| 	NMSUB	c12, c12, b2, c11
 | |
| 	LD	b3,  3 * SIZE(AO)
 | |
| 	NMSUB	c22, c22, b2, c21
 | |
| 	NMSUB	c32, c32, b2, c31
 | |
| 	NMSUB	c42, c42, b2, c41
 | |
| 	NMSUB	c52, c52, b2, c51
 | |
| 	NMSUB	c62, c62, b2, c61
 | |
| 	NMSUB	c72, c72, b2, c71
 | |
| 	NMSUB	c82, c82, b2, c81
 | |
| 
 | |
| 	MUL	c12, b3, c12
 | |
| 	MUL	c22, b3, c22
 | |
| 	MUL	c32, b3, c32
 | |
| 	MUL	c42, b3, c42
 | |
| 	MUL	c52, b3, c52
 | |
| 	MUL	c62, b3, c62
 | |
| 	MUL	c72, b3, c72
 | |
| 	MUL	c82, b3, c82
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c11, b1, c11
 | |
| 	MUL	c12, b1, c12
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 
 | |
| 	NMSUB	c21, c21, b2, c11
 | |
| 	NMSUB	c22, c22, b2, c12
 | |
| 	LD	b6,  5 * SIZE(BO)
 | |
| 	NMSUB	c31, c31, b3, c11
 | |
| 	NMSUB	c32, c32, b3, c12
 | |
| 	LD	b7,  6 * SIZE(BO)
 | |
| 	NMSUB	c41, c41, b4, c11
 | |
| 	NMSUB	c42, c42, b4, c12
 | |
| 	LD	b8,  7 * SIZE(BO)
 | |
| 
 | |
| 	NMSUB	c51, c51, b5, c11
 | |
| 	NMSUB	c52, c52, b5, c12
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	NMSUB	c61, c61, b6, c11
 | |
| 	NMSUB	c62, c62, b6, c12
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	NMSUB	c71, c71, b7, c11
 | |
| 	NMSUB	c72, c72, b7, c12
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 	NMSUB	c81, c81, b8, c11
 | |
| 	NMSUB	c82, c82, b8, c12
 | |
| 	LD	b5, 12 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c21, b2, c21
 | |
| 	MUL	c22, b2, c22
 | |
| 	LD	b6, 13 * SIZE(BO)
 | |
| 
 | |
| 	NMSUB	c31, c31, b3, c21
 | |
| 	NMSUB	c32, c32, b3, c22
 | |
| 	LD	b7, 14 * SIZE(BO)
 | |
| 	NMSUB	c41, c41, b4, c21
 | |
| 	NMSUB	c42, c42, b4, c22
 | |
| 	LD	b8, 15 * SIZE(BO)
 | |
| 	NMSUB	c51, c51, b5, c21
 | |
| 	NMSUB	c52, c52, b5, c22
 | |
| 	LD	b3, 18 * SIZE(BO)
 | |
| 	NMSUB	c61, c61, b6, c21
 | |
| 	NMSUB	c62, c62, b6, c22
 | |
| 	LD	b4, 19 * SIZE(BO)
 | |
| 	NMSUB	c71, c71, b7, c21
 | |
| 	NMSUB	c72, c72, b7, c22
 | |
| 	LD	b5, 20 * SIZE(BO)
 | |
| 	NMSUB	c81, c81, b8, c21
 | |
| 	NMSUB	c82, c82, b8, c22
 | |
| 	LD	b6, 21 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c31, b3, c31
 | |
| 	MUL	c32, b3, c32
 | |
| 	LD	b7, 22 * SIZE(BO)
 | |
| 
 | |
| 	NMSUB	c41, c41, b4, c31
 | |
| 	NMSUB	c42, c42, b4, c32
 | |
| 	LD	b8, 23 * SIZE(BO)
 | |
| 	NMSUB	c51, c51, b5, c31
 | |
| 	NMSUB	c52, c52, b5, c32
 | |
| 	LD	b4, 27 * SIZE(BO)
 | |
| 	NMSUB	c61, c61, b6, c31
 | |
| 	NMSUB	c62, c62, b6, c32
 | |
| 	LD	b5, 28 * SIZE(BO)
 | |
| 	NMSUB	c71, c71, b7, c31
 | |
| 	NMSUB	c72, c72, b7, c32
 | |
| 	LD	b6, 29 * SIZE(BO)
 | |
| 	NMSUB	c81, c81, b8, c31
 | |
| 	NMSUB	c82, c82, b8, c32
 | |
| 	LD	b7, 30 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c41, b4, c41
 | |
| 	MUL	c42, b4, c42
 | |
| 	LD	b8, 31 * SIZE(BO)
 | |
| 
 | |
| 	NMSUB	c51, c51, b5, c41
 | |
| 	NMSUB	c52, c52, b5, c42
 | |
| 	LD	b5, 36 * SIZE(BO)
 | |
| 	NMSUB	c61, c61, b6, c41
 | |
| 	NMSUB	c62, c62, b6, c42
 | |
| 	LD	b6, 37 * SIZE(BO)
 | |
| 	NMSUB	c71, c71, b7, c41
 | |
| 	NMSUB	c72, c72, b7, c42
 | |
| 	LD	b7, 38 * SIZE(BO)
 | |
| 	NMSUB	c81, c81, b8, c41
 | |
| 	NMSUB	c82, c82, b8, c42
 | |
| 	LD	b8, 39 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c51, b5, c51
 | |
| 	MUL	c52, b5, c52
 | |
| 
 | |
| 	NMSUB	c61, c61, b6, c51
 | |
| 	NMSUB	c62, c62, b6, c52
 | |
| 	LD	b6, 45 * SIZE(BO)
 | |
| 	NMSUB	c71, c71, b7, c51
 | |
| 	NMSUB	c72, c72, b7, c52
 | |
| 	LD	b7, 46 * SIZE(BO)
 | |
| 	NMSUB	c81, c81, b8, c51
 | |
| 	NMSUB	c82, c82, b8, c52
 | |
| 	LD	b8, 47 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c61, b6, c61
 | |
| 	MUL	c62, b6, c62
 | |
| 
 | |
| 	NMSUB	c71, c71, b7, c61
 | |
| 	NMSUB	c72, c72, b7, c62
 | |
| 	LD	b7, 54 * SIZE(BO)
 | |
| 	NMSUB	c81, c81, b8, c61
 | |
| 	NMSUB	c82, c82, b8, c62
 | |
| 	LD	b8, 55 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c71, b7, c71
 | |
| 	MUL	c72, b7, c72
 | |
| 
 | |
| 	NMSUB	c81, c81, b8, c71
 | |
| 	NMSUB	c82, c82, b8, c72
 | |
| 	LD	b8, 63 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c81, b8, c81
 | |
| 	MUL	c82, b8, c82
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LD	b1, 63 * SIZE(BO)
 | |
| 	LD	b2, 62 * SIZE(BO)
 | |
| 	LD	b3, 61 * SIZE(BO)
 | |
| 	LD	b4, 60 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c81, b1, c81
 | |
| 	MUL	c82, b1, c82
 | |
| 	LD	b5, 59 * SIZE(BO)
 | |
| 
 | |
| 	NMSUB	c71, c71, b2, c81
 | |
| 	NMSUB	c72, c72, b2, c82
 | |
| 	LD	b6, 58 * SIZE(BO)
 | |
| 	NMSUB	c61, c61, b3, c81
 | |
| 	NMSUB	c62, c62, b3, c82
 | |
| 	LD	b7, 57 * SIZE(BO)
 | |
| 	NMSUB	c51, c51, b4, c81
 | |
| 	NMSUB	c52, c52, b4, c82
 | |
| 	LD	b8, 56 * SIZE(BO)
 | |
| 
 | |
| 	NMSUB	c41, c41, b5, c81
 | |
| 	NMSUB	c42, c42, b5, c82
 | |
| 	LD	b2, 54 * SIZE(BO)
 | |
| 	NMSUB	c31, c31, b6, c81
 | |
| 	NMSUB	c32, c32, b6, c82
 | |
| 	LD	b3, 53 * SIZE(BO)
 | |
| 	NMSUB	c21, c21, b7, c81
 | |
| 	NMSUB	c22, c22, b7, c82
 | |
| 	LD	b4, 52 * SIZE(BO)
 | |
| 	NMSUB	c11, c11, b8, c81
 | |
| 	NMSUB	c12, c12, b8, c82
 | |
| 	LD	b5, 51 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c71, b2, c71
 | |
| 	MUL	c72, b2, c72
 | |
| 	LD	b6, 50 * SIZE(BO)
 | |
| 
 | |
| 	NMSUB	c61, c61, b3, c71
 | |
| 	NMSUB	c62, c62, b3, c72
 | |
| 	LD	b7, 49 * SIZE(BO)
 | |
| 	NMSUB	c51, c51, b4, c71
 | |
| 	NMSUB	c52, c52, b4, c72
 | |
| 	LD	b8, 48 * SIZE(BO)
 | |
| 	NMSUB	c41, c41, b5, c71
 | |
| 	NMSUB	c42, c42, b5, c72
 | |
| 	LD	b3, 45 * SIZE(BO)
 | |
| 	NMSUB	c31, c31, b6, c71
 | |
| 	NMSUB	c32, c32, b6, c72
 | |
| 	LD	b4, 44 * SIZE(BO)
 | |
| 	NMSUB	c21, c21, b7, c71
 | |
| 	NMSUB	c22, c22, b7, c72
 | |
| 	LD	b5, 43 * SIZE(BO)
 | |
| 	NMSUB	c11, c11, b8, c71
 | |
| 	NMSUB	c12, c12, b8, c72
 | |
| 	LD	b6, 42 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c61, b3, c61
 | |
| 	MUL	c62, b3, c62
 | |
| 	LD	b7, 41 * SIZE(BO)
 | |
| 
 | |
| 	NMSUB	c51, c51, b4, c61
 | |
| 	NMSUB	c52, c52, b4, c62
 | |
| 	LD	b8, 40 * SIZE(BO)
 | |
| 	NMSUB	c41, c41, b5, c61
 | |
| 	NMSUB	c42, c42, b5, c62
 | |
| 	LD	b4, 36 * SIZE(BO)
 | |
| 	NMSUB	c31, c31, b6, c61
 | |
| 	NMSUB	c32, c32, b6, c62
 | |
| 	LD	b5, 35 * SIZE(BO)
 | |
| 	NMSUB	c21, c21, b7, c61
 | |
| 	NMSUB	c22, c22, b7, c62
 | |
| 	LD	b6, 34 * SIZE(BO)
 | |
| 	NMSUB	c11, c11, b8, c61
 | |
| 	NMSUB	c12, c12, b8, c62
 | |
| 	LD	b7, 33 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c51, b4, c51
 | |
| 	MUL	c52, b4, c52
 | |
| 	LD	b8, 32 * SIZE(BO)
 | |
| 
 | |
| 	NMSUB	c41, c41, b5, c51
 | |
| 	NMSUB	c42, c42, b5, c52
 | |
| 	LD	b5, 27 * SIZE(BO)
 | |
| 	NMSUB	c31, c31, b6, c51
 | |
| 	NMSUB	c32, c32, b6, c52
 | |
| 	LD	b6, 26 * SIZE(BO)
 | |
| 	NMSUB	c21, c21, b7, c51
 | |
| 	NMSUB	c22, c22, b7, c52
 | |
| 	LD	b7, 25 * SIZE(BO)
 | |
| 	NMSUB	c11, c11, b8, c51
 | |
| 	NMSUB	c12, c12, b8, c52
 | |
| 	LD	b8, 24 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c41, b5, c41
 | |
| 	MUL	c42, b5, c42
 | |
| 
 | |
| 	NMSUB	c31, c31, b6, c41
 | |
| 	NMSUB	c32, c32, b6, c42
 | |
| 	LD	b6, 18 * SIZE(BO)
 | |
| 	NMSUB	c21, c21, b7, c41
 | |
| 	NMSUB	c22, c22, b7, c42
 | |
| 	LD	b7, 17 * SIZE(BO)
 | |
| 	NMSUB	c11, c11, b8, c41
 | |
| 	NMSUB	c12, c12, b8, c42
 | |
| 	LD	b8, 16 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c31, b6, c31
 | |
| 	MUL	c32, b6, c32
 | |
| 
 | |
| 	NMSUB	c21, c21, b7, c31
 | |
| 	NMSUB	c22, c22, b7, c32
 | |
| 	LD	b7,  9 * SIZE(BO)
 | |
| 	NMSUB	c11, c11, b8, c31
 | |
| 	NMSUB	c12, c12, b8, c32
 | |
| 	LD	b8,  8 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c21, b7, c21
 | |
| 	MUL	c22, b7, c22
 | |
| 
 | |
| 	NMSUB	c11, c11, b8, c21
 | |
| 	NMSUB	c12, c12, b8, c22
 | |
| 	LD	b8,  0 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c11, b8, c11
 | |
| 	MUL	c12, b8, c12
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	ST	c11,  0 * SIZE(BO)
 | |
| 	ST	c21,  1 * SIZE(BO)
 | |
| 	ST	c31,  2 * SIZE(BO)
 | |
| 	ST	c41,  3 * SIZE(BO)
 | |
| 	ST	c51,  4 * SIZE(BO)
 | |
| 	ST	c61,  5 * SIZE(BO)
 | |
| 	ST	c71,  6 * SIZE(BO)
 | |
| 	ST	c81,  7 * SIZE(BO)
 | |
| 
 | |
| 	ST	c12,  8 * SIZE(BO)
 | |
| 	ST	c22,  9 * SIZE(BO)
 | |
| 	ST	c32, 10 * SIZE(BO)
 | |
| 	ST	c42, 11 * SIZE(BO)
 | |
| 	ST	c52, 12 * SIZE(BO)
 | |
| 	ST	c62, 13 * SIZE(BO)
 | |
| 	ST	c72, 14 * SIZE(BO)
 | |
| 	ST	c82, 15 * SIZE(BO)
 | |
| #else
 | |
| 	ST	c11,  0 * SIZE(AO)
 | |
| 	ST	c12,  1 * SIZE(AO)
 | |
| 	ST	c21,  2 * SIZE(AO)
 | |
| 	ST	c22,  3 * SIZE(AO)
 | |
| 	ST	c31,  4 * SIZE(AO)
 | |
| 	ST	c32,  5 * SIZE(AO)
 | |
| 	ST	c41,  6 * SIZE(AO)
 | |
| 	ST	c42,  7 * SIZE(AO)
 | |
| 
 | |
| 	ST	c51,  8 * SIZE(AO)
 | |
| 	ST	c52,  9 * SIZE(AO)
 | |
| 	ST	c61, 10 * SIZE(AO)
 | |
| 	ST	c62, 11 * SIZE(AO)
 | |
| 	ST	c71, 12 * SIZE(AO)
 | |
| 	ST	c72, 13 * SIZE(AO)
 | |
| 	ST	c81, 14 * SIZE(AO)
 | |
| 	ST	c82, 15 * SIZE(AO)
 | |
| #endif
 | |
| 
 | |
| 	ST	c11,  0 * SIZE(CO1)
 | |
| 	ST	c12,  1 * SIZE(CO1)
 | |
| 	ST	c21,  0 * SIZE(CO2)
 | |
| 	ST	c22,  1 * SIZE(CO2)
 | |
| 	ST	c31,  0 * SIZE(CO3)
 | |
| 	ST	c32,  1 * SIZE(CO3)
 | |
| 	ST	c41,  0 * SIZE(CO4)
 | |
| 	ST	c42,  1 * SIZE(CO4)
 | |
| 	ST	c51,  0 * SIZE(CO5)
 | |
| 	ST	c52,  1 * SIZE(CO5)
 | |
| 	ST	c61,  0 * SIZE(CO6)
 | |
| 	ST	c62,  1 * SIZE(CO6)
 | |
| 	ST	c71,  0 * SIZE(CO7)
 | |
| 	ST	c72,  1 * SIZE(CO7)
 | |
| 	ST	c81,  0 * SIZE(CO8)
 | |
| 	ST	c82,  1 * SIZE(CO8)
 | |
| 
 | |
| 	MTC	$0,  a1
 | |
| 
 | |
| #ifndef LN
 | |
| 	daddiu	CO1, CO1, 2 * SIZE
 | |
| 	daddiu	CO2, CO2, 2 * SIZE
 | |
| 	daddiu	CO3, CO3, 2 * SIZE
 | |
| 	daddiu	CO4, CO4, 2 * SIZE
 | |
| 	daddiu	CO5, CO5, 2 * SIZE
 | |
| 	daddiu	CO6, CO6, 2 * SIZE
 | |
| 	daddiu	CO7, CO7, 2 * SIZE
 | |
| 	daddiu	CO8, CO8, 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| 	MOV	c11, a1
 | |
| 	MOV	c21, a1
 | |
| 
 | |
| #ifdef RT
 | |
| 	dsll	TEMP, K, 1 + BASE_SHIFT
 | |
| 	daddu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| 	MOV	c31, a1
 | |
| 	MOV	c41, a1
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	dsubu	TEMP, K, KK
 | |
| 	dsll	L,    TEMP, 1 + BASE_SHIFT
 | |
| 	dsll	TEMP, TEMP, 3 + BASE_SHIFT
 | |
| 	daddu	AO, AO, L
 | |
| 	daddu	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	daddiu	KK, KK, 2
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	KK, KK, -2
 | |
| #endif
 | |
| 
 | |
| 	daddiu	I, I, -1
 | |
| 	MOV	c51, a1
 | |
| 
 | |
| 	bgtz	I, .L11
 | |
| 	MOV	c61, a1
 | |
| 	.align 3
 | |
| 
 | |
| .L20:
 | |
| 	andi	I,  M, 1
 | |
| 	MOV	c61, c11
 | |
| 	blez	I, .L29
 | |
| 	MOV	c71, c11
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	LD	a3,  2 * SIZE(AO)
 | |
| 	LD	a4,  3 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	LD	b4,  3 * SIZE(B)
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 
 | |
| 	dsra	L,  KK, 2
 | |
| 	MOV	c81, c11
 | |
| 
 | |
| 	blez	L, .L25
 | |
| 	move	BO,  B
 | |
| #else
 | |
| 
 | |
| #ifdef LN
 | |
| 	dsll	TEMP,   K,  0 + BASE_SHIFT
 | |
| 	dsubu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| 	dsll	L,    KK, 0 + BASE_SHIFT
 | |
| 	dsll	TEMP, KK, 3 + BASE_SHIFT
 | |
| 
 | |
| 	daddu	AO, AORIG, L
 | |
| 	daddu	BO, B,     TEMP
 | |
| 
 | |
| 	dsubu	TEMP, K, KK
 | |
| 
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	LD	a3,  2 * SIZE(AO)
 | |
| 	LD	a4,  3 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	LD	b6,  8 * SIZE(BO)
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 
 | |
| 	dsra	L,  TEMP, 2
 | |
| 	MOV	c81, c11
 | |
| 
 | |
| 	blez	L, .L25
 | |
| 	NOP
 | |
| #endif
 | |
| 	.align	3
 | |
| 
 | |
| .L22:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	b1, 16 * SIZE(BO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a1, b5
 | |
| 	LD	b5, 20 * SIZE(BO)
 | |
| 	MADD	c61, c61, a1, b2
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	MADD	c71, c71, a1, b3
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	MADD	c81, c81, a1, b4
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	LD	a1,  4 * SIZE(AO)
 | |
| 	daddiu	L, L, -1
 | |
| 
 | |
| 	MADD	c11, c11, a2, b6
 | |
| 	LD	b6, 24 * SIZE(BO)
 | |
| 	MADD	c21, c21, a2, b2
 | |
| 	LD	b2, 13 * SIZE(BO)
 | |
| 	MADD	c31, c31, a2, b3
 | |
| 	LD	b3, 14 * SIZE(BO)
 | |
| 	MADD	c41, c41, a2, b4
 | |
| 	LD	b4, 15 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a2, b7
 | |
| 	LD	b7, 28 * SIZE(BO)
 | |
| 	MADD	c61, c61, a2, b2
 | |
| 	LD	b2, 17 * SIZE(BO)
 | |
| 	MADD	c71, c71, a2, b3
 | |
| 	LD	b3, 18 * SIZE(BO)
 | |
| 	MADD	c81, c81, a2, b4
 | |
| 	LD	b4, 19 * SIZE(BO)
 | |
| 
 | |
| 	LD	a2,  5 * SIZE(AO)
 | |
| 	daddiu	AO, AO,  4 * SIZE
 | |
| 
 | |
| 	MADD	c11, c11, a3, b1
 | |
| 	LD	b1, 32 * SIZE(BO)
 | |
| 	MADD	c21, c21, a3, b2
 | |
| 	LD	b2, 21 * SIZE(BO)
 | |
| 	MADD	c31, c31, a3, b3
 | |
| 	LD	b3, 22 * SIZE(BO)
 | |
| 	MADD	c41, c41, a3, b4
 | |
| 	LD	b4, 23 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a3, b5
 | |
| 	LD	b5, 36 * SIZE(BO)
 | |
| 	MADD	c61, c61, a3, b2
 | |
| 	LD	b2, 25 * SIZE(BO)
 | |
| 	MADD	c71, c71, a3, b3
 | |
| 	LD	b3, 26 * SIZE(BO)
 | |
| 	MADD	c81, c81, a3, b4
 | |
| 	LD	b4, 27 * SIZE(BO)
 | |
| 
 | |
| 	LD	a3,  2 * SIZE(AO)
 | |
| 	daddiu	BO, BO, 32 * SIZE
 | |
| 
 | |
| 	MADD	c11, c11, a4, b6
 | |
| 	LD	b6,  8 * SIZE(BO)
 | |
| 	MADD	c21, c21, a4, b2
 | |
| 	LD	b2, -3 * SIZE(BO)
 | |
| 	MADD	c31, c31, a4, b3
 | |
| 	LD	b3, -2 * SIZE(BO)
 | |
| 	MADD	c41, c41, a4, b4
 | |
| 	LD	b4, -1 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a4, b7
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 	MADD	c61, c61, a4, b2
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MADD	c71, c71, a4, b3
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MADD	c81, c81, a4, b4
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	bgtz	L, .L22
 | |
| 	LD	a4,  3 * SIZE(AO)
 | |
| 	.align 3
 | |
| 
 | |
| .L25:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi	L, KK,  3
 | |
| #else
 | |
| 	andi	L, TEMP, 3
 | |
| #endif
 | |
| 	NOP
 | |
| 	blez	L, .L28
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L26:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	b1,  8 * SIZE(BO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	daddiu	L, L, -1
 | |
| 	MOV	a2, a2
 | |
| 	daddiu	AO, AO,  1 * SIZE
 | |
| 	daddiu	BO, BO,  8 * SIZE
 | |
| 
 | |
| 	MADD	c51, c51, a1, b5
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	MADD	c61, c61, a1, b2
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MADD	c71, c71, a1, b3
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MADD	c81, c81, a1, b4
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 
 | |
| 	bgtz	L, .L26
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 
 | |
| .L28:
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	daddiu	TEMP, KK, -1
 | |
| #else
 | |
| 	daddiu	TEMP, KK, -8
 | |
| #endif
 | |
| 
 | |
| 	dsll	L,    TEMP, 0 + BASE_SHIFT
 | |
| 	dsll	TEMP, TEMP, 3 + BASE_SHIFT
 | |
| 	daddu	AO, AORIG, L
 | |
| 	daddu	BO, B,     TEMP
 | |
| #endif
 | |
| 
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	LD	b6,  5 * SIZE(BO)
 | |
| 	LD	b7,  6 * SIZE(BO)
 | |
| 	LD	b8,  7 * SIZE(BO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| 	SUB	c21, b2, c21
 | |
| 	SUB	c31, b3, c31
 | |
| 	SUB	c41, b4, c41
 | |
| 	SUB	c51, b5, c51
 | |
| 	SUB	c61, b6, c61
 | |
| 	SUB	c71, b7, c71
 | |
| 	SUB	c81, b8, c81
 | |
| #else
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| 	LD	b2,  1 * SIZE(AO)
 | |
| 	LD	b3,  2 * SIZE(AO)
 | |
| 	LD	b4,  3 * SIZE(AO)
 | |
| 	LD	b5,  4 * SIZE(AO)
 | |
| 	LD	b6,  5 * SIZE(AO)
 | |
| 	LD	b7,  6 * SIZE(AO)
 | |
| 	LD	b8,  7 * SIZE(AO)
 | |
| 
 | |
| 	SUB	c11, b1, c11
 | |
| 	SUB	c21, b2, c21
 | |
| 	SUB	c31, b3, c31
 | |
| 	SUB	c41, b4, c41
 | |
| 	SUB	c51, b5, c51
 | |
| 	SUB	c61, b6, c61
 | |
| 	SUB	c71, b7, c71
 | |
| 	SUB	c81, b8, c81
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LD	b1,  0 * SIZE(AO)
 | |
| 
 | |
| 	MUL	c11, b1, c11
 | |
| 	MUL	c21, b1, c21
 | |
| 	MUL	c31, b1, c31
 | |
| 	MUL	c41, b1, c41
 | |
| 	MUL	c51, b1, c51
 | |
| 	MUL	c61, b1, c61
 | |
| 	MUL	c71, b1, c71
 | |
| 	MUL	c81, b1, c81
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	LD	b6,  5 * SIZE(BO)
 | |
| 	LD	b7,  6 * SIZE(BO)
 | |
| 	LD	b8,  7 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c11, b1, c11
 | |
| 
 | |
| 	NMSUB	c21, c21, b2, c11
 | |
| 	NMSUB	c31, c31, b3, c11
 | |
| 	NMSUB	c41, c41, b4, c11
 | |
| 	NMSUB	c51, c51, b5, c11
 | |
| 	NMSUB	c61, c61, b6, c11
 | |
| 	NMSUB	c71, c71, b7, c11
 | |
| 	NMSUB	c81, c81, b8, c11
 | |
| 
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 	LD	b5, 12 * SIZE(BO)
 | |
| 	LD	b6, 13 * SIZE(BO)
 | |
| 	LD	b7, 14 * SIZE(BO)
 | |
| 	LD	b8, 15 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c21, b2, c21
 | |
| 
 | |
| 	NMSUB	c31, c31, b3, c21
 | |
| 	NMSUB	c41, c41, b4, c21
 | |
| 	NMSUB	c51, c51, b5, c21
 | |
| 	NMSUB	c61, c61, b6, c21
 | |
| 	NMSUB	c71, c71, b7, c21
 | |
| 	NMSUB	c81, c81, b8, c21
 | |
| 
 | |
| 	LD	b3, 18 * SIZE(BO)
 | |
| 	LD	b4, 19 * SIZE(BO)
 | |
| 	LD	b5, 20 * SIZE(BO)
 | |
| 	LD	b6, 21 * SIZE(BO)
 | |
| 	LD	b7, 22 * SIZE(BO)
 | |
| 	LD	b8, 23 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c31, b3, c31
 | |
| 
 | |
| 	NMSUB	c41, c41, b4, c31
 | |
| 	NMSUB	c51, c51, b5, c31
 | |
| 	NMSUB	c61, c61, b6, c31
 | |
| 	NMSUB	c71, c71, b7, c31
 | |
| 	NMSUB	c81, c81, b8, c31
 | |
| 
 | |
| 	LD	b4, 27 * SIZE(BO)
 | |
| 	LD	b5, 28 * SIZE(BO)
 | |
| 	LD	b6, 29 * SIZE(BO)
 | |
| 	LD	b7, 30 * SIZE(BO)
 | |
| 	LD	b8, 31 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c41, b4, c41
 | |
| 
 | |
| 	NMSUB	c51, c51, b5, c41
 | |
| 	NMSUB	c61, c61, b6, c41
 | |
| 	NMSUB	c71, c71, b7, c41
 | |
| 	NMSUB	c81, c81, b8, c41
 | |
| 
 | |
| 	LD	b5, 36 * SIZE(BO)
 | |
| 	LD	b6, 37 * SIZE(BO)
 | |
| 	LD	b7, 38 * SIZE(BO)
 | |
| 	LD	b8, 39 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c51, b5, c51
 | |
| 
 | |
| 	NMSUB	c61, c61, b6, c51
 | |
| 	NMSUB	c71, c71, b7, c51
 | |
| 	NMSUB	c81, c81, b8, c51
 | |
| 
 | |
| 	LD	b6, 45 * SIZE(BO)
 | |
| 	LD	b7, 46 * SIZE(BO)
 | |
| 	LD	b8, 47 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c61, b6, c61
 | |
| 
 | |
| 	NMSUB	c71, c71, b7, c61
 | |
| 	NMSUB	c81, c81, b8, c61
 | |
| 
 | |
| 	LD	b7, 54 * SIZE(BO)
 | |
| 	LD	b8, 55 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c71, b7, c71
 | |
| 
 | |
| 	NMSUB	c81, c81, b8, c71
 | |
| 
 | |
| 	LD	b8, 63 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c81, b8, c81
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LD	b1, 63 * SIZE(BO)
 | |
| 	LD	b2, 62 * SIZE(BO)
 | |
| 	LD	b3, 61 * SIZE(BO)
 | |
| 	LD	b4, 60 * SIZE(BO)
 | |
| 	LD	b5, 59 * SIZE(BO)
 | |
| 	LD	b6, 58 * SIZE(BO)
 | |
| 	LD	b7, 57 * SIZE(BO)
 | |
| 	LD	b8, 56 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c81, b1, c81
 | |
| 
 | |
| 	NMSUB	c71, c71, b2, c81
 | |
| 	NMSUB	c61, c61, b3, c81
 | |
| 	NMSUB	c51, c51, b4, c81
 | |
| 	NMSUB	c41, c41, b5, c81
 | |
| 	NMSUB	c31, c31, b6, c81
 | |
| 	NMSUB	c21, c21, b7, c81
 | |
| 	NMSUB	c11, c11, b8, c81
 | |
| 
 | |
| 	LD	b2, 54 * SIZE(BO)
 | |
| 	LD	b3, 53 * SIZE(BO)
 | |
| 	LD	b4, 52 * SIZE(BO)
 | |
| 	LD	b5, 51 * SIZE(BO)
 | |
| 	LD	b6, 50 * SIZE(BO)
 | |
| 	LD	b7, 49 * SIZE(BO)
 | |
| 	LD	b8, 48 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c71, b2, c71
 | |
| 
 | |
| 	NMSUB	c61, c61, b3, c71
 | |
| 	NMSUB	c51, c51, b4, c71
 | |
| 	NMSUB	c41, c41, b5, c71
 | |
| 	NMSUB	c31, c31, b6, c71
 | |
| 	NMSUB	c21, c21, b7, c71
 | |
| 	NMSUB	c11, c11, b8, c71
 | |
| 
 | |
| 	LD	b3, 45 * SIZE(BO)
 | |
| 	LD	b4, 44 * SIZE(BO)
 | |
| 	LD	b5, 43 * SIZE(BO)
 | |
| 	LD	b6, 42 * SIZE(BO)
 | |
| 	LD	b7, 41 * SIZE(BO)
 | |
| 	LD	b8, 40 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c61, b3, c61
 | |
| 
 | |
| 	NMSUB	c51, c51, b4, c61
 | |
| 	NMSUB	c41, c41, b5, c61
 | |
| 	NMSUB	c31, c31, b6, c61
 | |
| 	NMSUB	c21, c21, b7, c61
 | |
| 	NMSUB	c11, c11, b8, c61
 | |
| 
 | |
| 	LD	b4, 36 * SIZE(BO)
 | |
| 	LD	b5, 35 * SIZE(BO)
 | |
| 	LD	b6, 34 * SIZE(BO)
 | |
| 	LD	b7, 33 * SIZE(BO)
 | |
| 	LD	b8, 32 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c51, b4, c51
 | |
| 
 | |
| 	NMSUB	c41, c41, b5, c51
 | |
| 	NMSUB	c31, c31, b6, c51
 | |
| 	NMSUB	c21, c21, b7, c51
 | |
| 	NMSUB	c11, c11, b8, c51
 | |
| 
 | |
| 	LD	b5, 27 * SIZE(BO)
 | |
| 	LD	b6, 26 * SIZE(BO)
 | |
| 	LD	b7, 25 * SIZE(BO)
 | |
| 	LD	b8, 24 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c41, b5, c41
 | |
| 
 | |
| 	NMSUB	c31, c31, b6, c41
 | |
| 	NMSUB	c21, c21, b7, c41
 | |
| 	NMSUB	c11, c11, b8, c41
 | |
| 
 | |
| 	LD	b6, 18 * SIZE(BO)
 | |
| 	LD	b7, 17 * SIZE(BO)
 | |
| 	LD	b8, 16 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c31, b6, c31
 | |
| 
 | |
| 	NMSUB	c21, c21, b7, c31
 | |
| 	NMSUB	c11, c11, b8, c31
 | |
| 
 | |
| 	LD	b7,  9 * SIZE(BO)
 | |
| 	LD	b8,  8 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c21, b7, c21
 | |
| 
 | |
| 	NMSUB	c11, c11, b8, c21
 | |
| 
 | |
| 	LD	b8,  0 * SIZE(BO)
 | |
| 
 | |
| 	MUL	c11, b8, c11
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	CO1, CO1, -1 * SIZE
 | |
| 	daddiu	CO2, CO2, -1 * SIZE
 | |
| 	daddiu	CO3, CO3, -1 * SIZE
 | |
| 	daddiu	CO4, CO4, -1 * SIZE
 | |
| 	daddiu	CO5, CO5, -1 * SIZE
 | |
| 	daddiu	CO6, CO6, -1 * SIZE
 | |
| 	daddiu	CO7, CO7, -1 * SIZE
 | |
| 	daddiu	CO8, CO8, -1 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	ST	c11,  0 * SIZE(BO)
 | |
| 	ST	c21,  1 * SIZE(BO)
 | |
| 	ST	c31,  2 * SIZE(BO)
 | |
| 	ST	c41,  3 * SIZE(BO)
 | |
| 	ST	c51,  4 * SIZE(BO)
 | |
| 	ST	c61,  5 * SIZE(BO)
 | |
| 	ST	c71,  6 * SIZE(BO)
 | |
| 	ST	c81,  7 * SIZE(BO)
 | |
| #else
 | |
| 	ST	c11,  0 * SIZE(AO)
 | |
| 	ST	c21,  1 * SIZE(AO)
 | |
| 	ST	c31,  2 * SIZE(AO)
 | |
| 	ST	c41,  3 * SIZE(AO)
 | |
| 	ST	c51,  4 * SIZE(AO)
 | |
| 	ST	c61,  5 * SIZE(AO)
 | |
| 	ST	c71,  6 * SIZE(AO)
 | |
| 	ST	c81,  7 * SIZE(AO)
 | |
| #endif
 | |
| 
 | |
| 	ST	c11,  0 * SIZE(CO1)
 | |
| 	ST	c21,  0 * SIZE(CO2)
 | |
| 	ST	c31,  0 * SIZE(CO3)
 | |
| 	ST	c41,  0 * SIZE(CO4)
 | |
| 	ST	c51,  0 * SIZE(CO5)
 | |
| 	ST	c61,  0 * SIZE(CO6)
 | |
| 	ST	c71,  0 * SIZE(CO7)
 | |
| 	ST	c81,  0 * SIZE(CO8)
 | |
| 
 | |
| #ifndef LN
 | |
| 	daddiu	CO1, CO1, 1 * SIZE
 | |
| 	daddiu	CO2, CO2, 1 * SIZE
 | |
| 	daddiu	CO3, CO3, 1 * SIZE
 | |
| 	daddiu	CO4, CO4, 1 * SIZE
 | |
| 	daddiu	CO5, CO5, 1 * SIZE
 | |
| 	daddiu	CO6, CO6, 1 * SIZE
 | |
| 	daddiu	CO7, CO7, 1 * SIZE
 | |
| 	daddiu	CO8, CO8, 1 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	dsll	TEMP, K, BASE_SHIFT
 | |
| 	daddu	AORIG, AORIG, TEMP
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	dsubu	TEMP, K, KK
 | |
| 	dsll	L,    TEMP, 0 + BASE_SHIFT
 | |
| 	dsll	TEMP, TEMP, 3 + BASE_SHIFT
 | |
| 	daddu	AO, AO, L
 | |
| 	daddu	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	daddiu	KK, KK, 1
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	daddiu	KK, KK, -1
 | |
| #endif
 | |
| 	.align 3
 | |
| 
 | |
| .L29:
 | |
| #ifdef LN
 | |
| 	dsll	TEMP, K, 3 + BASE_SHIFT
 | |
| 	daddu	B, B, TEMP
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	move	B,  BO
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	daddiu	KK, KK,  8
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	daddiu	KK, KK, -8
 | |
| #endif
 | |
| 
 | |
| 	bgtz	J, .L10
 | |
| 	NOP
 | |
| 	.align 3
 | |
| 
 | |
| 
 | |
| 
 | |
| .L999:
 | |
| 	LDARG	$16,   0($sp)
 | |
| 	LDARG	$17,   8($sp)
 | |
| 	LDARG	$18,  16($sp)
 | |
| 	LDARG	$19,  24($sp)
 | |
| 	LDARG	$20,  32($sp)
 | |
| 	LDARG	$21,  40($sp)
 | |
| 	ldc1	$f24, 48($sp)
 | |
| 	ldc1	$f25, 56($sp)
 | |
| 	ldc1	$f26, 64($sp)
 | |
| 	ldc1	$f27, 72($sp)
 | |
| 	ldc1	$f28, 80($sp)
 | |
| 
 | |
| 	LDARG	$22,  88($sp)
 | |
| 	LDARG	$23,  96($sp)
 | |
| 	LDARG	$24, 104($sp)
 | |
| 	LDARG	$25, 112($sp)
 | |
| 
 | |
| #ifndef __64BIT__
 | |
| 	ldc1	$f20,112($sp)
 | |
| 	ldc1	$f21,120($sp)
 | |
| 	ldc1	$f22,128($sp)
 | |
| 	ldc1	$f23,136($sp)
 | |
| #endif
 | |
| 
 | |
| 	j	$31
 | |
| 	daddiu	$sp, $sp, 144
 | |
| 
 | |
| 	EPILOGUE
 |