1667 lines
		
	
	
		
			30 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			1667 lines
		
	
	
		
			30 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| /*********************************************************************/
 | |
| /* Copyright 2009, 2010 The University of Texas at Austin.           */
 | |
| /* All rights reserved.                                              */
 | |
| /*                                                                   */
 | |
| /* Redistribution and use in source and binary forms, with or        */
 | |
| /* without modification, are permitted provided that the following   */
 | |
| /* conditions are met:                                               */
 | |
| /*                                                                   */
 | |
| /*   1. Redistributions of source code must retain the above         */
 | |
| /*      copyright notice, this list of conditions and the following  */
 | |
| /*      disclaimer.                                                  */
 | |
| /*                                                                   */
 | |
| /*   2. Redistributions in binary form must reproduce the above      */
 | |
| /*      copyright notice, this list of conditions and the following  */
 | |
| /*      disclaimer in the documentation and/or other materials       */
 | |
| /*      provided with the distribution.                              */
 | |
| /*                                                                   */
 | |
| /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
 | |
| /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
 | |
| /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
 | |
| /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
 | |
| /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
 | |
| /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
 | |
| /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
 | |
| /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
 | |
| /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
 | |
| /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
 | |
| /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
 | |
| /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
 | |
| /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
 | |
| /*    POSSIBILITY OF SUCH DAMAGE.                                    */
 | |
| /*                                                                   */
 | |
| /* The views and conclusions contained in the software and           */
 | |
| /* documentation are those of the authors and should not be          */
 | |
| /* interpreted as representing official policies, either expressed   */
 | |
| /* or implied, of The University of Texas at Austin.                 */
 | |
| /*********************************************************************/
 | |
| 
 | |
| #define ASSEMBLER
 | |
| #include "common.h"
 | |
| 
 | |
| #define M	$4
 | |
| #define	N	$5
 | |
| #define	K	$6
 | |
| #define A	$9
 | |
| #define B	$10
 | |
| #define C	$11
 | |
| #define LDC	$8
 | |
| 
 | |
| #define AO	$12
 | |
| #define BO	$13
 | |
| 
 | |
| #define I	$2
 | |
| #define J	$3
 | |
| #define L	$7
 | |
| 
 | |
| #define CO1	$14
 | |
| #define CO2	$15
 | |
| #define CO3	$16
 | |
| #define CO4	$17
 | |
| #define CO5	$18
 | |
| #define CO6	$19
 | |
| #define CO7	$20
 | |
| #define CO8	$21
 | |
| 
 | |
| #if defined(TRMMKERNEL)
 | |
| #define OFFSET	$22
 | |
| #define KK	$23
 | |
| #define TEMP	$24
 | |
| #endif
 | |
| 
 | |
| #define a1	$f0
 | |
| #define a2	$f1
 | |
| #define a3	$f28
 | |
| #define a4	$f29
 | |
| 
 | |
| #define b1	$f2
 | |
| #define b2	$f3
 | |
| #define b3	$f4
 | |
| #define b4	$f5
 | |
| #define b5	$f6
 | |
| #define b6	$f7
 | |
| #define b7	$f8
 | |
| #define b8	$f9
 | |
| 
 | |
| #define a5	b8
 | |
| 
 | |
| #define c11	$f10
 | |
| #define c12	$f11
 | |
| #define c21	$f12
 | |
| #define c22	$f13
 | |
| #define c31	$f14
 | |
| #define c32	$f17
 | |
| #define c41	$f18
 | |
| #define c42	$f19
 | |
| #define c51	$f20
 | |
| #define c52	$f21
 | |
| #define c61	$f22
 | |
| #define c62	$f23
 | |
| #define c71	$f24
 | |
| #define c72	$f25
 | |
| #define c81	$f26
 | |
| #define c82	$f27
 | |
| 
 | |
| #define ALPHA_R	$f15
 | |
| #define ALPHA_I	$f16
 | |
| 
 | |
| 	PROLOGUE
 | |
| 
 | |
| 	daddiu	$sp, $sp, -128
 | |
| 
 | |
| 	SDARG	$16,   0($sp)
 | |
| 	SDARG	$17,   8($sp)
 | |
| 	SDARG	$18,  16($sp)
 | |
| 	SDARG	$19,  24($sp)
 | |
| 	SDARG	$20,  32($sp)
 | |
| 	SDARG	$21,  40($sp)
 | |
| 	sdc1	$f24, 48($sp)
 | |
| 	sdc1	$f25, 56($sp)
 | |
| 	sdc1	$f26, 64($sp)
 | |
| 	sdc1	$f27, 72($sp)
 | |
| 	sdc1	$f28, 80($sp)
 | |
| 	sdc1	$f29, 88($sp)
 | |
| 
 | |
| 	LDARG	LDC,  128($sp)
 | |
| 
 | |
| 	dsll	LDC, LDC, ZBASE_SHIFT
 | |
| 
 | |
| 	dsra	J,  N, 3
 | |
| 	blez	J, .L30
 | |
| 	nop
 | |
| 
 | |
| .L10:
 | |
| 	move	CO1, C
 | |
| 	MTC	$0,  c11
 | |
| 	daddu	CO2, C,   LDC
 | |
| 	move	AO, A
 | |
| 	daddu	CO3, CO2, LDC
 | |
| 	daddiu	J, J, -1
 | |
| 	daddu	CO4, CO3, LDC
 | |
| 	MOV	c21, c11
 | |
| 	daddu	CO5, CO4, LDC
 | |
| 	MOV	c31, c11
 | |
| 	daddu	CO6, CO5, LDC
 | |
| 	MOV	c41, c11
 | |
| 	daddu	CO7, CO6, LDC
 | |
| 	MOV	c51, c11
 | |
| 	daddu	CO8, CO7, LDC
 | |
| 	dsra	I,  M, 1
 | |
| 	daddu	C,   CO8, LDC
 | |
| 
 | |
| 	blez	I, .L20
 | |
| 	MOV	c61, c11
 | |
| 
 | |
| .L11:
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MOV	c71, c11
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	MOV	c81, c11
 | |
| 
 | |
| 	LD	a3,  4 * SIZE(AO)
 | |
| 	MOV	c12, c11
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	MOV	c22, c11
 | |
| 
 | |
| 	dsra	L,  K, 2
 | |
| 	MOV	c32, c11
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	MOV	c42, c11
 | |
| 
 | |
| 	LD	b4,  3 * SIZE(B)
 | |
| 	MOV	c52, c11
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	MOV	c62, c11
 | |
| 
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	MOV	c72, c11
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 	MOV	c82, c11
 | |
| 
 | |
| 	blez	L, .L15
 | |
| 	move	BO,  B
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	daddiu	L, L, -1
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	blez	L, .L13
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L12:
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1, 16 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a1, b5
 | |
| 	LD	a4,  2 * SIZE(AO)
 | |
| 	MADD	c61, c61, a1, b2
 | |
| 	NOP
 | |
| 	MADD	c71, c71, a1, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a1, b4
 | |
| 	LD	a1,  8 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c52, c52, a2, b5
 | |
| 	LD	b5, 20 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a4, b6
 | |
| 	LD	a2,  3 * SIZE(AO)
 | |
| 	MADD	c21, c21, a4, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a4, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c12, c12, a2, b6
 | |
| 	LD	b6, 24 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2, 13 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 14 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 15 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a4, b7
 | |
| 	NOP
 | |
| 	MADD	c61, c61, a4, b2
 | |
| 	NOP
 | |
| 	MADD	c71, c71, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a4, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c52, c52, a2, b7
 | |
| 	LD	b7, 28 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2, 17 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3, 18 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4, 19 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a3, b1
 | |
| 	LD	a2,  5 * SIZE(AO)
 | |
| 	MADD	c21, c21, a3, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a3, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a3, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1, 32 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2, 21 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 22 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 23 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a3, b5
 | |
| 	LD	a4,  6 * SIZE(AO)
 | |
| 	MADD	c61, c61, a3, b2
 | |
| 	NOP
 | |
| 	MADD	c71, c71, a3, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a3, b4
 | |
| 	LD	a3, 12 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c52, c52, a2, b5
 | |
| 	LD	b5, 36 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2, 25 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3, 26 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4, 27 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a4, b6
 | |
| 	LD	a2,  7 * SIZE(AO)
 | |
| 	MADD	c21, c21, a4, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a4, b4
 | |
| 	daddiu	L, L, -1
 | |
| 
 | |
| 	MADD	c12, c12, a2, b6
 | |
| 	LD	b6, 40 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2, 29 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 30 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 31 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a4, b7
 | |
| 	daddiu	BO, BO, 32 * SIZE
 | |
| 	MADD	c61, c61, a4, b2
 | |
| 	daddiu	AO, AO,  8 * SIZE
 | |
| 	MADD	c71, c71, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a4, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c52, c52, a2, b7
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	bgtz	L, .L12
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	NOP
 | |
| 	.align 3
 | |
| 
 | |
| .L13:
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1, 16 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a1, b5
 | |
| 	NOP
 | |
| 	MADD	c61, c61, a1, b2
 | |
| 	LD	a4,  2 * SIZE(AO)
 | |
| 	MADD	c71, c71, a1, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a1, b4
 | |
| 	LD	a1,  8 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c52, c52, a2, b5
 | |
| 	LD	b5, 20 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a4, b6
 | |
| 	LD	a2,  3 * SIZE(AO)
 | |
| 	MADD	c21, c21, a4, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a4, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c12, c12, a2, b6
 | |
| 	LD	b6, 24 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2, 13 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 14 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 15 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a4, b7
 | |
| 	NOP
 | |
| 	MADD	c61, c61, a4, b2
 | |
| 	NOP
 | |
| 	MADD	c71, c71, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a4, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c52, c52, a2, b7
 | |
| 	LD	b7, 28 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2, 17 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3, 18 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4, 19 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a3, b1
 | |
| 	LD	a2,  5 * SIZE(AO)
 | |
| 	MADD	c21, c21, a3, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a3, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a3, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1, 32 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2, 21 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 22 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 23 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a3, b5
 | |
| 	NOP
 | |
| 	MADD	c61, c61, a3, b2
 | |
| 	LD	a4,  6 * SIZE(AO)
 | |
| 	MADD	c71, c71, a3, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a3, b4
 | |
| 	LD	a3, 12 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c52, c52, a2, b5
 | |
| 	LD	b5, 36 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2, 25 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3, 26 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4, 27 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a4, b6
 | |
| 	LD	a2,  7 * SIZE(AO)
 | |
| 	MADD	c21, c21, a4, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a4, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c12, c12, a2, b6
 | |
| 	LD	b6, 40 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2, 29 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 30 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 31 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a4, b7
 | |
| 	daddiu	BO, BO, 32 * SIZE
 | |
| 	MADD	c61, c61, a4, b2
 | |
| 	daddiu	AO, AO,  8 * SIZE
 | |
| 	MADD	c71, c71, a4, b3
 | |
| 	NOP
 | |
| 	MADD	c81, c81, a4, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c52, c52, a2, b7
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	.align 3
 | |
| 
 | |
| .L15:
 | |
| 	andi	L,  K, 3
 | |
| 	NOP
 | |
| 	blez	L, .L18
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L16:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	NOP
 | |
| 
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1,  8 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a1, b5
 | |
| 	daddiu	L, L, -1
 | |
| 	MADD	c61, c61, a1, b2
 | |
| 	daddiu	AO, AO,  2 * SIZE
 | |
| 	MADD	c71, c71, a1, b3
 | |
| 	daddiu	BO, BO,  8 * SIZE
 | |
| 	MADD	c81, c81, a1, b4
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c52, c52, a2, b5
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	MADD	c62, c62, a2, b2
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MADD	c72, c72, a2, b3
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MADD	c82, c82, a2, b4
 | |
| 	bgtz	L, .L16
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 
 | |
| .L18:
 | |
| 	LD	$f0, 0 * SIZE(CO1)
 | |
| 	LD	$f1, 1 * SIZE(CO1)
 | |
| 	LD	$f2, 2 * SIZE(CO1)
 | |
| 	LD	$f3, 3 * SIZE(CO1)
 | |
| 
 | |
| 	LD	$f4, 0 * SIZE(CO2)
 | |
| 	MADD	$f0, $f0, ALPHA_R, c11
 | |
| 	LD	$f5, 1 * SIZE(CO2)
 | |
| 	MADD	$f1, $f1, ALPHA_I, c11
 | |
| 	LD	$f6, 2 * SIZE(CO2)
 | |
| 	MADD	$f2, $f2, ALPHA_R, c12
 | |
| 	LD	$f7, 3 * SIZE(CO2)
 | |
| 	MADD	$f3, $f3, ALPHA_I, c12
 | |
| 
 | |
| 	MADD	$f4, $f4, ALPHA_R, c21
 | |
| 	ST	$f0,  0 * SIZE(CO1)
 | |
| 	MADD	$f5, $f5, ALPHA_I, c21
 | |
| 	ST	$f1,  1 * SIZE(CO1)
 | |
| 	MADD	$f6, $f6, ALPHA_R, c22
 | |
| 	ST	$f2,  2 * SIZE(CO1)
 | |
| 	MADD	$f7, $f7, ALPHA_I, c22
 | |
| 	ST	$f3,  3 * SIZE(CO1)
 | |
| 
 | |
| 	LD	$f0, 0 * SIZE(CO3)
 | |
| 	LD	$f1, 1 * SIZE(CO3)
 | |
| 	LD	$f2, 2 * SIZE(CO3)
 | |
| 	LD	$f3, 3 * SIZE(CO3)
 | |
| 
 | |
| 	ST	$f4,  0 * SIZE(CO2)
 | |
| 	ST	$f5,  1 * SIZE(CO2)
 | |
| 	ST	$f6,  2 * SIZE(CO2)
 | |
| 	ST	$f7,  3 * SIZE(CO2)
 | |
| 
 | |
| 	LD	$f4, 0 * SIZE(CO4)
 | |
| 	LD	$f5, 1 * SIZE(CO4)
 | |
| 	LD	$f6, 2 * SIZE(CO4)
 | |
| 	LD	$f7, 3 * SIZE(CO4)
 | |
| 
 | |
| 	MADD	$f0, $f0, ALPHA_R, c31
 | |
| 	MADD	$f1, $f1, ALPHA_I, c31
 | |
| 	MADD	$f2, $f2, ALPHA_R, c32
 | |
| 	MADD	$f3, $f3, ALPHA_I, c32
 | |
| 
 | |
| 	MADD	$f4, $f4, ALPHA_R, c41
 | |
| 	ST	$f0,  0 * SIZE(CO3)
 | |
| 	MADD	$f5, $f5, ALPHA_I, c41
 | |
| 	ST	$f1,  1 * SIZE(CO3)
 | |
| 	MADD	$f6, $f6, ALPHA_R, c42
 | |
| 	ST	$f2,  2 * SIZE(CO3)
 | |
| 	MADD	$f7, $f7, ALPHA_I, c42
 | |
| 	ST	$f3,  3 * SIZE(CO3)
 | |
| 
 | |
| 	LD	$f0, 0 * SIZE(CO5)
 | |
| 	LD	$f1, 1 * SIZE(CO5)
 | |
| 	LD	$f2, 2 * SIZE(CO5)
 | |
| 	LD	$f3, 3 * SIZE(CO5)
 | |
| 
 | |
| 	ST	$f4,  0 * SIZE(CO4)
 | |
| 	ST	$f5,  1 * SIZE(CO4)
 | |
| 	ST	$f6,  2 * SIZE(CO4)
 | |
| 	ST	$f7,  3 * SIZE(CO4)
 | |
| 
 | |
| 	LD	$f4, 0 * SIZE(CO6)
 | |
| 	LD	$f5, 1 * SIZE(CO6)
 | |
| 	LD	$f6, 2 * SIZE(CO6)
 | |
| 	LD	$f7, 3 * SIZE(CO6)
 | |
| 
 | |
| 	MADD	$f0, $f0, ALPHA_R, c51
 | |
| 	daddiu	CO1,CO1, 4 * SIZE
 | |
| 	MADD	$f1, $f1, ALPHA_I, c51
 | |
| 	daddiu	CO2,CO2, 4 * SIZE
 | |
| 	MADD	$f2, $f2, ALPHA_R, c52
 | |
| 	daddiu	CO3,CO3, 4 * SIZE
 | |
| 	MADD	$f3, $f3, ALPHA_I, c52
 | |
| 	daddiu	CO4,CO4, 4 * SIZE
 | |
| 
 | |
| 	MADD	$f4, $f4, ALPHA_R, c61
 | |
| 	ST	$f0,  0 * SIZE(CO5)
 | |
| 	MADD	$f5, $f5, ALPHA_I, c61
 | |
| 	ST	$f1,  1 * SIZE(CO5)
 | |
| 	MADD	$f6, $f6, ALPHA_R, c62
 | |
| 	ST	$f2,  2 * SIZE(CO5)
 | |
| 	MADD	$f7, $f7, ALPHA_I, c62
 | |
| 	ST	$f3,  3 * SIZE(CO5)
 | |
| 
 | |
| 	LD	$f0, 0 * SIZE(CO7)
 | |
| 	LD	$f1, 1 * SIZE(CO7)
 | |
| 	LD	$f2, 2 * SIZE(CO7)
 | |
| 	LD	$f3, 3 * SIZE(CO7)
 | |
| 
 | |
| 	ST	$f4,  0 * SIZE(CO6)
 | |
| 	ST	$f5,  1 * SIZE(CO6)
 | |
| 	ST	$f6,  2 * SIZE(CO6)
 | |
| 	ST	$f7,  3 * SIZE(CO6)
 | |
| 
 | |
| 	LD	$f4, 0 * SIZE(CO8)
 | |
| 	daddiu	I, I, -1
 | |
| 	LD	$f5, 1 * SIZE(CO8)
 | |
| 	MTC	$0,  c11
 | |
| 	LD	$f6, 2 * SIZE(CO8)
 | |
| 	LD	$f7, 3 * SIZE(CO8)
 | |
| 
 | |
| 	MADD	$f0, $f0, ALPHA_R, c71
 | |
| 	daddiu	CO5,CO5, 4 * SIZE
 | |
| 	MADD	$f1, $f1, ALPHA_I, c71
 | |
| 	daddiu	CO6,CO6, 4 * SIZE
 | |
| 	MADD	$f2, $f2, ALPHA_R, c72
 | |
| 	daddiu	CO7,CO7, 4 * SIZE
 | |
| 	MADD	$f3, $f3, ALPHA_I, c72
 | |
| 	daddiu	CO8,CO8, 4 * SIZE
 | |
| 
 | |
| 	MADD	$f4, $f4, ALPHA_R, c81
 | |
| 	ST	$f0, -4 * SIZE(CO7)
 | |
| 	MADD	$f5, $f5, ALPHA_I, c81
 | |
| 	ST	$f1, -3 * SIZE(CO7)
 | |
| 	MADD	$f6, $f6, ALPHA_R, c82
 | |
| 	ST	$f2, -2 * SIZE(CO7)
 | |
| 	MADD	$f7, $f7, ALPHA_I, c82
 | |
| 	ST	$f3, -1 * SIZE(CO7)
 | |
| 
 | |
| 	ST	$f4, -4 * SIZE(CO8)
 | |
| 	MOV	c21, c11
 | |
| 	ST	$f5, -3 * SIZE(CO8)
 | |
| 	MOV	c31, c11
 | |
| 	ST	$f6, -2 * SIZE(CO8)
 | |
| 	MOV	c41, c11
 | |
| 	ST	$f7, -1 * SIZE(CO8)
 | |
| 	MOV	c51, c11
 | |
| 	bgtz	I, .L11
 | |
| 	MOV	c61, c11
 | |
| 	.align 3
 | |
| 
 | |
| .L20:
 | |
| 	andi	I,  M, 1
 | |
| 	MOV	c61, c11
 | |
| 	blez	I, .L29
 | |
| 	MOV	c71, c11
 | |
| 
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	LD	a3,  2 * SIZE(AO)
 | |
| 	LD	a4,  3 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	LD	b4,  3 * SIZE(B)
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 
 | |
| 	dsra	L,  K, 2
 | |
| 	MOV	c81, c11
 | |
| 
 | |
| 	blez	L, .L25
 | |
| 	move	BO,  B
 | |
| 	.align	3
 | |
| 
 | |
| .L22:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	b1, 16 * SIZE(BO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a1, b5
 | |
| 	LD	b5, 20 * SIZE(BO)
 | |
| 	MADD	c61, c61, a1, b2
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	MADD	c71, c71, a1, b3
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	MADD	c81, c81, a1, b4
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	LD	a1,  4 * SIZE(AO)
 | |
| 	daddiu	L, L, -1
 | |
| 
 | |
| 	MADD	c11, c11, a2, b6
 | |
| 	LD	b6, 24 * SIZE(BO)
 | |
| 	MADD	c21, c21, a2, b2
 | |
| 	LD	b2, 13 * SIZE(BO)
 | |
| 	MADD	c31, c31, a2, b3
 | |
| 	LD	b3, 14 * SIZE(BO)
 | |
| 	MADD	c41, c41, a2, b4
 | |
| 	LD	b4, 15 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a2, b7
 | |
| 	LD	b7, 28 * SIZE(BO)
 | |
| 	MADD	c61, c61, a2, b2
 | |
| 	LD	b2, 17 * SIZE(BO)
 | |
| 	MADD	c71, c71, a2, b3
 | |
| 	LD	b3, 18 * SIZE(BO)
 | |
| 	MADD	c81, c81, a2, b4
 | |
| 	LD	b4, 19 * SIZE(BO)
 | |
| 
 | |
| 	LD	a2,  5 * SIZE(AO)
 | |
| 	daddiu	AO, AO,  4 * SIZE
 | |
| 
 | |
| 	MADD	c11, c11, a3, b1
 | |
| 	LD	b1, 32 * SIZE(BO)
 | |
| 	MADD	c21, c21, a3, b2
 | |
| 	LD	b2, 21 * SIZE(BO)
 | |
| 	MADD	c31, c31, a3, b3
 | |
| 	LD	b3, 22 * SIZE(BO)
 | |
| 	MADD	c41, c41, a3, b4
 | |
| 	LD	b4, 23 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a3, b5
 | |
| 	LD	b5, 36 * SIZE(BO)
 | |
| 	MADD	c61, c61, a3, b2
 | |
| 	LD	b2, 25 * SIZE(BO)
 | |
| 	MADD	c71, c71, a3, b3
 | |
| 	LD	b3, 26 * SIZE(BO)
 | |
| 	MADD	c81, c81, a3, b4
 | |
| 	LD	b4, 27 * SIZE(BO)
 | |
| 
 | |
| 	LD	a3,  2 * SIZE(AO)
 | |
| 	daddiu	BO, BO, 32 * SIZE
 | |
| 
 | |
| 	MADD	c11, c11, a4, b6
 | |
| 	LD	b6,  8 * SIZE(BO)
 | |
| 	MADD	c21, c21, a4, b2
 | |
| 	LD	b2, -3 * SIZE(BO)
 | |
| 	MADD	c31, c31, a4, b3
 | |
| 	LD	b3, -2 * SIZE(BO)
 | |
| 	MADD	c41, c41, a4, b4
 | |
| 	LD	b4, -1 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c51, c51, a4, b7
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 	MADD	c61, c61, a4, b2
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MADD	c71, c71, a4, b3
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MADD	c81, c81, a4, b4
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	bgtz	L, .L22
 | |
| 	LD	a4,  3 * SIZE(AO)
 | |
| 	.align 3
 | |
| 
 | |
| .L25:
 | |
| 	andi	L,  K, 3
 | |
| 	NOP
 | |
| 	blez	L, .L28
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L26:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	b1,  8 * SIZE(BO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	daddiu	L, L, -1
 | |
| 	MOV	a2, a2
 | |
| 	daddiu	AO, AO,  1 * SIZE
 | |
| 	daddiu	BO, BO,  8 * SIZE
 | |
| 
 | |
| 	MADD	c51, c51, a1, b5
 | |
| 	LD	b5,  4 * SIZE(BO)
 | |
| 	MADD	c61, c61, a1, b2
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MADD	c71, c71, a1, b3
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MADD	c81, c81, a1, b4
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 
 | |
| 	bgtz	L, .L26
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 
 | |
| .L28:
 | |
| 	LD	$f0, 0 * SIZE(CO1)
 | |
| 	LD	$f1, 1 * SIZE(CO1)
 | |
| 	LD	$f2, 0 * SIZE(CO2)
 | |
| 	LD	$f3, 1 * SIZE(CO2)
 | |
| 
 | |
| 	LD	$f4, 0 * SIZE(CO3)
 | |
| 	MADD	$f0, $f0, ALPHA_R, c11
 | |
| 	LD	$f5, 1 * SIZE(CO3)
 | |
| 	MADD	$f1, $f1, ALPHA_I, c11
 | |
| 	LD	$f6, 0 * SIZE(CO4)
 | |
| 	MADD	$f2, $f2, ALPHA_R, c21
 | |
| 	LD	$f7, 1 * SIZE(CO4)
 | |
| 	MADD	$f3, $f3, ALPHA_I, c21
 | |
| 
 | |
| 	MADD	$f4, $f4, ALPHA_R, c31
 | |
| 	ST	$f0,  0 * SIZE(CO1)
 | |
| 	MADD	$f5, $f5, ALPHA_I, c31
 | |
| 	ST	$f1,  1 * SIZE(CO1)
 | |
| 	MADD	$f6, $f6, ALPHA_R, c41
 | |
| 	ST	$f2,  0 * SIZE(CO2)
 | |
| 	MADD	$f7, $f7, ALPHA_I, c41
 | |
| 	ST	$f3,  1 * SIZE(CO2)
 | |
| 
 | |
| 	LD	$f0, 0 * SIZE(CO5)
 | |
| 	LD	$f1, 1 * SIZE(CO5)
 | |
| 	LD	$f2, 0 * SIZE(CO6)
 | |
| 	LD	$f3, 1 * SIZE(CO6)
 | |
| 
 | |
| 	ST	$f4,  0 * SIZE(CO3)
 | |
| 	ST	$f5,  1 * SIZE(CO3)
 | |
| 	ST	$f6,  0 * SIZE(CO4)
 | |
| 	ST	$f7,  1 * SIZE(CO4)
 | |
| 
 | |
| 	LD	$f4, 0 * SIZE(CO7)
 | |
| 	MADD	$f0, $f0, ALPHA_R, c51
 | |
| 	LD	$f5, 1 * SIZE(CO7)
 | |
| 	MADD	$f1, $f1, ALPHA_I, c51
 | |
| 	LD	$f6, 0 * SIZE(CO8)
 | |
| 	MADD	$f2, $f2, ALPHA_R, c61
 | |
| 	LD	$f7, 1 * SIZE(CO8)
 | |
| 	MADD	$f3, $f3, ALPHA_I, c61
 | |
| 
 | |
| 	MADD	$f4, $f4, ALPHA_R, c71
 | |
| 	ST	$f0,  0 * SIZE(CO5)
 | |
| 	MADD	$f5, $f5, ALPHA_I, c71
 | |
| 	ST	$f1,  1 * SIZE(CO5)
 | |
| 	MADD	$f6, $f6, ALPHA_R, c81
 | |
| 	ST	$f2,  0 * SIZE(CO6)
 | |
| 	MADD	$f7, $f7, ALPHA_I, c81
 | |
| 	ST	$f3,  1 * SIZE(CO6)
 | |
| 
 | |
| 	ST	$f4,  0 * SIZE(CO7)
 | |
| 	ST	$f5,  1 * SIZE(CO7)
 | |
| 	ST	$f6,  0 * SIZE(CO8)
 | |
| 	ST	$f7,  1 * SIZE(CO8)
 | |
| 	.align 3
 | |
| 
 | |
| .L29:
 | |
| 	bgtz	J, .L10
 | |
| 	move	B, BO
 | |
| 	.align 3
 | |
| 
 | |
| .L30:
 | |
| 	andi	J,  N, 4
 | |
| 	blez	J, .L50
 | |
| 	move	AO, A
 | |
| 
 | |
| 	move	CO1, C
 | |
| 	MTC	$0,  c11
 | |
| 	daddu	CO2, C,   LDC
 | |
| 	daddu	CO3, CO2, LDC
 | |
| 	daddu	CO4, CO3, LDC
 | |
| 	MOV	c21, c11
 | |
| 	daddu	C,   CO4, LDC
 | |
| 	MOV	c31, c11
 | |
| 
 | |
| 	dsra	I,  M, 1
 | |
| 	blez	I, .L40
 | |
| 	MOV	c41, c11
 | |
| 
 | |
| .L31:
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	LD	a3,  4 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	MOV	c12, c11
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	MOV	c22, c11
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	MOV	c32, c11
 | |
| 	LD	b4,  3 * SIZE(B)
 | |
| 	MOV	c42, c11
 | |
| 
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	dsra	L,  K, 2
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 
 | |
| 	blez	L, .L35
 | |
| 	move	BO,  B
 | |
| 	.align	3
 | |
| 
 | |
| .L32:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	daddiu	L, L, -1
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	LD	a1,  2 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1, 16 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b5
 | |
| 	LD	a2,  3 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	LD	a1,  8 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c12, c12, a2, b5
 | |
| 	LD	b5, 20 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a3, b6
 | |
| 	LD	a2,  5 * SIZE(AO)
 | |
| 	MADD	c21, c21, a3, b2
 | |
| 	NOP
 | |
| 	MADD	c31, c31, a3, b3
 | |
| 	NOP
 | |
| 	MADD	c41, c41, a3, b4
 | |
| 	LD	a3,  6 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c12, c12, a2, b6
 | |
| 	LD	b6, 24 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2, 13 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3, 14 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4, 15 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a3, b7
 | |
| 	LD	a2,  7 * SIZE(AO)
 | |
| 	MADD	c21, c21, a3, b2
 | |
| 	daddiu	AO, AO,  8 * SIZE
 | |
| 	MADD	c31, c31, a3, b3
 | |
| 	daddiu	BO, BO, 16 * SIZE
 | |
| 	MADD	c41, c41, a3, b4
 | |
| 	LD	a3,  4 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c12, c12, a2, b7
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	NOP
 | |
| 
 | |
| 	bgtz	L, .L32
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	.align 3
 | |
| 
 | |
| .L35:
 | |
| 	andi	L,  K, 3
 | |
| 	NOP
 | |
| 	blez	L, .L38
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L36:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	daddiu	L, L, -1
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	daddiu	AO, AO,  2 * SIZE
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1,  4 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c32, c32, a2, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c42, c42, a2, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	bgtz	L, .L36
 | |
| 	daddiu	BO, BO,  4 * SIZE
 | |
| 
 | |
| .L38:
 | |
| 	LD	$f0, 0 * SIZE(CO1)
 | |
| 	LD	$f1, 1 * SIZE(CO1)
 | |
| 	LD	$f2, 2 * SIZE(CO1)
 | |
| 	LD	$f3, 3 * SIZE(CO1)
 | |
| 
 | |
| 	LD	$f4, 0 * SIZE(CO2)
 | |
| 	LD	$f5, 1 * SIZE(CO2)
 | |
| 	LD	$f6, 2 * SIZE(CO2)
 | |
| 	LD	$f7, 3 * SIZE(CO2)
 | |
| 
 | |
| 	MADD	$f0, $f0, ALPHA_R, c11
 | |
| 	MADD	$f1, $f1, ALPHA_I, c11
 | |
| 	MADD	$f2, $f2, ALPHA_R, c12
 | |
| 	MADD	$f3, $f3, ALPHA_I, c12
 | |
| 
 | |
| 	MADD	$f4, $f4, ALPHA_R, c21
 | |
| 	ST	$f0,  0 * SIZE(CO1)
 | |
| 	MADD	$f5, $f5, ALPHA_I, c21
 | |
| 	ST	$f1,  1 * SIZE(CO1)
 | |
| 	MADD	$f6, $f6, ALPHA_R, c22
 | |
| 	ST	$f2,  2 * SIZE(CO1)
 | |
| 	MADD	$f7, $f7, ALPHA_I, c22
 | |
| 	ST	$f3,  3 * SIZE(CO1)
 | |
| 
 | |
| 	LD	$f0, 0 * SIZE(CO3)
 | |
| 	LD	$f1, 1 * SIZE(CO3)
 | |
| 	LD	$f2, 2 * SIZE(CO3)
 | |
| 	LD	$f3, 3 * SIZE(CO3)
 | |
| 
 | |
| 	ST	$f4,  0 * SIZE(CO2)
 | |
| 	MADD	$f0, $f0, ALPHA_R, c31
 | |
| 	ST	$f5,  1 * SIZE(CO2)
 | |
| 	MADD	$f1, $f1, ALPHA_I, c31
 | |
| 	ST	$f6,  2 * SIZE(CO2)
 | |
| 	MADD	$f2, $f2, ALPHA_R, c32
 | |
| 	ST	$f7,  3 * SIZE(CO2)
 | |
| 	MADD	$f3, $f3, ALPHA_I, c32
 | |
| 
 | |
| 	LD	$f4, 0 * SIZE(CO4)
 | |
| 	LD	$f5, 1 * SIZE(CO4)
 | |
| 	LD	$f6, 2 * SIZE(CO4)
 | |
| 	LD	$f7, 3 * SIZE(CO4)
 | |
| 
 | |
| 	MADD	$f4, $f4, ALPHA_R, c41
 | |
| 	daddiu	CO1,CO1, 4 * SIZE
 | |
| 	MADD	$f5, $f5, ALPHA_I, c41
 | |
| 	daddiu	CO2,CO2, 4 * SIZE
 | |
| 	MADD	$f6, $f6, ALPHA_R, c42
 | |
| 	daddiu	CO3,CO3, 4 * SIZE
 | |
| 	MADD	$f7, $f7, ALPHA_I, c42
 | |
| 	daddiu	CO4,CO4, 4 * SIZE
 | |
| 
 | |
| 	ST	$f0, -4 * SIZE(CO3)
 | |
| 	daddiu	I, I, -1
 | |
| 	ST	$f1, -3 * SIZE(CO3)
 | |
| 	ST	$f2, -2 * SIZE(CO3)
 | |
| 	ST	$f3, -1 * SIZE(CO3)
 | |
| 
 | |
| 	ST	$f4, -4 * SIZE(CO4)
 | |
| 	MTC	$0,  c11
 | |
| 	ST	$f5, -3 * SIZE(CO4)
 | |
| 	MOV	c21, c11
 | |
| 	ST	$f6, -2 * SIZE(CO4)
 | |
| 	MOV	c31, c11
 | |
| 	ST	$f7, -1 * SIZE(CO4)
 | |
| 	bgtz	I, .L31
 | |
| 	MOV	c41, c11
 | |
| 	.align 3
 | |
| 
 | |
| .L40:
 | |
| 	andi	I,  M, 1
 | |
| 	blez	I, .L49
 | |
| 	MOV	c61, c11
 | |
| 
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MOV	c71, c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MOV	c81, c11
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	LD	b4,  3 * SIZE(B)
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 
 | |
| 	dsra	L,  K, 2
 | |
| 
 | |
| 	blez	L, .L45
 | |
| 	move	BO,  B
 | |
| 	.align	3
 | |
| 
 | |
| .L42:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	b1, 16 * SIZE(BO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	LD	a1,  4 * SIZE(AO)
 | |
| 	daddiu	L, L, -1
 | |
| 
 | |
| 	MADD	c11, c11, a2, b5
 | |
| 	LD	b5, 20 * SIZE(BO)
 | |
| 	MADD	c21, c21, a2, b2
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	MADD	c31, c31, a2, b3
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	MADD	c41, c41, a2, b4
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	LD	a2,  2 * SIZE(AO)
 | |
| 	daddiu	AO, AO,  4 * SIZE
 | |
| 
 | |
| 	MADD	c11, c11, a2, b6
 | |
| 	LD	b6, 24 * SIZE(BO)
 | |
| 	MADD	c21, c21, a2, b2
 | |
| 	LD	b2, 13 * SIZE(BO)
 | |
| 	MADD	c31, c31, a2, b3
 | |
| 	LD	b3, 14 * SIZE(BO)
 | |
| 	MADD	c41, c41, a2, b4
 | |
| 	LD	b4, 15 * SIZE(BO)
 | |
| 
 | |
| 	LD	a2, -1 * SIZE(AO)
 | |
| 	daddiu	BO, BO, 16 * SIZE
 | |
| 
 | |
| 	MADD	c11, c11, a2, b7
 | |
| 	LD	b7, 12 * SIZE(BO)
 | |
| 	MADD	c21, c21, a2, b2
 | |
| 	LD	b2,  1 * SIZE(BO)
 | |
| 	MADD	c31, c31, a2, b3
 | |
| 	LD	b3,  2 * SIZE(BO)
 | |
| 	MADD	c41, c41, a2, b4
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 
 | |
| 	bgtz	L, .L42
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	.align 3
 | |
| 
 | |
| .L45:
 | |
| 	andi	L,  K, 3
 | |
| 	NOP
 | |
| 	blez	L, .L48
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L46:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	b1,  4 * SIZE(BO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c31, c31, a1, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c41, c41, a1, b4
 | |
| 	LD	a1,  1 * SIZE(AO)
 | |
| 
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 	daddiu	L, L, -1
 | |
| 
 | |
| 	daddiu	AO, AO,  1 * SIZE
 | |
| 	MOV	a2, a2
 | |
| 	bgtz	L, .L46
 | |
| 	daddiu	BO, BO,  4 * SIZE
 | |
| 
 | |
| 
 | |
| .L48:
 | |
| 	LD	$f0, 0 * SIZE(CO1)
 | |
| 	LD	$f1, 1 * SIZE(CO1)
 | |
| 	LD	$f2, 0 * SIZE(CO2)
 | |
| 	LD	$f3, 1 * SIZE(CO2)
 | |
| 
 | |
| 	LD	$f4, 0 * SIZE(CO3)
 | |
| 	MADD	$f0, $f0, ALPHA_R, c11
 | |
| 	LD	$f5, 1 * SIZE(CO3)
 | |
| 	MADD	$f1, $f1, ALPHA_I, c11
 | |
| 	LD	$f6, 0 * SIZE(CO4)
 | |
| 	MADD	$f2, $f2, ALPHA_R, c21
 | |
| 	LD	$f7, 1 * SIZE(CO4)
 | |
| 	MADD	$f3, $f3, ALPHA_I, c21
 | |
| 
 | |
| 	MADD	$f4, $f4, ALPHA_R, c31
 | |
| 	ST	$f0,  0 * SIZE(CO1)
 | |
| 	MADD	$f5, $f5, ALPHA_I, c31
 | |
| 	ST	$f1,  1 * SIZE(CO1)
 | |
| 	MADD	$f6, $f6, ALPHA_R, c41
 | |
| 	ST	$f2,  0 * SIZE(CO2)
 | |
| 	MADD	$f7, $f7, ALPHA_I, c41
 | |
| 	ST	$f3,  1 * SIZE(CO2)
 | |
| 
 | |
| 	ST	$f4,  0 * SIZE(CO3)
 | |
| 	ST	$f5,  1 * SIZE(CO3)
 | |
| 	ST	$f6,  0 * SIZE(CO4)
 | |
| 	ST	$f7,  1 * SIZE(CO4)
 | |
| 	.align 3
 | |
| 
 | |
| .L49:
 | |
| 	move	B, BO
 | |
| 	.align 3
 | |
| 
 | |
| .L50:
 | |
| 	andi	J,  N, 2
 | |
| 	blez	J, .L70
 | |
| 
 | |
| 	move	AO, A
 | |
| 	move	CO1, C
 | |
| 	daddu	CO2, C,   LDC
 | |
| 
 | |
| 	dsra	I,  M, 1
 | |
| 	blez	I, .L60
 | |
| 	daddu	C,   CO2, LDC
 | |
| 
 | |
| .L51:
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MTC	$0,  c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MOV	c21, c11
 | |
| 	LD	a5,  4 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	MOV	c12, c11
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	MOV	c22, c11
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	dsra	L,  K, 2
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 
 | |
| 	blez	L, .L55
 | |
| 	move	BO,  B
 | |
| 	.align	3
 | |
| 
 | |
| .L52:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	a3,  2 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	b4,  3 * SIZE(BO)
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	a4,  3 * SIZE(AO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b1,  8 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a3, b3
 | |
| 	LD	a1,  8 * SIZE(AO)
 | |
| 	MADD	c21, c21, a3, b4
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c12, c12, a4, b3
 | |
| 	LD	a2,  5 * SIZE(AO)
 | |
| 	MADD	c22, c22, a4, b4
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a5, b5
 | |
| 	LD	a3,  6 * SIZE(AO)
 | |
| 	MADD	c21, c21, a5, b2
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 	MADD	c12, c12, a2, b5
 | |
| 	LD	a4,  7 * SIZE(AO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b5, 12 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a3, b3
 | |
| 	LD	a5, 12 * SIZE(AO)
 | |
| 	MADD	c21, c21, a3, b4
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	MADD	c12, c12, a4, b3
 | |
| 	LD	a2,  9 * SIZE(AO)
 | |
| 	MADD	c22, c22, a4, b4
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 
 | |
| 	daddiu	AO, AO,  8 * SIZE
 | |
| 	daddiu	L, L, -1
 | |
| 	bgtz	L, .L52
 | |
| 	daddiu	BO, BO,  8 * SIZE
 | |
| 	.align 3
 | |
| 
 | |
| .L55:
 | |
| 	andi	L,  K, 3
 | |
| 	NOP
 | |
| 	blez	L, .L58
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L56:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	a1,  2 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 	LD	b1,  2 * SIZE(BO)
 | |
| 	MADD	c22, c22, a2, b2
 | |
| 	LD	b2,  3 * SIZE(BO)
 | |
| 
 | |
| 	daddiu	L, L, -1
 | |
| 	daddiu	AO, AO,  2 * SIZE
 | |
| 	bgtz	L, .L56
 | |
| 	daddiu	BO, BO,  2 * SIZE
 | |
| 
 | |
| .L58:
 | |
| 	LD	$f0, 0 * SIZE(CO1)
 | |
| 	LD	$f1, 1 * SIZE(CO1)
 | |
| 	LD	$f2, 2 * SIZE(CO1)
 | |
| 	LD	$f3, 3 * SIZE(CO1)
 | |
| 
 | |
| 	LD	$f4, 0 * SIZE(CO2)
 | |
| 	LD	$f5, 1 * SIZE(CO2)
 | |
| 	LD	$f6, 2 * SIZE(CO2)
 | |
| 	LD	$f7, 3 * SIZE(CO2)
 | |
| 
 | |
| 	MADD	$f0, $f0, ALPHA_R, c11
 | |
| 	daddiu	I, I, -1
 | |
| 	MADD	$f1, $f1, ALPHA_I, c11
 | |
| 	daddiu	CO1,CO1, 4 * SIZE
 | |
| 	MADD	$f2, $f2, ALPHA_R, c12
 | |
| 	daddiu	CO2,CO2, 4 * SIZE
 | |
| 	MADD	$f3, $f3, ALPHA_I, c12
 | |
| 	MADD	$f4, $f4, ALPHA_R, c21
 | |
| 	MADD	$f5, $f5, ALPHA_I, c21
 | |
| 	MADD	$f6, $f6, ALPHA_R, c22
 | |
| 	MADD	$f7, $f7, ALPHA_I, c22
 | |
| 
 | |
| 	ST	$f0, -4 * SIZE(CO1)
 | |
| 	ST	$f1, -3 * SIZE(CO1)
 | |
| 	ST	$f2, -2 * SIZE(CO1)
 | |
| 	ST	$f3, -1 * SIZE(CO1)
 | |
| 
 | |
| 	ST	$f4, -4 * SIZE(CO2)
 | |
| 	ST	$f5, -3 * SIZE(CO2)
 | |
| 	ST	$f6, -2 * SIZE(CO2)
 | |
| 	bgtz	I, .L51
 | |
| 	ST	$f7, -1 * SIZE(CO2)
 | |
| 	.align 3
 | |
| 
 | |
| .L60:
 | |
| 	andi	I,  M, 1
 | |
| 	blez	I, .L69
 | |
| 	NOP
 | |
| 
 | |
| 	dsra	L,  K, 2
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MTC	$0,  c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MOV	c21, c11
 | |
| 	LD	a3,  2 * SIZE(AO)
 | |
| 	MOV	c31, c11
 | |
| 	LD	a4,  3 * SIZE(AO)
 | |
| 	MOV	c41, c11
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	LD	b4,  3 * SIZE(B)
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 
 | |
| 	blez	L, .L65
 | |
| 	move	BO,  B
 | |
| 	.align	3
 | |
| 
 | |
| .L62:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	b1,  4 * SIZE(BO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	b2,  5 * SIZE(BO)
 | |
| 	MADD	c31, c31, a2, b3
 | |
| 	LD	b3,  6 * SIZE(BO)
 | |
| 	MADD	c41, c41, a2, b4
 | |
| 	LD	b4,  7 * SIZE(BO)
 | |
| 
 | |
| 	LD	a1,  4 * SIZE(AO)
 | |
| 	LD	a2,  5 * SIZE(AO)
 | |
| 
 | |
| 	MADD	c11, c11, a3, b1
 | |
| 	LD	b1,  8 * SIZE(BO)
 | |
| 	MADD	c21, c21, a3, b2
 | |
| 	LD	b2,  9 * SIZE(BO)
 | |
| 	MADD	c31, c31, a4, b3
 | |
| 	LD	b3, 10 * SIZE(BO)
 | |
| 	MADD	c41, c41, a4, b4
 | |
| 	LD	b4, 11 * SIZE(BO)
 | |
| 
 | |
| 	LD	a3,  6 * SIZE(AO)
 | |
| 	LD	a4,  7 * SIZE(AO)
 | |
| 
 | |
| 	daddiu	L, L, -1
 | |
| 	daddiu	AO, AO,  4 * SIZE
 | |
| 
 | |
| 	bgtz	L, .L62
 | |
| 	daddiu	BO, BO,  8 * SIZE
 | |
| 	.align 3
 | |
| 
 | |
| .L65:
 | |
| 	andi	L,  K, 3
 | |
| 	NOP
 | |
| 	blez	L, .L68
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L66:
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	LD	b1,  2 * SIZE(BO)
 | |
| 	MADD	c21, c21, a1, b2
 | |
| 	LD	b2,  3 * SIZE(BO)
 | |
| 
 | |
| 	LD	a1,  1 * SIZE(AO)
 | |
| 	daddiu	L, L, -1
 | |
| 
 | |
| 	daddiu	AO, AO,  1 * SIZE
 | |
| 	bgtz	L, .L66
 | |
| 	daddiu	BO, BO,  2 * SIZE
 | |
| 
 | |
| 
 | |
| .L68:
 | |
| 	LD	$f0, 0 * SIZE(CO1)
 | |
| 	LD	$f1, 1 * SIZE(CO1)
 | |
| 	LD	$f2, 0 * SIZE(CO2)
 | |
| 	LD	$f3, 1 * SIZE(CO2)
 | |
| 
 | |
| 	ADD	c11, c11, c31
 | |
| 	ADD	c21, c21, c41
 | |
| 
 | |
| 	MADD	$f0, $f0, ALPHA_R, c11
 | |
| 	MADD	$f1, $f1, ALPHA_I, c11
 | |
| 	MADD	$f2, $f2, ALPHA_R, c21
 | |
| 	MADD	$f3, $f3, ALPHA_I, c21
 | |
| 
 | |
| 	ST	$f0,  0 * SIZE(CO1)
 | |
| 	ST	$f1,  1 * SIZE(CO1)
 | |
| 	ST	$f2,  0 * SIZE(CO2)
 | |
| 	ST	$f3,  1 * SIZE(CO2)
 | |
| 	.align 3
 | |
| 
 | |
| .L69:
 | |
| 	move	B, BO
 | |
| 	.align 3
 | |
| 
 | |
| .L70:
 | |
| 	andi	J,  N, 1
 | |
| 	blez	J, .L999
 | |
| 
 | |
| 	move	AO, A
 | |
| 	move	CO1, C
 | |
| 
 | |
| 	dsra	I,  M, 1
 | |
| 	blez	I, .L80
 | |
| 	daddu	C,   CO1, LDC
 | |
| 
 | |
| .L71:
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MTC	$0,  c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MOV	c21, c11
 | |
| 	LD	a5,  4 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	MOV	c12, c11
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	MOV	c22, c11
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	dsra	L,  K, 2
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 
 | |
| 	blez	L, .L75
 | |
| 	move	BO,  B
 | |
| 	.align	3
 | |
| 
 | |
| .L72:
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 
 | |
| 	LD	a1,  2 * SIZE(AO)
 | |
| 	LD	a2,  3 * SIZE(AO)
 | |
| 	LD	b1,  1 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 
 | |
| 	LD	a1,  4 * SIZE(AO)
 | |
| 	LD	a2,  5 * SIZE(AO)
 | |
| 	LD	b1,  2 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 
 | |
| 	LD	a1,  6 * SIZE(AO)
 | |
| 	LD	a2,  7 * SIZE(AO)
 | |
| 	LD	b1,  3 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 
 | |
| 	daddiu	L, L, -1
 | |
| 	daddiu	AO, AO,  8 * SIZE
 | |
| 	bgtz	L, .L72
 | |
| 	daddiu	BO, BO,  4 * SIZE
 | |
| 	.align 3
 | |
| 
 | |
| .L75:
 | |
| 	andi	L,  K, 3
 | |
| 	NOP
 | |
| 	blez	L, .L78
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L76:
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 	MADD	c12, c12, a2, b1
 | |
| 
 | |
| 	daddiu	L, L, -1
 | |
| 	daddiu	AO, AO,  2 * SIZE
 | |
| 	bgtz	L, .L76
 | |
| 	daddiu	BO, BO,  1 * SIZE
 | |
| 
 | |
| .L78:
 | |
| 	LD	$f0, 0 * SIZE(CO1)
 | |
| 	LD	$f1, 1 * SIZE(CO1)
 | |
| 	LD	$f2, 2 * SIZE(CO1)
 | |
| 	LD	$f3, 3 * SIZE(CO1)
 | |
| 
 | |
| 	ADD	c11, c11, c21
 | |
| 	daddiu	I, I, -1
 | |
| 	ADD	c12, c12, c22
 | |
| 	daddiu	CO1,CO1, 4 * SIZE
 | |
| 
 | |
| 	MADD	$f0, $f0, ALPHA_R, c11
 | |
| 	MADD	$f1, $f1, ALPHA_I, c11
 | |
| 	MADD	$f2, $f2, ALPHA_R, c12
 | |
| 	MADD	$f3, $f3, ALPHA_I, c12
 | |
| 
 | |
| 	ST	$f0, -4 * SIZE(CO1)
 | |
| 	ST	$f1, -3 * SIZE(CO1)
 | |
| 	ST	$f2, -2 * SIZE(CO1)
 | |
| 
 | |
| 	bgtz	I, .L71
 | |
| 	ST	$f3, -1 * SIZE(CO1)
 | |
| 	.align 3
 | |
| 
 | |
| .L80:
 | |
| 	andi	I,  M, 1
 | |
| 	blez	I, .L89
 | |
| 	NOP
 | |
| 
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	MTC	$0,  c11
 | |
| 	LD	a2,  1 * SIZE(AO)
 | |
| 	MOV	c21, c11
 | |
| 	LD	a3,  2 * SIZE(AO)
 | |
| 	LD	a4,  3 * SIZE(AO)
 | |
| 
 | |
| 	LD	b1,  0 * SIZE(B)
 | |
| 	LD	b2,  1 * SIZE(B)
 | |
| 	LD	b3,  2 * SIZE(B)
 | |
| 	LD	b4,  3 * SIZE(B)
 | |
| 	LD	b5,  4 * SIZE(B)
 | |
| 	LD	b6,  8 * SIZE(B)
 | |
| 	LD	b7, 12 * SIZE(B)
 | |
| 
 | |
| 	dsra	L,  K, 2
 | |
| 	blez	L, .L85
 | |
| 	move	BO,  B
 | |
| 	.align	3
 | |
| 
 | |
| .L82:
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 
 | |
| 	LD	a1,  1 * SIZE(AO)
 | |
| 	LD	b1,  1 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c21, c21, a1, b1
 | |
| 
 | |
| 	LD	a1,  2 * SIZE(AO)
 | |
| 	LD	b1,  2 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 
 | |
| 	LD	a1,  3 * SIZE(AO)
 | |
| 	LD	b1,  3 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c21, c21, a1, b1
 | |
| 
 | |
| 	daddiu	L, L, -1
 | |
| 	daddiu	AO, AO,  4 * SIZE
 | |
| 	bgtz	L, .L82
 | |
| 	daddiu	BO, BO,  4 * SIZE
 | |
| 	.align 3
 | |
| 
 | |
| .L85:
 | |
| 	andi	L,  K, 3
 | |
| 	NOP
 | |
| 	blez	L, .L88
 | |
| 	NOP
 | |
| 	.align	3
 | |
| 
 | |
| .L86:
 | |
| 	LD	a1,  0 * SIZE(AO)
 | |
| 	LD	b1,  0 * SIZE(BO)
 | |
| 
 | |
| 	MADD	c11, c11, a1, b1
 | |
| 
 | |
| 	daddiu	L, L, -1
 | |
| 	daddiu	AO, AO,  1 * SIZE
 | |
| 	bgtz	L, .L86
 | |
| 	daddiu	BO, BO,  1 * SIZE
 | |
| 
 | |
| 
 | |
| .L88:
 | |
| 	LD	$f0, 0 * SIZE(CO1)
 | |
| 	LD	$f1, 1 * SIZE(CO1)
 | |
| 
 | |
| 	ADD	c11, c11, c21
 | |
| 	MADD	$f0, $f0, ALPHA_R, c11
 | |
| 	MADD	$f1, $f1, ALPHA_I, c11
 | |
| 
 | |
| 	ST	$f0,  0 * SIZE(CO1)
 | |
| 	ST	$f1,  1 * SIZE(CO1)
 | |
| 	.align 3
 | |
| 
 | |
| .L89:
 | |
| 	move	B, BO
 | |
| 	.align 3
 | |
| 
 | |
| .L999:
 | |
| 	LDARG	$16,   0($sp)
 | |
| 	LDARG	$17,   8($sp)
 | |
| 	LDARG	$18,  16($sp)
 | |
| 	LDARG	$19,  24($sp)
 | |
| 	LDARG	$20,  32($sp)
 | |
| 	LDARG	$21,  40($sp)
 | |
| 	ldc1	$f24, 48($sp)
 | |
| 	ldc1	$f25, 56($sp)
 | |
| 	ldc1	$f26, 64($sp)
 | |
| 	ldc1	$f27, 72($sp)
 | |
| 	ldc1	$f28, 80($sp)
 | |
| 	ldc1	$f29, 88($sp)
 | |
| 
 | |
| 	j	$31
 | |
| 	daddiu	$sp, $sp, 128
 | |
| 
 | |
| 	EPILOGUE
 |