7007 lines
		
	
	
		
			119 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			7007 lines
		
	
	
		
			119 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
/*********************************************************************/
 | 
						|
/* Copyright 2009, 2010 The University of Texas at Austin.           */
 | 
						|
/* All rights reserved.                                              */
 | 
						|
/*                                                                   */
 | 
						|
/* Redistribution and use in source and binary forms, with or        */
 | 
						|
/* without modification, are permitted provided that the following   */
 | 
						|
/* conditions are met:                                               */
 | 
						|
/*                                                                   */
 | 
						|
/*   1. Redistributions of source code must retain the above         */
 | 
						|
/*      copyright notice, this list of conditions and the following  */
 | 
						|
/*      disclaimer.                                                  */
 | 
						|
/*                                                                   */
 | 
						|
/*   2. Redistributions in binary form must reproduce the above      */
 | 
						|
/*      copyright notice, this list of conditions and the following  */
 | 
						|
/*      disclaimer in the documentation and/or other materials       */
 | 
						|
/*      provided with the distribution.                              */
 | 
						|
/*                                                                   */
 | 
						|
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
 | 
						|
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
 | 
						|
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
 | 
						|
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
 | 
						|
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
 | 
						|
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
 | 
						|
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
 | 
						|
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
 | 
						|
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
 | 
						|
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
 | 
						|
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
 | 
						|
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
 | 
						|
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
 | 
						|
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
 | 
						|
/*                                                                   */
 | 
						|
/* The views and conclusions contained in the software and           */
 | 
						|
/* documentation are those of the authors and should not be          */
 | 
						|
/* interpreted as representing official policies, either expressed   */
 | 
						|
/* or implied, of The University of Texas at Austin.                 */
 | 
						|
/*********************************************************************/
 | 
						|
 | 
						|
#define ASSEMBLER
 | 
						|
#include "common.h"
 | 
						|
		
 | 
						|
#define ALPHA    0
 | 
						|
#define FZERO	 8
 | 
						|
 | 
						|
#define	M	r3
 | 
						|
#define	N	r4
 | 
						|
#define	K	r5
 | 
						|
 | 
						|
#ifdef linux
 | 
						|
#define A	r6
 | 
						|
#define	B	r7
 | 
						|
#define	C	r8
 | 
						|
#define	LDC	r9
 | 
						|
#define OFFSET	r10
 | 
						|
#endif
 | 
						|
 | 
						|
#define TEMP	r11
 | 
						|
#define KK	r14
 | 
						|
#define INCM1	r15
 | 
						|
#define INCM3	r16
 | 
						|
#define INCM5	r17
 | 
						|
#define INCM7	r18
 | 
						|
#define INC2	r19
 | 
						|
#define INC	r20
 | 
						|
#define INC4	r21
 | 
						|
 | 
						|
#define	I	r22
 | 
						|
#define J	r23
 | 
						|
#define AO	r24
 | 
						|
#define BO	r25
 | 
						|
#define AO2	r26
 | 
						|
#define	BO2	r27
 | 
						|
	
 | 
						|
#define	CO1	r28
 | 
						|
#define CO2	r29
 | 
						|
#define	CO3	r30
 | 
						|
#define	CO4	r31
 | 
						|
 | 
						|
#ifndef NEEDPARAM
 | 
						|
 | 
						|
#define A1	f16
 | 
						|
#define A2	f17
 | 
						|
#define A3	f18
 | 
						|
#define A4	f19
 | 
						|
#define A5	f20
 | 
						|
#define A6	f21
 | 
						|
#define A7	f22
 | 
						|
#define A8	f23
 | 
						|
#define A9	f24
 | 
						|
#define A10	f25
 | 
						|
 | 
						|
#define B1	f26
 | 
						|
#define B2	f27
 | 
						|
#define B3	f28
 | 
						|
#define B4	f29
 | 
						|
#define B5	f30
 | 
						|
#define B6	f31
 | 
						|
 | 
						|
#define AP	B6
 | 
						|
 | 
						|
 | 
						|
	PROLOGUE
 | 
						|
	PROFCODE
 | 
						|
 | 
						|
	li	r0, -16
 | 
						|
 | 
						|
	stfpdux	f14, SP, r0
 | 
						|
	stfpdux	f15, SP, r0
 | 
						|
	stfpdux	f16, SP, r0
 | 
						|
	stfpdux	f17, SP, r0
 | 
						|
	stfpdux	f18, SP, r0
 | 
						|
	stfpdux	f19, SP, r0
 | 
						|
	stfpdux	f20, SP, r0
 | 
						|
	stfpdux	f21, SP, r0
 | 
						|
	stfpdux	f22, SP, r0
 | 
						|
	stfpdux	f23, SP, r0
 | 
						|
	stfpdux	f24, SP, r0
 | 
						|
	stfpdux	f25, SP, r0
 | 
						|
	stfpdux	f26, SP, r0
 | 
						|
	stfpdux	f27, SP, r0
 | 
						|
	stfpdux	f28, SP, r0
 | 
						|
	stfpdux	f29, SP, r0
 | 
						|
	stfpdux	f30, SP, r0
 | 
						|
	stfpdux	f31, SP, r0
 | 
						|
	
 | 
						|
	stwu	r31,  -4(SP)
 | 
						|
	stwu	r30,  -4(SP)
 | 
						|
	stwu	r29,  -4(SP)
 | 
						|
	stwu	r28,  -4(SP)
 | 
						|
 | 
						|
	stwu	r27,  -4(SP)
 | 
						|
	stwu	r26,  -4(SP)
 | 
						|
	stwu	r25,  -4(SP)
 | 
						|
	stwu	r24,  -4(SP)
 | 
						|
 | 
						|
	stwu	r23,  -4(SP)
 | 
						|
	stwu	r22,  -4(SP)
 | 
						|
	stwu	r21,  -4(SP)
 | 
						|
	stwu	r20,  -4(SP)
 | 
						|
 | 
						|
	stwu	r19,  -4(SP)
 | 
						|
	stwu	r18,  -4(SP)
 | 
						|
	stwu	r17,  -4(SP)
 | 
						|
	stwu	r16,  -4(SP)
 | 
						|
 | 
						|
	stwu	r15,  -4(SP)
 | 
						|
	stwu	r14,  -4(SP)	# dummy
 | 
						|
 | 
						|
	li	r0,   0
 | 
						|
 | 
						|
	stwu	r0,   -4(SP)
 | 
						|
	stwu	r0,   -4(SP)
 | 
						|
	stfdu	f1,   -8(SP)
 | 
						|
 | 
						|
	slwi	LDC, LDC, BASE_SHIFT
 | 
						|
 | 
						|
	cmpwi	cr0, M, 0
 | 
						|
	ble	.L999
 | 
						|
	cmpwi	cr0, N, 0
 | 
						|
	ble	.L999
 | 
						|
	cmpwi	cr0, K, 0
 | 
						|
	ble	.L999
 | 
						|
 | 
						|
	li	INC,    1 * SIZE
 | 
						|
	li	INC2,   2 * SIZE
 | 
						|
	li	INC4,   4 * SIZE
 | 
						|
 | 
						|
#if defined(TRMMKERNEL) && !defined(LEFT)
 | 
						|
	neg	KK, OFFSET
 | 
						|
#endif
 | 
						|
 | 
						|
	andi.	r0, C,   2 * SIZE - 1
 | 
						|
	bne	.L1000
 | 
						|
	andi.	r0, LDC, 2 * SIZE - 1
 | 
						|
	bne	.L1000
 | 
						|
 | 
						|
/* High performance version */
 | 
						|
 | 
						|
	li	INCM3, -2 * SIZE
 | 
						|
	li	INCM5, -5 * SIZE
 | 
						|
	li	INCM7, -6 * SIZE
 | 
						|
 | 
						|
	addi	C, C, - 2 * SIZE
 | 
						|
	srawi.	J, N,  2
 | 
						|
	ble	.L50
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10:
 | 
						|
	mr	CO1, C
 | 
						|
	add	CO2, C,   LDC
 | 
						|
	add	CO3, CO2, LDC
 | 
						|
	add	CO4, CO3, LDC
 | 
						|
	add	C,   CO4, LDC
 | 
						|
 | 
						|
#if defined(TRMMKERNEL) &&  defined(LEFT)
 | 
						|
	mr	KK, OFFSET
 | 
						|
#endif
 | 
						|
 | 
						|
	addi	AO, A, -4 * SIZE
 | 
						|
	
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
 | 
						|
	srawi.	I, M,  3
 | 
						|
	ble	.L20
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L11:
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 3 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  BO, - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, BO,   2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 8
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
	srawi.	TEMP,  TEMP,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
	ble	.L14
 | 
						|
 | 
						|
#else
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L14
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
	fpmr	f5,  f0
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
	fpmr	f9,  f0
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
	fpmr	f13, f0
 | 
						|
 | 
						|
	LFPDUX	A5,  AO, INC4
 | 
						|
	fpmr	f2,  f0
 | 
						|
	LFPDUX	A6,  AO, INC4
 | 
						|
	fpmr	f6,  f0
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
	fpmr	f10, f0
 | 
						|
	LFPDUX	A7,  AO, INC4
 | 
						|
	fpmr	f14, f0
 | 
						|
 | 
						|
	LFPDUX	A8,  AO, INC4
 | 
						|
	fpmr	f3,  f0
 | 
						|
	LFPDUX	B5,  BO, INC4
 | 
						|
	fpmr	f7,  f0
 | 
						|
	LFPDUX	A9,  AO, INC4
 | 
						|
	fpmr	f11, f0
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fpmr	f15, f0
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	bdz-	.L13
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L12:
 | 
						|
 | 
						|
## 1 ##
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B2, A1, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	fxcsmadd	f12, B2, A1, f12
 | 
						|
	LFPDUX	B6,  BO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
	LFPDUX	A10, AO, INC4
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B1, A3, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B2, A3, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B2, A3, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fxcpmadd	f11, B2, A4, f11
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
	fxcsmadd	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 2 ##
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A5, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B3, A5, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B4, A5, f8
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	fxcsmadd	f12, B4, A5, f12
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B3, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B3, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B4, A2, f9
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
	fxcsmadd	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B3, A6, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B3, A6, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B4, A6, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B4, A6, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f3,  B3, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B3, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fxcpmadd	f11, B4, A4, f11
 | 
						|
	LFPDUX	A5,  AO, INC4
 | 
						|
	fxcsmadd	f15, B4, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 3 ##
 | 
						|
 | 
						|
	fxcpmadd	f0,  B5, A7, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B5, A7, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B2, A7, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	fxcsmadd	f12, B2, A7, f12
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B5, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B5, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
	LFPDUX	A6,  AO, INC4
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B5, A8, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B5, A8, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B2, A8, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B2, A8, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f3,  B5, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B5, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fxcpmadd	f11, B2, A4, f11
 | 
						|
	LFPDUX	A7,  AO, INC4
 | 
						|
	fxcsmadd	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 4 ##
 | 
						|
	fxcpmadd	f0,  B6, A9, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B6, A9, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B4, A9, f8
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	fxcsmadd	f12, B4, A9, f12
 | 
						|
	LFPDUX	B5,  BO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B6, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B6, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B4, A2, f9
 | 
						|
	LFPDUX	A8,  AO, INC4
 | 
						|
	fxcsmadd	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B6, A10, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B6, A10, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B4, A10, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B4, A10, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f3,  B6, A4, f3
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fxcsmadd	f7,  B6, A4, f7
 | 
						|
	LFPDUX	A9,  AO, INC4
 | 
						|
	fxcpmadd	f11, B4, A4, f11
 | 
						|
	nop	
 | 
						|
	fxcsmadd	f15, B4, A4, f15
 | 
						|
	bdnz+	.L12
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L13:
 | 
						|
## 1 ##
 | 
						|
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B2, A1, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	fxcsmadd	f12, B2, A1, f12
 | 
						|
	LFPDUX	B6,  BO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
	LFPDUX	A10, AO, INC4
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B1, A3, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B2, A3, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B2, A3, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fxcpmadd	f11, B2, A4, f11
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A1, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	fxcsmadd	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 2 ##
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A5, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B3, A5, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B4, A5, f8
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	fxcsmadd	f12, B4, A5, f12
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	B1, CO1, INC4
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	fxcpmadd	f1,  B3, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B3, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B4, A2, f9
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
 	LFPDUX	A3, CO2, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	fxcsmadd	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B3, A6, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B3, A6, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B4, A6, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B4, A6, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f3,  B3, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B3, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fxcpmadd	f11, B4, A4, f11
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A5, CO2, INC4
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	fxcsmadd	f15, B4, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 3 ##
 | 
						|
 | 
						|
	fxcpmadd	f0,  B5, A7, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B5, A7, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B2, A7, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	fxcsmadd	f12, B2, A7, f12
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	B3, CO3, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	fxcpmadd	f1,  B5, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B5, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A6, CO3, INC4
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B5, A8, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B5, A8, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B2, A8, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B2, A8, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f3,  B5, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B5, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fxcpmadd	f11, B2, A4, f11
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A7, CO4, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	fxcsmadd	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 4 ##
 | 
						|
 | 
						|
	fxcpmadd	f0,  B6, A9, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B6, A9, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B4, A9, f8
 | 
						|
	nop
 | 
						|
	fxcsmadd	f12, B4, A9, f12
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	B2, CO4, INC4
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	fxcpmadd	f1,  B6, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B6, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B4, A2, f9
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	B5, CO1, INCM3
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	fxcsmadd	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B6, A10, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B6, A10, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B4, A10, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B4, A10, f14
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
    	LFPDUX	A8, CO1, INC4
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	fxcpmadd	f3,  B6, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B6, A4, f7
 | 
						|
	nop
 | 
						|
	fxcpmadd	f11, B4, A4, f11
 | 
						|
	nop
 | 
						|
	fxcsmadd	f15, B4, A4, f15
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A9, CO2, INCM3
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L14:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 8
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
	andi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble+	.L18
 | 
						|
 | 
						|
	cmpwi	cr0, TEMP, 3
 | 
						|
	bgt+	.L15
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble+	.L18
 | 
						|
 | 
						|
	cmpwi	cr0, K, 3
 | 
						|
	bgt+	.L15
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A1, CO1, INC2
 | 
						|
	fpmr	f5,  f0
 | 
						|
	LFPDUX	B1, CO1, INC4
 | 
						|
	fpmr	f9,  f0
 | 
						|
 	LFPDUX	A3, CO2, INC2
 | 
						|
	fpmr	f13, f0
 | 
						|
	LFPDUX	A5, CO2, INC4
 | 
						|
	fpmr	f2,  f0
 | 
						|
 | 
						|
	LFPDUX	B3, CO3, INC2
 | 
						|
	fpmr	f6,  f0
 | 
						|
	LFPDUX	A6, CO3, INC4
 | 
						|
	fpmr	f10, f0
 | 
						|
	LFPDUX	A7,  CO4, INC2
 | 
						|
	fpmr	f14, f0
 | 
						|
	LFPDUX	B2,  CO4, INC4
 | 
						|
	fpmr	f3,  f0
 | 
						|
 | 
						|
	LFPDUX	B5, CO1, INCM3
 | 
						|
	fpmr	f7,  f0
 | 
						|
    	LFPDUX	A8, CO1, INC4
 | 
						|
	fpmr	f11, f0
 | 
						|
	LFPDUX	A9, CO2, INCM3
 | 
						|
	fpmr	f15, f0
 | 
						|
#else
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f9,  f0
 | 
						|
	fpmr	f13, f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
 | 
						|
	fpmr	f6,  f0
 | 
						|
	fpmr	f10, f0
 | 
						|
	fpmr	f14, f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
 | 
						|
	fpmr	f7,  f0
 | 
						|
	fpmr	f11, f0
 | 
						|
	fpmr	f15, f0
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L15:
 | 
						|
	LFPDUX	A2,  AO,  INC4
 | 
						|
	LFPDUX	A4,  AO2, INC4
 | 
						|
	LFPDUX	A10, BO,  INC4
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
	bdz-	.L17
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L16:
 | 
						|
	fxcpmadd	f0,  A10, A2, f0
 | 
						|
	fxcsmadd	f4,  A10, A2, f4
 | 
						|
	fxcpmadd	f8,  B4, A2, f8
 | 
						|
	fxcsmadd	f12, B4, A2, f12
 | 
						|
	LFPDUX	A2, AO,  INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  A10, A4, f1
 | 
						|
	fxcsmadd	f5,  A10, A4, f5
 | 
						|
	fxcpmadd	f9,  B4, A4, f9
 | 
						|
	fxcsmadd	f13, B4, A4, f13
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f2,  A10, A2, f2
 | 
						|
	fxcsmadd	f6,  A10, A2, f6
 | 
						|
	fxcpmadd	f10, B4, A2, f10
 | 
						|
	fxcsmadd	f14, B4, A2, f14
 | 
						|
	LFPDUX	A2, AO,  INC4
 | 
						|
 | 
						|
	fxcpmadd	f3,  A10, A4, f3
 | 
						|
	fxcsmadd	f7,  A10, A4, f7
 | 
						|
	LFPDUX	A10, BO,  INC4
 | 
						|
	fxcpmadd	f11, B4, A4, f11
 | 
						|
	fxcsmadd	f15, B4, A4, f15
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	bdnz+	.L16
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L17:
 | 
						|
	fxcpmadd	f0,  A10, A2, f0
 | 
						|
	fxcsmadd	f4,  A10, A2, f4
 | 
						|
	fxcpmadd	f8,  B4, A2, f8
 | 
						|
	fxcsmadd	f12, B4, A2, f12
 | 
						|
	LFPDUX	A2, AO,  INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  A10, A4, f1
 | 
						|
	fxcsmadd	f5,  A10, A4, f5
 | 
						|
	fxcpmadd	f9,  B4, A4, f9
 | 
						|
	fxcsmadd	f13, B4, A4, f13
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f2,  A10, A2, f2
 | 
						|
	fxcsmadd	f6,  A10, A2, f6
 | 
						|
	fxcpmadd	f10, B4, A2, f10
 | 
						|
	fxcsmadd	f14, B4, A2, f14
 | 
						|
 | 
						|
	fxcpmadd	f3,  A10, A4, f3
 | 
						|
	fxcsmadd	f7,  A10, A4, f7
 | 
						|
	fxcpmadd	f11, B4, A4, f11
 | 
						|
	fxcsmadd	f15, B4, A4, f15
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L18:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	LFPDUX	B4, CO2, INC4
 | 
						|
	fxcpmadd	f1,  AP, f1,  B5
 | 
						|
	LFPDUX	A2, CO3, INCM3
 | 
						|
 | 
						|
	fxcpmadd	f2,  AP, f2,  B1
 | 
						|
 	LFPDUX	A4, CO3, INC4
 | 
						|
	fxcpmadd	f3,  AP, f3,  A8
 | 
						|
	LFPDUX	A10,  CO4, INCM3
 | 
						|
 | 
						|
	fxcpmadd	f4,  AP, f4,  A3
 | 
						|
	LFPDUX	A1,  CO4, INC4
 | 
						|
	fxcpmadd	f5,  AP, f5,  A9
 | 
						|
	STFPDUX	f0,  CO1, INCM7
 | 
						|
 | 
						|
	fxcpmadd	f6,  AP, f6,  A5
 | 
						|
 	STFPDUX	f1,  CO1, INC2
 | 
						|
	fxcpmadd	f7,  AP, f7,  B4
 | 
						|
	STFPDUX	f2,  CO1, INC2
 | 
						|
 | 
						|
	fxcpmadd	f8,  AP, f8,  B3
 | 
						|
	STFPDUX	f3,  CO1, INC2
 | 
						|
	fxcpmadd	f9,  AP, f9,  A2
 | 
						|
	STFPDUX	f4,  CO2, INCM7
 | 
						|
 | 
						|
	fxcpmadd	f10, AP, f10, A6
 | 
						|
	STFPDUX	f5,  CO2, INC2
 | 
						|
	fxcpmadd	f11, AP, f11, A4
 | 
						|
	STFPDUX	f6,  CO2, INC2
 | 
						|
 | 
						|
	fxcpmadd	f12, AP, f12, A7
 | 
						|
	STFPDUX	f7,  CO2, INC2
 | 
						|
	fxcpmadd	f13, AP, f13, A10
 | 
						|
	STFPDUX	f8,  CO3, INCM7
 | 
						|
 | 
						|
	fxcpmadd	f14, AP, f14, B2
 | 
						|
	STFPDUX	f9,  CO3, INC2
 | 
						|
	fxcpmadd	f15, AP, f15, A1
 | 
						|
	STFPDUX	f10, CO3, INC2
 | 
						|
 | 
						|
	STFPDUX	f11, CO3, INC2
 | 
						|
	STFPDUX	f12, CO4, INCM7
 | 
						|
	STFPDUX	f13, CO4, INC2
 | 
						|
	STFPDUX	f14, CO4, INC2
 | 
						|
	STFPDUX	f15, CO4, INC2
 | 
						|
#else
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
	fpmul	f2,  AP, f2
 | 
						|
	fpmul	f3,  AP, f3
 | 
						|
 | 
						|
	fpmul	f4,  AP, f4
 | 
						|
	fpmul	f5,  AP, f5
 | 
						|
	STFPDUX	f0,  CO1, INC2
 | 
						|
 | 
						|
	fpmul	f6,  AP, f6
 | 
						|
 	STFPDUX	f1,  CO1, INC2
 | 
						|
	fpmul	f7,  AP, f7
 | 
						|
	STFPDUX	f2,  CO1, INC2
 | 
						|
 | 
						|
	fpmul	f8,  AP, f8
 | 
						|
	STFPDUX	f3,  CO1, INC2
 | 
						|
	fpmul	f9,  AP, f9
 | 
						|
	STFPDUX	f4,  CO2, INC2
 | 
						|
 | 
						|
	fpmul	f10, AP, f10
 | 
						|
	STFPDUX	f5,  CO2, INC2
 | 
						|
	fpmul	f11, AP, f11
 | 
						|
	STFPDUX	f6,  CO2, INC2
 | 
						|
 | 
						|
	fpmul	f12, AP, f12
 | 
						|
	STFPDUX	f7,  CO2, INC2
 | 
						|
	fpmul	f13, AP, f13
 | 
						|
	STFPDUX	f8,  CO3, INC2
 | 
						|
 | 
						|
	fpmul	f14, AP, f14
 | 
						|
	STFPDUX	f9,  CO3, INC2
 | 
						|
	fpmul	f15, AP, f15
 | 
						|
	STFPDUX	f10, CO3, INC2
 | 
						|
 | 
						|
	STFPDUX	f11, CO3, INC2
 | 
						|
	STFPDUX	f12, CO4, INC2
 | 
						|
	STFPDUX	f13, CO4, INC2
 | 
						|
	STFPDUX	f14, CO4, INC2
 | 
						|
	STFPDUX	f15, CO4, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -8
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 3 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 8
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	addic.	I, I, -1
 | 
						|
	li	r0, FZERO
 | 
						|
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	bgt+	.L11
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L20:
 | 
						|
	andi.	I, M,  4
 | 
						|
	beq	.L30
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 2 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  BO,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, BO,    2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
 | 
						|
	srawi.	TEMP,  TEMP,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f9,  f0
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
	fpmr	f13, f0
 | 
						|
	ble	.L24
 | 
						|
#else
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f9,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f13, f0
 | 
						|
	ble	.L24
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,   AO, INC4
 | 
						|
	LFPDUX	B1,   BO, INC4
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	LFPDUX	A3,   AO, INC4
 | 
						|
	LFPDUX	B3,   BO, INC4
 | 
						|
	LFPDUX	A4,  AO2, INC4
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
 | 
						|
	LFPDUX	A5,   AO, INC4
 | 
						|
	LFPDUX	B5,   BO, INC4
 | 
						|
	LFPDUX	A6,  AO2, INC4
 | 
						|
	LFPDUX	B6,  BO2, INC4
 | 
						|
	LFPDUX	A7,   AO, INC4
 | 
						|
	LFPDUX	A9,   BO, INC4
 | 
						|
	LFPDUX	A10, BO2, INC4
 | 
						|
	bdz-	.L23
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L22:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A8,  AO2, INC4
 | 
						|
	fxcpmadd	f8,  B2, A1, f8
 | 
						|
	nop
 | 
						|
	fxcsmadd	f12, B2, A1, f12
 | 
						|
	LFPDUX	A1,   AO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	LFPDUX	B1,   BO, INC4
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
	nop
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A3, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B3, A3, f4
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	fxcpmadd	f8,  B4, A3, f8
 | 
						|
	nop
 | 
						|
	fxcsmadd	f12, B4, A3, f12
 | 
						|
	LFPDUX	A3,   AO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B3, A4, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B3, A4, f5
 | 
						|
	LFPDUX	B3,   BO, INC4
 | 
						|
	fxcpmadd	f9,  B4, A4, f9
 | 
						|
	nop
 | 
						|
	fxcsmadd	f13, B4, A4, f13
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  B5, A5, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B5, A5, f4
 | 
						|
	LFPDUX	A4,  AO2, INC4
 | 
						|
	fxcpmadd	f8,  B6, A5, f8
 | 
						|
	nop
 | 
						|
	fxcsmadd	f12, B6, A5, f12
 | 
						|
	LFPDUX	A5,   AO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B5, A6, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B5, A6, f5
 | 
						|
	LFPDUX	B5,   BO, INC4
 | 
						|
	fxcpmadd	f9,  B6, A6, f9
 | 
						|
	nop
 | 
						|
	fxcsmadd	f13, B6, A6, f13
 | 
						|
	LFPDUX	B6,  BO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A9,  A7, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  A9,  A7, f4
 | 
						|
	LFPDUX	A6,  AO2, INC4
 | 
						|
	fxcpmadd	f8,  A10, A7, f8
 | 
						|
	nop
 | 
						|
	fxcsmadd	f12, A10, A7, f12
 | 
						|
	LFPDUX	A7,   AO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  A9,  A8, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  A9,  A8, f5
 | 
						|
	LFPDUX	A9,   BO, INC4
 | 
						|
	fxcpmadd	f9,  A10, A8, f9
 | 
						|
	nop
 | 
						|
	fxcsmadd	f13, A10, A8, f13
 | 
						|
	LFPDUX	A10, BO2, INC4
 | 
						|
	bdnz+	.L22
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L23:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A8,  AO2, INC4
 | 
						|
	fxcpmadd	f8,  B2, A1, f8
 | 
						|
	fxcsmadd	f12, B2, A1, f12
 | 
						|
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A3, f0
 | 
						|
	fxcsmadd	f4,  B3, A3, f4
 | 
						|
	fxcpmadd	f8,  B4, A3, f8
 | 
						|
	fxcsmadd	f12, B4, A3, f12
 | 
						|
 | 
						|
	fxcpmadd	f1,  B3, A4, f1
 | 
						|
	fxcsmadd	f5,  B3, A4, f5
 | 
						|
	fxcpmadd	f9,  B4, A4, f9
 | 
						|
	fxcsmadd	f13, B4, A4, f13
 | 
						|
 | 
						|
	fxcpmadd	f0,  B5, A5, f0
 | 
						|
	fxcsmadd	f4,  B5, A5, f4
 | 
						|
	fxcpmadd	f8,  B6, A5, f8
 | 
						|
	fxcsmadd	f12, B6, A5, f12
 | 
						|
 | 
						|
	fxcpmadd	f1,  B5, A6, f1
 | 
						|
	fxcsmadd	f5,  B5, A6, f5
 | 
						|
	fxcpmadd	f9,  B6, A6, f9
 | 
						|
	fxcsmadd	f13, B6, A6, f13
 | 
						|
 | 
						|
	fxcpmadd	f0,  A9, A7, f0
 | 
						|
	fxcsmadd	f4,  A9, A7, f4
 | 
						|
	fxcpmadd	f8,  A10, A7, f8
 | 
						|
	fxcsmadd	f12, A10, A7, f12
 | 
						|
 | 
						|
	fxcpmadd	f1,  A9, A8, f1
 | 
						|
	fxcsmadd	f5,  A9, A8, f5
 | 
						|
	fxcpmadd	f9,  A10, A8, f9
 | 
						|
	fxcsmadd	f13, A10, A8, f13
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L24:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  3
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L28
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC4
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	bdz-	.L27
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L26:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	fxcpmadd	f8,  B2, A1, f8
 | 
						|
	fxcsmadd	f12, B2, A1, f12
 | 
						|
	LFPDUX	A1,  AO,  INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	bdnz+	.L26
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L27:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	fxcpmadd	f8,  B2, A1, f8
 | 
						|
	fxcsmadd	f12, B2, A1, f12
 | 
						|
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L28:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A1, CO1, INC2
 | 
						|
	LFPDUX	B1, CO1, INC2
 | 
						|
	LFPDUX	B3, CO2, INC2
 | 
						|
	LFPDUX	A6, CO2, INC2
 | 
						|
 | 
						|
	LFPDUX	B5, CO3, INC2
 | 
						|
	LFPDUX	A8, CO3, INC2
 | 
						|
	LFPDUX	A2, CO4, INC2
 | 
						|
	LFPDUX	A4, CO4, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	fxcpmadd	f1,  AP, f1,  B1
 | 
						|
	fxcpmadd	f4,  AP, f4,  B3
 | 
						|
	fxcpmadd	f5,  AP, f5,  A6
 | 
						|
 | 
						|
	fxcpmadd	f8,  AP, f8,  B5
 | 
						|
	fxcpmadd	f9,  AP, f9,  A8
 | 
						|
	STFPDUX	f0,  CO1, INCM3
 | 
						|
	fxcpmadd	f12, AP, f12, A2
 | 
						|
 	STFPDUX	f1,  CO1, INC2
 | 
						|
	fxcpmadd	f13, AP, f13, A4
 | 
						|
	STFPDUX	f4,  CO2, INCM3
 | 
						|
 | 
						|
	STFPDUX	f5,  CO2, INC2
 | 
						|
	STFPDUX	f8,  CO3, INCM3
 | 
						|
	STFPDUX	f9,  CO3, INC2
 | 
						|
	STFPDUX	f12, CO4, INCM3
 | 
						|
	STFPDUX	f13, CO4, INC2
 | 
						|
#else
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
	fpmul	f4,  AP, f4
 | 
						|
	fpmul	f5,  AP, f5
 | 
						|
 | 
						|
	fpmul	f8,  AP, f8
 | 
						|
	fpmul	f9,  AP, f9
 | 
						|
	STFPDUX	f0,  CO1, INC2
 | 
						|
	fpmul	f12, AP, f12
 | 
						|
 	STFPDUX	f1,  CO1, INC2
 | 
						|
	fpmul	f13, AP, f13
 | 
						|
	STFPDUX	f4,  CO2, INC2
 | 
						|
 | 
						|
	STFPDUX	f5,  CO2, INC2
 | 
						|
	STFPDUX	f8,  CO3, INC2
 | 
						|
	STFPDUX	f9,  CO3, INC2
 | 
						|
	STFPDUX	f12, CO4, INC2
 | 
						|
	STFPDUX	f13, CO4, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 2 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 4
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L30:
 | 
						|
	andi.	I, M,  2
 | 
						|
	beq	.L40
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f3, f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 1 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  BO, - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, BO,   2 * SIZE
 | 
						|
	fpmr	f3, f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L34
 | 
						|
 | 
						|
#else
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f3, f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L34
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
	LFPDUX	A5,  BO, INC4
 | 
						|
	LFPDUX	A6, BO2, INC4
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	LFPDUX	A7,  BO, INC4
 | 
						|
	LFPDUX	A8, BO2, INC4
 | 
						|
	bdz-	.L33
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L32:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
	fxcpmadd	f2,  B2, A1, f2
 | 
						|
	fxcsmadd	f3,  B2, A1, f3
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A2, f0
 | 
						|
	fxcsmadd	f1,  B3, A2, f1
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
	fxcpmadd	f2,  B4, A2, f2
 | 
						|
	fxcsmadd	f3,  B4, A2, f3
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A5, A3, f0
 | 
						|
	fxcsmadd	f1,  A5, A3, f1
 | 
						|
	LFPDUX	A5,  BO, INC4
 | 
						|
	fxcpmadd	f2,  A6, A3, f2
 | 
						|
	fxcsmadd	f3,  A6, A3, f3
 | 
						|
	LFPDUX	A6, BO2, INC4
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A7, A4, f0
 | 
						|
	fxcsmadd	f1,  A7, A4, f1
 | 
						|
	LFPDUX	A7,  BO, INC4
 | 
						|
	fxcpmadd	f2,  A8, A4, f2
 | 
						|
	fxcsmadd	f3,  A8, A4, f3
 | 
						|
	LFPDUX	A8, BO2, INC4
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	bdnz+	.L32
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L33:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	fxcpmadd	f2,  B2, A1, f2
 | 
						|
	fxcsmadd	f3,  B2, A1, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A2, f0
 | 
						|
	fxcsmadd	f1,  B3, A2, f1
 | 
						|
	fxcpmadd	f2,  B4, A2, f2
 | 
						|
	fxcsmadd	f3,  B4, A2, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  A5, A3, f0
 | 
						|
	fxcsmadd	f1,  A5, A3, f1
 | 
						|
	fxcpmadd	f2,  A6, A3, f2
 | 
						|
	fxcsmadd	f3,  A6, A3, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  A7, A4, f0
 | 
						|
	fxcsmadd	f1,  A7, A4, f1
 | 
						|
	fxcpmadd	f2,  A8, A4, f2
 | 
						|
	fxcsmadd	f3,  A8, A4, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L34:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  3
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L38
 | 
						|
 | 
						|
	LFPDX	A1,  AO,  INC4
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	add	AO, AO, INC2
 | 
						|
	bdz-	.L37
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L36:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	fxcpmadd	f2,  B2, A1, f2
 | 
						|
	fxcsmadd	f3,  B2, A1, f3
 | 
						|
	LFPDX	A1,  AO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	add	AO, AO, INC2
 | 
						|
	bdnz+	.L36
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L37:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	fxcpmadd	f2,  B2, A1, f2
 | 
						|
	fxcsmadd	f3,  B2, A1, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L38:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDX	A1, CO1, INC2
 | 
						|
	LFPDX	A2, CO2, INC2
 | 
						|
	LFPDX	A3, CO3, INC2
 | 
						|
	LFPDX	A4, CO4, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0, AP, f0, A1
 | 
						|
	fxcpmadd	f1, AP, f1, A2
 | 
						|
	fxcpmadd	f2, AP, f2, A3
 | 
						|
	fxcpmadd	f3, AP, f3, A4
 | 
						|
#else
 | 
						|
	fpmul	f0, AP, f0
 | 
						|
	fpmul	f1, AP, f1
 | 
						|
	fpmul	f2, AP, f2
 | 
						|
	fpmul	f3, AP, f3
 | 
						|
#endif
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INC2
 | 
						|
	STFPDUX	f1,  CO2, INC2
 | 
						|
	STFPDUX	f2,  CO3, INC2
 | 
						|
	STFPDUX	f3,  CO4, INC2
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 1 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 2
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L40:
 | 
						|
	andi.	I, M,  1
 | 
						|
	beq	.L49
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f3,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 0 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  BO, - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, BO,   2 * SIZE
 | 
						|
	fpmr	f3,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L44
 | 
						|
 | 
						|
#else
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f3,  f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L44
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC4
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	LFPDUX	A2, AO2,  INC4
 | 
						|
	LFPDUX	B3,  BO,  INC4
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
 | 
						|
	LFPDUX	A3,  AO,  INC4
 | 
						|
	LFPDUX	A5,  BO,  INC4
 | 
						|
	LFPDUX	A6,  BO2, INC4
 | 
						|
	LFPDUX	A4, AO2,  INC4
 | 
						|
	LFPDUX	A7,  BO,  INC4
 | 
						|
	LFPDUX	A8,  BO2, INC4
 | 
						|
	bdz-	.L43
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L42:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	fxcpmadd	f1,  A1, B2, f1
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	fxcsmadd	f2,  A1, B3, f2
 | 
						|
	LFPDUX	B3,  BO,  INC4
 | 
						|
	fxcsmadd	f3,  A1, B4, f3
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
	LFPDUX	A1,  AO,  INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A2, A5, f0
 | 
						|
	LFPDUX	A5,  BO,  INC4
 | 
						|
	fxcpmadd	f1,  A2, A6, f1
 | 
						|
	LFPDUX	A6,  BO2, INC4
 | 
						|
	fxcsmadd	f2,  A2, A7, f2
 | 
						|
	LFPDUX	A7,  BO,  INC4
 | 
						|
	fxcsmadd	f3,  A2, A8, f3
 | 
						|
	LFPDUX	A8,  BO2, INC4
 | 
						|
	LFPDUX	A2, AO2,  INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A3, B1, f0
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	fxcpmadd	f1,  A3, B2, f1
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	fxcsmadd	f2,  A3, B3, f2
 | 
						|
	LFPDUX	B3,  BO,  INC4
 | 
						|
	fxcsmadd	f3,  A3, B4, f3
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
	LFPDUX	A3,  AO,  INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A4, A5, f0
 | 
						|
	LFPDUX	A5,  BO,  INC4
 | 
						|
	fxcpmadd	f1,  A4, A6, f1
 | 
						|
	LFPDUX	A6,  BO2, INC4
 | 
						|
	fxcsmadd	f2,  A4, A7, f2
 | 
						|
	LFPDUX	A7,  BO,  INC4
 | 
						|
	fxcsmadd	f3,  A4, A8, f3
 | 
						|
	LFPDUX	A8,  BO2, INC4
 | 
						|
	LFPDUX	A4, AO2,  INC4
 | 
						|
	bdnz+	.L42
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L43:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	fxcpmadd	f1,  A1, B2, f1
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	fxcsmadd	f2,  A1, B3, f2
 | 
						|
	LFPDUX	B3,  BO,  INC4
 | 
						|
	fxcsmadd	f3,  A1, B4, f3
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A2, A5, f0
 | 
						|
	LFPDUX	A5,  BO,  INC4
 | 
						|
	fxcpmadd	f1,  A2, A6, f1
 | 
						|
	LFPDUX	A6,  BO2, INC4
 | 
						|
	fxcsmadd	f2,  A2, A7, f2
 | 
						|
	LFPDUX	A7,  BO,  INC4
 | 
						|
	fxcsmadd	f3,  A2, A8, f3
 | 
						|
	LFPDUX	A8,  BO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A3, B1, f0
 | 
						|
	fxcpmadd	f1,  A3, B2, f1
 | 
						|
	fxcsmadd	f2,  A3, B3, f2
 | 
						|
	fxcsmadd	f3,  A3, B4, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  A4, A5, f0
 | 
						|
	fxcpmadd	f1,  A4, A6, f1
 | 
						|
	fxcsmadd	f2,  A4, A7, f2
 | 
						|
	fxcsmadd	f3,  A4, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L44:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  7
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L48
 | 
						|
 | 
						|
	LFDX	A1,  AO,  INC4
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	add	AO, AO, INC
 | 
						|
	bdz-	.L47
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L46:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	fxcpmadd	f1,  A1, B2, f1
 | 
						|
	LFDX	A1,  AO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	add	AO, AO, INC
 | 
						|
	bdnz+	.L46
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L47:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	fxcpmadd	f1,  A1, B2, f1
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L48:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDX	A1, CO1, INC2
 | 
						|
	LFDX	A2, CO2, INC2
 | 
						|
	LFDX	A3, CO3, INC2
 | 
						|
	LFDX	A4, CO4, INC2
 | 
						|
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
 | 
						|
	fsmfp	A1, A2
 | 
						|
	fsmfp	A3, A4
 | 
						|
	
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	fxcpmadd	f1,  AP, f1,  A3
 | 
						|
#else
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
#endif
 | 
						|
 | 
						|
	STFDX	f0,  CO1, INC2
 | 
						|
	STFSDX	f0,  CO2, INC2
 | 
						|
	STFDX	f1,  CO3, INC2
 | 
						|
	STFSDX	f1,  CO4, INC2
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 0 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 1
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L49:
 | 
						|
#if defined(TRMMKERNEL) && !defined(LEFT)
 | 
						|
	addi	KK, KK, 4
 | 
						|
#endif
 | 
						|
 | 
						|
	addi	B,  BO, 4 * SIZE
 | 
						|
 | 
						|
	addic.	J, J, -1
 | 
						|
	bgt+	.L10
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L50:
 | 
						|
	andi.	J, N,  2
 | 
						|
	beq	.L90
 | 
						|
 | 
						|
	mr	CO1, C
 | 
						|
	add	CO2, C,   LDC
 | 
						|
	add	C,   CO2, LDC
 | 
						|
 | 
						|
#if defined(TRMMKERNEL) &&  defined(LEFT)
 | 
						|
	mr	KK, OFFSET
 | 
						|
#endif
 | 
						|
 | 
						|
	addi	AO, A, -2 * SIZE
 | 
						|
	
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
 | 
						|
	srawi.	I, M,  3
 | 
						|
	ble	.L60
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L51:
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f6,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 3 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f6,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 8
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
	fpmr	f3,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f7,  f0
 | 
						|
	ble	.L54
 | 
						|
#else
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f6,  f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
	fpmr	f3,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f7,  f0
 | 
						|
	ble	.L54
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	bdz-	.L53
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L52:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B1, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A5, f0
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	fxcsmadd	f4,  B2, A5, f4
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B2, A6, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B2, A6, f5
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  B2, A7, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B2, A7, f6
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B2, A8, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B2, A8, f7
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A1, f0
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	fxcsmadd	f4,  B3, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B3, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B3, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  B3, A3, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B3, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B3, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B3, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B4, A5, f0
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	fxcsmadd	f4,  B4, A5, f4
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B4, A6, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B4, A6, f5
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  B4, A7, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B4, A7, f6
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B4, A8, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B4, A8, f7
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	bdnz+	.L52
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L53:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B1, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A5, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B2, A5, f4
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B2, A6, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B2, A6, f5
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  B2, A7, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B2, A7, f6
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B2, A8, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B2, A8, f7
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A1, f0
 | 
						|
	fxcsmadd	f4,  B3, A1, f4
 | 
						|
	fxcpmadd	f1,  B3, A2, f1
 | 
						|
	fxcsmadd	f5,  B3, A2, f5
 | 
						|
 | 
						|
	fxcpmadd	f2,  B3, A3, f2
 | 
						|
	fxcsmadd	f6,  B3, A3, f6
 | 
						|
	fxcpmadd	f3,  B3, A4, f3
 | 
						|
	fxcsmadd	f7,  B3, A4, f7
 | 
						|
 | 
						|
	fxcpmadd	f0,  B4, A5, f0
 | 
						|
	fxcsmadd	f4,  B4, A5, f4
 | 
						|
	fxcpmadd	f1,  B4, A6, f1
 | 
						|
	fxcsmadd	f5,  B4, A6, f5
 | 
						|
 | 
						|
	fxcpmadd	f2,  B4, A7, f2
 | 
						|
	fxcsmadd	f6,  B4, A7, f6
 | 
						|
	fxcpmadd	f3,  B4, A8, f3
 | 
						|
	fxcsmadd	f7,  B4, A8, f7
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L54:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 8
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  3
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L58
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	bdz-	.L57
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L56:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	fxcsmadd	f6,  B1, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	fxcsmadd	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	bdnz+	.L56
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L57:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	fxcsmadd	f6,  B1, A3, f6
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	fxcsmadd	f7,  B1, A4, f7
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L58:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A1, CO1, INC2
 | 
						|
	LFPDUX	B1, CO1, INC2
 | 
						|
	LFPDUX	A3, CO1, INC2
 | 
						|
   	LFPDUX	A5, CO1, INC2
 | 
						|
 | 
						|
 	LFPDUX	B3, CO2, INC2
 | 
						|
	LFPDUX	A6, CO2, INC2
 | 
						|
	LFPDUX	A7, CO2, INC2
 | 
						|
	LFPDUX	B2, CO2, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	fxcpmadd	f1,  AP, f1,  B1
 | 
						|
	fxcpmadd	f2,  AP, f2,  A3
 | 
						|
	fxcpmadd	f3,  AP, f3,  A5
 | 
						|
 | 
						|
	fxcpmadd	f4,  AP, f4,  B3
 | 
						|
	fxcpmadd	f5,  AP, f5,  A6
 | 
						|
	STFPDUX	f0,  CO1, INCM7
 | 
						|
	fxcpmadd	f6,  AP, f6,  A7
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
	fxcpmadd	f7,  AP, f7,  B2
 | 
						|
	STFPDUX	f2,  CO1, INC2
 | 
						|
	STFPDUX	f3,  CO1, INC2
 | 
						|
	STFPDUX	f4,  CO2, INCM7
 | 
						|
 | 
						|
	STFPDUX	f5,  CO2, INC2
 | 
						|
	STFPDUX	f6,  CO2, INC2
 | 
						|
	STFPDUX	f7,  CO2, INC2
 | 
						|
#else
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
	fpmul	f2,  AP, f2
 | 
						|
	fpmul	f3,  AP, f3
 | 
						|
 | 
						|
	fpmul	f4,  AP, f4
 | 
						|
	fpmul	f5,  AP, f5
 | 
						|
	STFPDUX	f0,  CO1, INC2
 | 
						|
	fpmul	f6,  AP, f6
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
	fpmul	f7,  AP, f7
 | 
						|
	STFPDUX	f2,  CO1, INC2
 | 
						|
	STFPDUX	f3,  CO1, INC2
 | 
						|
	STFPDUX	f4,  CO2, INC2
 | 
						|
 | 
						|
	STFPDUX	f5,  CO2, INC2
 | 
						|
	STFPDUX	f6,  CO2, INC2
 | 
						|
	STFPDUX	f7,  CO2, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -8
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 3 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 8
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	addic.	I, I, -1
 | 
						|
	li	r0, FZERO
 | 
						|
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	bgt+	.L51
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L60:
 | 
						|
	andi.	I, M,  4
 | 
						|
	beq	.L70
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 2 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	fpmr	f2,  f0
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	ble	.L64
 | 
						|
#else
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	ble	.L64
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	bdz-	.L63
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L62:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f2,  B1, A1, f2
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f3,  B1, A2, f3
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A3, f0
 | 
						|
	fxcsmadd	f2,  B2, A3, f2
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	fxcpmadd	f1,  B2, A4, f1
 | 
						|
	fxcsmadd	f3,  B2, A4, f3
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A5, f0
 | 
						|
	fxcsmadd	f2,  B3, A5, f2
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	fxcpmadd	f1,  B3, A6, f1
 | 
						|
	fxcsmadd	f3,  B3, A6, f3
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B4, A7, f0
 | 
						|
	fxcsmadd	f2,  B4, A7, f2
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	fxcpmadd	f1,  B4, A8, f1
 | 
						|
	fxcsmadd	f3,  B4, A8, f3
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
	bdnz+	.L62
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L63:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f2,  B1, A1, f2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f3,  B1, A2, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A3, f0
 | 
						|
	fxcsmadd	f2,  B2, A3, f2
 | 
						|
	fxcpmadd	f1,  B2, A4, f1
 | 
						|
	fxcsmadd	f3,  B2, A4, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A5, f0
 | 
						|
	fxcsmadd	f2,  B3, A5, f2
 | 
						|
	fxcpmadd	f1,  B3, A6, f1
 | 
						|
	fxcsmadd	f3,  B3, A6, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  B4, A7, f0
 | 
						|
	fxcsmadd	f2,  B4, A7, f2
 | 
						|
	fxcpmadd	f1,  B4, A8, f1
 | 
						|
	fxcsmadd	f3,  B4, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L64:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  3
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L68
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	bdz-	.L67
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L66:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f2,  B1, A1, f2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f3,  B1, A2, f3
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	bdnz+	.L66
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L67:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f2,  B1, A1, f2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f3,  B1, A2, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L68:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A1, CO1, INC2
 | 
						|
	LFPDUX	A2, CO1, INC2
 | 
						|
	LFPDUX	A3, CO2, INC2
 | 
						|
	LFPDUX	A4, CO2, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	fxcpmadd	f1,  AP, f1,  A2
 | 
						|
	fxcpmadd	f2,  AP, f2,  A3
 | 
						|
 	fxcpmadd	f3,  AP, f3,  A4
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INCM3
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
	STFPDUX	f2,  CO2, INCM3
 | 
						|
	STFPDUX	f3,  CO2, INC2
 | 
						|
#else
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
	fpmul	f2,  AP, f2
 | 
						|
 	fpmul	f3,  AP, f3
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INC2
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
	STFPDUX	f2,  CO2, INC2
 | 
						|
	STFPDUX	f3,  CO2, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 2 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 4
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L70:
 | 
						|
	andi.	I, M,  2
 | 
						|
	beq	.L80
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 1 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  3
 | 
						|
	fpmr	f2,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3, f0
 | 
						|
	ble	.L74
 | 
						|
#else
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  3
 | 
						|
	fpmr	f2,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3, f0
 | 
						|
	ble	.L74
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	LFPDUX	B5,  BO, INC2
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B6,  BO, INC2
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	LFPDUX	A9,  BO, INC2
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	LFPDUX	A10, BO, INC2
 | 
						|
	bdz-	.L73
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L72:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
	fxcpmadd	f2,  B2, A2, f2
 | 
						|
	fxcsmadd	f3,  B2, A2, f3
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A3, f0
 | 
						|
	fxcsmadd	f1,  B3, A3, f1
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
	fxcpmadd	f2,  B4, A4, f2
 | 
						|
	fxcsmadd	f3,  B4, A4, f3
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B5, A5, f0
 | 
						|
	fxcsmadd	f1,  B5, A5, f1
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	LFPDUX	B5,  BO, INC2
 | 
						|
	fxcpmadd	f2,  B6, A6, f2
 | 
						|
	fxcsmadd	f3,  B6, A6, f3
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B6,  BO, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  A9,  A7, f0
 | 
						|
	fxcsmadd	f1,  A9,  A7, f1
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	LFPDUX	A9,  BO, INC2
 | 
						|
	fxcpmadd	f2,  A10, A8, f2
 | 
						|
	fxcsmadd	f3,  A10, A8, f3
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	LFPDUX	A10, BO, INC2
 | 
						|
	bdnz+	.L72
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L73:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	fxcpmadd	f2,  B2, A2, f2
 | 
						|
	fxcsmadd	f3,  B2, A2, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A3, f0
 | 
						|
	fxcsmadd	f1,  B3, A3, f1
 | 
						|
	fxcpmadd	f2,  B4, A4, f2
 | 
						|
	fxcsmadd	f3,  B4, A4, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  B5, A5, f0
 | 
						|
	fxcsmadd	f1,  B5, A5, f1
 | 
						|
	fxcpmadd	f2,  B6, A6, f2
 | 
						|
	fxcsmadd	f3,  B6, A6, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  A9,  A7, f0
 | 
						|
	fxcsmadd	f1,  A9,  A7, f1
 | 
						|
	fxcpmadd	f2,  A10, A8, f2
 | 
						|
	fxcsmadd	f3,  A10, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L74:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  7
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L78
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	bdz-	.L77
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L76:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	bdnz+	.L76
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L77:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L78:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDX	A1, CO1, INC2
 | 
						|
	LFPDX	B3, CO2, INC2
 | 
						|
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	fxcpmadd	f1,  AP, f1,  B3
 | 
						|
#else
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
#endif
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INC2
 | 
						|
	STFPDUX	f1,  CO2, INC2
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 1 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 2
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L80:
 | 
						|
	andi.	I, M,  1
 | 
						|
	beq	.L89
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 0 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L84
 | 
						|
#else
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L84
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	bdz-	.L83
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L82:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	fxcsmadd	f1,  A1, B2, f1
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f2,  A2, B3, f2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	fxcsmadd	f3,  A2, B4, f3
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  A3, B1, f0
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	fxcsmadd	f1,  A3, B2, f1
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f2,  A4, B3, f2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	fxcsmadd	f3,  A4, B4, f3
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	bdnz+	.L82
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L83:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	fxcsmadd	f1,  A1, B2, f1
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	fxcpmadd	f2,  A2, B3, f2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	fxcsmadd	f3,  A2, B4, f3
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  A3, B1, f0
 | 
						|
	fxcsmadd	f1,  A3, B2, f1
 | 
						|
	fxcpmadd	f2,  A4, B3, f2
 | 
						|
	fxcsmadd	f3,  A4, B4, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L84:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  7
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L88
 | 
						|
 | 
						|
	LFDX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	add	AO, AO, INC
 | 
						|
	bdz-	.L87
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L86:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	LFDX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	add	AO, AO, INC
 | 
						|
	bdnz+	.L86
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L87:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L88:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDX	A1, CO1, INC2
 | 
						|
	LFDX	A2, CO2, INC2
 | 
						|
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
	fpadd	f2, f2, f3
 | 
						|
	fsmfp	A1, A2
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
#else
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
	fpadd	f2, f2, f3
 | 
						|
	fsmfp	A1, A2
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
#endif
 | 
						|
 | 
						|
	STFDX	f0,  CO1, INC2
 | 
						|
	STFSDX	f0,  CO2, INC2
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 0 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 1
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L89:
 | 
						|
#if defined(TRMMKERNEL) && !defined(LEFT)
 | 
						|
	addi	KK, KK, 2
 | 
						|
#endif
 | 
						|
 | 
						|
	addi	B,  BO, 2 * SIZE
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L90:
 | 
						|
	andi.	J, N,  1
 | 
						|
	beq	.L999
 | 
						|
 | 
						|
#if defined(TRMMKERNEL) &&  defined(LEFT)
 | 
						|
	mr	KK, OFFSET
 | 
						|
#endif
 | 
						|
 | 
						|
	mr	CO1, C
 | 
						|
	addi	AO, A, -2 * SIZE
 | 
						|
	
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
 | 
						|
	srawi.	I, M,  3
 | 
						|
	ble	.L100
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L91:
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 3 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 0 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 8
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	fpmr	f2,  f0
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
	fpmr	f3,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L94
 | 
						|
 | 
						|
#else
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L94
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	bdz-	.L93
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L92:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	fxcsmadd	f0,  B1, A5, f0
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcsmadd	f1,  B1, A6, f1
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B1, A7, f2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B1, A8, f3
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B2, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	fxcpmadd	f2,  B2, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B2, A4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	fxcsmadd	f0,  B2, A5, f0
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcsmadd	f1,  B2, A6, f1
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B2, A7, f2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B2, A8, f3
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	bdnz+	.L92
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L93:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	fxcsmadd	f0,  B1, A5, f0
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcsmadd	f1,  B1, A6, f1
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B1, A7, f2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B1, A8, f3
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A1, f0
 | 
						|
	fxcpmadd	f1,  B2, A2, f1
 | 
						|
	fxcpmadd	f2,  B2, A3, f2
 | 
						|
	fxcpmadd	f3,  B2, A4, f3
 | 
						|
 | 
						|
	fxcsmadd	f0,  B2, A5, f0
 | 
						|
	fxcsmadd	f1,  B2, A6, f1
 | 
						|
	fxcsmadd	f2,  B2, A7, f2
 | 
						|
	fxcsmadd	f3,  B2, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L94:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 8
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  3
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L98
 | 
						|
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdz-	.L97
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L96:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdnz+	.L96
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L97:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L98:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A1, CO1, INC2
 | 
						|
	LFPDUX	B1, CO1, INC2
 | 
						|
	LFPDUX	A3, CO1, INC2
 | 
						|
   	LFPDUX	A5, CO1, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	fxcpmadd	f1,  AP, f1,  B1
 | 
						|
	fxcpmadd	f2,  AP, f2,  A3
 | 
						|
	fxcpmadd	f3,  AP, f3,  A5
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INCM7
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
	STFPDUX	f2,  CO1, INC2
 | 
						|
	STFPDUX	f3,  CO1, INC2
 | 
						|
#else
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
	fpmul	f2,  AP, f2
 | 
						|
	fpmul	f3,  AP, f3
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INC2
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
	STFPDUX	f2,  CO1, INC2
 | 
						|
	STFPDUX	f3,  CO1, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -8
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 3 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 0 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 8
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	addic.	I, I, -1
 | 
						|
	li	r0, FZERO
 | 
						|
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	bgt+	.L91
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L100:
 | 
						|
	andi.	I, M,  4
 | 
						|
	beq	.L110
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3, f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 2 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 0 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3, f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L104
 | 
						|
#else
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3, f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L104
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
 | 
						|
	bdz-	.L103
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L102:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B1, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B1, A4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A5, f0
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B2, A6, f1
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B2, A7, f2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B2, A8, f3
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B3, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B3, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B3, A4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B4, A5, f0
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B4, A6, f1
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B4, A7, f2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B4, A8, f3
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	bdnz+	.L102
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L103:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B1, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B1, A4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A5, f0
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B2, A6, f1
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B2, A7, f2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B2, A8, f3
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A1, f0
 | 
						|
	fxcpmadd	f1,  B3, A2, f1
 | 
						|
	fxcsmadd	f2,  B3, A3, f2
 | 
						|
	fxcsmadd	f3,  B3, A4, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  B4, A5, f0
 | 
						|
	fxcpmadd	f1,  B4, A6, f1
 | 
						|
	fxcsmadd	f2,  B4, A7, f2
 | 
						|
	fxcsmadd	f3,  B4, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L104:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  7
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L108
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdz-	.L107
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L106:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdnz+	.L106
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L107:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L108:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A1, CO1, INC2
 | 
						|
	LFPDUX	B1, CO1, INC2
 | 
						|
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	fxcpmadd	f1,  AP, f1,  B1
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INCM3
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
#else
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INC2
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 2 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 0 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 4
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L110:
 | 
						|
	andi.	I, M,  2
 | 
						|
	beq	.L120
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 1 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 0 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L114
 | 
						|
#else
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L114
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	bdz-	.L113
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L112:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcsmadd	f1,  B1, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	fxcpmadd	f2,  B2, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B2, A4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	fxcpmadd	f0,  B3, A5, f0
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcsmadd	f1,  B3, A6, f1
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	fxcpmadd	f2,  B4, A7, f2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B4, A8, f3
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	bdnz+	.L112
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L113:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A2, f1
 | 
						|
	fxcpmadd	f2,  B2, A3, f2
 | 
						|
	fxcsmadd	f3,  B2, A4, f3
 | 
						|
	fxcpmadd	f0,  B3, A5, f0
 | 
						|
	fxcsmadd	f1,  B3, A6, f1
 | 
						|
	fxcpmadd	f2,  B4, A7, f2
 | 
						|
	fxcsmadd	f3,  B4, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L114:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  7
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L118
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdz-	.L117
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L116:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdnz+	.L116
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L117:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L118:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDX	A1, CO1, INC2
 | 
						|
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
	fpadd	f2, f3, f2
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fxcpmadd	f1,  AP, f0,  A1
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
#else
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
	fpadd	f2, f3, f2
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpmul	f1,  AP, f0
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 1 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 0 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 2
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L120:
 | 
						|
	andi.	I, M,  1
 | 
						|
	beq	.L999
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 0 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 0 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L124
 | 
						|
#else
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L124
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	bdz-	.L123
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L122:
 | 
						|
	fpmadd	f0,  A1, B1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	fpmadd	f1,  A2, B2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	fpmadd	f2,  A3, B3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	fpmadd	f3,  A4, B4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	bdnz+	.L122
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L123:
 | 
						|
	fpmadd	f0,  A1, B1, f0
 | 
						|
	fpmadd	f1,  A2, B2, f1
 | 
						|
	fpmadd	f2,  A3, B3, f2
 | 
						|
	fpmadd	f3,  A4, B4, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L124:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  7
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L128
 | 
						|
 | 
						|
	LFDX	A1,  AO,  INC2
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	add	AO, AO, INC
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdz-	.L127
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L126:
 | 
						|
	fmadd	f0,  A1, B1, f0
 | 
						|
	LFDX	A1,  AO,  INC2
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	add	AO, AO, INC
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdnz+	.L126
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L127:
 | 
						|
	fmadd	f0,  A1, B1, f0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L128:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDX	A1, CO1, INC2
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
	fpadd	f2, f2, f3
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fsmtp	f1, f0
 | 
						|
	fadd	f0, f0, f1
 | 
						|
	fmadd	f0,  AP, f0,  A1
 | 
						|
#else
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
	fpadd	f2, f2, f3
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fsmtp	f1, f0
 | 
						|
	fadd	f0, f0, f1
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
#endif
 | 
						|
	STFDUX	f0,  CO1, INC2
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L999:
 | 
						|
	addi	SP, SP, 12
 | 
						|
 | 
						|
	lwzu	r14,   4(SP)
 | 
						|
	lwzu	r15,   4(SP)
 | 
						|
 | 
						|
	lwzu	r16,   4(SP)
 | 
						|
	lwzu	r17,   4(SP)
 | 
						|
	lwzu	r18,   4(SP)
 | 
						|
	lwzu	r19,   4(SP)
 | 
						|
 | 
						|
	lwzu	r20,   4(SP)
 | 
						|
	lwzu	r21,   4(SP)
 | 
						|
	lwzu	r22,   4(SP)
 | 
						|
	lwzu	r23,   4(SP)
 | 
						|
 | 
						|
	lwzu	r24,   4(SP)
 | 
						|
	lwzu	r25,   4(SP)
 | 
						|
	lwzu	r26,   4(SP)
 | 
						|
	lwzu	r27,   4(SP)
 | 
						|
 | 
						|
	lwzu	r28,   4(SP)
 | 
						|
	lwzu	r29,   4(SP)
 | 
						|
	lwzu	r30,   4(SP)
 | 
						|
	lwzu	r31,   4(SP)
 | 
						|
 | 
						|
	subi	SP, SP, 12
 | 
						|
	li	r0, 16
 | 
						|
 | 
						|
	lfpdux	f31, SP, r0
 | 
						|
	lfpdux	f30, SP, r0
 | 
						|
	lfpdux	f29, SP, r0
 | 
						|
	lfpdux	f28, SP, r0
 | 
						|
	lfpdux	f27, SP, r0
 | 
						|
	lfpdux	f26, SP, r0
 | 
						|
	lfpdux	f25, SP, r0
 | 
						|
	lfpdux	f24, SP, r0
 | 
						|
	lfpdux	f23, SP, r0
 | 
						|
	lfpdux	f22, SP, r0
 | 
						|
	lfpdux	f21, SP, r0
 | 
						|
	lfpdux	f20, SP, r0
 | 
						|
	lfpdux	f19, SP, r0
 | 
						|
	lfpdux	f18, SP, r0
 | 
						|
	lfpdux	f17, SP, r0
 | 
						|
	lfpdux	f16, SP, r0
 | 
						|
	lfpdux	f15, SP, r0
 | 
						|
	lfpdux	f14, SP, r0
 | 
						|
	addi	SP, SP, 16
 | 
						|
	blr
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1000:
 | 
						|
	li	INCM1, -1 * SIZE
 | 
						|
	li	INCM3, -3 * SIZE
 | 
						|
	li	INCM5, -5 * SIZE
 | 
						|
	li	INCM7, -7 * SIZE
 | 
						|
 | 
						|
	addi	C, C, - 1 * SIZE
 | 
						|
	srawi.	J, N,  2
 | 
						|
	ble	.L1050
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1010:
 | 
						|
	mr	CO1, C
 | 
						|
	add	CO2, C,   LDC
 | 
						|
	add	CO3, CO2, LDC
 | 
						|
	add	CO4, CO3, LDC
 | 
						|
	add	C,   CO4, LDC
 | 
						|
 | 
						|
#if defined(TRMMKERNEL) &&  defined(LEFT)
 | 
						|
	mr	KK, OFFSET
 | 
						|
#endif
 | 
						|
 | 
						|
	addi	AO, A, -4 * SIZE
 | 
						|
	
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
 | 
						|
	srawi.	I, M,  3
 | 
						|
	ble	.L1020
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1011:
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 3 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  BO, - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, BO,   2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 8
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
	srawi.	TEMP,  TEMP,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
	ble	.L1014
 | 
						|
 | 
						|
#else
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L1014
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
	fpmr	f5,  f0
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
	fpmr	f9,  f0
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
	fpmr	f13, f0
 | 
						|
 | 
						|
	LFPDUX	A5,  AO, INC4
 | 
						|
	fpmr	f2,  f0
 | 
						|
	LFPDUX	A6,  AO, INC4
 | 
						|
	fpmr	f6,  f0
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
	fpmr	f10, f0
 | 
						|
	LFPDUX	A7,  AO, INC4
 | 
						|
	fpmr	f14, f0
 | 
						|
 | 
						|
	LFPDUX	A8,  AO, INC4
 | 
						|
	fpmr	f3,  f0
 | 
						|
	LFPDUX	B5,  BO, INC4
 | 
						|
	fpmr	f7,  f0
 | 
						|
	LFPDUX	A9,  AO, INC4
 | 
						|
	fpmr	f11, f0
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fpmr	f15, f0
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	bdz-	.L1013
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1012:
 | 
						|
 | 
						|
## 1 ##
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B2, A1, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	fxcsmadd	f12, B2, A1, f12
 | 
						|
	LFPDUX	B6,  BO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
	LFPDUX	A10, AO, INC4
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B1, A3, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B2, A3, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B2, A3, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fxcpmadd	f11, B2, A4, f11
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
	fxcsmadd	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 2 ##
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A5, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B3, A5, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B4, A5, f8
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	fxcsmadd	f12, B4, A5, f12
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B3, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B3, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B4, A2, f9
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
	fxcsmadd	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B3, A6, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B3, A6, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B4, A6, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B4, A6, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f3,  B3, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B3, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fxcpmadd	f11, B4, A4, f11
 | 
						|
	LFPDUX	A5,  AO, INC4
 | 
						|
	fxcsmadd	f15, B4, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 3 ##
 | 
						|
 | 
						|
	fxcpmadd	f0,  B5, A7, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B5, A7, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B2, A7, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	fxcsmadd	f12, B2, A7, f12
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B5, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B5, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
	LFPDUX	A6,  AO, INC4
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B5, A8, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B5, A8, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B2, A8, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B2, A8, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f3,  B5, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B5, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fxcpmadd	f11, B2, A4, f11
 | 
						|
	LFPDUX	A7,  AO, INC4
 | 
						|
	fxcsmadd	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 4 ##
 | 
						|
	fxcpmadd	f0,  B6, A9, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B6, A9, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B4, A9, f8
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	fxcsmadd	f12, B4, A9, f12
 | 
						|
	LFPDUX	B5,  BO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B6, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B6, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B4, A2, f9
 | 
						|
	LFPDUX	A8,  AO, INC4
 | 
						|
	fxcsmadd	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B6, A10, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B6, A10, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B4, A10, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B4, A10, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f3,  B6, A4, f3
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fxcsmadd	f7,  B6, A4, f7
 | 
						|
	LFPDUX	A9,  AO, INC4
 | 
						|
	fxcpmadd	f11, B4, A4, f11
 | 
						|
	nop	
 | 
						|
	fxcsmadd	f15, B4, A4, f15
 | 
						|
	bdnz+	.L1012
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1013:
 | 
						|
## 1 ##
 | 
						|
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B2, A1, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	fxcsmadd	f12, B2, A1, f12
 | 
						|
	LFPDUX	B6,  BO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
	LFPDUX	A10, AO, INC4
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B1, A3, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B2, A3, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B2, A3, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fxcpmadd	f11, B2, A4, f11
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	fxcsmadd	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 2 ##
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A5, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B3, A5, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B4, A5, f8
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	fxcsmadd	f12, B4, A5, f12
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	B1, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	fxcpmadd	f1,  B3, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B3, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B4, A2, f9
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A3, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	fxcsmadd	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B3, A6, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B3, A6, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B4, A6, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B4, A6, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f3,  B3, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B3, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fxcpmadd	f11, B4, A4, f11
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
   	LFDUX	A5, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	fxcsmadd	f15, B4, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 3 ##
 | 
						|
 | 
						|
	fxcpmadd	f0,  B5, A7, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B5, A7, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B2, A7, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	fxcsmadd	f12, B2, A7, f12
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFSDUX	A1, CO1, INCM5
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	fxcpmadd	f1,  B5, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B5, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFSDUX	B1, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B5, A8, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B5, A8, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B2, A8, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B2, A8, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f3,  B5, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B5, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fxcpmadd	f11, B2, A4, f11
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFSDUX	A3, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	fxcsmadd	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 4 ##
 | 
						|
 | 
						|
	fxcpmadd	f0,  B6, A9, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B6, A9, f4
 | 
						|
	nop
 | 
						|
	fxcpmadd	f8,  B4, A9, f8
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFSDUX	A5, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	fxcsmadd	f12, B4, A9, f12
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	B3, CO2, INC
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	fxcpmadd	f1,  B6, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B6, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	fxcpmadd	f9,  B4, A2, f9
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A6, CO2, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	fxcsmadd	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	fxcpmadd	f2,  B6, A10, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B6, A10, f6
 | 
						|
	nop
 | 
						|
	fxcpmadd	f10, B4, A10, f10
 | 
						|
	nop
 | 
						|
	fxcsmadd	f14, B4, A10, f14
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A7, CO2, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	fxcpmadd	f3,  B6, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B6, A4, f7
 | 
						|
	nop
 | 
						|
	fxcpmadd	f11, B4, A4, f11
 | 
						|
	nop
 | 
						|
	fxcsmadd	f15, B4, A4, f15
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	B2, CO2, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1014:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 8
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
	andi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble+	.L1018
 | 
						|
 | 
						|
	cmpwi	cr0, TEMP, 3
 | 
						|
	bgt+	.L1015
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble+	.L1018
 | 
						|
 | 
						|
	cmpwi	cr0, K, 3
 | 
						|
	bgt+	.L1015
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	fpmr	f5,  f0
 | 
						|
	LFDUX	B1, CO1, INC2
 | 
						|
	fpmr	f9,  f0
 | 
						|
	LFDUX	A3, CO1, INC2
 | 
						|
	fpmr	f13, f0
 | 
						|
   	LFDUX	A5, CO1, INC2
 | 
						|
	fpmr	f2,  f0
 | 
						|
 | 
						|
	LFSDUX	A1, CO1, INCM5
 | 
						|
	fpmr	f6,  f0
 | 
						|
	LFSDUX	B1, CO1, INC2
 | 
						|
	fpmr	f10, f0
 | 
						|
	LFSDUX	A3, CO1, INC2
 | 
						|
	fpmr	f14, f0
 | 
						|
	LFSDUX	A5, CO1, INC2
 | 
						|
	fpmr	f3,  f0
 | 
						|
 | 
						|
	LFDUX	B3, CO2, INC
 | 
						|
	fpmr	f7,  f0
 | 
						|
	LFDUX	A6, CO2, INC2
 | 
						|
	fpmr	f11, f0
 | 
						|
	LFDUX	A7, CO2, INC2
 | 
						|
	fpmr	f15, f0
 | 
						|
	LFDUX	B2, CO2, INC2
 | 
						|
#else
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f9,  f0
 | 
						|
	fpmr	f13, f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
 | 
						|
	fpmr	f6,  f0
 | 
						|
	fpmr	f10, f0
 | 
						|
	fpmr	f14, f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
 | 
						|
	fpmr	f7,  f0
 | 
						|
	fpmr	f11, f0
 | 
						|
	fpmr	f15, f0
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1015:
 | 
						|
	LFPDUX	A2,  AO,  INC4
 | 
						|
	LFPDUX	A4,  AO2, INC4
 | 
						|
	LFPDUX	A10, BO,  INC4
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
	bdz-	.L1017
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1016:
 | 
						|
	fxcpmadd	f0,  A10, A2, f0
 | 
						|
	fxcsmadd	f4,  A10, A2, f4
 | 
						|
	fxcpmadd	f8,  B4, A2, f8
 | 
						|
	fxcsmadd	f12, B4, A2, f12
 | 
						|
	LFPDUX	A2, AO,  INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  A10, A4, f1
 | 
						|
	fxcsmadd	f5,  A10, A4, f5
 | 
						|
	fxcpmadd	f9,  B4, A4, f9
 | 
						|
	fxcsmadd	f13, B4, A4, f13
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f2,  A10, A2, f2
 | 
						|
	fxcsmadd	f6,  A10, A2, f6
 | 
						|
	fxcpmadd	f10, B4, A2, f10
 | 
						|
	fxcsmadd	f14, B4, A2, f14
 | 
						|
	LFPDUX	A2, AO,  INC4
 | 
						|
 | 
						|
	fxcpmadd	f3,  A10, A4, f3
 | 
						|
	fxcsmadd	f7,  A10, A4, f7
 | 
						|
	LFPDUX	A10, BO,  INC4
 | 
						|
	fxcpmadd	f11, B4, A4, f11
 | 
						|
	fxcsmadd	f15, B4, A4, f15
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	bdnz+	.L1016
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1017:
 | 
						|
	fxcpmadd	f0,  A10, A2, f0
 | 
						|
	fxcsmadd	f4,  A10, A2, f4
 | 
						|
	fxcpmadd	f8,  B4, A2, f8
 | 
						|
	fxcsmadd	f12, B4, A2, f12
 | 
						|
	LFPDUX	A2, AO,  INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  A10, A4, f1
 | 
						|
	fxcsmadd	f5,  A10, A4, f5
 | 
						|
	fxcpmadd	f9,  B4, A4, f9
 | 
						|
	fxcsmadd	f13, B4, A4, f13
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f2,  A10, A2, f2
 | 
						|
	fxcsmadd	f6,  A10, A2, f6
 | 
						|
	fxcpmadd	f10, B4, A2, f10
 | 
						|
	fxcsmadd	f14, B4, A2, f14
 | 
						|
 | 
						|
	fxcpmadd	f3,  A10, A4, f3
 | 
						|
	fxcsmadd	f7,  A10, A4, f7
 | 
						|
	fxcpmadd	f11, B4, A4, f11
 | 
						|
	fxcsmadd	f15, B4, A4, f15
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1018:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFSDUX	B3, CO2, INCM5
 | 
						|
	LFSDUX	A6, CO2, INC2
 | 
						|
	LFSDUX	A7, CO2, INC2
 | 
						|
	LFSDUX	B2, CO2, INC2
 | 
						|
 | 
						|
	LFDUX	B5, CO3, INC
 | 
						|
	LFDUX	A8, CO3, INC2
 | 
						|
	LFDUX	A9, CO3, INC2
 | 
						|
 	LFDUX	B4, CO3, INC2
 | 
						|
 | 
						|
	LFSDUX	B5, CO3, INCM5
 | 
						|
	LFSDUX	A8, CO3, INC2
 | 
						|
	LFSDUX	A9, CO3, INC2
 | 
						|
	LFSDUX	B4, CO3, INC2
 | 
						|
 | 
						|
	LFDUX	A2,  CO4, INC
 | 
						|
	LFDUX	A4,  CO4, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	LFDUX	A10, CO4, INC2
 | 
						|
	LFDUX	A1,  CO4, INC2
 | 
						|
 | 
						|
	fxcpmadd	f1,  AP, f1,  B1
 | 
						|
	LFSDUX	A2,  CO4, INCM5
 | 
						|
	LFSDUX	A4,  CO4, INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  AP, f2,  A3
 | 
						|
	LFSDUX	A10, CO4, INC2
 | 
						|
	LFSDUX	A1,  CO4, INC2
 | 
						|
 | 
						|
	fxcpmadd	f3,  AP, f3,  A5
 | 
						|
	STFDUX	f0,  CO1, INCM7
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	fxcpmadd	f4,  AP, f4,  B3
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
 | 
						|
	fxcpmadd	f5,  AP, f5,  A6
 | 
						|
	STFDUX	f2,  CO1, INC
 | 
						|
	STFSDUX	f2,  CO1, INC
 | 
						|
 | 
						|
	fxcpmadd	f6,  AP, f6,  A7
 | 
						|
	STFDUX	f3,  CO1, INC
 | 
						|
	STFSDUX	f3,  CO1, INC
 | 
						|
 | 
						|
	fxcpmadd	f7,  AP, f7,  B2
 | 
						|
	STFDUX	f4,  CO2, INCM7
 | 
						|
	STFSDUX	f4,  CO2, INC
 | 
						|
 | 
						|
	fxcpmadd	f8,  AP, f8,  B5
 | 
						|
	STFDUX	f5,  CO2, INC
 | 
						|
	STFSDUX	f5,  CO2, INC
 | 
						|
 | 
						|
	fxcpmadd	f9,  AP, f9,  A8
 | 
						|
	STFDUX	f6,  CO2, INC
 | 
						|
	STFSDUX	f6,  CO2, INC
 | 
						|
 | 
						|
	fxcpmadd	f10, AP, f10, A9
 | 
						|
	STFDUX	f7,  CO2, INC
 | 
						|
	STFSDUX	f7,  CO2, INC
 | 
						|
 | 
						|
	fxcpmadd	f11, AP, f11, B4
 | 
						|
	STFDUX	f8,  CO3, INCM7
 | 
						|
	STFSDUX	f8,  CO3, INC
 | 
						|
 | 
						|
	fxcpmadd	f12, AP, f12, A2
 | 
						|
	STFDUX	f9,  CO3, INC
 | 
						|
	STFSDUX	f9,  CO3, INC
 | 
						|
 | 
						|
	fxcpmadd	f13, AP, f13, A4
 | 
						|
	STFDUX	f10, CO3, INC
 | 
						|
	STFSDUX	f10, CO3, INC
 | 
						|
 | 
						|
	fxcpmadd	f14, AP, f14, A10
 | 
						|
	STFDUX	f11, CO3, INC
 | 
						|
	STFSDUX	f11, CO3, INC
 | 
						|
 | 
						|
	fxcpmadd	f15, AP, f15, A1
 | 
						|
	STFDUX	f12, CO4, INCM7
 | 
						|
#else
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
	fpmul	f2,  AP, f2
 | 
						|
	fpmul	f3,  AP, f3
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	fpmul	f4,  AP, f4
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
 | 
						|
	fpmul	f5,  AP, f5
 | 
						|
	STFDUX	f2,  CO1, INC
 | 
						|
	STFSDUX	f2,  CO1, INC
 | 
						|
 | 
						|
	fpmul	f6,  AP, f6
 | 
						|
	STFDUX	f3,  CO1, INC
 | 
						|
	STFSDUX	f3,  CO1, INC
 | 
						|
 | 
						|
	fpmul	f7,  AP, f7
 | 
						|
	STFDUX	f4,  CO2, INC
 | 
						|
	STFSDUX	f4,  CO2, INC
 | 
						|
 | 
						|
	fpmul	f8,  AP, f8
 | 
						|
	STFDUX	f5,  CO2, INC
 | 
						|
	STFSDUX	f5,  CO2, INC
 | 
						|
 | 
						|
	fpmul	f9,  AP, f9
 | 
						|
	STFDUX	f6,  CO2, INC
 | 
						|
	STFSDUX	f6,  CO2, INC
 | 
						|
 | 
						|
	fpmul	f10, AP, f10
 | 
						|
	STFDUX	f7,  CO2, INC
 | 
						|
	STFSDUX	f7,  CO2, INC
 | 
						|
 | 
						|
	fpmul	f11, AP, f11
 | 
						|
	STFDUX	f8,  CO3, INC
 | 
						|
	STFSDUX	f8,  CO3, INC
 | 
						|
 | 
						|
	fpmul	f12, AP, f12
 | 
						|
	STFDUX	f9,  CO3, INC
 | 
						|
	STFSDUX	f9,  CO3, INC
 | 
						|
 | 
						|
	fpmul	f13, AP, f13
 | 
						|
	STFDUX	f10, CO3, INC
 | 
						|
	STFSDUX	f10, CO3, INC
 | 
						|
 | 
						|
	fpmul	f14, AP, f14
 | 
						|
	STFDUX	f11, CO3, INC
 | 
						|
	STFSDUX	f11, CO3, INC
 | 
						|
 | 
						|
	fpmul	f15, AP, f15
 | 
						|
	STFDUX	f12, CO4, INC
 | 
						|
#endif
 | 
						|
 | 
						|
	STFSDUX	f12, CO4, INC
 | 
						|
	STFDUX	f13, CO4, INC
 | 
						|
	STFSDUX	f13, CO4, INC
 | 
						|
	STFDUX	f14, CO4, INC
 | 
						|
	STFSDUX	f14, CO4, INC
 | 
						|
	STFDUX	f15, CO4, INC
 | 
						|
	STFSDUX	f15, CO4, INC
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -8
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 3 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 8
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	addic.	I, I, -1
 | 
						|
	li	r0, FZERO
 | 
						|
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	bgt+	.L1011
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1020:
 | 
						|
	andi.	I, M,  4
 | 
						|
	beq	.L1030
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 2 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  BO,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, BO,    2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
 | 
						|
	srawi.	TEMP,  TEMP,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f9,  f0
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
	fpmr	f13, f0
 | 
						|
	ble	.L1024
 | 
						|
#else
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f9,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f13, f0
 | 
						|
	ble	.L1024
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,   AO, INC4
 | 
						|
	LFPDUX	B1,   BO, INC4
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	LFPDUX	A3,   AO, INC4
 | 
						|
	LFPDUX	B3,   BO, INC4
 | 
						|
	LFPDUX	A4,  AO2, INC4
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
 | 
						|
	LFPDUX	A5,   AO, INC4
 | 
						|
	LFPDUX	B5,   BO, INC4
 | 
						|
	LFPDUX	A6,  AO2, INC4
 | 
						|
	LFPDUX	B6,  BO2, INC4
 | 
						|
	LFPDUX	A7,   AO, INC4
 | 
						|
	LFPDUX	A9,   BO, INC4
 | 
						|
	LFPDUX	A10, BO2, INC4
 | 
						|
	bdz-	.L1023
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1022:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A8,  AO2, INC4
 | 
						|
	fxcpmadd	f8,  B2, A1, f8
 | 
						|
	nop
 | 
						|
	fxcsmadd	f12, B2, A1, f12
 | 
						|
	LFPDUX	A1,   AO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	LFPDUX	B1,   BO, INC4
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
	nop
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A3, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B3, A3, f4
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	fxcpmadd	f8,  B4, A3, f8
 | 
						|
	nop
 | 
						|
	fxcsmadd	f12, B4, A3, f12
 | 
						|
	LFPDUX	A3,   AO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B3, A4, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B3, A4, f5
 | 
						|
	LFPDUX	B3,   BO, INC4
 | 
						|
	fxcpmadd	f9,  B4, A4, f9
 | 
						|
	nop
 | 
						|
	fxcsmadd	f13, B4, A4, f13
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  B5, A5, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B5, A5, f4
 | 
						|
	LFPDUX	A4,  AO2, INC4
 | 
						|
	fxcpmadd	f8,  B6, A5, f8
 | 
						|
	nop
 | 
						|
	fxcsmadd	f12, B6, A5, f12
 | 
						|
	LFPDUX	A5,   AO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B5, A6, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B5, A6, f5
 | 
						|
	LFPDUX	B5,   BO, INC4
 | 
						|
	fxcpmadd	f9,  B6, A6, f9
 | 
						|
	nop
 | 
						|
	fxcsmadd	f13, B6, A6, f13
 | 
						|
	LFPDUX	B6,  BO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A9,  A7, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  A9,  A7, f4
 | 
						|
	LFPDUX	A6,  AO2, INC4
 | 
						|
	fxcpmadd	f8,  A10, A7, f8
 | 
						|
	nop
 | 
						|
	fxcsmadd	f12, A10, A7, f12
 | 
						|
	LFPDUX	A7,   AO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  A9,  A8, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  A9,  A8, f5
 | 
						|
	LFPDUX	A9,   BO, INC4
 | 
						|
	fxcpmadd	f9,  A10, A8, f9
 | 
						|
	nop
 | 
						|
	fxcsmadd	f13, A10, A8, f13
 | 
						|
	LFPDUX	A10, BO2, INC4
 | 
						|
	bdnz+	.L1022
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1023:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A8,  AO2, INC4
 | 
						|
	fxcpmadd	f8,  B2, A1, f8
 | 
						|
	fxcsmadd	f12, B2, A1, f12
 | 
						|
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A3, f0
 | 
						|
	fxcsmadd	f4,  B3, A3, f4
 | 
						|
	fxcpmadd	f8,  B4, A3, f8
 | 
						|
	fxcsmadd	f12, B4, A3, f12
 | 
						|
 | 
						|
	fxcpmadd	f1,  B3, A4, f1
 | 
						|
	fxcsmadd	f5,  B3, A4, f5
 | 
						|
	fxcpmadd	f9,  B4, A4, f9
 | 
						|
	fxcsmadd	f13, B4, A4, f13
 | 
						|
 | 
						|
	fxcpmadd	f0,  B5, A5, f0
 | 
						|
	fxcsmadd	f4,  B5, A5, f4
 | 
						|
	fxcpmadd	f8,  B6, A5, f8
 | 
						|
	fxcsmadd	f12, B6, A5, f12
 | 
						|
 | 
						|
	fxcpmadd	f1,  B5, A6, f1
 | 
						|
	fxcsmadd	f5,  B5, A6, f5
 | 
						|
	fxcpmadd	f9,  B6, A6, f9
 | 
						|
	fxcsmadd	f13, B6, A6, f13
 | 
						|
 | 
						|
	fxcpmadd	f0,  A9, A7, f0
 | 
						|
	fxcsmadd	f4,  A9, A7, f4
 | 
						|
	fxcpmadd	f8,  A10, A7, f8
 | 
						|
	fxcsmadd	f12, A10, A7, f12
 | 
						|
 | 
						|
	fxcpmadd	f1,  A9, A8, f1
 | 
						|
	fxcsmadd	f5,  A9, A8, f5
 | 
						|
	fxcpmadd	f9,  A10, A8, f9
 | 
						|
	fxcsmadd	f13, A10, A8, f13
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1024:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  3
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L1028
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC4
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	bdz-	.L1027
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1026:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	fxcpmadd	f8,  B2, A1, f8
 | 
						|
	fxcsmadd	f12, B2, A1, f12
 | 
						|
	LFPDUX	A1,  AO,  INC4
 | 
						|
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	bdnz+	.L1026
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1027:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	fxcpmadd	f8,  B2, A1, f8
 | 
						|
	fxcsmadd	f12, B2, A1, f12
 | 
						|
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	fxcpmadd	f9,  B2, A2, f9
 | 
						|
	fxcsmadd	f13, B2, A2, f13
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1028:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	LFDUX	B1, CO1, INC2
 | 
						|
	LFDUX	B3, CO2, INC
 | 
						|
	LFDUX	A6, CO2, INC2
 | 
						|
 | 
						|
	LFSDUX	A1, CO1, INCM1
 | 
						|
	LFSDUX	B1, CO1, INC2
 | 
						|
	LFSDUX	B3, CO2, INCM1
 | 
						|
	LFSDUX	A6, CO2, INC2
 | 
						|
 | 
						|
	LFDUX	B5, CO3, INC
 | 
						|
	LFDUX	A8, CO3, INC2
 | 
						|
	LFDUX	A2, CO4, INC
 | 
						|
	LFDUX	A4, CO4, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	LFSDUX	B5, CO3, INCM1
 | 
						|
	LFSDUX	A8, CO3, INC2
 | 
						|
 | 
						|
	fxcpmadd	f1,  AP, f1,  B1
 | 
						|
	LFSDUX	A2,  CO4, INCM1
 | 
						|
	LFSDUX	A4,  CO4, INC2
 | 
						|
 | 
						|
	fxcpmadd	f4,  AP, f4,  B3
 | 
						|
	STFDUX	f0,  CO1, INCM3
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	fxcpmadd	f5,  AP, f5,  A6
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
 | 
						|
	fxcpmadd	f8,  AP, f8,  B5
 | 
						|
	STFDUX	f4,  CO2, INCM3
 | 
						|
	STFSDUX	f4,  CO2, INC
 | 
						|
 | 
						|
	fxcpmadd	f9,  AP, f9,  A8
 | 
						|
	STFDUX	f5,  CO2, INC
 | 
						|
	STFSDUX	f5,  CO2, INC
 | 
						|
 | 
						|
	fxcpmadd	f12, AP, f12, A2
 | 
						|
	STFDUX	f8,  CO3, INCM3
 | 
						|
	STFSDUX	f8,  CO3, INC
 | 
						|
 | 
						|
	fxcpmadd	f13, AP, f13, A4
 | 
						|
	STFDUX	f9,  CO3, INC
 | 
						|
	STFSDUX	f9,  CO3, INC
 | 
						|
 | 
						|
	STFDUX	f12, CO4, INCM3
 | 
						|
	STFSDUX	f12, CO4, INC
 | 
						|
 | 
						|
	STFDUX	f13, CO4, INC
 | 
						|
	STFSDUX	f13, CO4, INC
 | 
						|
#else
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
 | 
						|
	fpmul	f4,  AP, f4
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	fpmul	f5,  AP, f5
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
 | 
						|
	fpmul	f8,  AP, f8
 | 
						|
	STFDUX	f4,  CO2, INC
 | 
						|
	STFSDUX	f4,  CO2, INC
 | 
						|
 | 
						|
	fpmul	f9,  AP, f9
 | 
						|
	STFDUX	f5,  CO2, INC
 | 
						|
	STFSDUX	f5,  CO2, INC
 | 
						|
 | 
						|
	fpmul	f12, AP, f12
 | 
						|
	STFDUX	f8,  CO3, INC
 | 
						|
	STFSDUX	f8,  CO3, INC
 | 
						|
 | 
						|
	fpmul	f13, AP, f13
 | 
						|
	STFDUX	f9,  CO3, INC
 | 
						|
	STFSDUX	f9,  CO3, INC
 | 
						|
 | 
						|
	STFDUX	f12, CO4, INC
 | 
						|
	STFSDUX	f12, CO4, INC
 | 
						|
 | 
						|
	STFDUX	f13, CO4, INC
 | 
						|
	STFSDUX	f13, CO4, INC
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 2 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 4
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1030:
 | 
						|
	andi.	I, M,  2
 | 
						|
	beq	.L1040
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f3, f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 1 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  BO, - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, BO,   2 * SIZE
 | 
						|
	fpmr	f3, f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L1034
 | 
						|
 | 
						|
#else
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f3, f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L1034
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
	LFPDUX	A5,  BO, INC4
 | 
						|
	LFPDUX	A6, BO2, INC4
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	LFPDUX	A7,  BO, INC4
 | 
						|
	LFPDUX	A8, BO2, INC4
 | 
						|
	bdz-	.L1033
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1032:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
	fxcpmadd	f2,  B2, A1, f2
 | 
						|
	fxcsmadd	f3,  B2, A1, f3
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A2, f0
 | 
						|
	fxcsmadd	f1,  B3, A2, f1
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
	fxcpmadd	f2,  B4, A2, f2
 | 
						|
	fxcsmadd	f3,  B4, A2, f3
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A5, A3, f0
 | 
						|
	fxcsmadd	f1,  A5, A3, f1
 | 
						|
	LFPDUX	A5,  BO, INC4
 | 
						|
	fxcpmadd	f2,  A6, A3, f2
 | 
						|
	fxcsmadd	f3,  A6, A3, f3
 | 
						|
	LFPDUX	A6, BO2, INC4
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A7, A4, f0
 | 
						|
	fxcsmadd	f1,  A7, A4, f1
 | 
						|
	LFPDUX	A7,  BO, INC4
 | 
						|
	fxcpmadd	f2,  A8, A4, f2
 | 
						|
	fxcsmadd	f3,  A8, A4, f3
 | 
						|
	LFPDUX	A8, BO2, INC4
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	bdnz+	.L1032
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1033:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	fxcpmadd	f2,  B2, A1, f2
 | 
						|
	fxcsmadd	f3,  B2, A1, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A2, f0
 | 
						|
	fxcsmadd	f1,  B3, A2, f1
 | 
						|
	fxcpmadd	f2,  B4, A2, f2
 | 
						|
	fxcsmadd	f3,  B4, A2, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  A5, A3, f0
 | 
						|
	fxcsmadd	f1,  A5, A3, f1
 | 
						|
	fxcpmadd	f2,  A6, A3, f2
 | 
						|
	fxcsmadd	f3,  A6, A3, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  A7, A4, f0
 | 
						|
	fxcsmadd	f1,  A7, A4, f1
 | 
						|
	fxcpmadd	f2,  A8, A4, f2
 | 
						|
	fxcsmadd	f3,  A8, A4, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1034:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  3
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L1038
 | 
						|
 | 
						|
	LFPDX	A1,  AO,  INC4
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	add	AO, AO, INC2
 | 
						|
	bdz-	.L1037
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1036:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	fxcpmadd	f2,  B2, A1, f2
 | 
						|
	fxcsmadd	f3,  B2, A1, f3
 | 
						|
	LFPDX	A1,  AO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	add	AO, AO, INC2
 | 
						|
	bdnz+	.L1036
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1037:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	fxcpmadd	f2,  B2, A1, f2
 | 
						|
	fxcsmadd	f3,  B2, A1, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1038:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	LFDUX	A2, CO2, INC
 | 
						|
	LFDUX	A3, CO3, INC
 | 
						|
	LFDUX	A4, CO4, INC
 | 
						|
 | 
						|
	LFSDUX	A1, CO1, INC
 | 
						|
	LFSDUX	A2, CO2, INC
 | 
						|
	LFSDUX	A3, CO3, INC
 | 
						|
	LFSDUX	A4, CO4, INC
 | 
						|
 | 
						|
	fxcpmadd	f0, AP, f0, A1
 | 
						|
	fxcpmadd	f1, AP, f1, A2
 | 
						|
	fxcpmadd	f2, AP, f2, A3
 | 
						|
	fxcpmadd	f3, AP, f3, A4
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INCM1
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f1,  CO2, INCM1
 | 
						|
	STFSDUX	f1,  CO2, INC
 | 
						|
 | 
						|
	STFDUX	f2,  CO3, INCM1
 | 
						|
	STFSDUX	f2,  CO3, INC
 | 
						|
 | 
						|
	STFDUX	f3,  CO4, INCM1
 | 
						|
	STFSDUX	f3,  CO4, INC
 | 
						|
#else
 | 
						|
	fpmul	f0, AP, f0
 | 
						|
	fpmul	f1, AP, f1
 | 
						|
	fpmul	f2, AP, f2
 | 
						|
	fpmul	f3, AP, f3
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f1,  CO2, INC
 | 
						|
	STFSDUX	f1,  CO2, INC
 | 
						|
 | 
						|
	STFDUX	f2,  CO3, INC
 | 
						|
	STFSDUX	f2,  CO3, INC
 | 
						|
 | 
						|
	STFDUX	f3,  CO4, INC
 | 
						|
	STFSDUX	f3,  CO4, INC
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 1 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 2
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1040:
 | 
						|
	andi.	I, M,  1
 | 
						|
	beq	.L1049
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f3,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 0 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  BO, - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, BO,   2 * SIZE
 | 
						|
	fpmr	f3,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L1044
 | 
						|
 | 
						|
#else
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f3,  f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L1044
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC4
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	LFPDUX	A2, AO2,  INC4
 | 
						|
	LFPDUX	B3,  BO,  INC4
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
 | 
						|
	LFPDUX	A3,  AO,  INC4
 | 
						|
	LFPDUX	A5,  BO,  INC4
 | 
						|
	LFPDUX	A6,  BO2, INC4
 | 
						|
	LFPDUX	A4, AO2,  INC4
 | 
						|
	LFPDUX	A7,  BO,  INC4
 | 
						|
	LFPDUX	A8,  BO2, INC4
 | 
						|
	bdz-	.L1043
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1042:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	fxcpmadd	f1,  A1, B2, f1
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	fxcsmadd	f2,  A1, B3, f2
 | 
						|
	LFPDUX	B3,  BO,  INC4
 | 
						|
	fxcsmadd	f3,  A1, B4, f3
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
	LFPDUX	A1,  AO,  INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A2, A5, f0
 | 
						|
	LFPDUX	A5,  BO,  INC4
 | 
						|
	fxcpmadd	f1,  A2, A6, f1
 | 
						|
	LFPDUX	A6,  BO2, INC4
 | 
						|
	fxcsmadd	f2,  A2, A7, f2
 | 
						|
	LFPDUX	A7,  BO,  INC4
 | 
						|
	fxcsmadd	f3,  A2, A8, f3
 | 
						|
	LFPDUX	A8,  BO2, INC4
 | 
						|
	LFPDUX	A2, AO2,  INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A3, B1, f0
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	fxcpmadd	f1,  A3, B2, f1
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	fxcsmadd	f2,  A3, B3, f2
 | 
						|
	LFPDUX	B3,  BO,  INC4
 | 
						|
	fxcsmadd	f3,  A3, B4, f3
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
	LFPDUX	A3,  AO,  INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A4, A5, f0
 | 
						|
	LFPDUX	A5,  BO,  INC4
 | 
						|
	fxcpmadd	f1,  A4, A6, f1
 | 
						|
	LFPDUX	A6,  BO2, INC4
 | 
						|
	fxcsmadd	f2,  A4, A7, f2
 | 
						|
	LFPDUX	A7,  BO,  INC4
 | 
						|
	fxcsmadd	f3,  A4, A8, f3
 | 
						|
	LFPDUX	A8,  BO2, INC4
 | 
						|
	LFPDUX	A4, AO2,  INC4
 | 
						|
	bdnz+	.L1042
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1043:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	fxcpmadd	f1,  A1, B2, f1
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	fxcsmadd	f2,  A1, B3, f2
 | 
						|
	LFPDUX	B3,  BO,  INC4
 | 
						|
	fxcsmadd	f3,  A1, B4, f3
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A2, A5, f0
 | 
						|
	LFPDUX	A5,  BO,  INC4
 | 
						|
	fxcpmadd	f1,  A2, A6, f1
 | 
						|
	LFPDUX	A6,  BO2, INC4
 | 
						|
	fxcsmadd	f2,  A2, A7, f2
 | 
						|
	LFPDUX	A7,  BO,  INC4
 | 
						|
	fxcsmadd	f3,  A2, A8, f3
 | 
						|
	LFPDUX	A8,  BO2, INC4
 | 
						|
 | 
						|
	fxcpmadd	f0,  A3, B1, f0
 | 
						|
	fxcpmadd	f1,  A3, B2, f1
 | 
						|
	fxcsmadd	f2,  A3, B3, f2
 | 
						|
	fxcsmadd	f3,  A3, B4, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  A4, A5, f0
 | 
						|
	fxcpmadd	f1,  A4, A6, f1
 | 
						|
	fxcsmadd	f2,  A4, A7, f2
 | 
						|
	fxcsmadd	f3,  A4, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1044:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  7
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L1048
 | 
						|
 | 
						|
	LFDX	A1,  AO,  INC4
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	add	AO, AO, INC
 | 
						|
	bdz-	.L1047
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1046:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	fxcpmadd	f1,  A1, B2, f1
 | 
						|
	LFDX	A1,  AO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	add	AO, AO, INC
 | 
						|
	bdnz+	.L1046
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1047:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	fxcpmadd	f1,  A1, B2, f1
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1048:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDX	A1, CO1, INC
 | 
						|
	LFDX	B3, CO3, INC
 | 
						|
	LFSDX	A1, CO2, INC
 | 
						|
	LFSDX	B3, CO4, INC
 | 
						|
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	fxcpmadd	f1,  AP, f1,  B3
 | 
						|
#else
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
#endif
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO2, INC
 | 
						|
	STFDUX	f1,  CO3, INC
 | 
						|
	STFSDUX	f1,  CO4, INC
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 0 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 2 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 1
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1049:
 | 
						|
#if defined(TRMMKERNEL) && !defined(LEFT)
 | 
						|
	addi	KK, KK, 4
 | 
						|
#endif
 | 
						|
 | 
						|
	addi	B,  BO, 4 * SIZE
 | 
						|
 | 
						|
	addic.	J, J, -1
 | 
						|
	bgt+	.L1010
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1050:
 | 
						|
	andi.	J, N,  2
 | 
						|
	beq	.L1090
 | 
						|
 | 
						|
	mr	CO1, C
 | 
						|
	add	CO2, C,   LDC
 | 
						|
	add	C,   CO2, LDC
 | 
						|
 | 
						|
#if defined(TRMMKERNEL) &&  defined(LEFT)
 | 
						|
	mr	KK, OFFSET
 | 
						|
#endif
 | 
						|
 | 
						|
	addi	AO, A, -2 * SIZE
 | 
						|
	
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
 | 
						|
	srawi.	I, M,  3
 | 
						|
	ble	.L1060
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1051:
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f6,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 3 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f6,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 8
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
	fpmr	f3,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f7,  f0
 | 
						|
	ble	.L1054
 | 
						|
#else
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f6,  f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
	fpmr	f3,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f7,  f0
 | 
						|
	ble	.L1054
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	bdz-	.L1053
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1052:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B1, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A5, f0
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	fxcsmadd	f4,  B2, A5, f4
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B2, A6, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B2, A6, f5
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  B2, A7, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B2, A7, f6
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B2, A8, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B2, A8, f7
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A1, f0
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	fxcsmadd	f4,  B3, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B3, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B3, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  B3, A3, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B3, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B3, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B3, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B4, A5, f0
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	fxcsmadd	f4,  B4, A5, f4
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B4, A6, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B4, A6, f5
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  B4, A7, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B4, A7, f6
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B4, A8, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B4, A8, f7
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	bdnz+	.L1052
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1053:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B1, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A5, f0
 | 
						|
	nop
 | 
						|
	fxcsmadd	f4,  B2, A5, f4
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B2, A6, f1
 | 
						|
	nop
 | 
						|
	fxcsmadd	f5,  B2, A6, f5
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  B2, A7, f2
 | 
						|
	nop
 | 
						|
	fxcsmadd	f6,  B2, A7, f6
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B2, A8, f3
 | 
						|
	nop
 | 
						|
	fxcsmadd	f7,  B2, A8, f7
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A1, f0
 | 
						|
	fxcsmadd	f4,  B3, A1, f4
 | 
						|
	fxcpmadd	f1,  B3, A2, f1
 | 
						|
	fxcsmadd	f5,  B3, A2, f5
 | 
						|
 | 
						|
	fxcpmadd	f2,  B3, A3, f2
 | 
						|
	fxcsmadd	f6,  B3, A3, f6
 | 
						|
	fxcpmadd	f3,  B3, A4, f3
 | 
						|
	fxcsmadd	f7,  B3, A4, f7
 | 
						|
 | 
						|
	fxcpmadd	f0,  B4, A5, f0
 | 
						|
	fxcsmadd	f4,  B4, A5, f4
 | 
						|
	fxcpmadd	f1,  B4, A6, f1
 | 
						|
	fxcsmadd	f5,  B4, A6, f5
 | 
						|
 | 
						|
	fxcpmadd	f2,  B4, A7, f2
 | 
						|
	fxcsmadd	f6,  B4, A7, f6
 | 
						|
	fxcpmadd	f3,  B4, A8, f3
 | 
						|
	fxcsmadd	f7,  B4, A8, f7
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1054:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 8
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  3
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L1058
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	bdz-	.L1057
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1056:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	fxcsmadd	f6,  B1, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	fxcsmadd	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	bdnz+	.L1056
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1057:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f4,  B1, A1, f4
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f5,  B1, A2, f5
 | 
						|
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	fxcsmadd	f6,  B1, A3, f6
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	fxcsmadd	f7,  B1, A4, f7
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1058:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	LFDUX	B1, CO1, INC2
 | 
						|
	LFDUX	A3, CO1, INC2
 | 
						|
   	LFDUX	A5, CO1, INC2
 | 
						|
 | 
						|
	LFSDUX	A1, CO1, INCM5
 | 
						|
	LFSDUX	B1, CO1, INC2
 | 
						|
	LFSDUX	A3, CO1, INC2
 | 
						|
	LFSDUX	A5, CO1, INC2
 | 
						|
 | 
						|
 	LFDUX	B3, CO2, INC
 | 
						|
	LFDUX	A6, CO2, INC2
 | 
						|
	LFDUX	A7, CO2, INC2
 | 
						|
	LFDUX	B2, CO2, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	LFSDUX	B3, CO2, INCM5
 | 
						|
	LFSDUX	A6, CO2, INC2
 | 
						|
	fxcpmadd	f1,  AP, f1,  B1
 | 
						|
	LFSDUX	A7, CO2, INC2
 | 
						|
	LFSDUX	B2, CO2, INC2
 | 
						|
 | 
						|
	fxcpmadd	f2,  AP, f2,  A3
 | 
						|
	STFDUX	f0,  CO1, INCM7
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	fxcpmadd	f3,  AP, f3,  A5
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
 | 
						|
	fxcpmadd	f4,  AP, f4,  B3
 | 
						|
	STFDUX	f2,  CO1, INC
 | 
						|
	STFSDUX	f2,  CO1, INC
 | 
						|
 | 
						|
	fxcpmadd	f5,  AP, f5,  A6
 | 
						|
	STFDUX	f3,  CO1, INC
 | 
						|
	STFSDUX	f3,  CO1, INC
 | 
						|
 | 
						|
	fxcpmadd	f6,  AP, f6,  A7
 | 
						|
	STFDUX	f4,  CO2, INCM7
 | 
						|
	STFSDUX	f4,  CO2, INC
 | 
						|
 | 
						|
	fxcpmadd	f7,  AP, f7,  B2
 | 
						|
	STFDUX	f5,  CO2, INC
 | 
						|
	STFSDUX	f5,  CO2, INC
 | 
						|
 | 
						|
	STFDUX	f6,  CO2, INC
 | 
						|
	STFSDUX	f6,  CO2, INC
 | 
						|
 | 
						|
	STFDUX	f7,  CO2, INC
 | 
						|
	STFSDUX	f7,  CO2, INC
 | 
						|
#else
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
 | 
						|
	fpmul	f2,  AP, f2
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	fpmul	f3,  AP, f3
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
 | 
						|
	fpmul	f4,  AP, f4
 | 
						|
	STFDUX	f2,  CO1, INC
 | 
						|
	STFSDUX	f2,  CO1, INC
 | 
						|
 | 
						|
	fpmul	f5,  AP, f5
 | 
						|
	STFDUX	f3,  CO1, INC
 | 
						|
	STFSDUX	f3,  CO1, INC
 | 
						|
 | 
						|
	fpmul	f6,  AP, f6
 | 
						|
	STFDUX	f4,  CO2, INC
 | 
						|
	STFSDUX	f4,  CO2, INC
 | 
						|
 | 
						|
	fpmul	f7,  AP, f7
 | 
						|
	STFDUX	f5,  CO2, INC
 | 
						|
	STFSDUX	f5,  CO2, INC
 | 
						|
 | 
						|
	STFDUX	f6,  CO2, INC
 | 
						|
	STFSDUX	f6,  CO2, INC
 | 
						|
 | 
						|
	STFDUX	f7,  CO2, INC
 | 
						|
	STFSDUX	f7,  CO2, INC
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -8
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 3 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 8
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	addic.	I, I, -1
 | 
						|
	li	r0, FZERO
 | 
						|
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	bgt+	.L1051
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1060:
 | 
						|
	andi.	I, M,  4
 | 
						|
	beq	.L1070
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 2 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	fpmr	f2,  f0
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	ble	.L1064
 | 
						|
#else
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	ble	.L1064
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	bdz-	.L1063
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1062:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f2,  B1, A1, f2
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f3,  B1, A2, f3
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A3, f0
 | 
						|
	fxcsmadd	f2,  B2, A3, f2
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	fxcpmadd	f1,  B2, A4, f1
 | 
						|
	fxcsmadd	f3,  B2, A4, f3
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A5, f0
 | 
						|
	fxcsmadd	f2,  B3, A5, f2
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	fxcpmadd	f1,  B3, A6, f1
 | 
						|
	fxcsmadd	f3,  B3, A6, f3
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B4, A7, f0
 | 
						|
	fxcsmadd	f2,  B4, A7, f2
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	fxcpmadd	f1,  B4, A8, f1
 | 
						|
	fxcsmadd	f3,  B4, A8, f3
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
	bdnz+	.L1062
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1063:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f2,  B1, A1, f2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f3,  B1, A2, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A3, f0
 | 
						|
	fxcsmadd	f2,  B2, A3, f2
 | 
						|
	fxcpmadd	f1,  B2, A4, f1
 | 
						|
	fxcsmadd	f3,  B2, A4, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A5, f0
 | 
						|
	fxcsmadd	f2,  B3, A5, f2
 | 
						|
	fxcpmadd	f1,  B3, A6, f1
 | 
						|
	fxcsmadd	f3,  B3, A6, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  B4, A7, f0
 | 
						|
	fxcsmadd	f2,  B4, A7, f2
 | 
						|
	fxcpmadd	f1,  B4, A8, f1
 | 
						|
	fxcsmadd	f3,  B4, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1064:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  3
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L1068
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	bdz-	.L1067
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1066:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f2,  B1, A1, f2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f3,  B1, A2, f3
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	bdnz+	.L1066
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1067:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f2,  B1, A1, f2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcsmadd	f3,  B1, A2, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1068:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	LFDUX	A2, CO1, INC2
 | 
						|
	LFDUX	A3, CO2, INC
 | 
						|
	LFDUX	A4, CO2, INC2
 | 
						|
 | 
						|
	LFSDUX	A1, CO1, INCM1
 | 
						|
	LFSDUX	A2, CO1, INC2
 | 
						|
	LFSDUX	A3, CO2, INCM1
 | 
						|
	LFSDUX	A4, CO2, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	fxcpmadd	f1,  AP, f1,  A2
 | 
						|
	fxcpmadd	f2,  AP, f2,  A3
 | 
						|
	STFDUX	f0,  CO1, INCM3
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
 	fxcpmadd	f3,  AP, f3,  A4
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f2,  CO2, INCM3
 | 
						|
	STFSDUX	f2,  CO2, INC
 | 
						|
 | 
						|
	STFDUX	f3,  CO2, INC
 | 
						|
	STFSDUX	f3,  CO2, INC
 | 
						|
#else
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
	fpmul	f2,  AP, f2
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
 	fpmul	f3,  AP, f3
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f2,  CO2, INC
 | 
						|
	STFSDUX	f2,  CO2, INC
 | 
						|
 | 
						|
	STFDUX	f3,  CO2, INC
 | 
						|
	STFSDUX	f3,  CO2, INC
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 2 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 4
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1070:
 | 
						|
	andi.	I, M,  2
 | 
						|
	beq	.L1080
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 1 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  3
 | 
						|
	fpmr	f2,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3, f0
 | 
						|
	ble	.L1074
 | 
						|
#else
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  3
 | 
						|
	fpmr	f2,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3, f0
 | 
						|
	ble	.L1074
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	LFPDUX	B5,  BO, INC2
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B6,  BO, INC2
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	LFPDUX	A9,  BO, INC2
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	LFPDUX	A10, BO, INC2
 | 
						|
	bdz-	.L1073
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1072:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
	fxcpmadd	f2,  B2, A2, f2
 | 
						|
	fxcsmadd	f3,  B2, A2, f3
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A3, f0
 | 
						|
	fxcsmadd	f1,  B3, A3, f1
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
	fxcpmadd	f2,  B4, A4, f2
 | 
						|
	fxcsmadd	f3,  B4, A4, f3
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B5, A5, f0
 | 
						|
	fxcsmadd	f1,  B5, A5, f1
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	LFPDUX	B5,  BO, INC2
 | 
						|
	fxcpmadd	f2,  B6, A6, f2
 | 
						|
	fxcsmadd	f3,  B6, A6, f3
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B6,  BO, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  A9,  A7, f0
 | 
						|
	fxcsmadd	f1,  A9,  A7, f1
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	LFPDUX	A9,  BO, INC2
 | 
						|
	fxcpmadd	f2,  A10, A8, f2
 | 
						|
	fxcsmadd	f3,  A10, A8, f3
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	LFPDUX	A10, BO, INC2
 | 
						|
	bdnz+	.L1072
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1073:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	fxcpmadd	f2,  B2, A2, f2
 | 
						|
	fxcsmadd	f3,  B2, A2, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A3, f0
 | 
						|
	fxcsmadd	f1,  B3, A3, f1
 | 
						|
	fxcpmadd	f2,  B4, A4, f2
 | 
						|
	fxcsmadd	f3,  B4, A4, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  B5, A5, f0
 | 
						|
	fxcsmadd	f1,  B5, A5, f1
 | 
						|
	fxcpmadd	f2,  B6, A6, f2
 | 
						|
	fxcsmadd	f3,  B6, A6, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  A9,  A7, f0
 | 
						|
	fxcsmadd	f1,  A9,  A7, f1
 | 
						|
	fxcpmadd	f2,  A10, A8, f2
 | 
						|
	fxcsmadd	f3,  A10, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1074:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  7
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L1078
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	bdz-	.L1077
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1076:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	bdnz+	.L1076
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1077:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A1, f1
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1078:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	LFDUX	B3, CO2, INC
 | 
						|
	LFSDUX	A1, CO1, INC
 | 
						|
	LFSDUX	B3, CO2, INC
 | 
						|
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	fxcpmadd	f1,  AP, f1,  B3
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INCM1
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
	STFDUX	f1,  CO2, INCM1
 | 
						|
	STFSDUX	f1,  CO2, INC
 | 
						|
#else
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
	STFDUX	f1,  CO2, INC
 | 
						|
	STFSDUX	f1,  CO2, INC
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 1 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 2
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1080:
 | 
						|
	andi.	I, M,  1
 | 
						|
	beq	.L1089
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 0 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L1084
 | 
						|
#else
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L1084
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	bdz-	.L1083
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1082:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	fxcsmadd	f1,  A1, B2, f1
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f2,  A2, B3, f2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	fxcsmadd	f3,  A2, B4, f3
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  A3, B1, f0
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	fxcsmadd	f1,  A3, B2, f1
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f2,  A4, B3, f2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	fxcsmadd	f3,  A4, B4, f3
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	bdnz+	.L1082
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1083:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	fxcsmadd	f1,  A1, B2, f1
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	fxcpmadd	f2,  A2, B3, f2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	fxcsmadd	f3,  A2, B4, f3
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  A3, B1, f0
 | 
						|
	fxcsmadd	f1,  A3, B2, f1
 | 
						|
	fxcpmadd	f2,  A4, B3, f2
 | 
						|
	fxcsmadd	f3,  A4, B4, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1084:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  7
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L1088
 | 
						|
 | 
						|
	LFDX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	add	AO, AO, INC
 | 
						|
	bdz-	.L1087
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1086:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	LFDX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	add	AO, AO, INC
 | 
						|
	bdnz+	.L1086
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1087:
 | 
						|
	fxcpmadd	f0,  A1, B1, f0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1088:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDX	A1, CO1, INC
 | 
						|
	LFDX	A2, CO2, INC
 | 
						|
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
	fpadd	f2, f2, f3
 | 
						|
	fsmfp	A1, A2
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
#else
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
	fpadd	f2, f2, f3
 | 
						|
	fsmfp	A1, A2
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
#endif
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO2, INC
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 0 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 1 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 1
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1089:
 | 
						|
#if defined(TRMMKERNEL) && !defined(LEFT)
 | 
						|
	addi	KK, KK, 2
 | 
						|
#endif
 | 
						|
 | 
						|
	addi	B,  BO, 2 * SIZE
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1090:
 | 
						|
	andi.	J, N,  1
 | 
						|
	beq	.L10999
 | 
						|
 | 
						|
#if defined(TRMMKERNEL) &&  defined(LEFT)
 | 
						|
	mr	KK, OFFSET
 | 
						|
#endif
 | 
						|
 | 
						|
	mr	CO1, C
 | 
						|
	addi	AO, A, -2 * SIZE
 | 
						|
	
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
 | 
						|
	srawi.	I, M,  3
 | 
						|
	ble	.L10100
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1091:
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 3 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 0 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 8
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	fpmr	f2,  f0
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
	fpmr	f3,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L1094
 | 
						|
 | 
						|
#else
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L1094
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	bdz-	.L1093
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1092:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	fxcsmadd	f0,  B1, A5, f0
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcsmadd	f1,  B1, A6, f1
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B1, A7, f2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B1, A8, f3
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B2, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	fxcpmadd	f2,  B2, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B2, A4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	fxcsmadd	f0,  B2, A5, f0
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcsmadd	f1,  B2, A6, f1
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B2, A7, f2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B2, A8, f3
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	bdnz+	.L1092
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1093:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	fxcsmadd	f0,  B1, A5, f0
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcsmadd	f1,  B1, A6, f1
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B1, A7, f2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B1, A8, f3
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A1, f0
 | 
						|
	fxcpmadd	f1,  B2, A2, f1
 | 
						|
	fxcpmadd	f2,  B2, A3, f2
 | 
						|
	fxcpmadd	f3,  B2, A4, f3
 | 
						|
 | 
						|
	fxcsmadd	f0,  B2, A5, f0
 | 
						|
	fxcsmadd	f1,  B2, A6, f1
 | 
						|
	fxcsmadd	f2,  B2, A7, f2
 | 
						|
	fxcsmadd	f3,  B2, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1094:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 8
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  3
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L1098
 | 
						|
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdz-	.L1097
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1096:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdnz+	.L1096
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1097:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	fxcpmadd	f2,  B1, A3, f2
 | 
						|
	fxcpmadd	f3,  B1, A4, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1098:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	LFDUX	B1, CO1, INC2
 | 
						|
	LFDUX	A3, CO1, INC2
 | 
						|
   	LFDUX	A5, CO1, INC2
 | 
						|
 | 
						|
	LFSDUX	A1, CO1, INCM5
 | 
						|
	LFSDUX	B1, CO1, INC2
 | 
						|
	LFSDUX	A3, CO1, INC2
 | 
						|
	LFSDUX	A5, CO1, INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	fxcpmadd	f1,  AP, f1,  B1
 | 
						|
	fxcpmadd	f2,  AP, f2,  A3
 | 
						|
	STFDUX	f0,  CO1, INCM7
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	fxcpmadd	f3,  AP, f3,  A5
 | 
						|
#else
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
	fpmul	f2,  AP, f2
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	fpmul	f3,  AP, f3
 | 
						|
#endif
 | 
						|
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f2,  CO1, INC
 | 
						|
	STFSDUX	f2,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f3,  CO1, INC
 | 
						|
	STFSDUX	f3,  CO1, INC
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -8
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 3 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 0 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 8
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	addic.	I, I, -1
 | 
						|
	li	r0, FZERO
 | 
						|
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	bgt+	.L1091
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10100:
 | 
						|
	andi.	I, M,  4
 | 
						|
	beq	.L10110
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3, f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 2 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 0 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3, f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L10104
 | 
						|
#else
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3, f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L10104
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
 | 
						|
	bdz-	.L10103
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10102:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B1, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B1, A4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A5, f0
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B2, A6, f1
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B2, A7, f2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B2, A8, f3
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B3, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B3, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B3, A4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B4, A5, f0
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B4, A6, f1
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B4, A7, f2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B4, A8, f3
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	bdnz+	.L10102
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10103:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B1, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B1, A4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B2, A5, f0
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B2, A6, f1
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	fxcsmadd	f2,  B2, A7, f2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B2, A8, f3
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
 | 
						|
	fxcpmadd	f0,  B3, A1, f0
 | 
						|
	fxcpmadd	f1,  B3, A2, f1
 | 
						|
	fxcsmadd	f2,  B3, A3, f2
 | 
						|
	fxcsmadd	f3,  B3, A4, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  B4, A5, f0
 | 
						|
	fxcpmadd	f1,  B4, A6, f1
 | 
						|
	fxcsmadd	f2,  B4, A7, f2
 | 
						|
	fxcsmadd	f3,  B4, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10104:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  7
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L10108
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdz-	.L10107
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10106:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdnz+	.L10106
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10107:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcpmadd	f1,  B1, A2, f1
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10108:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	LFDUX	B1, CO1, INC2
 | 
						|
	LFSDUX	A1, CO1, INCM1
 | 
						|
	LFSDUX	B1, CO1, INC2
 | 
						|
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
 | 
						|
	fxcpmadd	f0,  AP, f0,  A1
 | 
						|
	fxcpmadd	f1,  AP, f1,  B1
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INCM3
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
#else
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
 | 
						|
	fpmul	f0,  AP, f0
 | 
						|
	fpmul	f1,  AP, f1
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
#endif
 | 
						|
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 2 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 0 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 4
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10110:
 | 
						|
	andi.	I, M,  2
 | 
						|
	beq	.L10120
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 1 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 0 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L10114
 | 
						|
#else
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L10114
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	bdz-	.L10113
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10112:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	fxcsmadd	f1,  B1, A2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	fxcpmadd	f2,  B2, A3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B2, A4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	fxcpmadd	f0,  B3, A5, f0
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	fxcsmadd	f1,  B3, A6, f1
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	fxcpmadd	f2,  B4, A7, f2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	fxcsmadd	f3,  B4, A8, f3
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	bdnz+	.L10112
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10113:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	fxcsmadd	f1,  B1, A2, f1
 | 
						|
	fxcpmadd	f2,  B2, A3, f2
 | 
						|
	fxcsmadd	f3,  B2, A4, f3
 | 
						|
	fxcpmadd	f0,  B3, A5, f0
 | 
						|
	fxcsmadd	f1,  B3, A6, f1
 | 
						|
	fxcpmadd	f2,  B4, A7, f2
 | 
						|
	fxcsmadd	f3,  B4, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10114:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  7
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L10118
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdz-	.L10117
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10116:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdnz+	.L10116
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10117:
 | 
						|
	fxcpmadd	f0,  B1, A1, f0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10118:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	LFDUX	A2, CO1, INC
 | 
						|
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
	fpadd	f2, f3, f2
 | 
						|
	fsmfp	A1, A2
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fxcpmadd	f1,  AP, f0,  A1
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
 | 
						|
	STFDUX	f1,  CO1, INCM1
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
#else
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
	fpadd	f2, f3, f2
 | 
						|
	fsmfp	A1, A2
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpmul	f1,  AP, f0
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 1 + BASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 0 + BASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 2
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10120:
 | 
						|
	andi.	I, M,  1
 | 
						|
	beq	.L10999
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 0 + BASE_SHIFT
 | 
						|
	slwi	r0,   KK, 0 + BASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L10124
 | 
						|
#else
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L10124
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	bdz-	.L10123
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10122:
 | 
						|
	fpmadd	f0,  A1, B1, f0
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	fpmadd	f1,  A2, B2, f1
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	fpmadd	f2,  A3, B3, f2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	fpmadd	f3,  A4, B4, f3
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	bdnz+	.L10122
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10123:
 | 
						|
	fpmadd	f0,  A1, B1, f0
 | 
						|
	fpmadd	f1,  A2, B2, f1
 | 
						|
	fpmadd	f2,  A3, B3, f2
 | 
						|
	fpmadd	f3,  A4, B4, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10124:
 | 
						|
	lfd	AP,  ALPHA(SP)
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
       fsmfp	AP, AP
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	andi.	TEMP,  TEMP,  7
 | 
						|
	mtspr	CTR, TEMP
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L10128
 | 
						|
 | 
						|
	LFDX	A1,  AO,  INC2
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	add	AO, AO, INC
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdz-	.L10127
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10126:
 | 
						|
	fmadd	f0,  A1, B1, f0
 | 
						|
	LFDX	A1,  AO,  INC2
 | 
						|
	LFDX	B1,  BO,  INC2
 | 
						|
	add	AO, AO, INC
 | 
						|
	add	BO, BO, INC
 | 
						|
	bdnz+	.L10126
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10127:
 | 
						|
	fmadd	f0,  A1, B1, f0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10128:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDX	A1, CO1, INC
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
	fpadd	f2, f2, f3
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fsmtp	f1, f0
 | 
						|
	fadd	f0, f0, f1
 | 
						|
	fmadd	f0,  AP, f0,  A1
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
#else
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
	fpadd	f2, f2, f3
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fsmtp	f1, f0
 | 
						|
	fadd	f0, f0, f1
 | 
						|
	fmul	f0,  AP, f0
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
#endif
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10999:
 | 
						|
	addi	SP, SP, 12
 | 
						|
 | 
						|
	lwzu	r14,   4(SP)
 | 
						|
	lwzu	r15,   4(SP)
 | 
						|
 | 
						|
	lwzu	r16,   4(SP)
 | 
						|
	lwzu	r17,   4(SP)
 | 
						|
	lwzu	r18,   4(SP)
 | 
						|
	lwzu	r19,   4(SP)
 | 
						|
 | 
						|
	lwzu	r20,   4(SP)
 | 
						|
	lwzu	r21,   4(SP)
 | 
						|
	lwzu	r22,   4(SP)
 | 
						|
	lwzu	r23,   4(SP)
 | 
						|
 | 
						|
	lwzu	r24,   4(SP)
 | 
						|
	lwzu	r25,   4(SP)
 | 
						|
	lwzu	r26,   4(SP)
 | 
						|
	lwzu	r27,   4(SP)
 | 
						|
 | 
						|
	lwzu	r28,   4(SP)
 | 
						|
	lwzu	r29,   4(SP)
 | 
						|
	lwzu	r30,   4(SP)
 | 
						|
	lwzu	r31,   4(SP)
 | 
						|
 | 
						|
	subi	SP, SP, 12
 | 
						|
	li	r0, 16
 | 
						|
 | 
						|
	lfpdux	f31, SP, r0
 | 
						|
	lfpdux	f30, SP, r0
 | 
						|
	lfpdux	f29, SP, r0
 | 
						|
	lfpdux	f28, SP, r0
 | 
						|
	lfpdux	f27, SP, r0
 | 
						|
	lfpdux	f26, SP, r0
 | 
						|
	lfpdux	f25, SP, r0
 | 
						|
	lfpdux	f24, SP, r0
 | 
						|
	lfpdux	f23, SP, r0
 | 
						|
	lfpdux	f22, SP, r0
 | 
						|
	lfpdux	f21, SP, r0
 | 
						|
	lfpdux	f20, SP, r0
 | 
						|
	lfpdux	f19, SP, r0
 | 
						|
	lfpdux	f18, SP, r0
 | 
						|
	lfpdux	f17, SP, r0
 | 
						|
	lfpdux	f16, SP, r0
 | 
						|
	lfpdux	f15, SP, r0
 | 
						|
	lfpdux	f14, SP, r0
 | 
						|
	addi	SP, SP, 16
 | 
						|
	blr
 | 
						|
 | 
						|
 | 
						|
	EPILOGUE
 | 
						|
#endif
 |