4429 lines
		
	
	
		
			77 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			4429 lines
		
	
	
		
			77 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
/*********************************************************************/
 | 
						|
/* Copyright 2009, 2010 The University of Texas at Austin.           */
 | 
						|
/* All rights reserved.                                              */
 | 
						|
/*                                                                   */
 | 
						|
/* Redistribution and use in source and binary forms, with or        */
 | 
						|
/* without modification, are permitted provided that the following   */
 | 
						|
/* conditions are met:                                               */
 | 
						|
/*                                                                   */
 | 
						|
/*   1. Redistributions of source code must retain the above         */
 | 
						|
/*      copyright notice, this list of conditions and the following  */
 | 
						|
/*      disclaimer.                                                  */
 | 
						|
/*                                                                   */
 | 
						|
/*   2. Redistributions in binary form must reproduce the above      */
 | 
						|
/*      copyright notice, this list of conditions and the following  */
 | 
						|
/*      disclaimer in the documentation and/or other materials       */
 | 
						|
/*      provided with the distribution.                              */
 | 
						|
/*                                                                   */
 | 
						|
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
 | 
						|
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
 | 
						|
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
 | 
						|
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
 | 
						|
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
 | 
						|
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
 | 
						|
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
 | 
						|
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
 | 
						|
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
 | 
						|
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
 | 
						|
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
 | 
						|
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
 | 
						|
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
 | 
						|
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
 | 
						|
/*                                                                   */
 | 
						|
/* The views and conclusions contained in the software and           */
 | 
						|
/* documentation are those of the authors and should not be          */
 | 
						|
/* interpreted as representing official policies, either expressed   */
 | 
						|
/* or implied, of The University of Texas at Austin.                 */
 | 
						|
/*********************************************************************/
 | 
						|
 | 
						|
#define ASSEMBLER
 | 
						|
#include "common.h"
 | 
						|
		
 | 
						|
#undef ZERO
 | 
						|
 | 
						|
#define ALPHA    0
 | 
						|
#define FZERO	16
 | 
						|
 | 
						|
#define	M	r3
 | 
						|
#define	N	r4
 | 
						|
#define	K	r5
 | 
						|
 | 
						|
#ifdef linux
 | 
						|
#define A	r6
 | 
						|
#define	B	r7
 | 
						|
#define	C	r8
 | 
						|
#define	LDC	r9
 | 
						|
#define OFFSET	r10
 | 
						|
#endif
 | 
						|
 | 
						|
#define TEMP	r11
 | 
						|
#define KK	r14
 | 
						|
#define INCM1	r15
 | 
						|
#define INCM3	r16
 | 
						|
#define INCM5	r17
 | 
						|
#define INCM7	r18
 | 
						|
#define INC2	r19
 | 
						|
#define INC	r20
 | 
						|
#define INC4	r21
 | 
						|
 | 
						|
#define	I	r22
 | 
						|
#define J	r23
 | 
						|
#define AO	r24
 | 
						|
#define BO	r25
 | 
						|
#define AO2	r26
 | 
						|
#define	BO2	r27
 | 
						|
	
 | 
						|
#define	CO1	r28
 | 
						|
#define CO2	r29
 | 
						|
#define	ZERO	r31
 | 
						|
 | 
						|
#ifndef NEEDPARAM
 | 
						|
 | 
						|
#define A1	f16
 | 
						|
#define A2	f17
 | 
						|
#define A3	f18
 | 
						|
#define A4	f19
 | 
						|
#define A5	f20
 | 
						|
#define A6	f21
 | 
						|
#define A7	f22
 | 
						|
#define A8	f23
 | 
						|
#define A9	f24
 | 
						|
#define A10	f25
 | 
						|
 | 
						|
#define B1	f26
 | 
						|
#define B2	f27
 | 
						|
#define B3	f28
 | 
						|
#define B4	f29
 | 
						|
#define B5	f30
 | 
						|
#define B6	f31
 | 
						|
 | 
						|
#define AP	B6
 | 
						|
 | 
						|
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
 | 
						|
    defined(NR) || defined(NC) || defined(TR) || defined(TC)
 | 
						|
#define FXCPMADD fxcpmadd
 | 
						|
#define FXCSMADD fxcxnpma
 | 
						|
#else
 | 
						|
#define FXCPMADD fxcpnsma
 | 
						|
#define FXCSMADD fxcxma
 | 
						|
#endif
 | 
						|
 | 
						|
	PROLOGUE
 | 
						|
	PROFCODE
 | 
						|
 | 
						|
	li	r0, -16
 | 
						|
 | 
						|
	stfpdux	f14, SP, r0
 | 
						|
	stfpdux	f15, SP, r0
 | 
						|
	stfpdux	f16, SP, r0
 | 
						|
	stfpdux	f17, SP, r0
 | 
						|
	stfpdux	f18, SP, r0
 | 
						|
	stfpdux	f19, SP, r0
 | 
						|
	stfpdux	f20, SP, r0
 | 
						|
	stfpdux	f21, SP, r0
 | 
						|
	stfpdux	f22, SP, r0
 | 
						|
	stfpdux	f23, SP, r0
 | 
						|
	stfpdux	f24, SP, r0
 | 
						|
	stfpdux	f25, SP, r0
 | 
						|
	stfpdux	f26, SP, r0
 | 
						|
	stfpdux	f27, SP, r0
 | 
						|
	stfpdux	f28, SP, r0
 | 
						|
	stfpdux	f29, SP, r0
 | 
						|
	stfpdux	f30, SP, r0
 | 
						|
	stfpdux	f31, SP, r0
 | 
						|
	
 | 
						|
	stwu	r31,  -4(SP)
 | 
						|
	stwu	r30,  -4(SP)
 | 
						|
	stwu	r29,  -4(SP)
 | 
						|
	stwu	r28,  -4(SP)
 | 
						|
 | 
						|
	stwu	r27,  -4(SP)
 | 
						|
	stwu	r26,  -4(SP)
 | 
						|
	stwu	r25,  -4(SP)
 | 
						|
	stwu	r24,  -4(SP)
 | 
						|
 | 
						|
	stwu	r23,  -4(SP)
 | 
						|
	stwu	r22,  -4(SP)
 | 
						|
	stwu	r21,  -4(SP)
 | 
						|
	stwu	r20,  -4(SP)
 | 
						|
 | 
						|
	stwu	r19,  -4(SP)
 | 
						|
	stwu	r18,  -4(SP)
 | 
						|
	stwu	r17,  -4(SP)
 | 
						|
	stwu	r16,  -4(SP)
 | 
						|
 | 
						|
	stwu	r15,  -4(SP)
 | 
						|
	stwu	r14,  -4(SP)
 | 
						|
 | 
						|
	li	r0,   0
 | 
						|
	stwu	r0,   -4(SP)
 | 
						|
	stwu	r0,   -4(SP)
 | 
						|
 | 
						|
	stfdu	f2,   -8(SP)
 | 
						|
	stfdu	f1,   -8(SP)
 | 
						|
 | 
						|
	slwi	LDC, LDC, ZBASE_SHIFT
 | 
						|
 | 
						|
	cmpwi	cr0, M, 0
 | 
						|
	ble	.L999
 | 
						|
	cmpwi	cr0, N, 0
 | 
						|
	ble	.L999
 | 
						|
	cmpwi	cr0, K, 0
 | 
						|
	ble	.L999
 | 
						|
 | 
						|
#if defined(TRMMKERNEL) && !defined(LEFT)
 | 
						|
	neg	KK, OFFSET
 | 
						|
#endif
 | 
						|
 | 
						|
	andi.	r0, C,   2 * SIZE - 1
 | 
						|
	bne	.L1000
 | 
						|
 | 
						|
	li	INC,    1 * SIZE
 | 
						|
	li	INC2,   2 * SIZE
 | 
						|
	li	INC4,   4 * SIZE
 | 
						|
	li	INCM1, -1 * SIZE
 | 
						|
	li	INCM3, -2 * SIZE
 | 
						|
	li	INCM5, -4 * SIZE
 | 
						|
	li	INCM7, -6 * SIZE
 | 
						|
 | 
						|
	addi	C, C, - 2 * SIZE
 | 
						|
	srawi.	J, N,  1
 | 
						|
	ble	.L50
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10:
 | 
						|
	mr	CO1, C
 | 
						|
	add	CO2, C,   LDC
 | 
						|
	add	C,   CO2, LDC
 | 
						|
 | 
						|
#if defined(TRMMKERNEL) &&  defined(LEFT)
 | 
						|
	mr	KK, OFFSET
 | 
						|
#endif
 | 
						|
 | 
						|
	addi	AO, A, -4 * SIZE
 | 
						|
	
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
 | 
						|
	srawi.	I, M,  2
 | 
						|
	ble	.L20
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L11:
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 2 + ZBASE_SHIFT
 | 
						|
	slwi	r0,   KK, 1 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  BO, - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, BO,   2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
#endif
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L14
 | 
						|
#else
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L14
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
	fpmr	f5,  f0
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
	fpmr	f9,  f0
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
	fpmr	f13, f0
 | 
						|
 | 
						|
	LFPDUX	A5,  AO, INC4
 | 
						|
	fpmr	f2,  f0
 | 
						|
	LFPDUX	A6,  AO, INC4
 | 
						|
	fpmr	f6,  f0
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
	fpmr	f10, f0
 | 
						|
	LFPDUX	A7,  AO, INC4
 | 
						|
	fpmr	f14, f0
 | 
						|
 | 
						|
	LFPDUX	A8,  AO, INC4
 | 
						|
	fpmr	f3,  f0
 | 
						|
	LFPDUX	B5,  BO, INC4
 | 
						|
	fpmr	f7,  f0
 | 
						|
	LFPDUX	A9,  AO, INC4
 | 
						|
	fpmr	f11, f0
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fpmr	f15, f0
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	bdz-	.L13
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L12:
 | 
						|
 | 
						|
## 1 ##
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B2, A1, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	FXCSMADD	f12, B2, A1, f12
 | 
						|
	LFPDUX	B6,  BO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
	LFPDUX	A10, AO, INC4
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B1, A3, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B2, A3, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B2, A3, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	FXCPMADD	f11, B2, A4, f11
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
	FXCSMADD	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 2 ##
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A5, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B3, A5, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B4, A5, f8
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	FXCSMADD	f12, B4, A5, f12
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B3, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B3, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B4, A2, f9
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
	FXCSMADD	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f2,  B3, A6, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B3, A6, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B4, A6, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B4, A6, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f3,  B3, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B3, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	FXCPMADD	f11, B4, A4, f11
 | 
						|
	LFPDUX	A5,  AO, INC4
 | 
						|
	FXCSMADD	f15, B4, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 3 ##
 | 
						|
 | 
						|
	FXCPMADD	f0,  B5, A7, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B5, A7, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B2, A7, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	FXCSMADD	f12, B2, A7, f12
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B5, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B5, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
	LFPDUX	A6,  AO, INC4
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f2,  B5, A8, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B5, A8, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B2, A8, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B2, A8, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f3,  B5, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B5, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	FXCPMADD	f11, B2, A4, f11
 | 
						|
	LFPDUX	A7,  AO, INC4
 | 
						|
	FXCSMADD	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 4 ##
 | 
						|
	FXCPMADD	f0,  B6, A9, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B6, A9, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B4, A9, f8
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	FXCSMADD	f12, B4, A9, f12
 | 
						|
	LFPDUX	B5,  BO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B6, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B6, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B4, A2, f9
 | 
						|
	LFPDUX	A8,  AO, INC4
 | 
						|
	FXCSMADD	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f2,  B6, A10, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B6, A10, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B4, A10, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B4, A10, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f3,  B6, A4, f3
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	FXCSMADD	f7,  B6, A4, f7
 | 
						|
	LFPDUX	A9,  AO, INC4
 | 
						|
	FXCPMADD	f11, B4, A4, f11
 | 
						|
	nop
 | 
						|
	FXCSMADD	f15, B4, A4, f15
 | 
						|
	bdnz+	.L12
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L13:
 | 
						|
## 1 ##
 | 
						|
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B2, A1, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	FXCSMADD	f12, B2, A1, f12
 | 
						|
	LFPDUX	B6,  BO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
	LFPDUX	A10, AO, INC4
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B1, A3, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B2, A3, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B2, A3, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	FXCPMADD	f11, B2, A4, f11
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A1, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	FXCSMADD	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 2 ##
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A5, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B3, A5, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B4, A5, f8
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	FXCSMADD	f12, B4, A5, f12
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	B1, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	FXCPMADD	f1,  B3, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B3, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B4, A2, f9
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A3, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	FXCSMADD	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f2,  B3, A6, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B3, A6, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B4, A6, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B4, A6, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f3,  B3, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B3, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	FXCPMADD	f11, B4, A4, f11
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
   	LFPDUX	A5, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	FXCSMADD	f15, B4, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 3 ##
 | 
						|
 | 
						|
	FXCPMADD	f0,  B5, A7, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B5, A7, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B2, A7, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	FXCSMADD	f12, B2, A7, f12
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	B3, CO2, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	FXCPMADD	f1,  B5, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B5, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A6, CO2, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
 | 
						|
	FXCPMADD	f2,  B5, A8, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B5, A8, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B2, A8, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B2, A8, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f3,  B5, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B5, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	FXCPMADD	f11, B2, A4, f11
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A7, CO2, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	FXCSMADD	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 4 ##
 | 
						|
 | 
						|
	FXCPMADD	f0,  B6, A9, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B6, A9, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B4, A9, f8
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	B2, CO2, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	FXCSMADD	f12, B4, A9, f12
 | 
						|
 | 
						|
	FXCPMADD	f1,  B6, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B6, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B4, A2, f9
 | 
						|
	nop
 | 
						|
	FXCSMADD	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f2,  B6, A10, f2
 | 
						|
	FXCSMADD	f6,  B6, A10, f6
 | 
						|
	FXCPMADD	f10, B4, A10, f10
 | 
						|
	FXCSMADD	f14, B4, A10, f14
 | 
						|
 | 
						|
	FXCPMADD	f3,  B6, A4, f3
 | 
						|
	FXCSMADD	f7,  B6, A4, f7
 | 
						|
	FXCPMADD	f11, B4, A4, f11
 | 
						|
	FXCSMADD	f15, B4, A4, f15
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L14:
 | 
						|
	li	r0, ALPHA
 | 
						|
	lfpdx	AP,  SP, r0
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f30, SP, r0
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	andi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble+	.L18
 | 
						|
 | 
						|
	cmpwi	cr0, TEMP, 3
 | 
						|
	bgt+	.L15
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble+	.L18
 | 
						|
 | 
						|
	cmpwi	cr0, K, 3
 | 
						|
	bgt+	.L15
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A1, CO1, INC2
 | 
						|
	fpmr	f5,  f0
 | 
						|
	LFPDUX	B1, CO1, INC2
 | 
						|
	fpmr	f9,  f0
 | 
						|
	LFPDUX	A3, CO1, INC2
 | 
						|
	fpmr	f13, f0
 | 
						|
   	LFPDUX	A5, CO1, INC2
 | 
						|
	fpmr	f2,  f0
 | 
						|
 | 
						|
	LFPDUX	B3, CO2, INC2
 | 
						|
	fpmr	f6,  f0
 | 
						|
	LFPDUX	A6, CO2, INC2
 | 
						|
	fpmr	f10, f0
 | 
						|
	LFPDUX	A7, CO2, INC2
 | 
						|
	fpmr	f14, f0
 | 
						|
	LFPDUX	B2, CO2, INC2
 | 
						|
	fpmr	f3,  f0
 | 
						|
#else
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f9,  f0
 | 
						|
	fpmr	f13, f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
 | 
						|
	fpmr	f6,  f0
 | 
						|
	fpmr	f10, f0
 | 
						|
	fpmr	f14, f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
#endif
 | 
						|
 | 
						|
	fpmr	f7,  f0
 | 
						|
	fpmr	f11, f0
 | 
						|
	fpmr	f15, f0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L15:
 | 
						|
	LFPDUX	A2,  AO,  INC4
 | 
						|
	LFPDUX	A4,  AO2, INC4
 | 
						|
	LFPDUX	A10, BO,  INC4
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
	bdz-	.L17
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L16:
 | 
						|
	FXCPMADD	f0,  A10, A2, f0
 | 
						|
	FXCSMADD	f4,  A10, A2, f4
 | 
						|
	FXCPMADD	f8,  B4, A2, f8
 | 
						|
	FXCSMADD	f12, B4, A2, f12
 | 
						|
	LFPDUX	A2, AO,  INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  A10, A4, f1
 | 
						|
	FXCSMADD	f5,  A10, A4, f5
 | 
						|
	FXCPMADD	f9,  B4, A4, f9
 | 
						|
	FXCSMADD	f13, B4, A4, f13
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
 | 
						|
	FXCPMADD	f2,  A10, A2, f2
 | 
						|
	FXCSMADD	f6,  A10, A2, f6
 | 
						|
	FXCPMADD	f10, B4, A2, f10
 | 
						|
	FXCSMADD	f14, B4, A2, f14
 | 
						|
	LFPDUX	A2, AO,  INC4
 | 
						|
 | 
						|
	FXCPMADD	f3,  A10, A4, f3
 | 
						|
	FXCSMADD	f7,  A10, A4, f7
 | 
						|
	LFPDUX	A10, BO,  INC4
 | 
						|
	FXCPMADD	f11, B4, A4, f11
 | 
						|
	FXCSMADD	f15, B4, A4, f15
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	bdnz+	.L16
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L17:
 | 
						|
	FXCPMADD	f0,  A10, A2, f0
 | 
						|
	FXCSMADD	f4,  A10, A2, f4
 | 
						|
	FXCPMADD	f8,  B4, A2, f8
 | 
						|
	FXCSMADD	f12, B4, A2, f12
 | 
						|
	LFPDUX	A2, AO,  INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  A10, A4, f1
 | 
						|
	FXCSMADD	f5,  A10, A4, f5
 | 
						|
	FXCPMADD	f9,  B4, A4, f9
 | 
						|
	FXCSMADD	f13, B4, A4, f13
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
 | 
						|
	FXCPMADD	f2,  A10, A2, f2
 | 
						|
	FXCSMADD	f6,  A10, A2, f6
 | 
						|
	FXCPMADD	f10, B4, A2, f10
 | 
						|
	FXCSMADD	f14, B4, A2, f14
 | 
						|
 | 
						|
	FXCPMADD	f3,  A10, A4, f3
 | 
						|
	FXCSMADD	f7,  A10, A4, f7
 | 
						|
	FXCPMADD	f11, B4, A4, f11
 | 
						|
	FXCSMADD	f15, B4, A4, f15
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L18:
 | 
						|
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
 | 
						|
    defined(RN) || defined(RT) || defined(CN) || defined(CT)
 | 
						|
	fpadd	f0, f0, f4
 | 
						|
	fpadd	f8, f8, f12
 | 
						|
	fpadd	f1, f1, f5
 | 
						|
	fpadd	f9, f9, f13
 | 
						|
 | 
						|
	fpadd	f2,  f2,  f6
 | 
						|
	fpadd	f10, f10, f14
 | 
						|
	fpadd	f3,  f3,  f7
 | 
						|
	fpadd	f11, f11, f15
 | 
						|
#else
 | 
						|
	fpsub	f0, f0, f4
 | 
						|
	fpsub	f8, f8, f12
 | 
						|
	fpsub	f1, f1, f5
 | 
						|
	fpsub	f9, f9, f13
 | 
						|
 | 
						|
	fpsub	f2,  f2,  f6
 | 
						|
	fpsub	f10, f10, f14
 | 
						|
	fpsub	f3,  f3,  f7
 | 
						|
	fpsub	f11, f11, f15
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	fxcpmadd A1,  f0, AP,  A1
 | 
						|
	fxcpmadd B1,  f1, AP,  B1
 | 
						|
	fxcpmadd A3,  f2, AP,  A3
 | 
						|
	fxcpmadd A5,  f3, AP,  A5
 | 
						|
 | 
						|
	fxcxnpma f0,  f0, AP,  A1
 | 
						|
	fxcpmadd B3,  f8,  AP,  B3
 | 
						|
	fxcxnpma f1,  f1, AP,  B1
 | 
						|
	fxcpmadd A6,  f9,  AP,  A6
 | 
						|
	fxcxnpma f2,  f2, AP,  A3
 | 
						|
	fxcpmadd A7,  f10, AP,  A7
 | 
						|
 | 
						|
	fxcxnpma f3,  f3, AP,  A5
 | 
						|
	fxcpmadd B2,  f11, AP,  B2
 | 
						|
	fxcxnpma f8,  f8,  AP,  B3
 | 
						|
	STFPDUX	f0,  CO1, INCM7
 | 
						|
	fxcxnpma f9,  f9,  AP,  A6
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
	fxcxnpma f10, f10, AP,  A7
 | 
						|
	STFPDUX	f2,  CO1, INC2
 | 
						|
 | 
						|
	fxcxnpma f11, f11, AP,  B2
 | 
						|
	STFPDUX	f3,  CO1, INC2
 | 
						|
	STFPDUX	f8,  CO2, INCM7
 | 
						|
	STFPDUX	f9,  CO2, INC2
 | 
						|
	STFPDUX	f10, CO2, INC2
 | 
						|
	STFPDUX	f11, CO2, INC2
 | 
						|
#else
 | 
						|
	fxcpmadd f12, f0,  AP,  f30
 | 
						|
	fxcpmadd f13, f1,  AP,  f30
 | 
						|
	fxcpmadd f14, f2,  AP,  f30
 | 
						|
	fxcpmadd f15, f3,  AP,  f30
 | 
						|
 | 
						|
	fxcxnpma f0,  f0,  AP,  f12
 | 
						|
	fxcxnpma f1,  f1,  AP,  f13
 | 
						|
	fxcxnpma f2,  f2,  AP,  f14
 | 
						|
	fxcxnpma f3,  f3,  AP,  f15
 | 
						|
 | 
						|
	fxcpmadd f16, f8,  AP,  f30
 | 
						|
	fxcpmadd f17, f9,  AP,  f30
 | 
						|
	fxcpmadd f18, f10, AP,  f30
 | 
						|
	fxcpmadd f19, f11, AP,  f30
 | 
						|
 | 
						|
	fxcxnpma f8,  f8,  AP,  f16
 | 
						|
	fxcxnpma f9,  f9,  AP,  f17
 | 
						|
	fxcxnpma f10, f10, AP,  f18
 | 
						|
	fxcxnpma f11, f11, AP,  f19
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INC2
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
	STFPDUX	f2,  CO1, INC2
 | 
						|
	STFPDUX	f3,  CO1, INC2
 | 
						|
 | 
						|
	STFPDUX	f8,  CO2, INC2
 | 
						|
	STFPDUX	f9,  CO2, INC2
 | 
						|
	STFPDUX	f10, CO2, INC2
 | 
						|
	STFPDUX	f11, CO2, INC2
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 2 + ZBASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 1 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 4
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	addic.	I, I, -1
 | 
						|
	li	r0, FZERO
 | 
						|
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	bgt+	.L11
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L20:
 | 
						|
	andi.	I, M,  2
 | 
						|
	beq	.L30
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 1 + ZBASE_SHIFT
 | 
						|
	slwi	r0,   KK, 1 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  BO, - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, BO,   2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
 | 
						|
#endif
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f9,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f13, f0
 | 
						|
	ble	.L24
 | 
						|
#else
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f9,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f13, f0
 | 
						|
	ble	.L24
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,   AO, INC4
 | 
						|
	LFPDUX	B1,   BO, INC4
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	LFPDUX	A3,   AO, INC4
 | 
						|
	LFPDUX	B3,   BO, INC4
 | 
						|
	LFPDUX	A4,  AO2, INC4
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
 | 
						|
	LFPDUX	A5,   AO, INC4
 | 
						|
	LFPDUX	B5,   BO, INC4
 | 
						|
	LFPDUX	A6,  AO2, INC4
 | 
						|
	LFPDUX	B6,  BO2, INC4
 | 
						|
	LFPDUX	A7,   AO, INC4
 | 
						|
	LFPDUX	A9,   BO, INC4
 | 
						|
	LFPDUX	A10, BO2, INC4
 | 
						|
	bdz-	.L23
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L22:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A8,  AO2, INC4
 | 
						|
	FXCPMADD	f8,  B2, A1, f8
 | 
						|
	nop
 | 
						|
	FXCSMADD	f12, B2, A1, f12
 | 
						|
	LFPDUX	A1,   AO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	LFPDUX	B1,   BO, INC4
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
	nop
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A3, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B3, A3, f4
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	FXCPMADD	f8,  B4, A3, f8
 | 
						|
	nop
 | 
						|
	FXCSMADD	f12, B4, A3, f12
 | 
						|
	LFPDUX	A3,   AO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B3, A4, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B3, A4, f5
 | 
						|
	LFPDUX	B3,   BO, INC4
 | 
						|
	FXCPMADD	f9,  B4, A4, f9
 | 
						|
	nop
 | 
						|
	FXCSMADD	f13, B4, A4, f13
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
 | 
						|
	FXCPMADD	f0,  B5, A5, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B5, A5, f4
 | 
						|
	LFPDUX	A4,  AO2, INC4
 | 
						|
	FXCPMADD	f8,  B6, A5, f8
 | 
						|
	nop
 | 
						|
	FXCSMADD	f12, B6, A5, f12
 | 
						|
	LFPDUX	A5,   AO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B5, A6, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B5, A6, f5
 | 
						|
	LFPDUX	B5,   BO, INC4
 | 
						|
	FXCPMADD	f9,  B6, A6, f9
 | 
						|
	nop
 | 
						|
	FXCSMADD	f13, B6, A6, f13
 | 
						|
	LFPDUX	B6,  BO2, INC4
 | 
						|
 | 
						|
	FXCPMADD	f0,  A9,  A7, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  A9,  A7, f4
 | 
						|
	LFPDUX	A6,  AO2, INC4
 | 
						|
	FXCPMADD	f8,  A10, A7, f8
 | 
						|
	nop
 | 
						|
	FXCSMADD	f12, A10, A7, f12
 | 
						|
	LFPDUX	A7,   AO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  A9,  A8, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  A9,  A8, f5
 | 
						|
	LFPDUX	A9,   BO, INC4
 | 
						|
	FXCPMADD	f9,  A10, A8, f9
 | 
						|
	nop
 | 
						|
	FXCSMADD	f13, A10, A8, f13
 | 
						|
	LFPDUX	A10, BO2, INC4
 | 
						|
	bdnz+	.L22
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L23:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A8,  AO2, INC4
 | 
						|
	FXCPMADD	f8,  B2, A1, f8
 | 
						|
	FXCSMADD	f12, B2, A1, f12
 | 
						|
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A3, f0
 | 
						|
	FXCSMADD	f4,  B3, A3, f4
 | 
						|
	FXCPMADD	f8,  B4, A3, f8
 | 
						|
	FXCSMADD	f12, B4, A3, f12
 | 
						|
 | 
						|
	FXCPMADD	f1,  B3, A4, f1
 | 
						|
	FXCSMADD	f5,  B3, A4, f5
 | 
						|
	FXCPMADD	f9,  B4, A4, f9
 | 
						|
	FXCSMADD	f13, B4, A4, f13
 | 
						|
 | 
						|
	FXCPMADD	f0,  B5, A5, f0
 | 
						|
	FXCSMADD	f4,  B5, A5, f4
 | 
						|
	FXCPMADD	f8,  B6, A5, f8
 | 
						|
	FXCSMADD	f12, B6, A5, f12
 | 
						|
 | 
						|
	FXCPMADD	f1,  B5, A6, f1
 | 
						|
	FXCSMADD	f5,  B5, A6, f5
 | 
						|
	FXCPMADD	f9,  B6, A6, f9
 | 
						|
	FXCSMADD	f13, B6, A6, f13
 | 
						|
 | 
						|
	FXCPMADD	f0,  A9, A7, f0
 | 
						|
	FXCSMADD	f4,  A9, A7, f4
 | 
						|
	FXCPMADD	f8,  A10, A7, f8
 | 
						|
	FXCSMADD	f12, A10, A7, f12
 | 
						|
 | 
						|
	FXCPMADD	f1,  A9, A8, f1
 | 
						|
	FXCSMADD	f5,  A9, A8, f5
 | 
						|
	FXCPMADD	f9,  A10, A8, f9
 | 
						|
	FXCSMADD	f13, A10, A8, f13
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L24:
 | 
						|
	li	r0, ALPHA
 | 
						|
	lfpdx	AP,  SP, r0
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f30, SP, r0
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	andi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L28
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC4
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	bdz-	.L27
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L26:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	FXCPMADD	f8,  B2, A1, f8
 | 
						|
	FXCSMADD	f12, B2, A1, f12
 | 
						|
	LFPDUX	A1,  AO,  INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	bdnz+	.L26
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L27:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	FXCPMADD	f8,  B2, A1, f8
 | 
						|
	FXCSMADD	f12, B2, A1, f12
 | 
						|
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L28:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A1, CO1, INC2
 | 
						|
	LFPDUX	A2, CO1, INC2
 | 
						|
	LFPDUX	A3, CO2, INC2
 | 
						|
	LFPDUX	A4, CO2, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
 | 
						|
    defined(RN) || defined(RT) || defined(CN) || defined(CT)
 | 
						|
	fpadd	f0, f0, f4
 | 
						|
	fpadd	f8, f8, f12
 | 
						|
	fpadd	f1, f1, f5
 | 
						|
	fpadd	f9, f9, f13
 | 
						|
#else
 | 
						|
	fpsub	f0, f0, f4
 | 
						|
	fpsub	f8, f8, f12
 | 
						|
	fpsub	f1, f1, f5
 | 
						|
	fpsub	f9, f9, f13
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	fxcpmadd A1,  f0, AP,  A1
 | 
						|
	fxcpmadd A2,  f1, AP,  A2
 | 
						|
	fxcpmadd A3,  f8, AP,  A3
 | 
						|
	fxcpmadd A4,  f9, AP,  A4
 | 
						|
 | 
						|
	fxcxnpma f0,  f0, AP,  A1
 | 
						|
	fxcxnpma f1,  f1, AP,  A2
 | 
						|
	fxcxnpma f8,  f8, AP,  A3
 | 
						|
	fxcxnpma f9,  f9, AP,  A4
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INCM3
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
 | 
						|
	STFPDUX	f8,  CO2, INCM3
 | 
						|
	STFPDUX	f9,  CO2, INC2
 | 
						|
#else
 | 
						|
	fxcpmadd f12,  f0, AP,  f30
 | 
						|
	fxcpmadd f13,  f1, AP,  f30
 | 
						|
	fxcpmadd f14,  f8, AP,  f30
 | 
						|
	fxcpmadd f15,  f9, AP,  f30
 | 
						|
 | 
						|
	fxcxnpma f0,  f0, AP,  f12
 | 
						|
	fxcxnpma f1,  f1, AP,  f13
 | 
						|
	fxcxnpma f8,  f8, AP,  f14
 | 
						|
	fxcxnpma f9,  f9, AP,  f15
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INC2
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
 | 
						|
	STFPDUX	f8,  CO2, INC2
 | 
						|
	STFPDUX	f9,  CO2, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 1 + ZBASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 1 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 2
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L30:
 | 
						|
	andi.	I, M,  1
 | 
						|
	beq	.L49
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f3, f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 0 + ZBASE_SHIFT
 | 
						|
	slwi	r0,   KK, 1 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  BO,  - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, BO,    2 * SIZE
 | 
						|
	fpmr	f3, f0
 | 
						|
#endif
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L34
 | 
						|
#else
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f3, f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L34
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
	LFPDUX	A5,  BO, INC4
 | 
						|
	LFPDUX	A6, BO2, INC4
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	LFPDUX	A7,  BO, INC4
 | 
						|
	LFPDUX	A8, BO2, INC4
 | 
						|
	bdz-	.L33
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L32:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
	FXCPMADD	f2,  B2, A1, f2
 | 
						|
	FXCSMADD	f3,  B2, A1, f3
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A2, f0
 | 
						|
	FXCSMADD	f1,  B3, A2, f1
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
	FXCPMADD	f2,  B4, A2, f2
 | 
						|
	FXCSMADD	f3,  B4, A2, f3
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
 | 
						|
	FXCPMADD	f0,  A5, A3, f0
 | 
						|
	FXCSMADD	f1,  A5, A3, f1
 | 
						|
	LFPDUX	A5,  BO, INC4
 | 
						|
	FXCPMADD	f2,  A6, A3, f2
 | 
						|
	FXCSMADD	f3,  A6, A3, f3
 | 
						|
	LFPDUX	A6, BO2, INC4
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f0,  A7, A4, f0
 | 
						|
	FXCSMADD	f1,  A7, A4, f1
 | 
						|
	LFPDUX	A7,  BO, INC4
 | 
						|
	FXCPMADD	f2,  A8, A4, f2
 | 
						|
	FXCSMADD	f3,  A8, A4, f3
 | 
						|
	LFPDUX	A8, BO2, INC4
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	bdnz+	.L32
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L33:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	FXCPMADD	f2,  B2, A1, f2
 | 
						|
	FXCSMADD	f3,  B2, A1, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A2, f0
 | 
						|
	FXCSMADD	f1,  B3, A2, f1
 | 
						|
	FXCPMADD	f2,  B4, A2, f2
 | 
						|
	FXCSMADD	f3,  B4, A2, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  A5, A3, f0
 | 
						|
	FXCSMADD	f1,  A5, A3, f1
 | 
						|
	FXCPMADD	f2,  A6, A3, f2
 | 
						|
	FXCSMADD	f3,  A6, A3, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  A7, A4, f0
 | 
						|
	FXCSMADD	f1,  A7, A4, f1
 | 
						|
	FXCPMADD	f2,  A8, A4, f2
 | 
						|
	FXCSMADD	f3,  A8, A4, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L34:
 | 
						|
	li	r0, ALPHA
 | 
						|
	lfpdx	AP,  SP, r0
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f30, SP, r0
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	andi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L38
 | 
						|
 | 
						|
	LFPDX	A1,  AO,  INC4
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	add	AO, AO, INC2
 | 
						|
	bdz-	.L37
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L36:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	FXCPMADD	f2,  B2, A1, f2
 | 
						|
	FXCSMADD	f3,  B2, A1, f3
 | 
						|
	LFPDX	A1,  AO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	add	AO, AO, INC2
 | 
						|
	bdnz+	.L36
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L37:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	FXCPMADD	f2,  B2, A1, f2
 | 
						|
	FXCSMADD	f3,  B2, A1, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L38:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDX	A1, CO1, INC2
 | 
						|
	LFPDX	A2, CO2, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
 | 
						|
    defined(RN) || defined(RT) || defined(CN) || defined(CT)
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
	fpadd	f2, f2, f3
 | 
						|
#else
 | 
						|
	fpsub	f0, f0, f1
 | 
						|
	fpsub	f2, f2, f3
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	fxcpmadd A1,  f0, AP,  A1
 | 
						|
	fxcpmadd A2,  f2, AP,  A2
 | 
						|
	fxcxnpma f0,  f0, AP,  A1
 | 
						|
	fxcxnpma f2,  f2, AP,  A2
 | 
						|
#else
 | 
						|
	fxcpmadd f12, f0, AP,  f30
 | 
						|
	fxcpmadd f13, f2, AP,  f30
 | 
						|
	fxcxnpma f0,  f0, AP,  f12
 | 
						|
	fxcxnpma f2,  f2, AP,  f13
 | 
						|
#endif
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INC2
 | 
						|
	STFPDUX	f2,  CO2, INC2
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 0 + ZBASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 1 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 1
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L49:
 | 
						|
#if defined(TRMMKERNEL) && !defined(LEFT)
 | 
						|
	addi	KK, KK, 2
 | 
						|
#endif
 | 
						|
 | 
						|
	addi	B,  BO, 4 * SIZE
 | 
						|
 | 
						|
	addic.	J, J, -1
 | 
						|
	bgt+	.L10
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L50:
 | 
						|
	andi.	J, N,  1
 | 
						|
	beq	.L999
 | 
						|
 | 
						|
	mr	CO1, C
 | 
						|
 | 
						|
#if defined(TRMMKERNEL) &&  defined(LEFT)
 | 
						|
	mr	KK, OFFSET
 | 
						|
#endif
 | 
						|
 | 
						|
	addi	AO, A, -2 * SIZE
 | 
						|
	
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
 | 
						|
	srawi.	I, M,  2
 | 
						|
	ble	.L60
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L51:
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f6,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 2 + ZBASE_SHIFT
 | 
						|
	slwi	r0,   KK, 0 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f6,  f0
 | 
						|
#endif
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
	fpmr	f3,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f7,  f0
 | 
						|
	ble	.L54
 | 
						|
#else
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f6,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f7,  f0
 | 
						|
	ble	.L54
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	bdz-	.L53
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L52:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B1, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	FXCPMADD	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B2, A5, f0
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	FXCSMADD	f4,  B2, A5, f4
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B2, A6, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B2, A6, f5
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f2,  B2, A7, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B2, A7, f6
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	FXCPMADD	f3,  B2, A8, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B2, A8, f7
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A1, f0
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	FXCSMADD	f4,  B3, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B3, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B3, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f2,  B3, A3, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B3, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	FXCPMADD	f3,  B3, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B3, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B4, A5, f0
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	FXCSMADD	f4,  B4, A5, f4
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B4, A6, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B4, A6, f5
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f2,  B4, A7, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B4, A7, f6
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	FXCPMADD	f3,  B4, A8, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B4, A8, f7
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	bdnz+	.L52
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L53:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B1, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	FXCPMADD	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B2, A5, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B2, A5, f4
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B2, A6, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B2, A6, f5
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f2,  B2, A7, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B2, A7, f6
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	FXCPMADD	f3,  B2, A8, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B2, A8, f7
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A1, f0
 | 
						|
	FXCSMADD	f4,  B3, A1, f4
 | 
						|
	FXCPMADD	f1,  B3, A2, f1
 | 
						|
	FXCSMADD	f5,  B3, A2, f5
 | 
						|
 | 
						|
	FXCPMADD	f2,  B3, A3, f2
 | 
						|
	FXCSMADD	f6,  B3, A3, f6
 | 
						|
	FXCPMADD	f3,  B3, A4, f3
 | 
						|
	FXCSMADD	f7,  B3, A4, f7
 | 
						|
 | 
						|
	FXCPMADD	f0,  B4, A5, f0
 | 
						|
	FXCSMADD	f4,  B4, A5, f4
 | 
						|
	FXCPMADD	f1,  B4, A6, f1
 | 
						|
	FXCSMADD	f5,  B4, A6, f5
 | 
						|
 | 
						|
	FXCPMADD	f2,  B4, A7, f2
 | 
						|
	FXCSMADD	f6,  B4, A7, f6
 | 
						|
	FXCPMADD	f3,  B4, A8, f3
 | 
						|
	FXCSMADD	f7,  B4, A8, f7
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L54:
 | 
						|
	li	r0, ALPHA
 | 
						|
	lfpdx	AP,  SP, r0
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f30, SP, r0
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	andi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L58
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	bdz-	.L57
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L56:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f2,  B1, A3, f2
 | 
						|
	FXCSMADD	f6,  B1, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	FXCPMADD	f3,  B1, A4, f3
 | 
						|
	FXCSMADD	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	bdnz+	.L56
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L57:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
 | 
						|
	FXCPMADD	f2,  B1, A3, f2
 | 
						|
	FXCSMADD	f6,  B1, A3, f6
 | 
						|
	FXCPMADD	f3,  B1, A4, f3
 | 
						|
	FXCSMADD	f7,  B1, A4, f7
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L58:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A1, CO1, INC2
 | 
						|
	LFPDUX	A2, CO1, INC2
 | 
						|
	LFPDUX	A3, CO1, INC2
 | 
						|
   	LFPDUX	A4, CO1, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
 | 
						|
    defined(RN) || defined(RT) || defined(CN) || defined(CT)
 | 
						|
	fpadd	f0, f0, f4
 | 
						|
	fpadd	f1, f1, f5
 | 
						|
	fpadd	f2, f2, f6
 | 
						|
	fpadd	f3, f3, f7
 | 
						|
#else
 | 
						|
	fpsub	f0, f0, f4
 | 
						|
	fpsub	f1, f1, f5
 | 
						|
	fpsub	f2, f2, f6
 | 
						|
	fpsub	f3, f3, f7
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	fxcpmadd A1,  f0, AP,  A1
 | 
						|
	fxcpmadd A2,  f1, AP,  A2
 | 
						|
	fxcpmadd A3,  f2, AP,  A3
 | 
						|
	fxcpmadd A4,  f3, AP,  A4
 | 
						|
 | 
						|
	fxcxnpma f0,  f0, AP,  A1
 | 
						|
	fxcxnpma f1,  f1, AP,  A2
 | 
						|
	fxcxnpma f2,  f2, AP,  A3
 | 
						|
	fxcxnpma f3,  f3, AP,  A4
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INCM7
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
	STFPDUX	f2,  CO1, INC2
 | 
						|
	STFPDUX	f3,  CO1, INC2
 | 
						|
#else
 | 
						|
	fxcpmadd f12, f0, AP,  f30
 | 
						|
	fxcpmadd f13, f1, AP,  f30
 | 
						|
	fxcpmadd f14, f2, AP,  f30
 | 
						|
	fxcpmadd f15, f3, AP,  f30
 | 
						|
 | 
						|
	fxcxnpma f0,  f0, AP,  f12
 | 
						|
	fxcxnpma f1,  f1, AP,  f13
 | 
						|
	fxcxnpma f2,  f2, AP,  f14
 | 
						|
	fxcxnpma f3,  f3, AP,  f15
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INC2
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
	STFPDUX	f2,  CO1, INC2
 | 
						|
	STFPDUX	f3,  CO1, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 2 + ZBASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 0 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 4
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	addic.	I, I, -1
 | 
						|
	li	r0, FZERO
 | 
						|
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	bgt+	.L51
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L60:
 | 
						|
	andi.	I, M,  2
 | 
						|
	beq	.L70
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 1 + ZBASE_SHIFT
 | 
						|
	slwi	r0,   KK, 0 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
#endif
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
	fpmr	f2,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	ble	.L64
 | 
						|
 | 
						|
#else
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	ble	.L64
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	bdz-	.L63
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L62:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f2,  B1, A1, f2
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f3,  B1, A2, f3
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B2, A3, f0
 | 
						|
	FXCSMADD	f2,  B2, A3, f2
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	FXCPMADD	f1,  B2, A4, f1
 | 
						|
	FXCSMADD	f3,  B2, A4, f3
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A5, f0
 | 
						|
	FXCSMADD	f2,  B3, A5, f2
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	FXCPMADD	f1,  B3, A6, f1
 | 
						|
	FXCSMADD	f3,  B3, A6, f3
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B4, A7, f0
 | 
						|
	FXCSMADD	f2,  B4, A7, f2
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	FXCPMADD	f1,  B4, A8, f1
 | 
						|
	FXCSMADD	f3,  B4, A8, f3
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
	bdnz+	.L62
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L63:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f2,  B1, A1, f2
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f3,  B1, A2, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  B2, A3, f0
 | 
						|
	FXCSMADD	f2,  B2, A3, f2
 | 
						|
	FXCPMADD	f1,  B2, A4, f1
 | 
						|
	FXCSMADD	f3,  B2, A4, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A5, f0
 | 
						|
	FXCSMADD	f2,  B3, A5, f2
 | 
						|
	FXCPMADD	f1,  B3, A6, f1
 | 
						|
	FXCSMADD	f3,  B3, A6, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  B4, A7, f0
 | 
						|
	FXCSMADD	f2,  B4, A7, f2
 | 
						|
	FXCPMADD	f1,  B4, A8, f1
 | 
						|
	FXCSMADD	f3,  B4, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L64:
 | 
						|
	li	r0, ALPHA
 | 
						|
	lfpdx	AP,  SP, r0
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f30, SP, r0
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	andi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L68
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	bdz-	.L67
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L66:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f2,  B1, A1, f2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f3,  B1, A2, f3
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	bdnz+	.L66
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L67:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f2,  B1, A1, f2
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f3,  B1, A2, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L68:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDUX	A1, CO1, INC2
 | 
						|
	LFPDUX	A2, CO1, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
 | 
						|
    defined(RN) || defined(RT) || defined(CN) || defined(CT)
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
#else
 | 
						|
	fpsub	f0, f0, f2
 | 
						|
	fpsub	f1, f1, f3
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	fxcpmadd A1,  f0, AP,  A1
 | 
						|
	fxcpmadd A2,  f1, AP,  A2
 | 
						|
	fxcxnpma f0,  f0, AP,  A1
 | 
						|
	fxcxnpma f1,  f1, AP,  A2
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INCM3
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
#else
 | 
						|
	fxcpmadd f12, f0, AP,  f30
 | 
						|
	fxcpmadd f13, f1, AP,  f30
 | 
						|
	fxcxnpma f0,  f0, AP,  f12
 | 
						|
	fxcxnpma f1,  f1, AP,  f13
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INC2
 | 
						|
	STFPDUX	f1,  CO1, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 1 + ZBASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 0 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 2
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L70:
 | 
						|
	andi.	I, M,  1
 | 
						|
	beq	.L89
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 0 + ZBASE_SHIFT
 | 
						|
	slwi	r0,   KK, 0 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
#endif
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  3
 | 
						|
	fpmr	f2,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	ble	.L74
 | 
						|
#else
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	srawi.	r0,  K,  3
 | 
						|
	fpmr	f2,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	ble	.L74
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	LFPDUX	B5,  BO, INC2
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B6,  BO, INC2
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	LFPDUX	A9,  BO, INC2
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	LFPDUX	A10, BO, INC2
 | 
						|
	bdz-	.L73
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L72:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
	FXCPMADD	f2,  B2, A2, f2
 | 
						|
	FXCSMADD	f3,  B2, A2, f3
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A3, f0
 | 
						|
	FXCSMADD	f1,  B3, A3, f1
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
	FXCPMADD	f2,  B4, A4, f2
 | 
						|
	FXCSMADD	f3,  B4, A4, f3
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B5, A5, f0
 | 
						|
	FXCSMADD	f1,  B5, A5, f1
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	LFPDUX	B5,  BO, INC2
 | 
						|
	FXCPMADD	f2,  B6, A6, f2
 | 
						|
	FXCSMADD	f3,  B6, A6, f3
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B6,  BO, INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  A9,  A7, f0
 | 
						|
	FXCSMADD	f1,  A9,  A7, f1
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	LFPDUX	A9,  BO, INC2
 | 
						|
	FXCPMADD	f2,  A10, A8, f2
 | 
						|
	FXCSMADD	f3,  A10, A8, f3
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	LFPDUX	A10, BO, INC2
 | 
						|
 | 
						|
	bdnz+	.L72
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L73:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	FXCPMADD	f2,  B2, A2, f2
 | 
						|
	FXCSMADD	f3,  B2, A2, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A3, f0
 | 
						|
	FXCSMADD	f1,  B3, A3, f1
 | 
						|
	FXCPMADD	f2,  B4, A4, f2
 | 
						|
	FXCSMADD	f3,  B4, A4, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  B5, A5, f0
 | 
						|
	FXCSMADD	f1,  B5, A5, f1
 | 
						|
	FXCPMADD	f2,  B6, A6, f2
 | 
						|
	FXCSMADD	f3,  B6, A6, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  A9,  A7, f0
 | 
						|
	FXCSMADD	f1,  A9,  A7, f1
 | 
						|
	FXCPMADD	f2,  A10, A8, f2
 | 
						|
	FXCSMADD	f3,  A10, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L74:
 | 
						|
	li	r0, ALPHA
 | 
						|
	lfpdx	AP,  SP, r0
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f30, SP, r0
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	andi.	r0,  TEMP,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L78
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	bdz-	.L77
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L76:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	bdnz+	.L76
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L77:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L78:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFPDX	A1, CO1, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
 | 
						|
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
 | 
						|
    defined(RN) || defined(RT) || defined(CN) || defined(CT)
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
#else
 | 
						|
	fpsub	f0, f0, f1
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	fxcpmadd A1,  f0, AP,  A1
 | 
						|
	fxcxnpma f0,  f0, AP,  A1
 | 
						|
#else
 | 
						|
	fxcpmadd f12, f0, AP,  f30
 | 
						|
	fxcxnpma f0,  f0, AP,  f12
 | 
						|
#endif
 | 
						|
 | 
						|
	STFPDUX	f0,  CO1, INC2
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L89:
 | 
						|
	addi	B,  BO, 2 * SIZE
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L999:
 | 
						|
	addi	SP, SP, 20
 | 
						|
 | 
						|
	lwzu	r14,   4(SP)
 | 
						|
	lwzu	r15,   4(SP)
 | 
						|
 | 
						|
	lwzu	r16,   4(SP)
 | 
						|
	lwzu	r17,   4(SP)
 | 
						|
	lwzu	r18,   4(SP)
 | 
						|
	lwzu	r19,   4(SP)
 | 
						|
 | 
						|
	lwzu	r20,   4(SP)
 | 
						|
	lwzu	r21,   4(SP)
 | 
						|
	lwzu	r22,   4(SP)
 | 
						|
	lwzu	r23,   4(SP)
 | 
						|
 | 
						|
	lwzu	r24,   4(SP)
 | 
						|
	lwzu	r25,   4(SP)
 | 
						|
	lwzu	r26,   4(SP)
 | 
						|
	lwzu	r27,   4(SP)
 | 
						|
 | 
						|
	lwzu	r28,   4(SP)
 | 
						|
	lwzu	r29,   4(SP)
 | 
						|
	lwzu	r30,   4(SP)
 | 
						|
	lwzu	r31,   4(SP)
 | 
						|
 | 
						|
	subi	SP, SP, 12
 | 
						|
	li	r0, 16
 | 
						|
 | 
						|
	lfpdux	f31, SP, r0
 | 
						|
	lfpdux	f30, SP, r0
 | 
						|
	lfpdux	f29, SP, r0
 | 
						|
	lfpdux	f28, SP, r0
 | 
						|
	lfpdux	f27, SP, r0
 | 
						|
	lfpdux	f26, SP, r0
 | 
						|
	lfpdux	f25, SP, r0
 | 
						|
	lfpdux	f24, SP, r0
 | 
						|
	lfpdux	f23, SP, r0
 | 
						|
	lfpdux	f22, SP, r0
 | 
						|
	lfpdux	f21, SP, r0
 | 
						|
	lfpdux	f20, SP, r0
 | 
						|
	lfpdux	f19, SP, r0
 | 
						|
	lfpdux	f18, SP, r0
 | 
						|
	lfpdux	f17, SP, r0
 | 
						|
	lfpdux	f16, SP, r0
 | 
						|
	lfpdux	f15, SP, r0
 | 
						|
	lfpdux	f14, SP, r0
 | 
						|
	addi	SP, SP, 16
 | 
						|
	blr
 | 
						|
	.align 4
 | 
						|
 | 
						|
 | 
						|
.L1000:
 | 
						|
	li	INC,    1 * SIZE
 | 
						|
	li	INC2,   2 * SIZE
 | 
						|
	li	INC4,   4 * SIZE
 | 
						|
	li	INCM1, -1 * SIZE
 | 
						|
	li	INCM3, -3 * SIZE
 | 
						|
	li	INCM5, -5 * SIZE
 | 
						|
	li	INCM7, -7 * SIZE
 | 
						|
 | 
						|
	addi	C, C, - 1 * SIZE
 | 
						|
	srawi.	J, N,  1
 | 
						|
	ble	.L1050
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1010:
 | 
						|
	mr	CO1, C
 | 
						|
	add	CO2, C,   LDC
 | 
						|
	add	C,   CO2, LDC
 | 
						|
 | 
						|
#if defined(TRMMKERNEL) &&  defined(LEFT)
 | 
						|
	mr	KK, OFFSET
 | 
						|
#endif
 | 
						|
 | 
						|
	addi	AO, A, -4 * SIZE
 | 
						|
	
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
 | 
						|
	srawi.	I, M,  2
 | 
						|
	ble	.L1020
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1011:
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 2 + ZBASE_SHIFT
 | 
						|
	slwi	r0,   KK, 1 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  BO, - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, BO,   2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
#endif
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L1014
 | 
						|
#else
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L1014
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
	fpmr	f5,  f0
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
	fpmr	f9,  f0
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
	fpmr	f13, f0
 | 
						|
 | 
						|
	LFPDUX	A5,  AO, INC4
 | 
						|
	fpmr	f2,  f0
 | 
						|
	LFPDUX	A6,  AO, INC4
 | 
						|
	fpmr	f6,  f0
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
	fpmr	f10, f0
 | 
						|
	LFPDUX	A7,  AO, INC4
 | 
						|
	fpmr	f14, f0
 | 
						|
 | 
						|
	LFPDUX	A8,  AO, INC4
 | 
						|
	fpmr	f3,  f0
 | 
						|
	LFPDUX	B5,  BO, INC4
 | 
						|
	fpmr	f7,  f0
 | 
						|
	LFPDUX	A9,  AO, INC4
 | 
						|
	fpmr	f11, f0
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	fpmr	f15, f0
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	bdz-	.L1013
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1012:
 | 
						|
 | 
						|
## 1 ##
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B2, A1, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	FXCSMADD	f12, B2, A1, f12
 | 
						|
	LFPDUX	B6,  BO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
	LFPDUX	A10, AO, INC4
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B1, A3, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B2, A3, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B2, A3, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	FXCPMADD	f11, B2, A4, f11
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
	FXCSMADD	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 2 ##
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A5, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B3, A5, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B4, A5, f8
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	FXCSMADD	f12, B4, A5, f12
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B3, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B3, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B4, A2, f9
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
	FXCSMADD	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f2,  B3, A6, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B3, A6, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B4, A6, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B4, A6, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f3,  B3, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B3, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	FXCPMADD	f11, B4, A4, f11
 | 
						|
	LFPDUX	A5,  AO, INC4
 | 
						|
	FXCSMADD	f15, B4, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 3 ##
 | 
						|
 | 
						|
	FXCPMADD	f0,  B5, A7, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B5, A7, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B2, A7, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	FXCSMADD	f12, B2, A7, f12
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B5, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B5, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
	LFPDUX	A6,  AO, INC4
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f2,  B5, A8, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B5, A8, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B2, A8, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B2, A8, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f3,  B5, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B5, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	FXCPMADD	f11, B2, A4, f11
 | 
						|
	LFPDUX	A7,  AO, INC4
 | 
						|
	FXCSMADD	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 4 ##
 | 
						|
	FXCPMADD	f0,  B6, A9, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B6, A9, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B4, A9, f8
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	FXCSMADD	f12, B4, A9, f12
 | 
						|
	LFPDUX	B5,  BO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B6, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B6, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B4, A2, f9
 | 
						|
	LFPDUX	A8,  AO, INC4
 | 
						|
	FXCSMADD	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f2,  B6, A10, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B6, A10, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B4, A10, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B4, A10, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f3,  B6, A4, f3
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	FXCSMADD	f7,  B6, A4, f7
 | 
						|
	LFPDUX	A9,  AO, INC4
 | 
						|
	FXCPMADD	f11, B4, A4, f11
 | 
						|
	nop
 | 
						|
	FXCSMADD	f15, B4, A4, f15
 | 
						|
	bdnz+	.L1012
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1013:
 | 
						|
## 1 ##
 | 
						|
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B2, A1, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	FXCSMADD	f12, B2, A1, f12
 | 
						|
	LFPDUX	B6,  BO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
	LFPDUX	A10, AO, INC4
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B1, A3, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B2, A3, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B2, A3, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	FXCPMADD	f11, B2, A4, f11
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	FXCSMADD	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 2 ##
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A5, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B3, A5, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B4, A5, f8
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	FXCSMADD	f12, B4, A5, f12
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	B1, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	FXCPMADD	f1,  B3, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B3, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B4, A2, f9
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A3, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	FXCSMADD	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f2,  B3, A6, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B3, A6, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B4, A6, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B4, A6, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f3,  B3, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B3, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	FXCPMADD	f11, B4, A4, f11
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
   	LFDUX	A5, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	FXCSMADD	f15, B4, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 3 ##
 | 
						|
 | 
						|
	FXCPMADD	f0,  B5, A7, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B5, A7, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B2, A7, f8
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	FXCSMADD	f12, B2, A7, f12
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFSDUX	A1, CO1, INCM5
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	FXCPMADD	f1,  B5, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B5, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFSDUX	B1, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
	nop
 | 
						|
	
 | 
						|
	FXCPMADD	f2,  B5, A8, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B5, A8, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B2, A8, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B2, A8, f14
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f3,  B5, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B5, A4, f7
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	FXCPMADD	f11, B2, A4, f11
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFSDUX	A3, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	FXCSMADD	f15, B2, A4, f15
 | 
						|
	nop
 | 
						|
 | 
						|
## 4 ##
 | 
						|
 | 
						|
	FXCPMADD	f0,  B6, A9, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B6, A9, f4
 | 
						|
	nop
 | 
						|
	FXCPMADD	f8,  B4, A9, f8
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFSDUX	A5, CO1, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	FXCSMADD	f12, B4, A9, f12
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	B3, CO2, INC
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	FXCPMADD	f1,  B6, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B6, A2, f5
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	FXCPMADD	f9,  B4, A2, f9
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A6, CO2, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	FXCSMADD	f13, B4, A2, f13
 | 
						|
	nop
 | 
						|
 | 
						|
	FXCPMADD	f2,  B6, A10, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B6, A10, f6
 | 
						|
	nop
 | 
						|
	FXCPMADD	f10, B4, A10, f10
 | 
						|
	nop
 | 
						|
	FXCSMADD	f14, B4, A10, f14
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A7, CO2, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
 | 
						|
	FXCPMADD	f3,  B6, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B6, A4, f7
 | 
						|
	nop
 | 
						|
	FXCPMADD	f11, B4, A4, f11
 | 
						|
	nop
 | 
						|
	FXCSMADD	f15, B4, A4, f15
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	B2, CO2, INC2
 | 
						|
#else
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1014:
 | 
						|
	li	r0, ALPHA
 | 
						|
	lfpdx	AP,  SP, r0
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f30, SP, r0
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	andi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble+	.L1018
 | 
						|
 | 
						|
	cmpwi	cr0, TEMP, 3
 | 
						|
	bgt+	.L1015
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble+	.L1018
 | 
						|
 | 
						|
	cmpwi	cr0, K, 3
 | 
						|
	bgt+	.L1015
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	fpmr	f5,  f0
 | 
						|
	LFDUX	B1, CO1, INC2
 | 
						|
	fpmr	f9,  f0
 | 
						|
	LFDUX	A3, CO1, INC2
 | 
						|
	fpmr	f13, f0
 | 
						|
   	LFDUX	A5, CO1, INC2
 | 
						|
	fpmr	f2,  f0
 | 
						|
 | 
						|
	LFSDUX	A1, CO1, INCM5
 | 
						|
	fpmr	f6,  f0
 | 
						|
	LFSDUX	B1, CO1, INC2
 | 
						|
	fpmr	f10, f0
 | 
						|
	LFSDUX	A3, CO1, INC2
 | 
						|
	fpmr	f14, f0
 | 
						|
	LFSDUX	A5, CO1, INC2
 | 
						|
	fpmr	f3,  f0
 | 
						|
 | 
						|
	LFDUX	B3, CO2, INC
 | 
						|
	fpmr	f7,  f0
 | 
						|
	LFDUX	A6, CO2, INC2
 | 
						|
	fpmr	f11, f0
 | 
						|
	LFDUX	A7, CO2, INC2
 | 
						|
	fpmr	f15, f0
 | 
						|
	LFDUX	B2, CO2, INC2
 | 
						|
#else
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f9,  f0
 | 
						|
	fpmr	f13, f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
 | 
						|
	fpmr	f6,  f0
 | 
						|
	fpmr	f10, f0
 | 
						|
	fpmr	f14, f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
 | 
						|
	fpmr	f7,  f0
 | 
						|
	fpmr	f11, f0
 | 
						|
	fpmr	f15, f0
 | 
						|
#endif
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1015:
 | 
						|
	LFPDUX	A2,  AO,  INC4
 | 
						|
	LFPDUX	A4,  AO2, INC4
 | 
						|
	LFPDUX	A10, BO,  INC4
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
	bdz-	.L1017
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1016:
 | 
						|
	FXCPMADD	f0,  A10, A2, f0
 | 
						|
	FXCSMADD	f4,  A10, A2, f4
 | 
						|
	FXCPMADD	f8,  B4, A2, f8
 | 
						|
	FXCSMADD	f12, B4, A2, f12
 | 
						|
	LFPDUX	A2, AO,  INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  A10, A4, f1
 | 
						|
	FXCSMADD	f5,  A10, A4, f5
 | 
						|
	FXCPMADD	f9,  B4, A4, f9
 | 
						|
	FXCSMADD	f13, B4, A4, f13
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
 | 
						|
	FXCPMADD	f2,  A10, A2, f2
 | 
						|
	FXCSMADD	f6,  A10, A2, f6
 | 
						|
	FXCPMADD	f10, B4, A2, f10
 | 
						|
	FXCSMADD	f14, B4, A2, f14
 | 
						|
	LFPDUX	A2, AO,  INC4
 | 
						|
 | 
						|
	FXCPMADD	f3,  A10, A4, f3
 | 
						|
	FXCSMADD	f7,  A10, A4, f7
 | 
						|
	LFPDUX	A10, BO,  INC4
 | 
						|
	FXCPMADD	f11, B4, A4, f11
 | 
						|
	FXCSMADD	f15, B4, A4, f15
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	bdnz+	.L1016
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1017:
 | 
						|
	FXCPMADD	f0,  A10, A2, f0
 | 
						|
	FXCSMADD	f4,  A10, A2, f4
 | 
						|
	FXCPMADD	f8,  B4, A2, f8
 | 
						|
	FXCSMADD	f12, B4, A2, f12
 | 
						|
	LFPDUX	A2, AO,  INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  A10, A4, f1
 | 
						|
	FXCSMADD	f5,  A10, A4, f5
 | 
						|
	FXCPMADD	f9,  B4, A4, f9
 | 
						|
	FXCSMADD	f13, B4, A4, f13
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
 | 
						|
	FXCPMADD	f2,  A10, A2, f2
 | 
						|
	FXCSMADD	f6,  A10, A2, f6
 | 
						|
	FXCPMADD	f10, B4, A2, f10
 | 
						|
	FXCSMADD	f14, B4, A2, f14
 | 
						|
 | 
						|
	FXCPMADD	f3,  A10, A4, f3
 | 
						|
	FXCSMADD	f7,  A10, A4, f7
 | 
						|
	FXCPMADD	f11, B4, A4, f11
 | 
						|
	FXCSMADD	f15, B4, A4, f15
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1018:
 | 
						|
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
 | 
						|
    defined(RN) || defined(RT) || defined(CN) || defined(CT)
 | 
						|
	fpadd	f0, f0, f4
 | 
						|
	fpadd	f8, f8, f12
 | 
						|
	fpadd	f1, f1, f5
 | 
						|
	fpadd	f9, f9, f13
 | 
						|
 | 
						|
	fpadd	f2,  f2,  f6
 | 
						|
	fpadd	f10, f10, f14
 | 
						|
	fpadd	f3,  f3,  f7
 | 
						|
	fpadd	f11, f11, f15
 | 
						|
#else
 | 
						|
	fpsub	f0, f0, f4
 | 
						|
	fpsub	f8, f8, f12
 | 
						|
	fpsub	f1, f1, f5
 | 
						|
	fpsub	f9, f9, f13
 | 
						|
 | 
						|
	fpsub	f2,  f2,  f6
 | 
						|
	fpsub	f10, f10, f14
 | 
						|
	fpsub	f3,  f3,  f7
 | 
						|
	fpsub	f11, f11, f15
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	fxcpmadd A1,  f0, AP,  A1
 | 
						|
	LFSDUX	B3, CO2, INCM5
 | 
						|
	fxcpmadd B1,  f1, AP,  B1
 | 
						|
	LFSDUX	A6, CO2, INC2
 | 
						|
	fxcpmadd A3,  f2, AP,  A3
 | 
						|
	LFSDUX	A7, CO2, INC2
 | 
						|
	fxcpmadd A5,  f3, AP,  A5
 | 
						|
	LFSDUX	B2, CO2, INC2
 | 
						|
 | 
						|
	fxcxnpma f0,  f0, AP,  A1
 | 
						|
	fxcpmadd B3,  f8,  AP,  B3
 | 
						|
	fxcxnpma f1,  f1, AP,  B1
 | 
						|
	fxcpmadd A6,  f9,  AP,  A6
 | 
						|
	fxcxnpma f2,  f2, AP,  A3
 | 
						|
	fxcpmadd A7,  f10, AP,  A7
 | 
						|
 | 
						|
	fxcxnpma f3,  f3, AP,  A5
 | 
						|
	STFDUX	f0,  CO1, INCM7
 | 
						|
	fxcpmadd B2,  f11, AP,  B2
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
	fxcxnpma f8,  f8,  AP,  B3
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
	fxcxnpma f9,  f9,  AP,  A6
 | 
						|
	STFDUX	f2,  CO1, INC
 | 
						|
	STFSDUX	f2,  CO1, INC
 | 
						|
	fxcxnpma f10, f10, AP,  A7
 | 
						|
	STFDUX	f3,  CO1, INC
 | 
						|
	STFSDUX	f3,  CO1, INC
 | 
						|
 | 
						|
	fxcxnpma f11, f11, AP,  B2
 | 
						|
	STFDUX	f8,  CO2, INCM7
 | 
						|
#else
 | 
						|
	fxcpmadd f12, f0, AP,  f30
 | 
						|
	fxcpmadd f13, f1, AP,  f30
 | 
						|
	fxcpmadd f14, f2, AP,  f30
 | 
						|
	fxcpmadd f15, f3, AP,  f30
 | 
						|
 | 
						|
	fxcxnpma f0,  f0, AP,  f12
 | 
						|
	fxcxnpma f1,  f1, AP,  f13
 | 
						|
	fxcxnpma f2,  f2, AP,  f14
 | 
						|
	fxcxnpma f3,  f3, AP,  f15
 | 
						|
 | 
						|
	fxcpmadd f16, f8,  AP, f30
 | 
						|
	fxcpmadd f17, f9,  AP, f30
 | 
						|
	fxcpmadd f18, f10, AP, f30
 | 
						|
	fxcpmadd f19, f11, AP, f30
 | 
						|
 | 
						|
	fxcxnpma f8,  f8,  AP, f16
 | 
						|
	fxcxnpma f9,  f9,  AP, f17
 | 
						|
	fxcxnpma f10, f10, AP, f18
 | 
						|
	fxcxnpma f11, f11, AP, f19
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
	STFDUX	f2,  CO1, INC
 | 
						|
	STFSDUX	f2,  CO1, INC
 | 
						|
	STFDUX	f3,  CO1, INC
 | 
						|
	STFSDUX	f3,  CO1, INC
 | 
						|
	STFDUX	f8,  CO2, INC
 | 
						|
#endif
 | 
						|
	STFSDUX	f8,  CO2, INC
 | 
						|
	STFDUX	f9,  CO2, INC
 | 
						|
	STFSDUX	f9,  CO2, INC
 | 
						|
	STFDUX	f10, CO2, INC
 | 
						|
	STFSDUX	f10, CO2, INC
 | 
						|
 | 
						|
	STFDUX	f11, CO2, INC
 | 
						|
	STFSDUX	f11, CO2, INC
 | 
						|
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 2 + ZBASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 1 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 4
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	addic.	I, I, -1
 | 
						|
	li	r0, FZERO
 | 
						|
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	bgt+	.L1011
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1020:
 | 
						|
	andi.	I, M,  2
 | 
						|
	beq	.L1030
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 1 + ZBASE_SHIFT
 | 
						|
	slwi	r0,   KK, 1 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  BO, - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, BO,   2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
 | 
						|
#endif
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f9,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f13, f0
 | 
						|
	ble	.L1024
 | 
						|
#else
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f8,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f12, f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f9,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f13, f0
 | 
						|
	ble	.L1024
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,   AO, INC4
 | 
						|
	LFPDUX	B1,   BO, INC4
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	LFPDUX	A3,   AO, INC4
 | 
						|
	LFPDUX	B3,   BO, INC4
 | 
						|
	LFPDUX	A4,  AO2, INC4
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
 | 
						|
	LFPDUX	A5,   AO, INC4
 | 
						|
	LFPDUX	B5,   BO, INC4
 | 
						|
	LFPDUX	A6,  AO2, INC4
 | 
						|
	LFPDUX	B6,  BO2, INC4
 | 
						|
	LFPDUX	A7,   AO, INC4
 | 
						|
	LFPDUX	A9,   BO, INC4
 | 
						|
	LFPDUX	A10, BO2, INC4
 | 
						|
	bdz-	.L1023
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1022:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A8,  AO2, INC4
 | 
						|
	FXCPMADD	f8,  B2, A1, f8
 | 
						|
	nop
 | 
						|
	FXCSMADD	f12, B2, A1, f12
 | 
						|
	LFPDUX	A1,   AO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	LFPDUX	B1,   BO, INC4
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
	nop
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A3, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B3, A3, f4
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	FXCPMADD	f8,  B4, A3, f8
 | 
						|
	nop
 | 
						|
	FXCSMADD	f12, B4, A3, f12
 | 
						|
	LFPDUX	A3,   AO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B3, A4, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B3, A4, f5
 | 
						|
	LFPDUX	B3,   BO, INC4
 | 
						|
	FXCPMADD	f9,  B4, A4, f9
 | 
						|
	nop
 | 
						|
	FXCSMADD	f13, B4, A4, f13
 | 
						|
	LFPDUX	B4,  BO2, INC4
 | 
						|
 | 
						|
	FXCPMADD	f0,  B5, A5, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B5, A5, f4
 | 
						|
	LFPDUX	A4,  AO2, INC4
 | 
						|
	FXCPMADD	f8,  B6, A5, f8
 | 
						|
	nop
 | 
						|
	FXCSMADD	f12, B6, A5, f12
 | 
						|
	LFPDUX	A5,   AO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B5, A6, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B5, A6, f5
 | 
						|
	LFPDUX	B5,   BO, INC4
 | 
						|
	FXCPMADD	f9,  B6, A6, f9
 | 
						|
	nop
 | 
						|
	FXCSMADD	f13, B6, A6, f13
 | 
						|
	LFPDUX	B6,  BO2, INC4
 | 
						|
 | 
						|
	FXCPMADD	f0,  A9,  A7, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  A9,  A7, f4
 | 
						|
	LFPDUX	A6,  AO2, INC4
 | 
						|
	FXCPMADD	f8,  A10, A7, f8
 | 
						|
	nop
 | 
						|
	FXCSMADD	f12, A10, A7, f12
 | 
						|
	LFPDUX	A7,   AO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  A9,  A8, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  A9,  A8, f5
 | 
						|
	LFPDUX	A9,   BO, INC4
 | 
						|
	FXCPMADD	f9,  A10, A8, f9
 | 
						|
	nop
 | 
						|
	FXCSMADD	f13, A10, A8, f13
 | 
						|
	LFPDUX	A10, BO2, INC4
 | 
						|
	bdnz+	.L1022
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1023:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A8,  AO2, INC4
 | 
						|
	FXCPMADD	f8,  B2, A1, f8
 | 
						|
	FXCSMADD	f12, B2, A1, f12
 | 
						|
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A3, f0
 | 
						|
	FXCSMADD	f4,  B3, A3, f4
 | 
						|
	FXCPMADD	f8,  B4, A3, f8
 | 
						|
	FXCSMADD	f12, B4, A3, f12
 | 
						|
 | 
						|
	FXCPMADD	f1,  B3, A4, f1
 | 
						|
	FXCSMADD	f5,  B3, A4, f5
 | 
						|
	FXCPMADD	f9,  B4, A4, f9
 | 
						|
	FXCSMADD	f13, B4, A4, f13
 | 
						|
 | 
						|
	FXCPMADD	f0,  B5, A5, f0
 | 
						|
	FXCSMADD	f4,  B5, A5, f4
 | 
						|
	FXCPMADD	f8,  B6, A5, f8
 | 
						|
	FXCSMADD	f12, B6, A5, f12
 | 
						|
 | 
						|
	FXCPMADD	f1,  B5, A6, f1
 | 
						|
	FXCSMADD	f5,  B5, A6, f5
 | 
						|
	FXCPMADD	f9,  B6, A6, f9
 | 
						|
	FXCSMADD	f13, B6, A6, f13
 | 
						|
 | 
						|
	FXCPMADD	f0,  A9, A7, f0
 | 
						|
	FXCSMADD	f4,  A9, A7, f4
 | 
						|
	FXCPMADD	f8,  A10, A7, f8
 | 
						|
	FXCSMADD	f12, A10, A7, f12
 | 
						|
 | 
						|
	FXCPMADD	f1,  A9, A8, f1
 | 
						|
	FXCSMADD	f5,  A9, A8, f5
 | 
						|
	FXCPMADD	f9,  A10, A8, f9
 | 
						|
	FXCSMADD	f13, A10, A8, f13
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1024:
 | 
						|
	li	r0, ALPHA
 | 
						|
	lfpdx	AP,  SP, r0
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f30, SP, r0
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	andi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L1028
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC4
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	bdz-	.L1027
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1026:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	FXCPMADD	f8,  B2, A1, f8
 | 
						|
	FXCSMADD	f12, B2, A1, f12
 | 
						|
	LFPDUX	A1,  AO,  INC4
 | 
						|
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
	LFPDUX	A2,  AO2, INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	bdnz+	.L1026
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1027:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	FXCPMADD	f8,  B2, A1, f8
 | 
						|
	FXCSMADD	f12, B2, A1, f12
 | 
						|
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	FXCPMADD	f9,  B2, A2, f9
 | 
						|
	FXCSMADD	f13, B2, A2, f13
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1028:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	LFDUX	A2, CO1, INC2
 | 
						|
	LFDUX	A3, CO2, INC
 | 
						|
	LFDUX	A4, CO2, INC2
 | 
						|
 | 
						|
	LFSDUX	A1, CO1, INCM1
 | 
						|
	LFSDUX	A2, CO1, INC2
 | 
						|
	LFSDUX	A3, CO2, INCM1
 | 
						|
	LFSDUX	A4, CO2, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
 | 
						|
    defined(RN) || defined(RT) || defined(CN) || defined(CT)
 | 
						|
	fpadd	f0, f0, f4
 | 
						|
	fpadd	f8, f8, f12
 | 
						|
	fpadd	f1, f1, f5
 | 
						|
	fpadd	f9, f9, f13
 | 
						|
#else
 | 
						|
	fpsub	f0, f0, f4
 | 
						|
	fpsub	f8, f8, f12
 | 
						|
	fpsub	f1, f1, f5
 | 
						|
	fpsub	f9, f9, f13
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	fxcpmadd A1,  f0, AP,  A1
 | 
						|
	fxcpmadd A2,  f1, AP,  A2
 | 
						|
	fxcpmadd A3,  f8, AP,  A3
 | 
						|
	fxcpmadd A4,  f9, AP,  A4
 | 
						|
 | 
						|
	fxcxnpma f0,  f0, AP,  A1
 | 
						|
	fxcxnpma f1,  f1, AP,  A2
 | 
						|
	fxcxnpma f8,  f8, AP,  A3
 | 
						|
	fxcxnpma f9,  f9, AP,  A4
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INCM3
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f8,  CO2, INCM3
 | 
						|
	STFSDUX	f8,  CO2, INC
 | 
						|
	STFDUX	f9,  CO2, INC
 | 
						|
	STFSDUX	f9,  CO2, INC
 | 
						|
#else
 | 
						|
	fxcpmadd f12, f0, AP,  f30
 | 
						|
	fxcpmadd f13, f1, AP,  f30
 | 
						|
	fxcpmadd f14, f8, AP,  f30
 | 
						|
	fxcpmadd f15, f9, AP,  f30
 | 
						|
 | 
						|
	fxcxnpma f0,  f0, AP,  f12
 | 
						|
	fxcxnpma f1,  f1, AP,  f13
 | 
						|
	fxcxnpma f8,  f8, AP,  f14
 | 
						|
	fxcxnpma f9,  f9, AP,  f15
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f8,  CO2, INC
 | 
						|
	STFSDUX	f8,  CO2, INC
 | 
						|
	STFDUX	f9,  CO2, INC
 | 
						|
	STFSDUX	f9,  CO2, INC
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 1 + ZBASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 1 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 2
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1030:
 | 
						|
	andi.	I, M,  1
 | 
						|
	beq	.L1049
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f3, f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 0 + ZBASE_SHIFT
 | 
						|
	slwi	r0,   KK, 1 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  BO,  - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, BO,    2 * SIZE
 | 
						|
	fpmr	f3, f0
 | 
						|
#endif
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L1034
 | 
						|
#else
 | 
						|
	addi	AO2, AO,   2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 4 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	addi	BO2, B,  - 2 * SIZE
 | 
						|
	fpmr	f3, f0
 | 
						|
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
	mtspr	CTR, r0
 | 
						|
	ble	.L1034
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
	LFPDUX	A5,  BO, INC4
 | 
						|
	LFPDUX	A6, BO2, INC4
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	LFPDUX	A7,  BO, INC4
 | 
						|
	LFPDUX	A8, BO2, INC4
 | 
						|
	bdz-	.L1033
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1032:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	LFPDUX	B1,  BO, INC4
 | 
						|
	FXCPMADD	f2,  B2, A1, f2
 | 
						|
	FXCSMADD	f3,  B2, A1, f3
 | 
						|
	LFPDUX	B2, BO2, INC4
 | 
						|
	LFPDUX	A1,  AO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A2, f0
 | 
						|
	FXCSMADD	f1,  B3, A2, f1
 | 
						|
	LFPDUX	B3,  BO, INC4
 | 
						|
	FXCPMADD	f2,  B4, A2, f2
 | 
						|
	FXCSMADD	f3,  B4, A2, f3
 | 
						|
	LFPDUX	B4, BO2, INC4
 | 
						|
	LFPDUX	A2, AO2, INC4
 | 
						|
 | 
						|
	FXCPMADD	f0,  A5, A3, f0
 | 
						|
	FXCSMADD	f1,  A5, A3, f1
 | 
						|
	LFPDUX	A5,  BO, INC4
 | 
						|
	FXCPMADD	f2,  A6, A3, f2
 | 
						|
	FXCSMADD	f3,  A6, A3, f3
 | 
						|
	LFPDUX	A6, BO2, INC4
 | 
						|
	LFPDUX	A3,  AO, INC4
 | 
						|
 | 
						|
	FXCPMADD	f0,  A7, A4, f0
 | 
						|
	FXCSMADD	f1,  A7, A4, f1
 | 
						|
	LFPDUX	A7,  BO, INC4
 | 
						|
	FXCPMADD	f2,  A8, A4, f2
 | 
						|
	FXCSMADD	f3,  A8, A4, f3
 | 
						|
	LFPDUX	A8, BO2, INC4
 | 
						|
	LFPDUX	A4, AO2, INC4
 | 
						|
	bdnz+	.L1032
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1033:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	FXCPMADD	f2,  B2, A1, f2
 | 
						|
	FXCSMADD	f3,  B2, A1, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A2, f0
 | 
						|
	FXCSMADD	f1,  B3, A2, f1
 | 
						|
	FXCPMADD	f2,  B4, A2, f2
 | 
						|
	FXCSMADD	f3,  B4, A2, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  A5, A3, f0
 | 
						|
	FXCSMADD	f1,  A5, A3, f1
 | 
						|
	FXCPMADD	f2,  A6, A3, f2
 | 
						|
	FXCSMADD	f3,  A6, A3, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  A7, A4, f0
 | 
						|
	FXCSMADD	f1,  A7, A4, f1
 | 
						|
	FXCPMADD	f2,  A8, A4, f2
 | 
						|
	FXCSMADD	f3,  A8, A4, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1034:
 | 
						|
	li	r0, ALPHA
 | 
						|
	lfpdx	AP,  SP, r0
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f30, SP, r0
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#endif
 | 
						|
	andi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L1038
 | 
						|
 | 
						|
	LFPDX	A1,  AO,  INC4
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	add	AO, AO, INC2
 | 
						|
	bdz-	.L1037
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1036:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	LFPDUX	B1,  BO,  INC4
 | 
						|
	FXCPMADD	f2,  B2, A1, f2
 | 
						|
	FXCSMADD	f3,  B2, A1, f3
 | 
						|
	LFPDX	A1,  AO,  INC4
 | 
						|
	LFPDUX	B2,  BO2, INC4
 | 
						|
	add	AO, AO, INC2
 | 
						|
	bdnz+	.L1036
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1037:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	FXCPMADD	f2,  B2, A1, f2
 | 
						|
	FXCSMADD	f3,  B2, A1, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1038:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	LFDUX	A2, CO2, INC
 | 
						|
	LFSDUX	A1, CO1, INC
 | 
						|
	LFSDUX	A2, CO2, INC
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
 | 
						|
    defined(RN) || defined(RT) || defined(CN) || defined(CT)
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
	fpadd	f2, f2, f3
 | 
						|
#else
 | 
						|
	fpsub	f0, f0, f1
 | 
						|
	fpsub	f2, f2, f3
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	fxcpmadd A1,  f0, AP,  A1
 | 
						|
	fxcpmadd A2,  f2, AP,  A2
 | 
						|
	fxcxnpma f0,  f0, AP,  A1
 | 
						|
	fxcxnpma f2,  f2, AP,  A2
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INCM1
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f2,  CO2, INCM1
 | 
						|
	STFSDUX	f2,  CO2, INC
 | 
						|
#else
 | 
						|
	fxcpmadd f12, f0, AP,  f30
 | 
						|
	fxcpmadd f13, f2, AP,  f30
 | 
						|
	fxcxnpma f0,  f0, AP,  f12
 | 
						|
	fxcxnpma f2,  f2, AP,  f13
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f2,  CO2, INC
 | 
						|
	STFSDUX	f2,  CO2, INC
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 0 + ZBASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 1 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 1
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1049:
 | 
						|
#if defined(TRMMKERNEL) && !defined(LEFT)
 | 
						|
	addi	KK, KK, 2
 | 
						|
#endif
 | 
						|
 | 
						|
	addi	B,  BO, 4 * SIZE
 | 
						|
 | 
						|
	addic.	J, J, -1
 | 
						|
	bgt+	.L1010
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1050:
 | 
						|
	andi.	J, N,  1
 | 
						|
	beq	.L10999
 | 
						|
 | 
						|
	mr	CO1, C
 | 
						|
 | 
						|
#if defined(TRMMKERNEL) &&  defined(LEFT)
 | 
						|
	mr	KK, OFFSET
 | 
						|
#endif
 | 
						|
 | 
						|
	addi	AO, A, -2 * SIZE
 | 
						|
	
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
 | 
						|
	srawi.	I, M,  2
 | 
						|
	ble	.L1060
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1051:
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f6,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 2 + ZBASE_SHIFT
 | 
						|
	slwi	r0,   KK, 0 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f6,  f0
 | 
						|
#endif
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
	fpmr	f3,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f7,  f0
 | 
						|
	ble	.L1054
 | 
						|
#else
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
	fpmr	f4,  f0
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	fpmr	f5,  f0
 | 
						|
	fpmr	f2,  f0
 | 
						|
	fpmr	f6,  f0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f7,  f0
 | 
						|
	ble	.L1054
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	bdz-	.L1053
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1052:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B1, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	FXCPMADD	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B2, A5, f0
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	FXCSMADD	f4,  B2, A5, f4
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B2, A6, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B2, A6, f5
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f2,  B2, A7, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B2, A7, f6
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	FXCPMADD	f3,  B2, A8, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B2, A8, f7
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A1, f0
 | 
						|
	LFPDUX	B2,  BO,  INC2
 | 
						|
	FXCSMADD	f4,  B3, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B3, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B3, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f2,  B3, A3, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B3, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	FXCPMADD	f3,  B3, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B3, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B4, A5, f0
 | 
						|
	LFPDUX	B3,  BO,  INC2
 | 
						|
	FXCSMADD	f4,  B4, A5, f4
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B4, A6, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B4, A6, f5
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f2,  B4, A7, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B4, A7, f6
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	FXCPMADD	f3,  B4, A8, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B4, A8, f7
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
	bdnz+	.L1052
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1053:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	LFPDUX	B4,  BO,  INC2
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f2,  B1, A3, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B1, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	FXCPMADD	f3,  B1, A4, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B2, A5, f0
 | 
						|
	nop
 | 
						|
	FXCSMADD	f4,  B2, A5, f4
 | 
						|
	LFPDUX	A5,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B2, A6, f1
 | 
						|
	nop
 | 
						|
	FXCSMADD	f5,  B2, A6, f5
 | 
						|
	LFPDUX	A6,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f2,  B2, A7, f2
 | 
						|
	nop
 | 
						|
	FXCSMADD	f6,  B2, A7, f6
 | 
						|
	LFPDUX	A7,  AO,  INC2
 | 
						|
	FXCPMADD	f3,  B2, A8, f3
 | 
						|
	nop
 | 
						|
	FXCSMADD	f7,  B2, A8, f7
 | 
						|
	LFPDUX	A8,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A1, f0
 | 
						|
	FXCSMADD	f4,  B3, A1, f4
 | 
						|
	FXCPMADD	f1,  B3, A2, f1
 | 
						|
	FXCSMADD	f5,  B3, A2, f5
 | 
						|
 | 
						|
	FXCPMADD	f2,  B3, A3, f2
 | 
						|
	FXCSMADD	f6,  B3, A3, f6
 | 
						|
	FXCPMADD	f3,  B3, A4, f3
 | 
						|
	FXCSMADD	f7,  B3, A4, f7
 | 
						|
 | 
						|
	FXCPMADD	f0,  B4, A5, f0
 | 
						|
	FXCSMADD	f4,  B4, A5, f4
 | 
						|
	FXCPMADD	f1,  B4, A6, f1
 | 
						|
	FXCSMADD	f5,  B4, A6, f5
 | 
						|
 | 
						|
	FXCPMADD	f2,  B4, A7, f2
 | 
						|
	FXCSMADD	f6,  B4, A7, f6
 | 
						|
	FXCPMADD	f3,  B4, A8, f3
 | 
						|
	FXCSMADD	f7,  B4, A8, f7
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1054:
 | 
						|
	li	r0, ALPHA
 | 
						|
	lfpdx	AP,  SP, r0
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f30, SP, r0
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 4
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	andi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L1058
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	bdz-	.L1057
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1056:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
 | 
						|
	FXCPMADD	f2,  B1, A3, f2
 | 
						|
	FXCSMADD	f6,  B1, A3, f6
 | 
						|
	LFPDUX	A3,  AO,  INC2
 | 
						|
	FXCPMADD	f3,  B1, A4, f3
 | 
						|
	FXCSMADD	f7,  B1, A4, f7
 | 
						|
	LFPDUX	A4,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	bdnz+	.L1056
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1057:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f4,  B1, A1, f4
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f5,  B1, A2, f5
 | 
						|
 | 
						|
	FXCPMADD	f2,  B1, A3, f2
 | 
						|
	FXCSMADD	f6,  B1, A3, f6
 | 
						|
	FXCPMADD	f3,  B1, A4, f3
 | 
						|
	FXCSMADD	f7,  B1, A4, f7
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1058:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	LFDUX	A2, CO1, INC2
 | 
						|
	LFDUX	A3, CO1, INC2
 | 
						|
   	LFDUX	A4, CO1, INC2
 | 
						|
 | 
						|
	LFSDUX	A1, CO1, INCM5
 | 
						|
	LFSDUX	A2, CO1, INC2
 | 
						|
	LFSDUX	A3, CO1, INC2
 | 
						|
	LFSDUX	A4, CO1, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
 | 
						|
    defined(RN) || defined(RT) || defined(CN) || defined(CT)
 | 
						|
	fpadd	f0, f0, f4
 | 
						|
	fpadd	f1, f1, f5
 | 
						|
	fpadd	f2, f2, f6
 | 
						|
	fpadd	f3, f3, f7
 | 
						|
#else
 | 
						|
	fpsub	f0, f0, f4
 | 
						|
	fpsub	f1, f1, f5
 | 
						|
	fpsub	f2, f2, f6
 | 
						|
	fpsub	f3, f3, f7
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	fxcpmadd A1,  f0, AP,  A1
 | 
						|
	fxcpmadd A2,  f1, AP,  A2
 | 
						|
	fxcpmadd A3,  f2, AP,  A3
 | 
						|
	fxcpmadd A4,  f3, AP,  A4
 | 
						|
 | 
						|
	fxcxnpma f0,  f0, AP,  A1
 | 
						|
	fxcxnpma f1,  f1, AP,  A2
 | 
						|
	fxcxnpma f2,  f2, AP,  A3
 | 
						|
	fxcxnpma f3,  f3, AP,  A4
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INCM7
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f2,  CO1, INC
 | 
						|
	STFSDUX	f2,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f3,  CO1, INC
 | 
						|
	STFSDUX	f3,  CO1, INC
 | 
						|
#else
 | 
						|
	fxcpmadd f12, f0, AP,  f30
 | 
						|
	fxcpmadd f13, f1, AP,  f30
 | 
						|
	fxcpmadd f14, f2, AP,  f30
 | 
						|
	fxcpmadd f15, f3, AP,  f30
 | 
						|
 | 
						|
	fxcxnpma f0,  f0, AP,  f12
 | 
						|
	fxcxnpma f1,  f1, AP,  f13
 | 
						|
	fxcxnpma f2,  f2, AP,  f14
 | 
						|
	fxcxnpma f3,  f3, AP,  f15
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f2,  CO1, INC
 | 
						|
	STFSDUX	f2,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f3,  CO1, INC
 | 
						|
	STFSDUX	f3,  CO1, INC
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -4
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 2 + ZBASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 0 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 4
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	addic.	I, I, -1
 | 
						|
	li	r0, FZERO
 | 
						|
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	bgt+	.L1051
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1060:
 | 
						|
	andi.	I, M,  2
 | 
						|
	beq	.L1070
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 1 + ZBASE_SHIFT
 | 
						|
	slwi	r0,   KK, 0 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
 	fpmr	f1,  f0
 | 
						|
#endif
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
 | 
						|
	srawi.	r0,  TEMP,  2
 | 
						|
	fpmr	f2,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	ble	.L1064
 | 
						|
 | 
						|
#else
 | 
						|
	srawi.	r0,  K,  2
 | 
						|
 	fpmr	f1,  f0
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f2,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	ble	.L1064
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	bdz-	.L1063
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1062:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f2,  B1, A1, f2
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f3,  B1, A2, f3
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B2, A3, f0
 | 
						|
	FXCSMADD	f2,  B2, A3, f2
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	FXCPMADD	f1,  B2, A4, f1
 | 
						|
	FXCSMADD	f3,  B2, A4, f3
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A5, f0
 | 
						|
	FXCSMADD	f2,  B3, A5, f2
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	FXCPMADD	f1,  B3, A6, f1
 | 
						|
	FXCSMADD	f3,  B3, A6, f3
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B4, A7, f0
 | 
						|
	FXCSMADD	f2,  B4, A7, f2
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	FXCPMADD	f1,  B4, A8, f1
 | 
						|
	FXCSMADD	f3,  B4, A8, f3
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
	bdnz+	.L1062
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1063:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f2,  B1, A1, f2
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f3,  B1, A2, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  B2, A3, f0
 | 
						|
	FXCSMADD	f2,  B2, A3, f2
 | 
						|
	FXCPMADD	f1,  B2, A4, f1
 | 
						|
	FXCSMADD	f3,  B2, A4, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A5, f0
 | 
						|
	FXCSMADD	f2,  B3, A5, f2
 | 
						|
	FXCPMADD	f1,  B3, A6, f1
 | 
						|
	FXCSMADD	f3,  B3, A6, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  B4, A7, f0
 | 
						|
	FXCSMADD	f2,  B4, A7, f2
 | 
						|
	FXCPMADD	f1,  B4, A8, f1
 | 
						|
	FXCSMADD	f3,  B4, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1064:
 | 
						|
	li	r0, ALPHA
 | 
						|
	lfpdx	AP,  SP, r0
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f30, SP, r0
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 2
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	andi.	r0,  TEMP,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  3
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L1068
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	bdz-	.L1067
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1066:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f2,  B1, A1, f2
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f3,  B1, A2, f3
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	LFPDUX	A2,  AO,  INC2
 | 
						|
	bdnz+	.L1066
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1067:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f2,  B1, A1, f2
 | 
						|
	FXCPMADD	f1,  B1, A2, f1
 | 
						|
	FXCSMADD	f3,  B1, A2, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1068:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	LFDUX	A2, CO1, INC2
 | 
						|
	LFSDUX	A1, CO1, INCM1
 | 
						|
	LFSDUX	A2, CO1, INC2
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
 | 
						|
    defined(RN) || defined(RT) || defined(CN) || defined(CT)
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
#else
 | 
						|
	fpsub	f0, f0, f2
 | 
						|
	fpsub	f1, f1, f3
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	fxcpmadd A1,  f0, AP,  A1
 | 
						|
	fxcpmadd A2,  f1, AP,  A2
 | 
						|
	fxcxnpma f0,  f0, AP,  A1
 | 
						|
	fxcxnpma f1,  f1, AP,  A2
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INCM3
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
#else
 | 
						|
	fxcpmadd f12, f0, AP,  f30
 | 
						|
	fxcpmadd f13, f1, AP,  f30
 | 
						|
	fxcxnpma f0,  f0, AP,  f12
 | 
						|
	fxcxnpma f1,  f1, AP,  f13
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
 | 
						|
	STFDUX	f1,  CO1, INC
 | 
						|
	STFSDUX	f1,  CO1, INC
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
#if ( defined(LEFT) &&  defined(TRANSA)) || \
 | 
						|
    (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#ifdef LEFT
 | 
						|
	addi	TEMP, TEMP, -2
 | 
						|
#else
 | 
						|
	addi	TEMP, TEMP, -1
 | 
						|
#endif
 | 
						|
	slwi	r0,   TEMP, 1 + ZBASE_SHIFT
 | 
						|
	slwi	TEMP, TEMP, 0 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, r0
 | 
						|
	add	BO, BO, TEMP
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LEFT
 | 
						|
	addi	KK, KK, 2
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1070:
 | 
						|
	andi.	I, M,  1
 | 
						|
	beq	.L1089
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
#else
 | 
						|
	slwi	TEMP, KK, 0 + ZBASE_SHIFT
 | 
						|
	slwi	r0,   KK, 0 + ZBASE_SHIFT
 | 
						|
	add	AO, AO, TEMP
 | 
						|
	add	BO, B,  r0
 | 
						|
 | 
						|
	addi	BO,  BO,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
#endif
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	srawi.	r0,  TEMP,  3
 | 
						|
	fpmr	f2,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	ble	.L1074
 | 
						|
#else
 | 
						|
	addi	BO,  B,  - 2 * SIZE
 | 
						|
	fpmr	f1,  f0
 | 
						|
	srawi.	r0,  K,  3
 | 
						|
	fpmr	f2,  f0
 | 
						|
	mtspr	CTR, r0
 | 
						|
	fpmr	f3,  f0
 | 
						|
	ble	.L1074
 | 
						|
#endif
 | 
						|
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	LFPDUX	B5,  BO, INC2
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B6,  BO, INC2
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	LFPDUX	A9,  BO, INC2
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	LFPDUX	A10, BO, INC2
 | 
						|
	bdz-	.L1073
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1072:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	LFPDUX	A1,  AO, INC2
 | 
						|
	LFPDUX	B1,  BO, INC2
 | 
						|
	FXCPMADD	f2,  B2, A2, f2
 | 
						|
	FXCSMADD	f3,  B2, A2, f3
 | 
						|
	LFPDUX	A2,  AO, INC2
 | 
						|
	LFPDUX	B2,  BO, INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A3, f0
 | 
						|
	FXCSMADD	f1,  B3, A3, f1
 | 
						|
	LFPDUX	A3,  AO, INC2
 | 
						|
	LFPDUX	B3,  BO, INC2
 | 
						|
	FXCPMADD	f2,  B4, A4, f2
 | 
						|
	FXCSMADD	f3,  B4, A4, f3
 | 
						|
	LFPDUX	A4,  AO, INC2
 | 
						|
	LFPDUX	B4,  BO, INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  B5, A5, f0
 | 
						|
	FXCSMADD	f1,  B5, A5, f1
 | 
						|
	LFPDUX	A5,  AO, INC2
 | 
						|
	LFPDUX	B5,  BO, INC2
 | 
						|
	FXCPMADD	f2,  B6, A6, f2
 | 
						|
	FXCSMADD	f3,  B6, A6, f3
 | 
						|
	LFPDUX	A6,  AO, INC2
 | 
						|
	LFPDUX	B6,  BO, INC2
 | 
						|
 | 
						|
	FXCPMADD	f0,  A9,  A7, f0
 | 
						|
	FXCSMADD	f1,  A9,  A7, f1
 | 
						|
	LFPDUX	A7,  AO, INC2
 | 
						|
	LFPDUX	A9,  BO, INC2
 | 
						|
	FXCPMADD	f2,  A10, A8, f2
 | 
						|
	FXCSMADD	f3,  A10, A8, f3
 | 
						|
	LFPDUX	A8,  AO, INC2
 | 
						|
	LFPDUX	A10, BO, INC2
 | 
						|
 | 
						|
	bdnz+	.L1072
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1073:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	FXCPMADD	f2,  B2, A2, f2
 | 
						|
	FXCSMADD	f3,  B2, A2, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  B3, A3, f0
 | 
						|
	FXCSMADD	f1,  B3, A3, f1
 | 
						|
	FXCPMADD	f2,  B4, A4, f2
 | 
						|
	FXCSMADD	f3,  B4, A4, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  B5, A5, f0
 | 
						|
	FXCSMADD	f1,  B5, A5, f1
 | 
						|
	FXCPMADD	f2,  B6, A6, f2
 | 
						|
	FXCSMADD	f3,  B6, A6, f3
 | 
						|
 | 
						|
	FXCPMADD	f0,  A9,  A7, f0
 | 
						|
	FXCSMADD	f1,  A9,  A7, f1
 | 
						|
	FXCPMADD	f2,  A10, A8, f2
 | 
						|
	FXCSMADD	f3,  A10, A8, f3
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1074:
 | 
						|
	li	r0, ALPHA
 | 
						|
	lfpdx	AP,  SP, r0
 | 
						|
#ifdef TRMMKERNEL
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f30, SP, r0
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(TRMMKERNEL)
 | 
						|
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
 | 
						|
	sub	TEMP, K, KK
 | 
						|
#elif defined(LEFT)
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#else
 | 
						|
	addi	TEMP, KK, 1
 | 
						|
#endif
 | 
						|
	andi.	r0,  TEMP,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#else
 | 
						|
	andi.	r0,  K,  7
 | 
						|
	mtspr	CTR, r0
 | 
						|
#endif
 | 
						|
	ble+	.L1078
 | 
						|
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	bdz-	.L1077
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1076:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	LFPDUX	A1,  AO,  INC2
 | 
						|
	LFPDUX	B1,  BO,  INC2
 | 
						|
	bdnz+	.L1076
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1077:
 | 
						|
	FXCPMADD	f0,  B1, A1, f0
 | 
						|
	FXCSMADD	f1,  B1, A1, f1
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1078:
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	LFDUX	A1, CO1, INC
 | 
						|
	LFDUX	A2, CO1, INC
 | 
						|
#endif
 | 
						|
 | 
						|
	fpadd	f0, f0, f2
 | 
						|
	fpadd	f1, f1, f3
 | 
						|
 | 
						|
	fsmfp	A1, A2
 | 
						|
 | 
						|
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
 | 
						|
    defined(RN) || defined(RT) || defined(CN) || defined(CT)
 | 
						|
	fpadd	f0, f0, f1
 | 
						|
#else
 | 
						|
	fpsub	f0, f0, f1
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRMMKERNEL
 | 
						|
	fxcpmadd A1,  f0, AP,  A1
 | 
						|
	fxcxnpma f0,  f0, AP,  A1
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INCM1
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
#else
 | 
						|
	fxcpmadd f12, f0, AP,  f30
 | 
						|
	fxcxnpma f0,  f0, AP,  f12
 | 
						|
 | 
						|
	STFDUX	f0,  CO1, INC
 | 
						|
	STFSDUX	f0,  CO1, INC
 | 
						|
#endif
 | 
						|
 | 
						|
	li	r0, FZERO
 | 
						|
	lfpsx	f0, SP, r0
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L1089:
 | 
						|
	addi	B,  BO, 2 * SIZE
 | 
						|
	.align 4
 | 
						|
 | 
						|
.L10999:
 | 
						|
	addi	SP, SP, 20
 | 
						|
 | 
						|
	lwzu	r14,   4(SP)
 | 
						|
	lwzu	r15,   4(SP)
 | 
						|
 | 
						|
	lwzu	r16,   4(SP)
 | 
						|
	lwzu	r17,   4(SP)
 | 
						|
	lwzu	r18,   4(SP)
 | 
						|
	lwzu	r19,   4(SP)
 | 
						|
 | 
						|
	lwzu	r20,   4(SP)
 | 
						|
	lwzu	r21,   4(SP)
 | 
						|
	lwzu	r22,   4(SP)
 | 
						|
	lwzu	r23,   4(SP)
 | 
						|
 | 
						|
	lwzu	r24,   4(SP)
 | 
						|
	lwzu	r25,   4(SP)
 | 
						|
	lwzu	r26,   4(SP)
 | 
						|
	lwzu	r27,   4(SP)
 | 
						|
 | 
						|
	lwzu	r28,   4(SP)
 | 
						|
	lwzu	r29,   4(SP)
 | 
						|
	lwzu	r30,   4(SP)
 | 
						|
	lwzu	r31,   4(SP)
 | 
						|
 | 
						|
	subi	SP, SP, 12
 | 
						|
	li	r0, 16
 | 
						|
 | 
						|
	lfpdux	f31, SP, r0
 | 
						|
	lfpdux	f30, SP, r0
 | 
						|
	lfpdux	f29, SP, r0
 | 
						|
	lfpdux	f28, SP, r0
 | 
						|
	lfpdux	f27, SP, r0
 | 
						|
	lfpdux	f26, SP, r0
 | 
						|
	lfpdux	f25, SP, r0
 | 
						|
	lfpdux	f24, SP, r0
 | 
						|
	lfpdux	f23, SP, r0
 | 
						|
	lfpdux	f22, SP, r0
 | 
						|
	lfpdux	f21, SP, r0
 | 
						|
	lfpdux	f20, SP, r0
 | 
						|
	lfpdux	f19, SP, r0
 | 
						|
	lfpdux	f18, SP, r0
 | 
						|
	lfpdux	f17, SP, r0
 | 
						|
	lfpdux	f16, SP, r0
 | 
						|
	lfpdux	f15, SP, r0
 | 
						|
	lfpdux	f14, SP, r0
 | 
						|
	addi	SP, SP, 16
 | 
						|
	blr
 | 
						|
	.align 4
 | 
						|
 | 
						|
 | 
						|
	EPILOGUE
 | 
						|
#endif
 |