5696 lines
		
	
	
		
			95 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			5696 lines
		
	
	
		
			95 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| /*********************************************************************/
 | |
| /* Copyright 2009, 2010 The University of Texas at Austin.           */
 | |
| /* All rights reserved.                                              */
 | |
| /*                                                                   */
 | |
| /* Redistribution and use in source and binary forms, with or        */
 | |
| /* without modification, are permitted provided that the following   */
 | |
| /* conditions are met:                                               */
 | |
| /*                                                                   */
 | |
| /*   1. Redistributions of source code must retain the above         */
 | |
| /*      copyright notice, this list of conditions and the following  */
 | |
| /*      disclaimer.                                                  */
 | |
| /*                                                                   */
 | |
| /*   2. Redistributions in binary form must reproduce the above      */
 | |
| /*      copyright notice, this list of conditions and the following  */
 | |
| /*      disclaimer in the documentation and/or other materials       */
 | |
| /*      provided with the distribution.                              */
 | |
| /*                                                                   */
 | |
| /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
 | |
| /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
 | |
| /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
 | |
| /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
 | |
| /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
 | |
| /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
 | |
| /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
 | |
| /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
 | |
| /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
 | |
| /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
 | |
| /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
 | |
| /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
 | |
| /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
 | |
| /*    POSSIBILITY OF SUCH DAMAGE.                                    */
 | |
| /*                                                                   */
 | |
| /* The views and conclusions contained in the software and           */
 | |
| /* documentation are those of the authors and should not be          */
 | |
| /* interpreted as representing official policies, either expressed   */
 | |
| /* or implied, of The University of Texas at Austin.                 */
 | |
| /*********************************************************************/
 | |
| 
 | |
| #define ASSEMBLER
 | |
| #include "common.h"
 | |
| 
 | |
| #define ALPHA    0
 | |
| #define FZERO	 8
 | |
| 
 | |
| #define	M	r3
 | |
| #define	N	r4
 | |
| #define	K	r5
 | |
| 
 | |
| #ifdef linux
 | |
| #define A	r6
 | |
| #define	B	r7
 | |
| #define	C	r8
 | |
| #define	LDC	r9
 | |
| #define OFFSET	r10
 | |
| #endif
 | |
| 
 | |
| #define TEMP	r11
 | |
| #define AORIG	r12
 | |
| #define KK	r14
 | |
| #define INCM1	r15
 | |
| #define INCM4	r16
 | |
| #define INCM2	r17
 | |
| #define INC2	r19
 | |
| #define INC	r20
 | |
| #define INC4	r21
 | |
| 
 | |
| #define	I	r22
 | |
| #define J	r23
 | |
| #define AO	r24
 | |
| #define BO	r25
 | |
| #define AO2	r26
 | |
| #define	BO2	r27
 | |
| 
 | |
| #define	CO1	r28
 | |
| #define CO2	r29
 | |
| #define	CO3	r30
 | |
| #define	CO4	r31
 | |
| 
 | |
| #ifndef NEEDPARAM
 | |
| 
 | |
| #define A1	f16
 | |
| #define A2	f17
 | |
| #define A3	f18
 | |
| #define A4	f19
 | |
| #define A5	f20
 | |
| #define A6	f21
 | |
| #define A7	f22
 | |
| #define A8	f23
 | |
| #define A9	f24
 | |
| #define A10	f25
 | |
| 
 | |
| #define B1	f26
 | |
| #define B2	f27
 | |
| #define B3	f28
 | |
| #define B4	f29
 | |
| #define B5	f30
 | |
| #define B6	f31
 | |
| 
 | |
| #define AP	B6
 | |
| 
 | |
| 
 | |
| 	PROLOGUE
 | |
| 	PROFCODE
 | |
| 
 | |
| 	li	r0, -16
 | |
| 
 | |
| 	stfpdux	f14, SP, r0
 | |
| 	stfpdux	f15, SP, r0
 | |
| 	stfpdux	f16, SP, r0
 | |
| 	stfpdux	f17, SP, r0
 | |
| 	stfpdux	f18, SP, r0
 | |
| 	stfpdux	f19, SP, r0
 | |
| 	stfpdux	f20, SP, r0
 | |
| 	stfpdux	f21, SP, r0
 | |
| 	stfpdux	f22, SP, r0
 | |
| 	stfpdux	f23, SP, r0
 | |
| 	stfpdux	f24, SP, r0
 | |
| 	stfpdux	f25, SP, r0
 | |
| 	stfpdux	f26, SP, r0
 | |
| 	stfpdux	f27, SP, r0
 | |
| 	stfpdux	f28, SP, r0
 | |
| 	stfpdux	f29, SP, r0
 | |
| 	stfpdux	f30, SP, r0
 | |
| 	stfpdux	f31, SP, r0
 | |
| 
 | |
| 	stwu	r31,  -4(SP)
 | |
| 	stwu	r30,  -4(SP)
 | |
| 	stwu	r29,  -4(SP)
 | |
| 	stwu	r28,  -4(SP)
 | |
| 
 | |
| 	stwu	r27,  -4(SP)
 | |
| 	stwu	r26,  -4(SP)
 | |
| 	stwu	r25,  -4(SP)
 | |
| 	stwu	r24,  -4(SP)
 | |
| 
 | |
| 	stwu	r23,  -4(SP)
 | |
| 	stwu	r22,  -4(SP)
 | |
| 	stwu	r21,  -4(SP)
 | |
| 	stwu	r20,  -4(SP)
 | |
| 
 | |
| 	stwu	r19,  -4(SP)
 | |
| 	stwu	r18,  -4(SP)
 | |
| 	stwu	r17,  -4(SP)
 | |
| 	stwu	r16,  -4(SP)
 | |
| 
 | |
| 	stwu	r15,  -4(SP)
 | |
| 	stwu	r14,  -4(SP)	# dummy
 | |
| 
 | |
| 	li	r0,   0
 | |
| 
 | |
| 	stwu	r0,   -4(SP)
 | |
| 	stwu	r0,   -4(SP)
 | |
| 	stfdu	f1,   -8(SP)
 | |
| 
 | |
| 	slwi	LDC, LDC, BASE_SHIFT
 | |
| 
 | |
| 	cmpwi	cr0, M, 0
 | |
| 	ble	.L999
 | |
| 	cmpwi	cr0, N, 0
 | |
| 	ble	.L999
 | |
| 	cmpwi	cr0, K, 0
 | |
| 	ble	.L999
 | |
| 
 | |
| 	li	INC,    1 * SIZE
 | |
| 	li	INC2,   2 * SIZE
 | |
| 	li	INC4,   4 * SIZE
 | |
| 
 | |
| 	li	INCM1, -1 * SIZE
 | |
| 	li	INCM2, -2 * SIZE
 | |
| 	li	INCM4, -4 * SIZE
 | |
| 
 | |
| 	addi	C, C, - 1 * SIZE
 | |
| 
 | |
| #ifdef LN
 | |
| 	mullw	r0, M, K
 | |
| 	slwi	r0, r0, BASE_SHIFT
 | |
| 	add	A, A, r0
 | |
| 
 | |
| 	slwi	r0, M, BASE_SHIFT
 | |
| 	add	C, C, r0
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	neg	KK, OFFSET
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	mullw	r0, N, K
 | |
| 	slwi	r0, r0, BASE_SHIFT
 | |
| 	add	B, B, r0
 | |
| 
 | |
| 	mullw	r0, N, LDC
 | |
| 	add	C, C, r0
 | |
| 
 | |
| 	sub	KK, N, OFFSET
 | |
| #endif
 | |
| 
 | |
| 	srawi.	J, N,  2
 | |
| 	ble	.L50
 | |
| 	.align 4
 | |
| 
 | |
| .L10:
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 2 + BASE_SHIFT
 | |
| 	sub	B, B, r0
 | |
| 
 | |
| 	slwi	r0, LDC, 2
 | |
| 	sub	C, C, r0
 | |
| #endif
 | |
| 
 | |
| 	mr	CO1, C
 | |
| 	add	CO2, C,   LDC
 | |
| 	add	CO3, CO2, LDC
 | |
| 	add	CO4, CO3, LDC
 | |
| 
 | |
| #ifdef LN
 | |
| 	add	KK, M, OFFSET
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	mr	KK, OFFSET
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| 	addi	AORIG, A, -4 * SIZE
 | |
| #else
 | |
| 	addi	AO, A, -4 * SIZE
 | |
| #endif
 | |
| #ifndef RT
 | |
| 	add	C,  CO4, LDC
 | |
| #endif
 | |
| 
 | |
| 	li	r0, FZERO
 | |
| 	lfpsx	f0, SP, r0
 | |
| 
 | |
| 	andi.	I, M,  1
 | |
| 	beq	.L20
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	addi	AO2, AO,   2 * SIZE
 | |
| 	fpmr	f1,  f0
 | |
| 	addi	BO,  B,  - 4 * SIZE
 | |
| 	fpmr	f2,  f0
 | |
| 	addi	BO2, B,  - 2 * SIZE
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	srawi.	r0,  KK,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L44
 | |
| #else
 | |
| 
 | |
| #ifdef LN
 | |
| 	slwi	r0,   K,  0 + BASE_SHIFT
 | |
| 	sub	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| 	slwi	r0  , KK, 0 + BASE_SHIFT
 | |
| 	slwi	TEMP, KK, 2 + BASE_SHIFT
 | |
| 	add	AO, AORIG, r0
 | |
| 	add	BO, B,     TEMP
 | |
| 
 | |
| 	sub	TEMP, K, KK
 | |
| 
 | |
| 	addi	AO2, AO,   2 * SIZE
 | |
| 	fpmr	f1,  f0
 | |
| 	addi	BO,  BO, - 4 * SIZE
 | |
| 	fpmr	f2,  f0
 | |
| 	addi	BO2, BO,   2 * SIZE
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	srawi.	r0,  TEMP,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L44
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	A1,  AO,  INC4
 | |
| 	LFPDUX	B1,  BO,  INC4
 | |
| 	LFPDUX	B2,  BO2, INC4
 | |
| 	LFPDUX	A2, AO2,  INC4
 | |
| 	LFPDUX	B3,  BO,  INC4
 | |
| 	LFPDUX	B4,  BO2, INC4
 | |
| 
 | |
| 	LFPDUX	A3,  AO,  INC4
 | |
| 	LFPDUX	A5,  BO,  INC4
 | |
| 	LFPDUX	A6,  BO2, INC4
 | |
| 	LFPDUX	A4, AO2,  INC4
 | |
| 	LFPDUX	A7,  BO,  INC4
 | |
| 	LFPDUX	A8,  BO2, INC4
 | |
| 	bdz-	.L43
 | |
| 	.align 4
 | |
| 
 | |
| .L42:
 | |
| 	fxcpmadd	f0,  A1, B1, f0
 | |
| 	LFPDUX	B1,  BO,  INC4
 | |
| 	fxcpmadd	f1,  A1, B2, f1
 | |
| 	LFPDUX	B2,  BO2, INC4
 | |
| 	fxcsmadd	f2,  A1, B3, f2
 | |
| 	LFPDUX	B3,  BO,  INC4
 | |
| 	fxcsmadd	f3,  A1, B4, f3
 | |
| 	LFPDUX	B4,  BO2, INC4
 | |
| 	LFPDUX	A1,  AO,  INC4
 | |
| 
 | |
| 	fxcpmadd	f0,  A2, A5, f0
 | |
| 	LFPDUX	A5,  BO,  INC4
 | |
| 	fxcpmadd	f1,  A2, A6, f1
 | |
| 	LFPDUX	A6,  BO2, INC4
 | |
| 	fxcsmadd	f2,  A2, A7, f2
 | |
| 	LFPDUX	A7,  BO,  INC4
 | |
| 	fxcsmadd	f3,  A2, A8, f3
 | |
| 	LFPDUX	A8,  BO2, INC4
 | |
| 	LFPDUX	A2, AO2,  INC4
 | |
| 
 | |
| 	fxcpmadd	f0,  A3, B1, f0
 | |
| 	LFPDUX	B1,  BO,  INC4
 | |
| 	fxcpmadd	f1,  A3, B2, f1
 | |
| 	LFPDUX	B2,  BO2, INC4
 | |
| 	fxcsmadd	f2,  A3, B3, f2
 | |
| 	LFPDUX	B3,  BO,  INC4
 | |
| 	fxcsmadd	f3,  A3, B4, f3
 | |
| 	LFPDUX	B4,  BO2, INC4
 | |
| 	LFPDUX	A3,  AO,  INC4
 | |
| 
 | |
| 	fxcpmadd	f0,  A4, A5, f0
 | |
| 	LFPDUX	A5,  BO,  INC4
 | |
| 	fxcpmadd	f1,  A4, A6, f1
 | |
| 	LFPDUX	A6,  BO2, INC4
 | |
| 	fxcsmadd	f2,  A4, A7, f2
 | |
| 	LFPDUX	A7,  BO,  INC4
 | |
| 	fxcsmadd	f3,  A4, A8, f3
 | |
| 	LFPDUX	A8,  BO2, INC4
 | |
| 	LFPDUX	A4, AO2,  INC4
 | |
| 	bdnz+	.L42
 | |
| 	.align 4
 | |
| 
 | |
| .L43:
 | |
| 	fxcpmadd	f0,  A1, B1, f0
 | |
| 	LFPDUX	B1,  BO,  INC4
 | |
| 	fxcpmadd	f1,  A1, B2, f1
 | |
| 	LFPDUX	B2,  BO2, INC4
 | |
| 	fxcsmadd	f2,  A1, B3, f2
 | |
| 	LFPDUX	B3,  BO,  INC4
 | |
| 	fxcsmadd	f3,  A1, B4, f3
 | |
| 	LFPDUX	B4,  BO2, INC4
 | |
| 
 | |
| 	fxcpmadd	f0,  A2, A5, f0
 | |
| 	LFPDUX	A5,  BO,  INC4
 | |
| 	fxcpmadd	f1,  A2, A6, f1
 | |
| 	LFPDUX	A6,  BO2, INC4
 | |
| 	fxcsmadd	f2,  A2, A7, f2
 | |
| 	LFPDUX	A7,  BO,  INC4
 | |
| 	fxcsmadd	f3,  A2, A8, f3
 | |
| 	LFPDUX	A8,  BO2, INC4
 | |
| 
 | |
| 	fxcpmadd	f0,  A3, B1, f0
 | |
| 	fxcpmadd	f1,  A3, B2, f1
 | |
| 	fxcsmadd	f2,  A3, B3, f2
 | |
| 	fxcsmadd	f3,  A3, B4, f3
 | |
| 
 | |
| 	fxcpmadd	f0,  A4, A5, f0
 | |
| 	fxcpmadd	f1,  A4, A6, f1
 | |
| 	fxcsmadd	f2,  A4, A7, f2
 | |
| 	fxcsmadd	f3,  A4, A8, f3
 | |
| 	.align 4
 | |
| 
 | |
| .L44:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi.	r0,  KK,  7
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L48
 | |
| #else
 | |
| 	andi.	r0, TEMP, 7
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L48
 | |
| #endif
 | |
| 
 | |
| 	LFDX	A1,  AO,  INC4
 | |
| 	LFPDUX	B1,  BO,  INC4
 | |
| 	LFPDUX	B2,  BO2, INC4
 | |
| 	add	AO, AO, INC
 | |
| 	bdz-	.L47
 | |
| 	.align 4
 | |
| 
 | |
| .L46:
 | |
| 	fxcpmadd	f0,  A1, B1, f0
 | |
| 	LFPDUX	B1,  BO,  INC4
 | |
| 	fxcpmadd	f1,  A1, B2, f1
 | |
| 	LFDX	A1,  AO,  INC4
 | |
| 	LFPDUX	B2,  BO2, INC4
 | |
| 	add	AO, AO, INC
 | |
| 	bdnz+	.L46
 | |
| 	.align 4
 | |
| 
 | |
| .L47:
 | |
| 	fxcpmadd	f0,  A1, B1, f0
 | |
| 	fxcpmadd	f1,  A1, B2, f1
 | |
| 	addi	AO2, AO,   2 * SIZE
 | |
| 	.align 4
 | |
| 
 | |
| .L48:
 | |
| 	fpadd	f0, f0, f2
 | |
| 	fpadd	f1, f1, f3
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	subi	r0, KK, 1
 | |
| #else
 | |
| 	subi	r0, KK, 4
 | |
| #endif
 | |
| 	slwi	TEMP, r0, 0 + BASE_SHIFT
 | |
| 	slwi	r0,   r0, 2 + BASE_SHIFT
 | |
| 	add	AO, AORIG, TEMP
 | |
| 	add	BO, B,     r0
 | |
| 	addi	AO2, AO,   2 * SIZE
 | |
| 	addi	BO,  BO, - 4 * SIZE
 | |
| 	addi	BO2, BO,   2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LFPDX	f16, BO,  INC4
 | |
| 	LFPDX	f17, BO2, INC4
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f1,  f17,  f1
 | |
| #else
 | |
| 	LFPDX	f16, AO,  INC4
 | |
| 	LFPDX	f17, AO2, INC4
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f1,  f17,  f1
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LFPDX	A1,  AO, INC4
 | |
| 
 | |
| 	fxpmul	f0,  A1, f0
 | |
| 	fxpmul	f1,  A1, f1
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LFD	A1,  (4 +  0) * SIZE(BO)
 | |
| 	LFD	A2,  (4 +  1) * SIZE(BO)
 | |
| 	LFD	A3,  (4 +  2) * SIZE(BO)
 | |
| 	LFD	A4,  (4 +  3) * SIZE(BO)
 | |
| 
 | |
| 	LFD	A5,  (4 +  5) * SIZE(BO)
 | |
| 	LFD	A6,  (4 +  6) * SIZE(BO)
 | |
| 	LFD	A7,  (4 +  7) * SIZE(BO)
 | |
| 	LFD	A8,  (4 + 10) * SIZE(BO)
 | |
| 
 | |
| 	LFD	A9,  (4 + 11) * SIZE(BO)
 | |
| 	LFD	A10, (4 + 15) * SIZE(BO)
 | |
| 
 | |
| 	fsmtp	     f2, f0
 | |
| 	fsmtp	     f3, f1
 | |
| 
 | |
| 	fmul	     f0,  A1, f0
 | |
| 	fnmsub	     f2,  A2, f0, f2
 | |
| 	fnmsub	     f1,  A3, f0, f1
 | |
| 	fnmsub	     f3,  A4, f0, f3
 | |
| 
 | |
| 	fmul	     f2,  A5, f2
 | |
| 	fnmsub	     f1,  A6, f2, f1
 | |
| 	fnmsub	     f3,  A7, f2, f3
 | |
| 
 | |
| 	fmul	     f1,  A8, f1
 | |
| 	fnmsub	     f3,  A9, f1, f3
 | |
| 
 | |
| 	fmul	     f3,  A10, f3
 | |
| 
 | |
| 	fsmfp	     f0, f2
 | |
| 	fsmfp	     f1, f3
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LFD	A1,  (4 + 15) * SIZE(BO)
 | |
| 	LFD	A2,  (4 + 14) * SIZE(BO)
 | |
| 	LFD	A3,  (4 + 13) * SIZE(BO)
 | |
| 	LFD	A4,  (4 + 12) * SIZE(BO)
 | |
| 
 | |
| 	LFD	A5,  (4 + 10) * SIZE(BO)
 | |
| 	LFD	A6,  (4 +  9) * SIZE(BO)
 | |
| 	LFD	A7,  (4 +  8) * SIZE(BO)
 | |
| 	LFD	A8,  (4 +  5) * SIZE(BO)
 | |
| 
 | |
| 	LFD	A9,  (4 +  4) * SIZE(BO)
 | |
| 	LFD	A10, (4 +  0) * SIZE(BO)
 | |
| 
 | |
| 	fsmtp	     f2, f0
 | |
| 	fsmtp	     f3, f1
 | |
| 
 | |
| 	fmul	     f3,  A1, f3
 | |
| 	fnmsub	     f1,  A2, f3, f1
 | |
| 	fnmsub	     f2,  A3, f3, f2
 | |
| 	fnmsub	     f0,  A4, f3, f0
 | |
| 
 | |
| 	fmul	     f1,  A5, f1
 | |
| 	fnmsub	     f2,  A6, f1, f2
 | |
| 	fnmsub	     f0,  A7, f1, f0
 | |
| 
 | |
| 	fmul	     f2,  A8, f2
 | |
| 	fnmsub	     f0,  A9, f2, f0
 | |
| 
 | |
| 	fmul	     f0,  A10, f0
 | |
| 
 | |
| 	fsmfp	     f0, f2
 | |
| 	fsmfp	     f1, f3
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	STFPDX	f0,  BO,  INC4
 | |
| 	STFPDX	f1,  BO2, INC4
 | |
| #else
 | |
| 	STFPDX	f0,  AO,  INC4
 | |
| 	STFPDX	f1,  AO2, INC4
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 1 * SIZE
 | |
| 	subi	CO2, CO2, 1 * SIZE
 | |
| 	subi	CO3, CO3, 1 * SIZE
 | |
| 	subi	CO4, CO4, 1 * SIZE
 | |
| #endif
 | |
| 
 | |
| 	STFDX	f0,  CO1, INC
 | |
| 	STFSDX	f0,  CO2, INC
 | |
| 	STFDX	f1,  CO3, INC
 | |
| 	STFSDX	f1,  CO4, INC
 | |
| 
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 0 + BASE_SHIFT
 | |
| 	add	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	sub	TEMP, K, KK
 | |
| 	slwi	r0,   TEMP, 0 + BASE_SHIFT
 | |
| 	slwi	TEMP, TEMP, 2 + BASE_SHIFT
 | |
| 	add	AO, AO, r0
 | |
| 	add	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	addi	KK, KK, 1
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	KK, KK, 1
 | |
| #endif
 | |
| 	li	r0, FZERO
 | |
| 	lfpsx	f0, SP, r0
 | |
| 	.align 4
 | |
| 
 | |
| .L20:
 | |
| 	andi.	I, M,  2
 | |
| 	beq	.L30
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	addi	AO2, AO,   2 * SIZE
 | |
| 	fpmr	f4,  f0
 | |
| 	addi	BO,  B,  - 4 * SIZE
 | |
| 	fpmr	f8,  f0
 | |
| 	addi	BO2, B,  - 2 * SIZE
 | |
| 	fpmr	f12, f0
 | |
| 
 | |
| 	srawi.	r0,  KK,  2
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L34
 | |
| #else
 | |
| 
 | |
| #ifdef LN
 | |
| 	slwi	r0,   K,  1 + BASE_SHIFT
 | |
| 	sub	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| 	slwi	r0  , KK, 1 + BASE_SHIFT
 | |
| 	slwi	TEMP, KK, 2 + BASE_SHIFT
 | |
| 	add	AO, AORIG, r0
 | |
| 	add	BO, B,     TEMP
 | |
| 
 | |
| 	sub	TEMP, K, KK
 | |
| 
 | |
| 	addi	AO2, AO,    2 * SIZE
 | |
| 	fpmr	f4,  f0
 | |
| 	addi	BO,  BO,  - 4 * SIZE
 | |
| 	fpmr	f8,  f0
 | |
| 	addi	BO2, BO,    2 * SIZE
 | |
| 	fpmr	f12, f0
 | |
| 
 | |
| 	srawi.	r0,  TEMP,  2
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L34
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	A1,  AO, INC4
 | |
| 	LFPDUX	B1,  BO, INC4
 | |
| 	LFPDUX	B2, BO2, INC4
 | |
| 	LFPDUX	A2, AO2, INC4
 | |
| 	LFPDUX	B3,  BO, INC4
 | |
| 	LFPDUX	B4, BO2, INC4
 | |
| 
 | |
| 	LFPDUX	A3,  AO, INC4
 | |
| 	LFPDUX	A5,  BO, INC4
 | |
| 	LFPDUX	A6, BO2, INC4
 | |
| 	LFPDUX	A4, AO2, INC4
 | |
| 	LFPDUX	A7,  BO, INC4
 | |
| 	LFPDUX	A8, BO2, INC4
 | |
| 	bdz-	.L33
 | |
| 	.align 4
 | |
| 
 | |
| .L32:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f4,  B1, A1, f4
 | |
| 	LFPDUX	B1,  BO, INC4
 | |
| 	fxcpmadd	f8,  B2, A1, f8
 | |
| 	fxcsmadd	f12, B2, A1, f12
 | |
| 	LFPDUX	B2, BO2, INC4
 | |
| 	LFPDUX	A1,  AO, INC4
 | |
| 
 | |
| 	fxcpmadd	f0,  B3, A2, f0
 | |
| 	fxcsmadd	f4,  B3, A2, f4
 | |
| 	LFPDUX	B3,  BO, INC4
 | |
| 	fxcpmadd	f8,  B4, A2, f8
 | |
| 	fxcsmadd	f12, B4, A2, f12
 | |
| 	LFPDUX	B4, BO2, INC4
 | |
| 	LFPDUX	A2, AO2, INC4
 | |
| 
 | |
| 	fxcpmadd	f0,  A5, A3, f0
 | |
| 	fxcsmadd	f4,  A5, A3, f4
 | |
| 	LFPDUX	A5,  BO, INC4
 | |
| 	fxcpmadd	f8,  A6, A3, f8
 | |
| 	fxcsmadd	f12, A6, A3, f12
 | |
| 	LFPDUX	A6, BO2, INC4
 | |
| 	LFPDUX	A3,  AO, INC4
 | |
| 
 | |
| 	fxcpmadd	f0,  A7, A4, f0
 | |
| 	fxcsmadd	f4,  A7, A4, f4
 | |
| 	LFPDUX	A7,  BO, INC4
 | |
| 	fxcpmadd	f8,  A8, A4, f8
 | |
| 	fxcsmadd	f12, A8, A4, f12
 | |
| 	LFPDUX	A8, BO2, INC4
 | |
| 	LFPDUX	A4, AO2, INC4
 | |
| 	bdnz+	.L32
 | |
| 	.align 4
 | |
| 
 | |
| .L33:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f4,  B1, A1, f4
 | |
| 	fxcpmadd	f8,  B2, A1, f8
 | |
| 	fxcsmadd	f12, B2, A1, f12
 | |
| 
 | |
| 	fxcpmadd	f0,  B3, A2, f0
 | |
| 	fxcsmadd	f4,  B3, A2, f4
 | |
| 	fxcpmadd	f8,  B4, A2, f8
 | |
| 	fxcsmadd	f12, B4, A2, f12
 | |
| 
 | |
| 	fxcpmadd	f0,  A5, A3, f0
 | |
| 	fxcsmadd	f4,  A5, A3, f4
 | |
| 	fxcpmadd	f8,  A6, A3, f8
 | |
| 	fxcsmadd	f12, A6, A3, f12
 | |
| 
 | |
| 	fxcpmadd	f0,  A7, A4, f0
 | |
| 	fxcsmadd	f4,  A7, A4, f4
 | |
| 	fxcpmadd	f8,  A8, A4, f8
 | |
| 	fxcsmadd	f12, A8, A4, f12
 | |
| 	.align 4
 | |
| 
 | |
| .L34:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi.	r0,  KK,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L38
 | |
| #else
 | |
| 	andi.	r0, TEMP, 3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L38
 | |
| #endif
 | |
| 
 | |
| 	LFPDX	A1,  AO,  INC4
 | |
| 	LFPDUX	B1,  BO,  INC4
 | |
| 	LFPDUX	B2,  BO2, INC4
 | |
| 	add	AO, AO, INC2
 | |
| 	bdz-	.L37
 | |
| 	.align 4
 | |
| 
 | |
| .L36:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f4,  B1, A1, f4
 | |
| 	LFPDUX	B1,  BO,  INC4
 | |
| 	fxcpmadd	f8,  B2, A1, f8
 | |
| 	fxcsmadd	f12, B2, A1, f12
 | |
| 	LFPDX	A1,  AO,  INC4
 | |
| 	LFPDUX	B2,  BO2, INC4
 | |
| 	add	AO, AO, INC2
 | |
| 	bdnz+	.L36
 | |
| 	.align 4
 | |
| 
 | |
| .L37:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f4,  B1, A1, f4
 | |
| 	fxcpmadd	f8,  B2, A1, f8
 | |
| 	fxcsmadd	f12, B2, A1, f12
 | |
| 	.align 4
 | |
| 
 | |
| .L38:
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	subi	r0, KK, 2
 | |
| #else
 | |
| 	subi	r0, KK, 4
 | |
| #endif
 | |
| 	slwi	TEMP, r0, 1 + BASE_SHIFT
 | |
| 	slwi	r0,   r0, 2 + BASE_SHIFT
 | |
| 	add	AO, AORIG, TEMP
 | |
| 	add	BO, B,     r0
 | |
| 	addi	AO2, AO,   2 * SIZE
 | |
| 	addi	BO,  BO, - 4 * SIZE
 | |
| 	addi	BO2, BO,   2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	fpmr	f24, f0
 | |
| 	fpmr	f28, f8
 | |
| 
 | |
| 	fsmfp	f0,  f4
 | |
| 	fsmfp	f8,  f12
 | |
| 	fsmtp	f4,  f24
 | |
| 	fsmtp	f12, f28
 | |
| 
 | |
| 	LFPDUX	f16, BO,  INC4
 | |
| 	LFPDUX	f17, BO2, INC4
 | |
| 	LFPDUX	f18, BO,  INC4
 | |
| 	LFPDUX	f19, BO2, INC4
 | |
| 
 | |
| 	subi	BO,  BO,   8 * SIZE
 | |
| 	subi	BO2, BO2,  8 * SIZE
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f8,  f17,  f8
 | |
| 	fpsub	f4,  f18,  f4
 | |
| 	fpsub	f12, f19,  f12
 | |
| #else
 | |
| 	LFPDUX	f16, AO,  INC4
 | |
| 	LFPDUX	f17, AO2, INC4
 | |
| 	LFPDUX	f18, AO,  INC4
 | |
| 	LFPDUX	f19, AO2, INC4
 | |
| 
 | |
| 	subi	AO,  AO,   8 * SIZE
 | |
| 	subi	AO2, AO2,  8 * SIZE
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f4,  f17,  f4
 | |
| 	fpsub	f8,  f18,  f8
 | |
| 	fpsub	f12, f19,  f12
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	addi	AO,  AO,   8 * SIZE
 | |
| 	addi	AO2, AO2,  8 * SIZE
 | |
| 
 | |
| 	LFPDUX	A1,  AO2, INCM4
 | |
| 	LFPDUX	A2,  AO,  INCM4
 | |
| 
 | |
| 	addi	AO,  AO,  -4 * SIZE
 | |
| 	addi	AO2, AO2, -4 * SIZE
 | |
| 
 | |
| 	fxsmul	f4,  A1, f4
 | |
| 	fxsmul	f12, A1, f12
 | |
| 
 | |
| 	fxcpnmsub  f0,  A1, f4,  f0
 | |
| 	fxcpnmsub  f8,  A1, f12, f8
 | |
| 
 | |
| 	fxpmul	f0,  A2, f0
 | |
| 	fxpmul	f8,  A2, f8
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	LFPDUX	A1,  AO,  INC4
 | |
| 	LFPDUX	A2,  AO2, INC4
 | |
| 
 | |
| 	subi	AO,  AO,   4 * SIZE
 | |
| 	subi	AO2, AO2,  4 * SIZE
 | |
| 
 | |
| 	fxpmul	f0,  A1,  f0
 | |
| 	fxpmul	f8,  A1,  f8
 | |
| 
 | |
| 	fxcsnmsub  f4,  A1, f0, f4
 | |
| 	fxcsnmsub  f12, A1, f8, f12
 | |
| 
 | |
| 	fxsmul	f4,  A2,  f4
 | |
| 	fxsmul	f12, A2,  f12
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LFPDUX	A1,  BO,  INC4
 | |
| 	LFPDUX	A2,  BO2, INC4
 | |
| 	LFPDUX	A3,  BO,  INC4
 | |
| 	LFPDUX	A4,  BO2, INC4
 | |
| 
 | |
| 	add	BO,  BO,  INC4
 | |
| 	LFPDUX	A5,  BO2, INC4
 | |
| 
 | |
| 	add	BO,  BO,  INC4
 | |
| 	LFPDUX	A6,  BO2, INC4
 | |
| 
 | |
| 	subi	BO,  BO,  16 * SIZE
 | |
| 	subi	BO2, BO2, 16 * SIZE
 | |
| 
 | |
| 	fxpmul	     f0,  A1,  f0
 | |
| 	fxcsnmsub    f4,  A1, f0, f4
 | |
| 	fxcpnmsub    f8,  A2, f0, f8
 | |
| 	fxcsnmsub    f12, A2, f0, f12
 | |
| 
 | |
| 	fxsmul	     f4,  A3,  f4
 | |
| 	fxcpnmsub    f8,  A4, f4, f8
 | |
| 	fxcsnmsub    f12, A4, f4, f12
 | |
| 
 | |
| 	fxpmul	     f8,  A5,  f8
 | |
| 	fxcsnmsub    f12, A5, f8,  f12
 | |
| 	fxsmul	     f12, A6,  f12
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	addi	BO,  BO,  20 * SIZE
 | |
| 	addi	BO2, BO2, 20 * SIZE
 | |
| 
 | |
| 	LFPDUX	A1,  BO2, INCM4
 | |
| 	LFPDUX	A2,  BO,  INCM4
 | |
| 
 | |
| 	LFPDUX	A3,  BO2, INCM4
 | |
| 	LFPDUX	A4,  BO,  INCM4
 | |
| 
 | |
| 	add	BO2, BO2, INCM4
 | |
| 	LFPDUX	A5,  BO,  INCM4
 | |
| 
 | |
| 	add	BO2, BO2, INCM4
 | |
| 	LFPDUX	A6,  BO,  INCM4
 | |
| 	subi	BO,  BO,  4 * SIZE
 | |
| 	subi	BO2, BO2, 4 * SIZE
 | |
| 
 | |
| 	fxsmul	     f12, A1,  f12
 | |
| 	fxcpnmsub    f8,  A1, f12, f8
 | |
| 	fxcsnmsub    f4,  A2, f12, f4
 | |
| 	fxcpnmsub    f0,  A2, f12, f0
 | |
| 
 | |
| 	fxpmul	     f8,  A3,  f8
 | |
| 	fxcsnmsub    f4,  A4, f8,  f4
 | |
| 	fxcpnmsub    f0,  A4, f8,  f0
 | |
| 
 | |
| 	fxsmul	     f4,  A5,  f4
 | |
| 	fxcpnmsub    f0,  A5, f4,  f0
 | |
| 	fxpmul	     f0,  A6,  f0
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 2 * SIZE
 | |
| 	subi	CO2, CO2, 2 * SIZE
 | |
| 	subi	CO3, CO3, 2 * SIZE
 | |
| 	subi	CO4, CO4, 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	STFPDUX	f0,  BO,  INC4
 | |
| 	STFPDUX	f8,  BO2, INC4
 | |
| 	STFPDUX	f4,  BO,  INC4
 | |
| 	STFPDUX	f12, BO2, INC4
 | |
| 
 | |
| 	subi	BO,  BO,   8 * SIZE
 | |
| 	subi	BO2, BO2,  8 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFDUX	f4,  CO1, INC
 | |
| 	STFSDUX	f0,  CO2, INC
 | |
| 	STFSDUX	f4,  CO2, INC
 | |
| 
 | |
| 	STFDUX	f8,  CO3, INC
 | |
| 	STFDUX	f12, CO3, INC
 | |
| 	STFSDUX	f8,  CO4, INC
 | |
| 	STFSDUX	f12, CO4, INC
 | |
| 
 | |
| #else
 | |
| 	STFPDUX	f0,  AO,  INC4
 | |
| 	STFPDUX	f4,  AO2, INC4
 | |
| 	STFPDUX	f8,  AO,  INC4
 | |
| 	STFPDUX	f12, AO2, INC4
 | |
| 
 | |
| 	subi	AO,  AO,   8 * SIZE
 | |
| 	subi	AO2, AO2,  8 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFSDUX	f0,  CO1, INC
 | |
| 	STFDUX	f4,  CO2, INC
 | |
| 	STFSDUX	f4,  CO2, INC
 | |
| 
 | |
| 	STFDUX	f8,  CO3, INC
 | |
| 	STFSDUX	f8,  CO3, INC
 | |
| 	STFDUX	f12, CO4, INC
 | |
| 	STFSDUX	f12, CO4, INC
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 2 * SIZE
 | |
| 	subi	CO2, CO2, 2 * SIZE
 | |
| 	subi	CO3, CO3, 2 * SIZE
 | |
| 	subi	CO4, CO4, 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 1 + BASE_SHIFT
 | |
| 	add	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	sub	TEMP, K, KK
 | |
| 	slwi	r0,   TEMP, 1 + BASE_SHIFT
 | |
| 	slwi	TEMP, TEMP, 2 + BASE_SHIFT
 | |
| 	add	AO, AO, r0
 | |
| 	add	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	addi	KK, KK, 2
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	KK, KK, 2
 | |
| #endif
 | |
| 
 | |
| 	li	r0, FZERO
 | |
| 	lfpsx	f0, SP, r0
 | |
| 	.align 4
 | |
| 
 | |
| .L30:
 | |
| 	andi.	I, M,  4
 | |
| 	beq	.L40
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	addi	AO2, AO,   2 * SIZE
 | |
| 	fpmr	f4,  f0
 | |
| 	addi	BO,  B,  - 4 * SIZE
 | |
| 	fpmr	f8,  f0
 | |
| 	addi	BO2, B,  - 2 * SIZE
 | |
| 	fpmr	f12, f0
 | |
| 
 | |
| 	srawi.	r0,  KK,  2
 | |
|  	fpmr	f1,  f0
 | |
| 	fpmr	f5,  f0
 | |
| 	fpmr	f9,  f0
 | |
| 	mtspr	CTR, r0
 | |
| 	fpmr	f13, f0
 | |
| 	ble	.L24
 | |
| #else
 | |
| 
 | |
| #ifdef LN
 | |
| 	slwi	r0,   K,  2 + BASE_SHIFT
 | |
| 	sub	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| 	slwi	r0  , KK, 2 + BASE_SHIFT
 | |
| 	slwi	TEMP, KK, 2 + BASE_SHIFT
 | |
| 	add	AO, AORIG, r0
 | |
| 	add	BO, B,     TEMP
 | |
| 
 | |
| 	sub	TEMP, K, KK
 | |
| 
 | |
| 	addi	AO2, AO,   2 * SIZE
 | |
| 	fpmr	f4,  f0
 | |
| 	addi	BO,  BO,  - 4 * SIZE
 | |
| 	fpmr	f8,  f0
 | |
| 	addi	BO2, BO,    2 * SIZE
 | |
| 	fpmr	f12, f0
 | |
| 
 | |
| 	srawi.	r0,  TEMP,  2
 | |
|  	fpmr	f1,  f0
 | |
| 	fpmr	f5,  f0
 | |
| 	fpmr	f9,  f0
 | |
| 	mtspr	CTR, r0
 | |
| 	fpmr	f13, f0
 | |
| 	ble	.L24
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	A1,   AO, INC4
 | |
| 	LFPDUX	B1,   BO, INC4
 | |
| 	LFPDUX	A2,  AO2, INC4
 | |
| 	LFPDUX	B2,  BO2, INC4
 | |
| 	LFPDUX	A3,   AO, INC4
 | |
| 	LFPDUX	B3,   BO, INC4
 | |
| 	LFPDUX	A4,  AO2, INC4
 | |
| 	LFPDUX	B4,  BO2, INC4
 | |
| 
 | |
| 	LFPDUX	A5,   AO, INC4
 | |
| 	LFPDUX	B5,   BO, INC4
 | |
| 	LFPDUX	A6,  AO2, INC4
 | |
| 	LFPDUX	B6,  BO2, INC4
 | |
| 	LFPDUX	A7,   AO, INC4
 | |
| 	LFPDUX	A9,   BO, INC4
 | |
| 	LFPDUX	A10, BO2, INC4
 | |
| 	bdz-	.L23
 | |
| 	.align 4
 | |
| 
 | |
| .L22:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	nop
 | |
| 	fxcsmadd	f4,  B1, A1, f4
 | |
| 	LFPDUX	A8,  AO2, INC4
 | |
| 	fxcpmadd	f8,  B2, A1, f8
 | |
| 	nop
 | |
| 	fxcsmadd	f12, B2, A1, f12
 | |
| 	LFPDUX	A1,   AO, INC4
 | |
| 
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B1, A2, f5
 | |
| 	LFPDUX	B1,   BO, INC4
 | |
| 	fxcpmadd	f9,  B2, A2, f9
 | |
| 	nop
 | |
| 	fxcsmadd	f13, B2, A2, f13
 | |
| 	LFPDUX	B2,  BO2, INC4
 | |
| 
 | |
| 	fxcpmadd	f0,  B3, A3, f0
 | |
| 	nop
 | |
| 	fxcsmadd	f4,  B3, A3, f4
 | |
| 	LFPDUX	A2,  AO2, INC4
 | |
| 	fxcpmadd	f8,  B4, A3, f8
 | |
| 	nop
 | |
| 	fxcsmadd	f12, B4, A3, f12
 | |
| 	LFPDUX	A3,   AO, INC4
 | |
| 
 | |
| 	fxcpmadd	f1,  B3, A4, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B3, A4, f5
 | |
| 	LFPDUX	B3,   BO, INC4
 | |
| 	fxcpmadd	f9,  B4, A4, f9
 | |
| 	nop
 | |
| 	fxcsmadd	f13, B4, A4, f13
 | |
| 	LFPDUX	B4,  BO2, INC4
 | |
| 
 | |
| 	fxcpmadd	f0,  B5, A5, f0
 | |
| 	nop
 | |
| 	fxcsmadd	f4,  B5, A5, f4
 | |
| 	LFPDUX	A4,  AO2, INC4
 | |
| 	fxcpmadd	f8,  B6, A5, f8
 | |
| 	nop
 | |
| 	fxcsmadd	f12, B6, A5, f12
 | |
| 	LFPDUX	A5,   AO, INC4
 | |
| 
 | |
| 	fxcpmadd	f1,  B5, A6, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B5, A6, f5
 | |
| 	LFPDUX	B5,   BO, INC4
 | |
| 	fxcpmadd	f9,  B6, A6, f9
 | |
| 	nop
 | |
| 	fxcsmadd	f13, B6, A6, f13
 | |
| 	LFPDUX	B6,  BO2, INC4
 | |
| 
 | |
| 	fxcpmadd	f0,  A9,  A7, f0
 | |
| 	nop
 | |
| 	fxcsmadd	f4,  A9,  A7, f4
 | |
| 	LFPDUX	A6,  AO2, INC4
 | |
| 	fxcpmadd	f8,  A10, A7, f8
 | |
| 	nop
 | |
| 	fxcsmadd	f12, A10, A7, f12
 | |
| 	LFPDUX	A7,   AO, INC4
 | |
| 
 | |
| 	fxcpmadd	f1,  A9,  A8, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  A9,  A8, f5
 | |
| 	LFPDUX	A9,   BO, INC4
 | |
| 	fxcpmadd	f9,  A10, A8, f9
 | |
| 	nop
 | |
| 	fxcsmadd	f13, A10, A8, f13
 | |
| 	LFPDUX	A10, BO2, INC4
 | |
| 	bdnz+	.L22
 | |
| 	.align 4
 | |
| 
 | |
| .L23:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f4,  B1, A1, f4
 | |
| 	LFPDUX	A8,  AO2, INC4
 | |
| 	fxcpmadd	f8,  B2, A1, f8
 | |
| 	fxcsmadd	f12, B2, A1, f12
 | |
| 
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	fxcsmadd	f5,  B1, A2, f5
 | |
| 	fxcpmadd	f9,  B2, A2, f9
 | |
| 	fxcsmadd	f13, B2, A2, f13
 | |
| 
 | |
| 	fxcpmadd	f0,  B3, A3, f0
 | |
| 	fxcsmadd	f4,  B3, A3, f4
 | |
| 	fxcpmadd	f8,  B4, A3, f8
 | |
| 	fxcsmadd	f12, B4, A3, f12
 | |
| 
 | |
| 	fxcpmadd	f1,  B3, A4, f1
 | |
| 	fxcsmadd	f5,  B3, A4, f5
 | |
| 	fxcpmadd	f9,  B4, A4, f9
 | |
| 	fxcsmadd	f13, B4, A4, f13
 | |
| 
 | |
| 	fxcpmadd	f0,  B5, A5, f0
 | |
| 	fxcsmadd	f4,  B5, A5, f4
 | |
| 	fxcpmadd	f8,  B6, A5, f8
 | |
| 	fxcsmadd	f12, B6, A5, f12
 | |
| 
 | |
| 	fxcpmadd	f1,  B5, A6, f1
 | |
| 	fxcsmadd	f5,  B5, A6, f5
 | |
| 	fxcpmadd	f9,  B6, A6, f9
 | |
| 	fxcsmadd	f13, B6, A6, f13
 | |
| 
 | |
| 	fxcpmadd	f0,  A9, A7, f0
 | |
| 	fxcsmadd	f4,  A9, A7, f4
 | |
| 	fxcpmadd	f8,  A10, A7, f8
 | |
| 	fxcsmadd	f12, A10, A7, f12
 | |
| 
 | |
| 	fxcpmadd	f1,  A9, A8, f1
 | |
| 	fxcsmadd	f5,  A9, A8, f5
 | |
| 	fxcpmadd	f9,  A10, A8, f9
 | |
| 	fxcsmadd	f13, A10, A8, f13
 | |
| 	.align 4
 | |
| 
 | |
| .L24:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi.	r0,  KK,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L28
 | |
| #else
 | |
| 	andi.	r0, TEMP, 3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L28
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	A1,  AO,  INC4
 | |
| 	LFPDUX	A2,  AO2, INC4
 | |
| 	LFPDUX	B1,  BO,  INC4
 | |
| 	LFPDUX	B2,  BO2, INC4
 | |
| 	bdz-	.L27
 | |
| 	.align 4
 | |
| 
 | |
| .L26:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f4,  B1, A1, f4
 | |
| 	fxcpmadd	f8,  B2, A1, f8
 | |
| 	fxcsmadd	f12, B2, A1, f12
 | |
| 	LFPDUX	A1,  AO,  INC4
 | |
| 
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	fxcsmadd	f5,  B1, A2, f5
 | |
| 	LFPDUX	B1,  BO,  INC4
 | |
| 	fxcpmadd	f9,  B2, A2, f9
 | |
| 	fxcsmadd	f13, B2, A2, f13
 | |
| 	LFPDUX	A2,  AO2, INC4
 | |
| 	LFPDUX	B2,  BO2, INC4
 | |
| 	bdnz+	.L26
 | |
| 	.align 4
 | |
| 
 | |
| .L27:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f4,  B1, A1, f4
 | |
| 	fxcpmadd	f8,  B2, A1, f8
 | |
| 	fxcsmadd	f12, B2, A1, f12
 | |
| 
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	fxcsmadd	f5,  B1, A2, f5
 | |
| 	fxcpmadd	f9,  B2, A2, f9
 | |
| 	fxcsmadd	f13, B2, A2, f13
 | |
| 	.align 4
 | |
| 
 | |
| .L28:
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	subi	r0, KK, 4
 | |
| #else
 | |
| 	subi	r0, KK, 4
 | |
| #endif
 | |
| 	slwi	TEMP, r0, 2 + BASE_SHIFT
 | |
| 	slwi	r0,   r0, 2 + BASE_SHIFT
 | |
| 	add	AO, AORIG, TEMP
 | |
| 	add	BO, B,     r0
 | |
| 	addi	AO2, AO,   2 * SIZE
 | |
| 	addi	BO,  BO, - 4 * SIZE
 | |
| 	addi	BO2, BO,   2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	fpmr	f24, f0
 | |
| 	fpmr	f25, f1
 | |
| 	fpmr	f28, f8
 | |
| 	fpmr	f29, f9
 | |
| 
 | |
| 	fsmfp	f0,  f4
 | |
| 	fsmfp	f1,  f5
 | |
| 	fsmfp	f8,  f12
 | |
| 	fsmfp	f9,  f13
 | |
| 
 | |
| 	fsmtp	f4,  f24
 | |
| 	fsmtp	f5,  f25
 | |
| 	fsmtp	f12, f28
 | |
| 	fsmtp	f13, f29
 | |
| 
 | |
| 	LFPDUX	f16, BO,  INC4
 | |
| 	LFPDUX	f17, BO2, INC4
 | |
| 	LFPDUX	f18, BO,  INC4
 | |
| 	LFPDUX	f19, BO2, INC4
 | |
| 
 | |
|  	LFPDUX	f20, BO,  INC4
 | |
| 	LFPDUX	f21, BO2, INC4
 | |
| 	LFPDUX	f22, BO,  INC4
 | |
| 	LFPDUX	f23, BO2, INC4
 | |
| 
 | |
| 	subi	BO,  BO,  16 * SIZE
 | |
| 	subi	BO2, BO2, 16 * SIZE
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f8,  f17,  f8
 | |
| 	fpsub	f4,  f18,  f4
 | |
| 	fpsub	f12, f19,  f12
 | |
| 
 | |
| 	fpsub	f1,  f20,  f1
 | |
| 	fpsub	f9,  f21,  f9
 | |
| 	fpsub	f5,  f22,  f5
 | |
| 	fpsub	f13, f23,  f13
 | |
| #else
 | |
| 	LFPDUX	f16, AO,  INC4
 | |
| 	LFPDUX	f17, AO2, INC4
 | |
| 	LFPDUX	f18, AO,  INC4
 | |
| 	LFPDUX	f19, AO2, INC4
 | |
| 	LFPDUX	f20, AO,  INC4
 | |
| 	LFPDUX	f21, AO2, INC4
 | |
| 	LFPDUX	f22, AO,  INC4
 | |
| 	LFPDUX	f23, AO2, INC4
 | |
| 
 | |
| 	subi	AO,  AO,  16 * SIZE
 | |
| 	subi	AO2, AO2, 16 * SIZE
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f1,  f17,  f1
 | |
| 	fpsub	f4,  f18,  f4
 | |
| 	fpsub	f5,  f19,  f5
 | |
| 
 | |
| 	fpsub	f8,  f20,  f8
 | |
| 	fpsub	f9,  f21,  f9
 | |
| 	fpsub	f12, f22,  f12
 | |
| 	fpsub	f13, f23,  f13
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
|        addi	AO,  AO,  20 * SIZE
 | |
|        addi	AO2, AO2, 20 * SIZE
 | |
| 
 | |
| 	LFPDUX	A1,  AO2, INCM4
 | |
| 	LFPDUX	A2,  AO,  INCM4
 | |
| 	LFPDUX	A3,  AO2, INCM4
 | |
| 	LFPDUX	A4,  AO,  INCM4
 | |
| 
 | |
| 	add	AO2, AO2, INCM4
 | |
| 	LFPDUX	A5,  AO,  INCM4
 | |
| 	add	AO2, AO2, INCM4
 | |
| 	LFPDUX	A6,  AO,  INCM4
 | |
| 
 | |
| 	addi	AO,  AO,  -4 * SIZE
 | |
| 	addi	AO2, AO2, -4 * SIZE
 | |
| 
 | |
| 	fxsmul	f5,  A1, f5
 | |
| 	fxsmul	f13, A1, f13
 | |
| 
 | |
| 	fxcpnmsub  f1,  A1, f5,  f1
 | |
| 	fxcpnmsub  f9,  A1, f13, f9
 | |
| 
 | |
| 	fxcsnmsub  f4,  A2, f5,  f4
 | |
| 	fxcsnmsub  f12, A2, f13, f12
 | |
| 
 | |
| 	fxcpnmsub  f0,  A2, f5,  f0
 | |
| 	fxcpnmsub  f8,  A2, f13, f8
 | |
| 
 | |
| 	fxpmul	f1,  A3, f1
 | |
| 	fxpmul	f9,  A3, f9
 | |
| 
 | |
| 	fxcsnmsub  f4,  A4, f1,  f4
 | |
| 	fxcsnmsub  f12, A4, f9,  f12
 | |
| 
 | |
| 	fxcpnmsub  f0,  A4, f1,  f0
 | |
| 	fxcpnmsub  f8,  A4, f9,  f8
 | |
| 
 | |
| 	fxsmul	f4,  A5, f4
 | |
| 	fxsmul	f12, A5, f12
 | |
| 
 | |
| 	fxcpnmsub  f0,  A5, f4,  f0
 | |
| 	fxcpnmsub  f8,  A5, f12, f8
 | |
| 
 | |
| 	fxpmul	f0,  A6, f0
 | |
| 	fxpmul	f8,  A6, f8
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	LFPDUX	A1,  AO,  INC4
 | |
| 	LFPDUX	A2,  AO2, INC4
 | |
| 	LFPDUX	A3,  AO,  INC4
 | |
| 	LFPDUX	A4,  AO2, INC4
 | |
| 
 | |
| 	add	AO,  AO,  INC4
 | |
| 	LFPDUX	A5,  AO2, INC4
 | |
| 	add	AO,  AO,  INC4
 | |
| 	LFPDUX	A6,  AO2, INC4
 | |
| 
 | |
| 	subi	AO,  AO,  16 * SIZE
 | |
| 	subi	AO2, AO2, 16 * SIZE
 | |
| 
 | |
| 	fxpmul	f0,  A1,  f0
 | |
| 	fxpmul	f8,  A1,  f8
 | |
| 
 | |
| 	fxcsnmsub  f4,  A1, f0, f4
 | |
| 	fxcsnmsub  f12, A1, f8, f12
 | |
| 
 | |
| 	fxcpnmsub  f1,  A2, f0, f1
 | |
| 	fxcpnmsub  f9,  A2, f8, f9
 | |
| 
 | |
| 	fxcsnmsub  f5,  A2, f0, f5
 | |
| 	fxcsnmsub  f13, A2, f8, f13
 | |
| 
 | |
| 	fxsmul	f4,  A3,  f4
 | |
| 	fxsmul	f12, A3,  f12
 | |
| 
 | |
| 	fxcpnmsub  f1,  A4, f4,  f1
 | |
| 	fxcpnmsub  f9,  A4, f12, f9
 | |
| 
 | |
| 	fxcsnmsub  f5,  A4, f4,  f5
 | |
| 	fxcsnmsub  f13, A4, f12, f13
 | |
| 
 | |
| 	fxpmul	f1,  A5,  f1
 | |
| 	fxpmul	f9,  A5,  f9
 | |
| 
 | |
| 	fxcsnmsub  f5,  A5, f1, f5
 | |
| 	fxcsnmsub  f13, A5, f9, f13
 | |
| 
 | |
| 	fxsmul	f5,  A6,  f5
 | |
| 	fxsmul	f13, A6,  f13
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LFPDUX	A1,  BO,  INC4
 | |
| 	LFPDUX	A2,  BO2, INC4
 | |
| 	LFPDUX	A3,  BO,  INC4
 | |
| 	LFPDUX	A4,  BO2, INC4
 | |
| 
 | |
| 	add	BO,  BO,  INC4
 | |
| 	LFPDUX	A5,  BO2, INC4
 | |
| 
 | |
| 	add	BO,  BO,  INC4
 | |
| 	LFPDUX	A6,  BO2, INC4
 | |
| 
 | |
| 	subi	BO,  BO,  16 * SIZE
 | |
| 	subi	BO2, BO2, 16 * SIZE
 | |
| 
 | |
| 	fxpmul	f0,  A1,  f0
 | |
| 	fxpmul	f1,  A1,  f1
 | |
| 	fxcsnmsub    f4,  A1, f0, f4
 | |
| 	fxcsnmsub    f5,  A1, f1, f5
 | |
| 
 | |
| 	fxcpnmsub    f8,  A2, f0, f8
 | |
| 	fxcpnmsub    f9,  A2, f1, f9
 | |
| 	fxcsnmsub    f12, A2, f0, f12
 | |
| 	fxcsnmsub    f13, A2, f1, f13
 | |
| 
 | |
| 	fxsmul	f4,  A3,  f4
 | |
| 	fxsmul	f5,  A3,  f5
 | |
| 	fxcpnmsub    f8,  A4, f4, f8
 | |
| 	fxcpnmsub    f9,  A4, f5, f9
 | |
| 
 | |
| 	fxcsnmsub    f12, A4, f4, f12
 | |
| 	fxcsnmsub    f13, A4, f5, f13
 | |
| 
 | |
| 	fxpmul	f8,  A5,  f8
 | |
| 	fxpmul	f9,  A5,  f9
 | |
| 	fxcsnmsub    f12, A5, f8,  f12
 | |
| 	fxcsnmsub    f13, A5, f9,  f13
 | |
| 
 | |
| 	fxsmul	f12,  A6,  f12
 | |
| 	fxsmul	f13,  A6,  f13
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	addi	BO,  BO,  20 * SIZE
 | |
| 	addi	BO2, BO2, 20 * SIZE
 | |
| 
 | |
| 	LFPDUX	A1,  BO2, INCM4
 | |
| 	LFPDUX	A2,  BO,  INCM4
 | |
| 
 | |
| 	LFPDUX	A3,  BO2, INCM4
 | |
| 	LFPDUX	A4,  BO,  INCM4
 | |
| 
 | |
| 	add	BO2, BO2, INCM4
 | |
| 	LFPDUX	A5,  BO,  INCM4
 | |
| 
 | |
| 	add	BO2, BO2, INCM4
 | |
| 	LFPDUX	A6,  BO,  INCM4
 | |
| 	subi	BO,  BO,  4 * SIZE
 | |
| 	subi	BO2, BO2, 4 * SIZE
 | |
| 
 | |
| 	fxsmul	f12, A1,  f12
 | |
| 	fxsmul	f13, A1,  f13
 | |
| 	fxcpnmsub    f8,  A1, f12, f8
 | |
| 	fxcpnmsub    f9,  A1, f13, f9
 | |
| 
 | |
| 	fxcsnmsub    f4,  A2, f12, f4
 | |
| 	fxcsnmsub    f5,  A2, f13, f5
 | |
| 	fxcpnmsub    f0,  A2, f12, f0
 | |
| 	fxcpnmsub    f1,  A2, f13, f1
 | |
| 
 | |
| 	fxpmul	f8,  A3,  f8
 | |
| 	fxpmul	f9,  A3,  f9
 | |
| 	fxcsnmsub    f4,  A4, f8,  f4
 | |
| 	fxcsnmsub    f5,  A4, f9,  f5
 | |
| 
 | |
| 	fxcpnmsub    f0,  A4, f8,  f0
 | |
| 	fxcpnmsub    f1,  A4, f9,  f1
 | |
| 
 | |
| 	fxsmul	f4,  A5,  f4
 | |
| 	fxsmul	f5,  A5,  f5
 | |
| 	fxcpnmsub    f0,  A5, f4,  f0
 | |
| 	fxcpnmsub    f1,  A5, f5,  f1
 | |
| 
 | |
| 	fxpmul	f0,  A6,  f0
 | |
| 	fxpmul	f1,  A6,  f1
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 4 * SIZE
 | |
| 	subi	CO2, CO2, 4 * SIZE
 | |
| 	subi	CO3, CO3, 4 * SIZE
 | |
| 	subi	CO4, CO4, 4 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	STFPDUX	f0,  BO,  INC4
 | |
| 	STFPDUX	f8,  BO2, INC4
 | |
| 	STFPDUX	f4,  BO,  INC4
 | |
| 	STFPDUX	f12, BO2, INC4
 | |
| 	STFPDUX	f1,  BO,  INC4
 | |
| 	STFPDUX	f9,  BO2, INC4
 | |
| 	STFPDUX	f5,  BO,  INC4
 | |
| 	STFPDUX	f13, BO2, INC4
 | |
| 
 | |
| 	subi	BO,  BO,  16 * SIZE
 | |
| 	subi	BO2, BO2, 16 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFDUX	f4,  CO1, INC
 | |
| 	STFDUX	f1,  CO1, INC
 | |
| 	STFDUX	f5,  CO1, INC
 | |
| 
 | |
| 	STFSDUX	f0,  CO2, INC
 | |
| 	STFSDUX	f4,  CO2, INC
 | |
| 	STFSDUX	f1,  CO2, INC
 | |
| 	STFSDUX	f5,  CO2, INC
 | |
| 
 | |
| 	STFDUX	f8,  CO3, INC
 | |
| 	STFDUX	f12, CO3, INC
 | |
| 	STFDUX	f9,  CO3, INC
 | |
| 	STFDUX	f13, CO3, INC
 | |
| 
 | |
| 	STFSDUX	f8,  CO4, INC
 | |
| 	STFSDUX	f12, CO4, INC
 | |
| 	STFSDUX	f9,  CO4, INC
 | |
| 	STFSDUX	f13, CO4, INC
 | |
| #else
 | |
| 	STFPDUX	f0,  AO,  INC4
 | |
| 	STFPDUX	f1,  AO2, INC4
 | |
| 	STFPDUX	f4,  AO,  INC4
 | |
| 	STFPDUX	f5,  AO2, INC4
 | |
| 	STFPDUX	f8,  AO,  INC4
 | |
| 	STFPDUX	f9,  AO2, INC4
 | |
| 	STFPDUX	f12, AO,  INC4
 | |
| 	STFPDUX	f13, AO2, INC4
 | |
| 
 | |
| 	subi	AO,  AO,  16 * SIZE
 | |
| 	subi	AO2, AO2, 16 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFSDUX	f0,  CO1, INC
 | |
| 	STFDUX	f1,  CO1, INC
 | |
| 	STFSDUX	f1,  CO1, INC
 | |
| 	STFDUX	f4,  CO2, INC
 | |
| 	STFSDUX	f4,  CO2, INC
 | |
| 	STFDUX	f5,  CO2, INC
 | |
| 	STFSDUX	f5,  CO2, INC
 | |
| 
 | |
| 	STFDUX	f8,  CO3, INC
 | |
| 	STFSDUX	f8,  CO3, INC
 | |
| 	STFDUX	f9,  CO3, INC
 | |
| 	STFSDUX	f9,  CO3, INC
 | |
| 	STFDUX	f12, CO4, INC
 | |
| 	STFSDUX	f12, CO4, INC
 | |
| 	STFDUX	f13, CO4, INC
 | |
| 	STFSDUX	f13, CO4, INC
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 4 * SIZE
 | |
| 	subi	CO2, CO2, 4 * SIZE
 | |
| 	subi	CO3, CO3, 4 * SIZE
 | |
| 	subi	CO4, CO4, 4 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 2 + BASE_SHIFT
 | |
| 	add	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	sub	TEMP, K, KK
 | |
| 	slwi	r0,   TEMP, 2 + BASE_SHIFT
 | |
| 	slwi	TEMP, TEMP, 2 + BASE_SHIFT
 | |
| 	add	AO, AO, r0
 | |
| 	add	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	addi	KK, KK, 4
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	KK, KK, 4
 | |
| #endif
 | |
| 
 | |
| 	li	r0, FZERO
 | |
| 	lfpsx	f0, SP, r0
 | |
| 	.align 4
 | |
| 
 | |
| .L40:
 | |
| 	srawi.	I, M,  3
 | |
| 	ble	.L49
 | |
| 	.align 4
 | |
| 
 | |
| .L11:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	addi	AO2, AO,   2 * SIZE
 | |
| 	fpmr	f4,  f0
 | |
| 	addi	BO,  B,  - 4 * SIZE
 | |
| 	fpmr	f8,  f0
 | |
| 	addi	BO2, B,  - 2 * SIZE
 | |
| 	fpmr	f12, f0
 | |
| 
 | |
| 	fpmr	f5,  f0
 | |
| 	fpmr	f9,  f0
 | |
| 	fpmr	f13, f0
 | |
| 	fpmr	f2,  f0
 | |
| 
 | |
| 	fpmr	f6,  f0
 | |
| 	fpmr	f10, f0
 | |
| 	fpmr	f14, f0
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	fpmr	f7,  f0
 | |
| 	fpmr	f11, f0
 | |
| 	fpmr	f15, f0
 | |
| 	nop
 | |
| 
 | |
| 	srawi.	r0,  KK,  2
 | |
|  	fpmr	f1,  f0
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L14
 | |
| #else
 | |
| 
 | |
| #ifdef LN
 | |
| 	slwi	r0,   K,  3 + BASE_SHIFT
 | |
| 	sub	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| 	slwi	r0  , KK, 3 + BASE_SHIFT
 | |
| 	slwi	TEMP, KK, 2 + BASE_SHIFT
 | |
| 	add	AO, AORIG, r0
 | |
| 	add	BO, B,     TEMP
 | |
| 
 | |
| 	sub	TEMP, K, KK
 | |
| 
 | |
| 	addi	AO2, AO,   2 * SIZE
 | |
| 	fpmr	f4,  f0
 | |
| 	addi	BO,  BO, - 4 * SIZE
 | |
| 	fpmr	f8,  f0
 | |
| 	addi	BO2, BO,   2 * SIZE
 | |
| 	fpmr	f12, f0
 | |
| 
 | |
| 	fpmr	f5,  f0
 | |
| 	fpmr	f9,  f0
 | |
| 	fpmr	f13, f0
 | |
| 	fpmr	f2,  f0
 | |
| 
 | |
| 	fpmr	f6,  f0
 | |
| 	fpmr	f10, f0
 | |
| 	fpmr	f14, f0
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	fpmr	f7,  f0
 | |
| 	fpmr	f11, f0
 | |
| 	fpmr	f15, f0
 | |
| 	nop
 | |
| 
 | |
| 	srawi.	r0,  TEMP,  2
 | |
|  	fpmr	f1,  f0
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L14
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	A1,  AO, INC4
 | |
| 	fpmr	f5,  f0
 | |
| 	LFPDUX	A3,  AO, INC4
 | |
| 	fpmr	f9,  f0
 | |
| 	LFPDUX	B1,  BO, INC4
 | |
| 	fpmr	f13, f0
 | |
| 
 | |
| 	LFPDUX	A5,  AO, INC4
 | |
| 	fpmr	f2,  f0
 | |
| 	LFPDUX	A6,  AO, INC4
 | |
| 	fpmr	f6,  f0
 | |
| 	LFPDUX	B3,  BO, INC4
 | |
| 	fpmr	f10, f0
 | |
| 	LFPDUX	A7,  AO, INC4
 | |
| 	fpmr	f14, f0
 | |
| 
 | |
| 	LFPDUX	A8,  AO, INC4
 | |
| 	fpmr	f3,  f0
 | |
| 	LFPDUX	B5,  BO, INC4
 | |
| 	fpmr	f7,  f0
 | |
| 	LFPDUX	A9,  AO, INC4
 | |
| 	fpmr	f11, f0
 | |
| 	LFPDUX	A2, AO2, INC4
 | |
| 	fpmr	f15, f0
 | |
| 	LFPDUX	B2, BO2, INC4
 | |
| 	bdz-	.L13
 | |
| 	.align 4
 | |
| 
 | |
| .L12:
 | |
| 
 | |
| ## 1 ##
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	nop
 | |
| 	fxcsmadd	f4,  B1, A1, f4
 | |
| 	nop
 | |
| 	fxcpmadd	f8,  B2, A1, f8
 | |
| 	LFPDUX	B4, BO2, INC4
 | |
| 	fxcsmadd	f12, B2, A1, f12
 | |
| 	LFPDUX	B6,  BO, INC4
 | |
| 
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B1, A2, f5
 | |
| 	LFPDUX	A4, AO2, INC4
 | |
| 	fxcpmadd	f9,  B2, A2, f9
 | |
| 	LFPDUX	A10, AO, INC4
 | |
| 	fxcsmadd	f13, B2, A2, f13
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f2,  B1, A3, f2
 | |
| 	nop
 | |
| 	fxcsmadd	f6,  B1, A3, f6
 | |
| 	nop
 | |
| 	fxcpmadd	f10, B2, A3, f10
 | |
| 	nop
 | |
| 	fxcsmadd	f14, B2, A3, f14
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f3,  B1, A4, f3
 | |
| 	nop
 | |
| 	fxcsmadd	f7,  B1, A4, f7
 | |
| 	LFPDUX	A2, AO2, INC4
 | |
| 	fxcpmadd	f11, B2, A4, f11
 | |
| 	LFPDUX	A1,  AO, INC4
 | |
| 	fxcsmadd	f15, B2, A4, f15
 | |
| 	nop
 | |
| 
 | |
| ## 2 ##
 | |
| 
 | |
| 	fxcpmadd	f0,  B3, A5, f0
 | |
| 	nop
 | |
| 	fxcsmadd	f4,  B3, A5, f4
 | |
| 	nop
 | |
| 	fxcpmadd	f8,  B4, A5, f8
 | |
| 	LFPDUX	B2, BO2, INC4
 | |
| 	fxcsmadd	f12, B4, A5, f12
 | |
| 	LFPDUX	B1,  BO, INC4
 | |
| 
 | |
| 	fxcpmadd	f1,  B3, A2, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B3, A2, f5
 | |
| 	LFPDUX	A4, AO2, INC4
 | |
| 	fxcpmadd	f9,  B4, A2, f9
 | |
| 	LFPDUX	A3,  AO, INC4
 | |
| 	fxcsmadd	f13, B4, A2, f13
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f2,  B3, A6, f2
 | |
| 	nop
 | |
| 	fxcsmadd	f6,  B3, A6, f6
 | |
| 	nop
 | |
| 	fxcpmadd	f10, B4, A6, f10
 | |
| 	nop
 | |
| 	fxcsmadd	f14, B4, A6, f14
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f3,  B3, A4, f3
 | |
| 	nop
 | |
| 	fxcsmadd	f7,  B3, A4, f7
 | |
| 	LFPDUX	A2, AO2, INC4
 | |
| 	fxcpmadd	f11, B4, A4, f11
 | |
| 	LFPDUX	A5,  AO, INC4
 | |
| 	fxcsmadd	f15, B4, A4, f15
 | |
| 	nop
 | |
| 
 | |
| ## 3 ##
 | |
| 
 | |
| 	fxcpmadd	f0,  B5, A7, f0
 | |
| 	nop
 | |
| 	fxcsmadd	f4,  B5, A7, f4
 | |
| 	nop
 | |
| 	fxcpmadd	f8,  B2, A7, f8
 | |
| 	LFPDUX	B4, BO2, INC4
 | |
| 	fxcsmadd	f12, B2, A7, f12
 | |
| 	LFPDUX	B3,  BO, INC4
 | |
| 
 | |
| 	fxcpmadd	f1,  B5, A2, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B5, A2, f5
 | |
| 	LFPDUX	A4, AO2, INC4
 | |
| 	fxcpmadd	f9,  B2, A2, f9
 | |
| 	LFPDUX	A6,  AO, INC4
 | |
| 	fxcsmadd	f13, B2, A2, f13
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f2,  B5, A8, f2
 | |
| 	nop
 | |
| 	fxcsmadd	f6,  B5, A8, f6
 | |
| 	nop
 | |
| 	fxcpmadd	f10, B2, A8, f10
 | |
| 	nop
 | |
| 	fxcsmadd	f14, B2, A8, f14
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f3,  B5, A4, f3
 | |
| 	nop
 | |
| 	fxcsmadd	f7,  B5, A4, f7
 | |
| 	LFPDUX	A2, AO2, INC4
 | |
| 	fxcpmadd	f11, B2, A4, f11
 | |
| 	LFPDUX	A7,  AO, INC4
 | |
| 	fxcsmadd	f15, B2, A4, f15
 | |
| 	nop
 | |
| 
 | |
| ## 4 ##
 | |
| 	fxcpmadd	f0,  B6, A9, f0
 | |
| 	nop
 | |
| 	fxcsmadd	f4,  B6, A9, f4
 | |
| 	nop
 | |
| 	fxcpmadd	f8,  B4, A9, f8
 | |
| 	LFPDUX	B2, BO2, INC4
 | |
| 	fxcsmadd	f12, B4, A9, f12
 | |
| 	LFPDUX	B5,  BO, INC4
 | |
| 
 | |
| 	fxcpmadd	f1,  B6, A2, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B6, A2, f5
 | |
| 	LFPDUX	A4, AO2, INC4
 | |
| 	fxcpmadd	f9,  B4, A2, f9
 | |
| 	LFPDUX	A8,  AO, INC4
 | |
| 	fxcsmadd	f13, B4, A2, f13
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f2,  B6, A10, f2
 | |
| 	nop
 | |
| 	fxcsmadd	f6,  B6, A10, f6
 | |
| 	nop
 | |
| 	fxcpmadd	f10, B4, A10, f10
 | |
| 	nop
 | |
| 	fxcsmadd	f14, B4, A10, f14
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f3,  B6, A4, f3
 | |
| 	LFPDUX	A2, AO2, INC4
 | |
| 	fxcsmadd	f7,  B6, A4, f7
 | |
| 	LFPDUX	A9,  AO, INC4
 | |
| 	fxcpmadd	f11, B4, A4, f11
 | |
| 	nop
 | |
| 	fxcsmadd	f15, B4, A4, f15
 | |
| 	bdnz+	.L12
 | |
| 	.align 4
 | |
| 
 | |
| .L13:
 | |
| ## 1 ##
 | |
| 
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	nop
 | |
| 	fxcsmadd	f4,  B1, A1, f4
 | |
| 	nop
 | |
| 	fxcpmadd	f8,  B2, A1, f8
 | |
| 	LFPDUX	B4, BO2, INC4
 | |
| 	fxcsmadd	f12, B2, A1, f12
 | |
| 	LFPDUX	B6,  BO, INC4
 | |
| 
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B1, A2, f5
 | |
| 	LFPDUX	A4, AO2, INC4
 | |
| 	fxcpmadd	f9,  B2, A2, f9
 | |
| 	LFPDUX	A10, AO, INC4
 | |
| 	fxcsmadd	f13, B2, A2, f13
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f2,  B1, A3, f2
 | |
| 	nop
 | |
| 	fxcsmadd	f6,  B1, A3, f6
 | |
| 	nop
 | |
| 	fxcpmadd	f10, B2, A3, f10
 | |
| 	nop
 | |
| 	fxcsmadd	f14, B2, A3, f14
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f3,  B1, A4, f3
 | |
| 	nop
 | |
| 	fxcsmadd	f7,  B1, A4, f7
 | |
| 	LFPDUX	A2, AO2, INC4
 | |
| 	fxcpmadd	f11, B2, A4, f11
 | |
| 	nop
 | |
| 	fxcsmadd	f15, B2, A4, f15
 | |
| 	nop
 | |
| 
 | |
| ## 2 ##
 | |
| 
 | |
| 	fxcpmadd	f0,  B3, A5, f0
 | |
| 	nop
 | |
| 	fxcsmadd	f4,  B3, A5, f4
 | |
| 	nop
 | |
| 	fxcpmadd	f8,  B4, A5, f8
 | |
| 	LFPDUX	B2, BO2, INC4
 | |
| 	fxcsmadd	f12, B4, A5, f12
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f1,  B3, A2, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B3, A2, f5
 | |
| 	LFPDUX	A4, AO2, INC4
 | |
| 	fxcpmadd	f9,  B4, A2, f9
 | |
| 	nop
 | |
| 	fxcsmadd	f13, B4, A2, f13
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f2,  B3, A6, f2
 | |
| 	nop
 | |
| 	fxcsmadd	f6,  B3, A6, f6
 | |
| 	nop
 | |
| 	fxcpmadd	f10, B4, A6, f10
 | |
| 	nop
 | |
| 	fxcsmadd	f14, B4, A6, f14
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f3,  B3, A4, f3
 | |
| 	nop
 | |
| 	fxcsmadd	f7,  B3, A4, f7
 | |
| 	LFPDUX	A2, AO2, INC4
 | |
| 	fxcpmadd	f11, B4, A4, f11
 | |
| 	nop
 | |
| 	fxcsmadd	f15, B4, A4, f15
 | |
| 	nop
 | |
| 
 | |
| ## 3 ##
 | |
| 
 | |
| 	fxcpmadd	f0,  B5, A7, f0
 | |
| 	nop
 | |
| 	fxcsmadd	f4,  B5, A7, f4
 | |
| 	nop
 | |
| 	fxcpmadd	f8,  B2, A7, f8
 | |
| 	LFPDUX	B4, BO2, INC4
 | |
| 	fxcsmadd	f12, B2, A7, f12
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f1,  B5, A2, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B5, A2, f5
 | |
| 	LFPDUX	A4, AO2, INC4
 | |
| 	fxcpmadd	f9,  B2, A2, f9
 | |
| 	nop
 | |
| 
 | |
| 	fxcsmadd	f13, B2, A2, f13
 | |
| 
 | |
| 	fxcpmadd	f2,  B5, A8, f2
 | |
| 	nop
 | |
| 	fxcsmadd	f6,  B5, A8, f6
 | |
| 	nop
 | |
| 	fxcpmadd	f10, B2, A8, f10
 | |
| 	nop
 | |
| 	fxcsmadd	f14, B2, A8, f14
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f3,  B5, A4, f3
 | |
| 	nop
 | |
| 	fxcsmadd	f7,  B5, A4, f7
 | |
| 	LFPDUX	A2, AO2, INC4
 | |
| 	fxcpmadd	f11, B2, A4, f11
 | |
| 	nop
 | |
| 	fxcsmadd	f15, B2, A4, f15
 | |
| 	nop
 | |
| 
 | |
| ## 4 ##
 | |
| 
 | |
| 	fxcpmadd	f0,  B6, A9, f0
 | |
| 	nop
 | |
| 	fxcsmadd	f4,  B6, A9, f4
 | |
| 	nop
 | |
| 	fxcpmadd	f8,  B4, A9, f8
 | |
| 	nop
 | |
| 	fxcsmadd	f12, B4, A9, f12
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f1,  B6, A2, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B6, A2, f5
 | |
| 	LFPDUX	A4, AO2, INC4
 | |
| 	fxcpmadd	f9,  B4, A2, f9
 | |
| 	nop
 | |
| 	fxcsmadd	f13, B4, A2, f13
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f2,  B6, A10, f2
 | |
| 	nop
 | |
| 	fxcsmadd	f6,  B6, A10, f6
 | |
| 	nop
 | |
| 	fxcpmadd	f10, B4, A10, f10
 | |
| 	nop
 | |
| 	fxcsmadd	f14, B4, A10, f14
 | |
| 	nop
 | |
| 
 | |
| 	fxcpmadd	f3,  B6, A4, f3
 | |
| 	nop
 | |
| 	fxcsmadd	f7,  B6, A4, f7
 | |
| 	nop
 | |
| 	fxcpmadd	f11, B4, A4, f11
 | |
| 	nop
 | |
| 	fxcsmadd	f15, B4, A4, f15
 | |
| 	nop
 | |
| 	.align 4
 | |
| 
 | |
| .L14:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi.	r0,  KK,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L18
 | |
| #else
 | |
| 	andi.	r0, TEMP, 3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L18
 | |
| #endif
 | |
| 	.align 4
 | |
| 
 | |
| .L15:
 | |
| 	LFPDUX	A2,  AO,  INC4
 | |
| 	LFPDUX	A4,  AO2, INC4
 | |
| 	LFPDUX	A10, BO,  INC4
 | |
| 	LFPDUX	B4,  BO2, INC4
 | |
| 	bdz-	.L17
 | |
| 	.align 4
 | |
| 
 | |
| .L16:
 | |
| 	fxcpmadd	f0,  A10, A2, f0
 | |
| 	fxcsmadd	f4,  A10, A2, f4
 | |
| 	fxcpmadd	f8,  B4, A2, f8
 | |
| 	fxcsmadd	f12, B4, A2, f12
 | |
| 	LFPDUX	A2, AO,  INC4
 | |
| 
 | |
| 	fxcpmadd	f1,  A10, A4, f1
 | |
| 	fxcsmadd	f5,  A10, A4, f5
 | |
| 	fxcpmadd	f9,  B4, A4, f9
 | |
| 	fxcsmadd	f13, B4, A4, f13
 | |
| 	LFPDUX	A4, AO2, INC4
 | |
| 
 | |
| 	fxcpmadd	f2,  A10, A2, f2
 | |
| 	fxcsmadd	f6,  A10, A2, f6
 | |
| 	fxcpmadd	f10, B4, A2, f10
 | |
| 	fxcsmadd	f14, B4, A2, f14
 | |
| 	LFPDUX	A2, AO,  INC4
 | |
| 
 | |
| 	fxcpmadd	f3,  A10, A4, f3
 | |
| 	fxcsmadd	f7,  A10, A4, f7
 | |
| 	LFPDUX	A10, BO,  INC4
 | |
| 	fxcpmadd	f11, B4, A4, f11
 | |
| 	fxcsmadd	f15, B4, A4, f15
 | |
| 	LFPDUX	A4, AO2, INC4
 | |
| 	LFPDUX	B4, BO2, INC4
 | |
| 	bdnz+	.L16
 | |
| 	.align 4
 | |
| 
 | |
| .L17:
 | |
| 	fxcpmadd	f0,  A10, A2, f0
 | |
| 	fxcsmadd	f4,  A10, A2, f4
 | |
| 	fxcpmadd	f8,  B4, A2, f8
 | |
| 	fxcsmadd	f12, B4, A2, f12
 | |
| 	LFPDUX	A2, AO,  INC4
 | |
| 
 | |
| 	fxcpmadd	f1,  A10, A4, f1
 | |
| 	fxcsmadd	f5,  A10, A4, f5
 | |
| 	fxcpmadd	f9,  B4, A4, f9
 | |
| 	fxcsmadd	f13, B4, A4, f13
 | |
| 	LFPDUX	A4, AO2, INC4
 | |
| 
 | |
| 	fxcpmadd	f2,  A10, A2, f2
 | |
| 	fxcsmadd	f6,  A10, A2, f6
 | |
| 	fxcpmadd	f10, B4, A2, f10
 | |
| 	fxcsmadd	f14, B4, A2, f14
 | |
| 
 | |
| 	fxcpmadd	f3,  A10, A4, f3
 | |
| 	fxcsmadd	f7,  A10, A4, f7
 | |
| 	fxcpmadd	f11, B4, A4, f11
 | |
| 	fxcsmadd	f15, B4, A4, f15
 | |
| 	.align 4
 | |
| 
 | |
| .L18:
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	subi	r0, KK, 8
 | |
| #else
 | |
| 	subi	r0, KK, 4
 | |
| #endif
 | |
| 	slwi	TEMP, r0, 3 + BASE_SHIFT
 | |
| 	slwi	r0,   r0, 2 + BASE_SHIFT
 | |
| 	add	AO, AORIG, TEMP
 | |
| 	add	BO, B,     r0
 | |
| 	addi	AO2, AO,   2 * SIZE
 | |
| 	addi	BO,  BO, - 4 * SIZE
 | |
| 	addi	BO2, BO,   2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	fpmr	f24, f0
 | |
| 	LFPDUX	f16, BO,  INC4
 | |
| 	fpmr	f25, f1
 | |
| 	nop
 | |
| 	fpmr	f26, f2
 | |
| 	LFPDUX	f17, BO2, INC4
 | |
| 	fpmr	f27, f3
 | |
| 	nop
 | |
| 
 | |
| 	fpmr	f28, f8
 | |
| 	LFPDUX	f18, BO,  INC4
 | |
| 	fpmr	f29, f9
 | |
| 	nop
 | |
| 	fpmr	f30, f10
 | |
| 	LFPDUX	f19, BO2, INC4
 | |
| 	fpmr	f31, f11
 | |
| 	nop
 | |
| 
 | |
| 	fsmfp	f0,  f4
 | |
|  	LFPDUX	f20, BO,  INC4
 | |
| 	fsmfp	f1,  f5
 | |
| 	nop
 | |
| 	fsmfp	f2,  f6
 | |
| 	LFPDUX	f21, BO2, INC4
 | |
| 	fsmfp	f3,  f7
 | |
| 	nop
 | |
| 
 | |
| 	fsmfp	f8,  f12
 | |
| 	LFPDUX	f22, BO,  INC4
 | |
| 	fsmfp	f9,  f13
 | |
| 	nop
 | |
| 	fsmfp	f10, f14
 | |
| 	LFPDUX	f23, BO2, INC4
 | |
| 	fsmfp	f11, f15
 | |
| 	nop
 | |
| 
 | |
| 	fsmtp	f4,  f24
 | |
| 	LFPDUX	f24, BO,  INC4
 | |
| 	fsmtp	f5,  f25
 | |
| 	nop
 | |
| 	fsmtp	f6,  f26
 | |
| 	LFPDUX	f25, BO2, INC4
 | |
| 	fsmtp	f7,  f27
 | |
| 	nop
 | |
| 
 | |
| 	fsmtp	f12, f28
 | |
| 	LFPDUX	f26, BO,  INC4
 | |
| 	fsmtp	f13, f29
 | |
| 	nop
 | |
| 	fsmtp	f14, f30
 | |
| 	LFPDUX	f27, BO2, INC4
 | |
| 	fsmtp	f15, f31
 | |
| 	nop
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	LFPDUX	f28, BO,  INC4
 | |
| 	fpsub	f8,  f17,  f8
 | |
| 	nop
 | |
| 	fpsub	f4,  f18,  f4
 | |
| 	LFPDUX	f29, BO2, INC4
 | |
| 	fpsub	f12, f19,  f12
 | |
| 	nop
 | |
| 
 | |
| 	fpsub	f1,  f20,  f1
 | |
| 	LFPDUX	f30, BO,  INC4
 | |
| 	fpsub	f9,  f21,  f9
 | |
| 	subi	BO,  BO,  32 * SIZE
 | |
| 	fpsub	f5,  f22,  f5
 | |
| 	LFPDUX	f31, BO2, INC4
 | |
| 	fpsub	f13, f23,  f13
 | |
| 	subi	BO2, BO2, 32 * SIZE
 | |
| 
 | |
| 	fpsub	f2,  f24,  f2
 | |
| 	fpsub	f10, f25,  f10
 | |
| 	fpsub	f6,  f26,  f6
 | |
| 	fpsub	f14, f27,  f14
 | |
| 	fpsub	f3,  f28,  f3
 | |
| 	fpsub	f11, f29,  f11
 | |
| 	fpsub	f7,  f30,  f7
 | |
| 	fpsub	f15, f31,  f15
 | |
| 
 | |
| #else
 | |
| 	LFPDUX	f16, AO,  INC4
 | |
| 	LFPDUX	f17, AO2, INC4
 | |
| 	LFPDUX	f18, AO,  INC4
 | |
| 	LFPDUX	f19, AO2, INC4
 | |
| 	LFPDUX	f20, AO,  INC4
 | |
| 	LFPDUX	f21, AO2, INC4
 | |
| 	LFPDUX	f22, AO,  INC4
 | |
| 	LFPDUX	f23, AO2, INC4
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	LFPDUX	f24, AO,  INC4
 | |
| 	fpsub	f1,  f17,  f1
 | |
| 	LFPDUX	f25, AO2, INC4
 | |
| 	fpsub	f2,  f18,  f2
 | |
| 	LFPDUX	f26, AO,  INC4
 | |
| 	fpsub	f3,  f19,  f3
 | |
| 	LFPDUX	f27, AO2, INC4
 | |
| 	fpsub	f4,  f20,  f4
 | |
| 	LFPDUX	f28, AO,  INC4
 | |
| 	fpsub	f5,  f21,  f5
 | |
| 	LFPDUX	f29, AO2, INC4
 | |
| 	fpsub	f6,  f22,  f6
 | |
| 	LFPDUX	f30, AO,  INC4
 | |
| 	fpsub	f7,  f23,  f7
 | |
| 	LFPDUX	f31, AO2, INC4
 | |
| 
 | |
| 	fpsub	f8,  f24,  f8
 | |
| 	subi	AO,  AO,  32 * SIZE
 | |
| 	fpsub	f9,  f25,  f9
 | |
| 	subi	AO2, AO2, 32 * SIZE
 | |
| 	fpsub	f10, f26,  f10
 | |
| 	fpsub	f11, f27,  f11
 | |
| 	fpsub	f12, f28,  f12
 | |
| 	fpsub	f13, f29,  f13
 | |
| 	fpsub	f14, f30,  f14
 | |
| 	fpsub	f15, f31,  f15
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
|        addi	AO,  AO,  68 * SIZE
 | |
|        addi	AO2, AO2, 68 * SIZE
 | |
| 
 | |
| 	LFPDUX	A1,  AO2, INCM4
 | |
| 	LFPDUX	A2,  AO,  INCM4
 | |
| 	LFPDUX	A3,  AO2, INCM4
 | |
| 	LFPDUX	A4,  AO,  INCM4
 | |
| 	LFPDUX	A5,  AO2, INCM4
 | |
| 	LFPDUX	A6,  AO,  INCM4
 | |
| 	LFPDUX	A7,  AO2, INCM4
 | |
| 	LFPDUX	A8,  AO,  INCM4
 | |
| 
 | |
| 	fxsmul	f7,  A1, f7
 | |
| 	fxsmul	f15, A1, f15
 | |
| 
 | |
| 	fxcpnmsub  f3,  A1, f7,  f3
 | |
| 	fxcpnmsub  f11, A1, f15, f11
 | |
| 
 | |
| 	fxcsnmsub  f6,  A2, f7,  f6
 | |
| 	fxcsnmsub  f14, A2, f15, f14
 | |
| 
 | |
| 	fxcpnmsub  f2,  A2, f7,  f2
 | |
| 	fxcpnmsub  f10, A2, f15, f10
 | |
| 
 | |
| 	fxcsnmsub  f5,  A3, f7,  f5
 | |
| 	fxcsnmsub  f13, A3, f15, f13
 | |
| 
 | |
| 	fxcpnmsub  f1,  A3, f7,  f1
 | |
| 	fxcpnmsub  f9,  A3, f15, f9
 | |
| 
 | |
| 	fxcsnmsub  f4,  A4, f7,  f4
 | |
| 	fxcsnmsub  f12, A4, f15, f12
 | |
| 
 | |
| 	fxcpnmsub  f0,  A4, f7,  f0
 | |
| 	fxcpnmsub  f8,  A4, f15, f8
 | |
| 
 | |
| 	fxpmul	f3,  A5, f3
 | |
| 	fxpmul	f11, A5, f11
 | |
| 
 | |
| 	fxcsnmsub  f6,  A6, f3,  f6
 | |
| 	fxcsnmsub  f14, A6, f11, f14
 | |
| 
 | |
| 	fxcpnmsub  f2,  A6, f3,  f2
 | |
| 	fxcpnmsub  f10, A6, f11, f10
 | |
| 
 | |
| 	fxcsnmsub  f5,  A7, f3,  f5
 | |
| 	fxcsnmsub  f13, A7, f11, f13
 | |
| 
 | |
| 	fxcpnmsub  f1,  A7, f3,  f1
 | |
| 	fxcpnmsub  f9,  A7, f11, f9
 | |
| 
 | |
| 	fxcsnmsub  f4,  A8, f3,  f4
 | |
| 	fxcsnmsub  f12, A8, f11, f12
 | |
| 
 | |
| 	fxcpnmsub  f0,  A8, f3,  f0
 | |
| 	fxcpnmsub  f8,  A8, f11, f8
 | |
| 
 | |
| 	add	AO2, AO2, INCM4
 | |
| 	LFPDUX	A1,  AO,  INCM4
 | |
| 	LFPDUX	A2,  AO2, INCM4
 | |
| 	LFPDUX	A3,  AO,  INCM4
 | |
| 
 | |
| 	add	AO2, AO2, INCM4
 | |
| 	LFPDUX	A4,  AO,  INCM4
 | |
| 	LFPDUX	A5,  AO2, INCM4
 | |
| 	LFPDUX	A6,  AO,  INCM4
 | |
| 
 | |
| 	add	AO2, AO2, INCM4
 | |
| 	add	AO,  AO,  INCM4
 | |
| 	LFPDUX	A7,  AO2, INCM4
 | |
| 	LFPDUX	A8,  AO,  INCM4
 | |
| 
 | |
| 
 | |
| 	fxsmul	f6,  A1, f6
 | |
| 	fxsmul	f14, A1, f14
 | |
| 
 | |
| 	fxcpnmsub  f2,  A1, f6,  f2
 | |
| 	fxcpnmsub  f10, A1, f14, f10
 | |
| 
 | |
| 	fxcsnmsub  f5,  A2, f6,  f5
 | |
| 	fxcsnmsub  f13, A2, f14, f13
 | |
| 
 | |
| 	fxcpnmsub  f1,  A2, f6,  f1
 | |
| 	fxcpnmsub  f9,  A2, f14, f9
 | |
| 
 | |
| 	fxcsnmsub  f4,  A3, f6,  f4
 | |
| 	fxcsnmsub  f12, A3, f14, f12
 | |
| 
 | |
| 	fxcpnmsub  f0,  A3, f6,  f0
 | |
| 	fxcpnmsub  f8,  A3, f14, f8
 | |
| 
 | |
| 	fxpmul	f2,  A4, f2
 | |
| 	fxpmul	f10, A4, f10
 | |
| 
 | |
| 	fxcsnmsub  f5,  A5, f2,  f5
 | |
| 	fxcsnmsub  f13, A5, f10, f13
 | |
| 
 | |
| 	fxcpnmsub  f1,  A5, f2,  f1
 | |
| 	fxcpnmsub  f9,  A5, f10, f9
 | |
| 
 | |
| 	fxcsnmsub  f4,  A6, f2,  f4
 | |
| 	fxcsnmsub  f12, A6, f10, f12
 | |
| 
 | |
| 	fxcpnmsub  f0,  A6, f2,  f0
 | |
| 	fxcpnmsub  f8,  A6, f10, f8
 | |
| 
 | |
| 	fxsmul	f5,  A7, f5
 | |
| 	fxsmul	f13, A7, f13
 | |
| 
 | |
| 	fxcpnmsub  f1,  A7, f5,  f1
 | |
| 	fxcpnmsub  f9,  A7, f13, f9
 | |
| 
 | |
| 	fxcsnmsub  f4,  A8, f5,  f4
 | |
| 	fxcsnmsub  f12, A8, f13, f12
 | |
| 
 | |
| 	fxcpnmsub  f0,  A8, f5,  f0
 | |
| 	fxcpnmsub  f8,  A8, f13, f8
 | |
| 
 | |
| 	add	AO2, AO2, INCM4
 | |
| 	add	AO,  AO,  INCM4
 | |
| 	LFPDUX	A1,  AO2, INCM4
 | |
| 	LFPDUX	A2,  AO,  INCM4
 | |
| 
 | |
| 	subi	AO2, AO2, 8 * SIZE
 | |
| 	add	AO,  AO,  INCM4
 | |
| 	LFPDUX	A3,  AO,  INCM4
 | |
| 
 | |
| 	subi	AO2, AO2, 8 * SIZE
 | |
| 	add	AO,  AO,  INCM4
 | |
| 	LFPDUX	A4,  AO,  INCM4
 | |
| 
 | |
| 	addi	AO,  AO,  -4 * SIZE
 | |
| 	addi	AO2, AO2, -4 * SIZE
 | |
| 
 | |
| 	fxpmul	f1,  A1, f1
 | |
| 	fxpmul	f9,  A1, f9
 | |
| 
 | |
| 	fxcsnmsub  f4,  A2, f1,  f4
 | |
| 	fxcsnmsub  f12, A2, f9,  f12
 | |
| 
 | |
| 	fxcpnmsub  f0,  A2, f1,  f0
 | |
| 	fxcpnmsub  f8,  A2, f9,  f8
 | |
| 
 | |
| 	fxsmul	f4,  A3, f4
 | |
| 	fxsmul	f12, A3, f12
 | |
| 
 | |
| 	fxcpnmsub  f0,  A3, f4,  f0
 | |
| 	fxcpnmsub  f8,  A3, f12, f8
 | |
| 
 | |
| 	fxpmul	f0,  A4, f0
 | |
| 	fxpmul	f8,  A4, f8
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	LFPDUX	A1,  AO,  INC4
 | |
| 	LFPDUX	A2,  AO2, INC4
 | |
| 	LFPDUX	A3,  AO,  INC4
 | |
| 	LFPDUX	A4,  AO2, INC4
 | |
| 
 | |
| 	LFPDUX	A5,  AO,  INC4
 | |
| 	LFPDUX	A6,  AO2, INC4
 | |
| 	LFPDUX	A7,  AO,  INC4
 | |
| 	LFPDUX	A8,  AO2, INC4
 | |
| 
 | |
| 	fxpmul	f0,  A1,  f0
 | |
| 	fxpmul	f8,  A1,  f8
 | |
| 
 | |
| 	fxcsnmsub  f4,  A1, f0, f4
 | |
| 	fxcsnmsub  f12, A1, f8, f12
 | |
| 
 | |
| 	fxcpnmsub  f1,  A2, f0, f1
 | |
| 	fxcpnmsub  f9,  A2, f8, f9
 | |
| 
 | |
| 	fxcsnmsub  f5,  A2, f0, f5
 | |
| 	fxcsnmsub  f13, A2, f8, f13
 | |
| 
 | |
| 	fxcpnmsub  f2,  A3, f0, f2
 | |
| 	fxcpnmsub  f10, A3, f8, f10
 | |
| 
 | |
| 	fxcsnmsub  f6,  A3, f0, f6
 | |
| 	fxcsnmsub  f14, A3, f8, f14
 | |
| 
 | |
| 	fxcpnmsub  f3,  A4, f0, f3
 | |
| 	fxcpnmsub  f11, A4, f8, f11
 | |
| 
 | |
| 	fxcsnmsub  f7,  A4, f0, f7
 | |
| 	fxcsnmsub  f15, A4, f8, f15
 | |
| 
 | |
| 	fxsmul	f4,  A5,  f4
 | |
| 	fxsmul	f12, A5,  f12
 | |
| 
 | |
| 	fxcpnmsub  f1,  A6, f4,  f1
 | |
| 	fxcpnmsub  f9,  A6, f12, f9
 | |
| 
 | |
| 	fxcsnmsub  f5,  A6, f4,  f5
 | |
| 	fxcsnmsub  f13, A6, f12, f13
 | |
| 
 | |
| 	fxcpnmsub  f2,  A7, f4,  f2
 | |
| 	fxcpnmsub  f10, A7, f12, f10
 | |
| 
 | |
| 	fxcsnmsub  f6,  A7, f4,  f6
 | |
| 	fxcsnmsub  f14, A7, f12, f14
 | |
| 
 | |
| 	fxcpnmsub  f3,  A8, f4,  f3
 | |
| 	fxcpnmsub  f11, A8, f12, f11
 | |
| 
 | |
| 	fxcsnmsub  f7,  A8, f4,  f7
 | |
| 	fxcsnmsub  f15, A8, f12, f15
 | |
| 
 | |
| 	add	AO,  AO,  INC4
 | |
| 	LFPDUX	A1,  AO2, INC4
 | |
| 	LFPDUX	A2,  AO,  INC4
 | |
| 	LFPDUX	A3,  AO2, INC4
 | |
| 
 | |
| 	add	AO,  AO,  INC4
 | |
| 	LFPDUX	A4,  AO2, INC4
 | |
| 	LFPDUX	A5,  AO,  INC4
 | |
| 	LFPDUX	A6,  AO2, INC4
 | |
| 
 | |
| 	add	AO,  AO,  INC4
 | |
| 	add	AO2, AO2, INC4
 | |
| 	LFPDUX	A7,  AO,  INC4
 | |
| 	LFPDUX	A8,  AO2, INC4
 | |
| 
 | |
| 	fxpmul	f1,  A1,  f1
 | |
| 	fxpmul	f9,  A1,  f9
 | |
| 
 | |
| 	fxcsnmsub  f5,  A1, f1, f5
 | |
| 	fxcsnmsub  f13, A1, f9, f13
 | |
| 
 | |
| 	fxcpnmsub  f2,  A2, f1, f2
 | |
| 	fxcpnmsub  f10, A2, f9, f10
 | |
| 
 | |
| 	fxcsnmsub  f6,  A2, f1, f6
 | |
| 	fxcsnmsub  f14, A2, f9, f14
 | |
| 
 | |
| 	fxcpnmsub  f3,  A3, f1, f3
 | |
| 	fxcpnmsub  f11, A3, f9, f11
 | |
| 
 | |
| 	fxcsnmsub  f7,  A3, f1, f7
 | |
| 	fxcsnmsub  f15, A3, f9, f15
 | |
| 
 | |
| 	fxsmul	f5,  A4,  f5
 | |
| 	fxsmul	f13, A4,  f13
 | |
| 
 | |
| 	fxcpnmsub  f2,  A5, f5,  f2
 | |
| 	fxcpnmsub  f10, A5, f13, f10
 | |
| 
 | |
| 	fxcsnmsub  f6,  A5, f5,  f6
 | |
| 	fxcsnmsub  f14, A5, f13, f14
 | |
| 
 | |
| 	fxcpnmsub  f3,  A6, f5,  f3
 | |
| 	fxcpnmsub  f11, A6, f13, f11
 | |
| 
 | |
| 	fxcsnmsub  f7,  A6, f5,  f7
 | |
| 	fxcsnmsub  f15, A6, f13, f15
 | |
| 
 | |
| 	fxpmul	f2,  A7,  f2
 | |
| 	fxpmul	f10, A7,  f10
 | |
| 
 | |
| 	fxcsnmsub  f6,  A7, f2,  f6
 | |
| 	fxcsnmsub  f14, A7, f10, f14
 | |
| 
 | |
| 	fxcpnmsub  f3,  A8, f2,  f3
 | |
| 	fxcpnmsub  f11, A8, f10, f11
 | |
| 
 | |
| 	fxcsnmsub  f7,  A8, f2,  f7
 | |
| 	fxcsnmsub  f15, A8, f10, f15
 | |
| 
 | |
| 	add	AO,  AO,  INC4
 | |
| 	add	AO2, AO2, INC4
 | |
| 	LFPDUX	A1,  AO,  INC4
 | |
| 	LFPDUX	A2,  AO2, INC4
 | |
| 
 | |
| 	addi	AO,  AO,  8 * SIZE
 | |
| 	addi	AO2, AO2, 4 * SIZE
 | |
| 	LFPDUX	A3,  AO2, INC4
 | |
| 
 | |
| 	addi	AO,  AO,  8 * SIZE
 | |
| 	addi	AO2, AO2, 4 * SIZE
 | |
| 	LFPDUX	A4,  AO2, INC4
 | |
| 
 | |
| 	subi	AO,  AO,  64 * SIZE
 | |
| 	subi	AO2, AO2, 64 * SIZE
 | |
| 
 | |
| 	fxsmul	f6,  A1,  f6
 | |
| 	fxsmul	f14, A1,  f14
 | |
| 
 | |
| 	fxcpnmsub  f3,  A2, f6,  f3
 | |
| 	fxcpnmsub  f11, A2, f14, f11
 | |
| 
 | |
| 	fxcsnmsub  f7,  A2, f6,  f7
 | |
| 	fxcsnmsub  f15, A2, f14, f15
 | |
| 
 | |
| 	fxpmul	f3,  A3,  f3
 | |
| 	fxpmul	f11, A3,  f11
 | |
| 
 | |
| 	fxcsnmsub  f7,  A3, f3,  f7
 | |
| 	fxcsnmsub  f15, A3, f11, f15
 | |
| 
 | |
| 	fxsmul	f7,  A4,  f7
 | |
| 	fxsmul	f15, A4,  f15
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LFPDUX	A1,  BO,  INC4
 | |
| 	LFPDUX	A2,  BO2, INC4
 | |
| 	LFPDUX	A3,  BO,  INC4
 | |
| 	LFPDUX	A4,  BO2, INC4
 | |
| 
 | |
| 	add	BO,  BO,  INC4
 | |
| 	LFPDUX	A5,  BO2, INC4
 | |
| 
 | |
| 	add	BO,  BO,  INC4
 | |
| 	LFPDUX	A6,  BO2, INC4
 | |
| 	subi	BO,  BO,  16 * SIZE
 | |
| 	subi	BO2, BO2, 16 * SIZE
 | |
| 
 | |
| 	fxpmul	f0,  A1,  f0
 | |
| 	fxpmul	f1,  A1,  f1
 | |
| 	fxpmul	f2,  A1,  f2
 | |
| 	fxpmul	f3,  A1,  f3
 | |
| 
 | |
| 	fxcsnmsub    f4,  A1, f0, f4
 | |
| 	fxcsnmsub    f5,  A1, f1, f5
 | |
| 	fxcsnmsub    f6,  A1, f2, f6
 | |
| 	fxcsnmsub    f7,  A1, f3, f7
 | |
| 
 | |
| 	fxcpnmsub    f8,  A2, f0, f8
 | |
| 	fxcpnmsub    f9,  A2, f1, f9
 | |
| 	fxcpnmsub    f10, A2, f2, f10
 | |
| 	fxcpnmsub    f11, A2, f3, f11
 | |
| 
 | |
| 	fxcsnmsub    f12, A2, f0, f12
 | |
| 	fxcsnmsub    f13, A2, f1, f13
 | |
| 	fxcsnmsub    f14, A2, f2, f14
 | |
| 	fxcsnmsub    f15, A2, f3, f15
 | |
| 
 | |
| 	fxsmul	f4,  A3,  f4
 | |
| 	fxsmul	f5,  A3,  f5
 | |
| 	fxsmul	f6,  A3,  f6
 | |
| 	fxsmul	f7,  A3,  f7
 | |
| 
 | |
| 	fxcpnmsub    f8,  A4, f4, f8
 | |
| 	fxcpnmsub    f9,  A4, f5, f9
 | |
| 	fxcpnmsub    f10, A4, f6, f10
 | |
| 	fxcpnmsub    f11, A4, f7, f11
 | |
| 
 | |
| 	fxcsnmsub    f12, A4, f4, f12
 | |
| 	fxcsnmsub    f13, A4, f5, f13
 | |
| 	fxcsnmsub    f14, A4, f6, f14
 | |
| 	fxcsnmsub    f15, A4, f7, f15
 | |
| 
 | |
| 	fxpmul	f8,  A5,  f8
 | |
| 	fxpmul	f9,  A5,  f9
 | |
| 	fxpmul	f10, A5,  f10
 | |
| 	fxpmul	f11, A5,  f11
 | |
| 
 | |
| 	fxcsnmsub    f12, A5, f8,  f12
 | |
| 	fxcsnmsub    f13, A5, f9,  f13
 | |
| 	fxcsnmsub    f14, A5, f10, f14
 | |
| 	fxcsnmsub    f15, A5, f11, f15
 | |
| 
 | |
| 	fxsmul	f12,  A6,  f12
 | |
| 	fxsmul	f13,  A6,  f13
 | |
| 	fxsmul	f14,  A6,  f14
 | |
| 	fxsmul	f15,  A6,  f15
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	addi	BO,  BO,  20 * SIZE
 | |
| 	addi	BO2, BO2, 20 * SIZE
 | |
| 
 | |
| 	LFPDUX	A1,  BO2, INCM4
 | |
| 	LFPDUX	A2,  BO,  INCM4
 | |
| 
 | |
| 	LFPDUX	A3,  BO2, INCM4
 | |
| 	LFPDUX	A4,  BO,  INCM4
 | |
| 
 | |
| 	add	BO2, BO2, INCM4
 | |
| 	LFPDUX	A5,  BO,  INCM4
 | |
| 
 | |
| 	add	BO2, BO2, INCM4
 | |
| 	LFPDUX	A6,  BO,  INCM4
 | |
| 	subi	BO,  BO,  4 * SIZE
 | |
| 	subi	BO2, BO2, 4 * SIZE
 | |
| 
 | |
| 	fxsmul	f12, A1,  f12
 | |
| 	fxsmul	f13, A1,  f13
 | |
| 	fxsmul	f14, A1,  f14
 | |
| 	fxsmul	f15, A1,  f15
 | |
| 
 | |
| 	fxcpnmsub    f8,  A1, f12, f8
 | |
| 	fxcpnmsub    f9,  A1, f13, f9
 | |
| 	fxcpnmsub    f10, A1, f14, f10
 | |
| 	fxcpnmsub    f11, A1, f15, f11
 | |
| 
 | |
| 	fxcsnmsub    f4,  A2, f12, f4
 | |
| 	fxcsnmsub    f5,  A2, f13, f5
 | |
| 	fxcsnmsub    f6,  A2, f14, f6
 | |
| 	fxcsnmsub    f7,  A2, f15, f7
 | |
| 
 | |
| 	fxcpnmsub    f0,  A2, f12, f0
 | |
| 	fxcpnmsub    f1,  A2, f13, f1
 | |
| 	fxcpnmsub    f2,  A2, f14, f2
 | |
| 	fxcpnmsub    f3,  A2, f15, f3
 | |
| 
 | |
| 	fxpmul	f8,  A3,  f8
 | |
| 	fxpmul	f9,  A3,  f9
 | |
| 	fxpmul	f10, A3,  f10
 | |
| 	fxpmul	f11, A3,  f11
 | |
| 
 | |
| 	fxcsnmsub    f4,  A4, f8,  f4
 | |
| 	fxcsnmsub    f5,  A4, f9,  f5
 | |
| 	fxcsnmsub    f6,  A4, f10, f6
 | |
| 	fxcsnmsub    f7,  A4, f11, f7
 | |
| 
 | |
| 	fxcpnmsub    f0,  A4, f8,  f0
 | |
| 	fxcpnmsub    f1,  A4, f9,  f1
 | |
| 	fxcpnmsub    f2,  A4, f10, f2
 | |
| 	fxcpnmsub    f3,  A4, f11, f3
 | |
| 
 | |
| 	fxsmul	f4,  A5,  f4
 | |
| 	fxsmul	f5,  A5,  f5
 | |
| 	fxsmul	f6,  A5,  f6
 | |
| 	fxsmul	f7,  A5,  f7
 | |
| 
 | |
| 	fxcpnmsub    f0,  A5, f4,  f0
 | |
| 	fxcpnmsub    f1,  A5, f5,  f1
 | |
| 	fxcpnmsub    f2,  A5, f6,  f2
 | |
| 	fxcpnmsub    f3,  A5, f7,  f3
 | |
| 
 | |
| 	fxpmul	f0,  A6,  f0
 | |
| 	fxpmul	f1,  A6,  f1
 | |
| 	fxpmul	f2,  A6,  f2
 | |
| 	fxpmul	f3,  A6,  f3
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 8 * SIZE
 | |
| 	subi	CO2, CO2, 8 * SIZE
 | |
| 	subi	CO3, CO3, 8 * SIZE
 | |
| 	subi	CO4, CO4, 8 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	STFPDUX	f0,  BO,  INC4
 | |
| 	STFPDUX	f8,  BO2, INC4
 | |
| 	STFPDUX	f4,  BO,  INC4
 | |
| 	STFPDUX	f12, BO2, INC4
 | |
| 	STFPDUX	f1,  BO,  INC4
 | |
| 	STFPDUX	f9,  BO2, INC4
 | |
| 	STFPDUX	f5,  BO,  INC4
 | |
| 	STFPDUX	f13, BO2, INC4
 | |
| 	STFPDUX	f2,  BO,  INC4
 | |
| 	STFPDUX	f10, BO2, INC4
 | |
| 	STFPDUX	f6,  BO,  INC4
 | |
| 	STFPDUX	f14, BO2, INC4
 | |
| 	STFPDUX	f3,  BO,  INC4
 | |
| 	STFPDUX	f11, BO2, INC4
 | |
| 	STFPDUX	f7,  BO,  INC4
 | |
| 	STFPDUX	f15, BO2, INC4
 | |
| 
 | |
| 	subi	BO,  BO,  32 * SIZE
 | |
| 	subi	BO2, BO2, 32 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFDUX	f4,  CO1, INC
 | |
| 	STFDUX	f1,  CO1, INC
 | |
| 	STFDUX	f5,  CO1, INC
 | |
| 	STFDUX	f2,  CO1, INC
 | |
| 	STFDUX	f6,  CO1, INC
 | |
| 	STFDUX	f3,  CO1, INC
 | |
| 	STFDUX	f7,  CO1, INC
 | |
| 
 | |
| 	STFSDUX	f0,  CO2, INC
 | |
| 	STFSDUX	f4,  CO2, INC
 | |
| 	STFSDUX	f1,  CO2, INC
 | |
| 	STFSDUX	f5,  CO2, INC
 | |
| 	STFSDUX	f2,  CO2, INC
 | |
| 	STFSDUX	f6,  CO2, INC
 | |
| 	STFSDUX	f3,  CO2, INC
 | |
| 	STFSDUX	f7,  CO2, INC
 | |
| 
 | |
| 	STFDUX	f8,  CO3, INC
 | |
| 	STFDUX	f12, CO3, INC
 | |
| 	STFDUX	f9,  CO3, INC
 | |
| 	STFDUX	f13, CO3, INC
 | |
| 	STFDUX	f10, CO3, INC
 | |
| 	STFDUX	f14, CO3, INC
 | |
| 	STFDUX	f11, CO3, INC
 | |
| 	STFDUX	f15, CO3, INC
 | |
| 
 | |
| 	STFSDUX	f8,  CO4, INC
 | |
| 	STFSDUX	f12, CO4, INC
 | |
| 	STFSDUX	f9,  CO4, INC
 | |
| 	STFSDUX	f13, CO4, INC
 | |
| 	STFSDUX	f10, CO4, INC
 | |
| 	STFSDUX	f14, CO4, INC
 | |
| 	STFSDUX	f11, CO4, INC
 | |
| 	STFSDUX	f15, CO4, INC
 | |
| 
 | |
| #else
 | |
| 	STFPDUX	f0,  AO,  INC4
 | |
| 	STFPDUX	f1,  AO2, INC4
 | |
| 	STFPDUX	f2,  AO,  INC4
 | |
| 	STFPDUX	f3,  AO2, INC4
 | |
| 	STFPDUX	f4,  AO,  INC4
 | |
| 	STFPDUX	f5,  AO2, INC4
 | |
| 	STFPDUX	f6,  AO,  INC4
 | |
| 	STFPDUX	f7,  AO2, INC4
 | |
| 	STFPDUX	f8,  AO,  INC4
 | |
| 	STFPDUX	f9,  AO2, INC4
 | |
| 	STFPDUX	f10, AO,  INC4
 | |
| 	STFPDUX	f11, AO2, INC4
 | |
| 	STFPDUX	f12, AO,  INC4
 | |
| 	STFPDUX	f13, AO2, INC4
 | |
| 	STFPDUX	f14, AO,  INC4
 | |
| 	STFPDUX	f15, AO2, INC4
 | |
| 
 | |
| 	subi	AO,  AO,  32 * SIZE
 | |
| 	subi	AO2, AO2, 32 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFSDUX	f0,  CO1, INC
 | |
| 	STFDUX	f1,  CO1, INC
 | |
| 	STFSDUX	f1,  CO1, INC
 | |
| 	STFDUX	f2,  CO1, INC
 | |
| 	STFSDUX	f2,  CO1, INC
 | |
| 	STFDUX	f3,  CO1, INC
 | |
| 	STFSDUX	f3,  CO1, INC
 | |
| 
 | |
| 	STFDUX	f4,  CO2, INC
 | |
| 	STFSDUX	f4,  CO2, INC
 | |
| 	STFDUX	f5,  CO2, INC
 | |
| 	STFSDUX	f5,  CO2, INC
 | |
| 	STFDUX	f6,  CO2, INC
 | |
| 	STFSDUX	f6,  CO2, INC
 | |
| 	STFDUX	f7,  CO2, INC
 | |
| 	STFSDUX	f7,  CO2, INC
 | |
| 
 | |
| 	STFDUX	f8,  CO3, INC
 | |
| 	STFSDUX	f8,  CO3, INC
 | |
| 	STFDUX	f9,  CO3, INC
 | |
| 	STFSDUX	f9,  CO3, INC
 | |
| 	STFDUX	f10, CO3, INC
 | |
| 	STFSDUX	f10, CO3, INC
 | |
| 	STFDUX	f11, CO3, INC
 | |
| 	STFSDUX	f11, CO3, INC
 | |
| 
 | |
| 	STFDUX	f12, CO4, INC
 | |
| 	STFSDUX	f12, CO4, INC
 | |
| 	STFDUX	f13, CO4, INC
 | |
| 	STFSDUX	f13, CO4, INC
 | |
| 	STFDUX	f14, CO4, INC
 | |
| 	STFSDUX	f14, CO4, INC
 | |
| 	STFDUX	f15, CO4, INC
 | |
| 	STFSDUX	f15, CO4, INC
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 8 * SIZE
 | |
| 	subi	CO2, CO2, 8 * SIZE
 | |
| 	subi	CO3, CO3, 8 * SIZE
 | |
| 	subi	CO4, CO4, 8 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 3 + BASE_SHIFT
 | |
| 	add	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	sub	TEMP, K, KK
 | |
| 	slwi	r0,   TEMP, 3 + BASE_SHIFT
 | |
| 	slwi	TEMP, TEMP, 2 + BASE_SHIFT
 | |
| 	add	AO, AO, r0
 | |
| 	add	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	addi	KK, KK, 8
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	KK, KK, 8
 | |
| #endif
 | |
| 
 | |
| 	addic.	I, I, -1
 | |
| 	li	r0, FZERO
 | |
| 
 | |
| 	lfpsx	f0, SP, r0
 | |
| 	bgt+	.L11
 | |
| 	.align 4
 | |
| 
 | |
| .L49:
 | |
| #ifdef LN
 | |
| 	slwi	r0, K, 2 + BASE_SHIFT
 | |
| 	add	B, B, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	addi	B,  BO, 4 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	addi	KK, KK, 4
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	subi	KK, KK, 4
 | |
| #endif
 | |
| 
 | |
| 	addic.	J, J, -1
 | |
| 	bgt+	.L10
 | |
| 	.align 4
 | |
| 
 | |
| .L50:
 | |
| 	andi.	J, N,  2
 | |
| 	beq	.L90
 | |
| 
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 1 + BASE_SHIFT
 | |
| 	sub	B, B, r0
 | |
| 
 | |
| 	slwi	r0, LDC, 1
 | |
| 	sub	C, C, r0
 | |
| #endif
 | |
| 
 | |
| 	mr	CO1, C
 | |
| 	add	CO2, C,   LDC
 | |
| 
 | |
| #ifdef LN
 | |
| 	add	KK, M, OFFSET
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	mr	KK, OFFSET
 | |
| #endif
 | |
| 
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| 	addi	AORIG, A, -2 * SIZE
 | |
| #else
 | |
| 	addi	AO, A, -2 * SIZE
 | |
| #endif
 | |
| #ifndef RT
 | |
| 	add	C,  CO2, LDC
 | |
| #endif
 | |
| 
 | |
| 	li	r0, FZERO
 | |
| 	lfpsx	f0, SP, r0
 | |
| 
 | |
| 	andi.	I, M,  1
 | |
| 	beq	.L60
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	addi	BO,  B,  - 2 * SIZE
 | |
| 	fpmr	f1,  f0
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	srawi.	r0,  KK,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L84
 | |
| #else
 | |
| 
 | |
| #ifdef LN
 | |
| 	slwi	r0,   K,  0 + BASE_SHIFT
 | |
| 	sub	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| 	slwi	r0  , KK, 0 + BASE_SHIFT
 | |
| 	slwi	TEMP, KK, 1 + BASE_SHIFT
 | |
| 	add	AO, AORIG, r0
 | |
| 	add	BO, B,     TEMP
 | |
| 
 | |
| 	sub	TEMP, K, KK
 | |
| 
 | |
| 	addi	BO,  BO,  - 2 * SIZE
 | |
| 	fpmr	f1,  f0
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	srawi.	r0,  TEMP,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L84
 | |
| 
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 
 | |
| 	LFPDUX	B2,  BO,  INC2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 
 | |
| 	LFPDUX	B3,  BO,  INC2
 | |
| 	LFPDUX	B4,  BO,  INC2
 | |
| 	bdz-	.L83
 | |
| 	.align 4
 | |
| 
 | |
| .L82:
 | |
| 	fxcpmadd	f0,  A1, B1, f0
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	fxcsmadd	f1,  A1, B2, f1
 | |
| 	LFPDUX	B2,  BO,  INC2
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcpmadd	f2,  A2, B3, f2
 | |
| 	LFPDUX	B3,  BO,  INC2
 | |
| 	fxcsmadd	f3,  A2, B4, f3
 | |
| 	LFPDUX	B4,  BO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  A3, B1, f0
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	fxcsmadd	f1,  A3, B2, f1
 | |
| 	LFPDUX	B2,  BO,  INC2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	fxcpmadd	f2,  A4, B3, f2
 | |
| 	LFPDUX	B3,  BO,  INC2
 | |
| 	fxcsmadd	f3,  A4, B4, f3
 | |
| 	LFPDUX	B4,  BO,  INC2
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 	bdnz+	.L82
 | |
| 	.align 4
 | |
| 
 | |
| .L83:
 | |
| 	fxcpmadd	f0,  A1, B1, f0
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	fxcsmadd	f1,  A1, B2, f1
 | |
| 	LFPDUX	B2,  BO,  INC2
 | |
| 	fxcpmadd	f2,  A2, B3, f2
 | |
| 	LFPDUX	B3,  BO,  INC2
 | |
| 	fxcsmadd	f3,  A2, B4, f3
 | |
| 	LFPDUX	B4,  BO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  A3, B1, f0
 | |
| 	fxcsmadd	f1,  A3, B2, f1
 | |
| 	fxcpmadd	f2,  A4, B3, f2
 | |
| 	fxcsmadd	f3,  A4, B4, f3
 | |
| 	.align 4
 | |
| 
 | |
| .L84:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi.	r0,  KK,  7
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L88
 | |
| #else
 | |
| 	andi.	r0, TEMP, 7
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L88
 | |
| #endif
 | |
| 
 | |
| 	LFDX	A1,  AO,  INC2
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	add	AO, AO, INC
 | |
| 	bdz-	.L87
 | |
| 	.align 4
 | |
| 
 | |
| .L86:
 | |
| 	fxcpmadd	f0,  A1, B1, f0
 | |
| 	LFDX	A1,  AO,  INC2
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	add	AO, AO, INC
 | |
| 	bdnz+	.L86
 | |
| 	.align 4
 | |
| 
 | |
| .L87:
 | |
| 	fxcpmadd	f0,  A1, B1, f0
 | |
| 	.align 4
 | |
| 
 | |
| .L88:
 | |
| 	fpadd	f0, f0, f1
 | |
| 	fpadd	f2, f2, f3
 | |
| 	fpadd	f0, f0, f2
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	subi	r0, KK, 1
 | |
| #else
 | |
| 	subi	r0, KK, 2
 | |
| #endif
 | |
| 	slwi	TEMP, r0, 0 + BASE_SHIFT
 | |
| 	slwi	r0,   r0, 1 + BASE_SHIFT
 | |
| 	add	AO, AORIG, TEMP
 | |
| 	add	BO,  B,     r0
 | |
| 	addi	BO,  BO, - 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LFPDX	f16, BO,  INC2
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| #else
 | |
| 	LFPDX	f16, AO,  INC2
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	LFPDX	A1,  AO,  INC2
 | |
| 
 | |
| 	fxpmul	   f0,  A1, f0
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	LFPDX	A1,  AO,  INC2
 | |
| 
 | |
| 	fxpmul	   f0,  A1, f0
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LFD	A1,  (2 + 0) * SIZE(BO)
 | |
| 	LFD	A2,  (2 + 1) * SIZE(BO)
 | |
| 	LFD	A3,  (2 + 3) * SIZE(BO)
 | |
| 
 | |
| 	fsmtp	     f1, f0
 | |
| 
 | |
| 	fmul	     f0,  A1, f0
 | |
| 	fnmsub	     f1,  A2, f0, f1
 | |
| 
 | |
| 	fmul	     f1,  A3, f1
 | |
| 	fsmfp	     f0, f1
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LFD	A1,  (2 + 3) * SIZE(BO)
 | |
| 	LFD	A2,  (2 + 2) * SIZE(BO)
 | |
| 	LFD	A3,  (2 + 0) * SIZE(BO)
 | |
| 
 | |
| 	fsmtp	     f1, f0
 | |
| 
 | |
| 	fmul	     f1,  A1, f1
 | |
| 	fnmsub	     f0,  A2, f1, f0
 | |
| 
 | |
| 	fmul	     f0,  A3, f0
 | |
| 	fsmfp	     f0, f1
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 1 * SIZE
 | |
| 	subi	CO2, CO2, 1 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	STFPDX	f0,  BO,  INC2
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFSDUX	f0,  CO2, INC
 | |
| #else
 | |
| 	STFPDX	f0,  AO,  INC2
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFDUX	f1,  CO2, INC
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 1 * SIZE
 | |
| 	subi	CO2, CO2, 1 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 0 + BASE_SHIFT
 | |
| 	add	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	sub	TEMP, K, KK
 | |
| 	slwi	r0,   TEMP, 0 + BASE_SHIFT
 | |
| 	slwi	TEMP, TEMP, 1 + BASE_SHIFT
 | |
| 	add	AO, AO, r0
 | |
| 	add	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	addi	KK, KK, 1
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	KK, KK, 1
 | |
| #endif
 | |
| 	li	r0, FZERO
 | |
| 	lfpsx	f0, SP, r0
 | |
| 	.align 4
 | |
| 
 | |
| .L60:
 | |
| 	andi.	I, M,  2
 | |
| 	beq	.L70
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	addi	BO,  B,  - 2 * SIZE
 | |
| 	fpmr	f1,  f0
 | |
| 
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	srawi.	r0,  KK,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L74
 | |
| #else
 | |
| 
 | |
| #ifdef LN
 | |
| 	slwi	r0,   K,  1 + BASE_SHIFT
 | |
| 	sub	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| 	slwi	r0  , KK, 1 + BASE_SHIFT
 | |
| 	slwi	TEMP, KK, 1 + BASE_SHIFT
 | |
| 	add	AO, AORIG, r0
 | |
| 	add	BO, B,     TEMP
 | |
| 
 | |
| 	sub	TEMP, K, KK
 | |
| 
 | |
| 	addi	BO,  BO,  - 2 * SIZE
 | |
| 	fpmr	f1,  f0
 | |
| 
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f3, f0
 | |
| 
 | |
| 	srawi.	r0,  TEMP,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L74
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	A1,  AO, INC2
 | |
| 	LFPDUX	B1,  BO, INC2
 | |
| 	LFPDUX	A2,  AO, INC2
 | |
| 	LFPDUX	B2,  BO, INC2
 | |
| 	LFPDUX	A3,  AO, INC2
 | |
| 	LFPDUX	B3,  BO, INC2
 | |
| 	LFPDUX	A4,  AO, INC2
 | |
| 	LFPDUX	B4,  BO, INC2
 | |
| 
 | |
| 	LFPDUX	A5,  AO, INC2
 | |
| 	LFPDUX	B5,  BO, INC2
 | |
| 	LFPDUX	A6,  AO, INC2
 | |
| 	LFPDUX	B6,  BO, INC2
 | |
| 	LFPDUX	A7,  AO, INC2
 | |
| 	LFPDUX	A9,  BO, INC2
 | |
| 	LFPDUX	A8,  AO, INC2
 | |
| 	LFPDUX	A10, BO, INC2
 | |
| 	bdz-	.L73
 | |
| 	.align 4
 | |
| 
 | |
| .L72:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f1,  B1, A1, f1
 | |
| 	LFPDUX	A1,  AO, INC2
 | |
| 	LFPDUX	B1,  BO, INC2
 | |
| 	fxcpmadd	f2,  B2, A2, f2
 | |
| 	fxcsmadd	f3,  B2, A2, f3
 | |
| 	LFPDUX	A2,  AO, INC2
 | |
| 	LFPDUX	B2,  BO, INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B3, A3, f0
 | |
| 	fxcsmadd	f1,  B3, A3, f1
 | |
| 	LFPDUX	A3,  AO, INC2
 | |
| 	LFPDUX	B3,  BO, INC2
 | |
| 	fxcpmadd	f2,  B4, A4, f2
 | |
| 	fxcsmadd	f3,  B4, A4, f3
 | |
| 	LFPDUX	A4,  AO, INC2
 | |
| 	LFPDUX	B4,  BO, INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B5, A5, f0
 | |
| 	fxcsmadd	f1,  B5, A5, f1
 | |
| 	LFPDUX	A5,  AO, INC2
 | |
| 	LFPDUX	B5,  BO, INC2
 | |
| 	fxcpmadd	f2,  B6, A6, f2
 | |
| 	fxcsmadd	f3,  B6, A6, f3
 | |
| 	LFPDUX	A6,  AO, INC2
 | |
| 	LFPDUX	B6,  BO, INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  A9,  A7, f0
 | |
| 	fxcsmadd	f1,  A9,  A7, f1
 | |
| 	LFPDUX	A7,  AO, INC2
 | |
| 	LFPDUX	A9,  BO, INC2
 | |
| 	fxcpmadd	f2,  A10, A8, f2
 | |
| 	fxcsmadd	f3,  A10, A8, f3
 | |
| 	LFPDUX	A8,  AO, INC2
 | |
| 	LFPDUX	A10, BO, INC2
 | |
| 	bdnz+	.L72
 | |
| 	.align 4
 | |
| 
 | |
| .L73:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f1,  B1, A1, f1
 | |
| 	fxcpmadd	f2,  B2, A2, f2
 | |
| 	fxcsmadd	f3,  B2, A2, f3
 | |
| 
 | |
| 	fxcpmadd	f0,  B3, A3, f0
 | |
| 	fxcsmadd	f1,  B3, A3, f1
 | |
| 	fxcpmadd	f2,  B4, A4, f2
 | |
| 	fxcsmadd	f3,  B4, A4, f3
 | |
| 
 | |
| 	fxcpmadd	f0,  B5, A5, f0
 | |
| 	fxcsmadd	f1,  B5, A5, f1
 | |
| 	fxcpmadd	f2,  B6, A6, f2
 | |
| 	fxcsmadd	f3,  B6, A6, f3
 | |
| 
 | |
| 	fxcpmadd	f0,  A9,  A7, f0
 | |
| 	fxcsmadd	f1,  A9,  A7, f1
 | |
| 	fxcpmadd	f2,  A10, A8, f2
 | |
| 	fxcsmadd	f3,  A10, A8, f3
 | |
| 	.align 4
 | |
| 
 | |
| .L74:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi.	r0,  KK,  7
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L78
 | |
| #else
 | |
| 	andi.	r0, TEMP, 7
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L78
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	bdz-	.L77
 | |
| 	.align 4
 | |
| 
 | |
| .L76:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f1,  B1, A1, f1
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	bdnz+	.L76
 | |
| 	.align 4
 | |
| 
 | |
| .L77:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f1,  B1, A1, f1
 | |
| 	.align 4
 | |
| 
 | |
| .L78:
 | |
| 	fpadd	f0, f0, f2
 | |
| 	fpadd	f1, f1, f3
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	subi	r0, KK, 2
 | |
| #else
 | |
| 	subi	r0, KK, 2
 | |
| #endif
 | |
| 	slwi	TEMP, r0, 1 + BASE_SHIFT
 | |
| 	slwi	r0,   r0, 1 + BASE_SHIFT
 | |
| 	add	AO, AORIG, TEMP
 | |
| 	add	BO,  B,     r0
 | |
| 	addi	BO,  BO, - 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	fpmr	f24, f0
 | |
| 	fsmfp	f0,  f1
 | |
| 	fsmtp	f1,  f24
 | |
| 
 | |
| 	LFPDUX	f16, BO,  INC2
 | |
| 	LFPDUX	f17, BO,  INC2
 | |
| 
 | |
| 	subi	BO,  BO,   4 * SIZE
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f1,  f17,  f1
 | |
| #else
 | |
| 	LFPDUX	f16, AO,  INC2
 | |
| 	LFPDUX	f17, AO,  INC2
 | |
| 
 | |
| 	subi	AO,  AO,   4 * SIZE
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f1,  f17,  f1
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 
 | |
| 	addi	AO,  AO,  -4 * SIZE
 | |
| 
 | |
| 	fxsmul	   f1,  A2, f1
 | |
| 	fxcpnmsub  f0,  A2, f1,  f0
 | |
| 	fxpmul	   f0,  A1, f0
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 
 | |
| 	addi	AO,  AO,  -4 * SIZE
 | |
| 
 | |
| 	fxpmul	   f0,  A1, f0
 | |
| 	fxcsnmsub  f1,  A1, f0, f1
 | |
| 
 | |
| 	fxsmul	   f1,  A2,  f1
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LFPDUX	A1,  BO,  INC2
 | |
| 	LFPDUX	A2,  BO,  INC2
 | |
| 
 | |
| 	subi	BO, BO, 4 * SIZE
 | |
| 
 | |
| 	fxpmul	     f0,  A1, f0
 | |
| 	fxcsnmsub    f1,  A1, f0, f1
 | |
| 
 | |
| 	fxsmul	     f1,  A2,  f1
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LFPDUX	A2,  BO,  INC2
 | |
| 	LFPDUX	A1,  BO,  INC2
 | |
| 
 | |
| 	subi	BO, BO, 4 * SIZE
 | |
| 
 | |
| 	fxsmul	     f1,  A1, f1
 | |
| 	fxcpnmsub    f0,  A1, f1,  f0
 | |
| 	fxpmul	     f0,  A2,  f0
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 2 * SIZE
 | |
| 	subi	CO2, CO2, 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	STFPDUX	f0,  BO,  INC2
 | |
| 	STFPDUX	f1,  BO,  INC2
 | |
| 
 | |
| 	subi	BO,  BO,   4 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFDUX	f1,  CO1, INC
 | |
| 
 | |
| 	STFSDUX	f0,  CO2, INC
 | |
| 	STFSDUX	f1,  CO2, INC
 | |
| #else
 | |
| 	STFPDUX	f0,  AO,  INC2
 | |
| 	STFPDUX	f1,  AO,  INC2
 | |
| 
 | |
| 	subi	AO,  AO,   4 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFSDUX	f0,  CO1, INC
 | |
| 
 | |
| 	STFDUX	f1,  CO2, INC
 | |
| 	STFSDUX	f1,  CO2, INC
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 2 * SIZE
 | |
| 	subi	CO2, CO2, 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 1 + BASE_SHIFT
 | |
| 	add	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	sub	TEMP, K, KK
 | |
| 	slwi	r0,   TEMP, 1 + BASE_SHIFT
 | |
| 	slwi	TEMP, TEMP, 1 + BASE_SHIFT
 | |
| 	add	AO, AO, r0
 | |
| 	add	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	addi	KK, KK, 2
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	KK, KK, 2
 | |
| #endif
 | |
| 
 | |
| 	li	r0, FZERO
 | |
| 	lfpsx	f0, SP, r0
 | |
| 	.align 4
 | |
| 
 | |
| .L70:
 | |
| 	andi.	I, M,  4
 | |
| 	beq	.L80
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	addi	BO,  B,  - 2 * SIZE
 | |
|  	fpmr	f1,  f0
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	srawi.	r0,  KK,  2
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L64
 | |
| #else
 | |
| 
 | |
| #ifdef LN
 | |
| 	slwi	r0,   K,  2 + BASE_SHIFT
 | |
| 	sub	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| 	slwi	r0  , KK, 2 + BASE_SHIFT
 | |
| 	slwi	TEMP, KK, 1 + BASE_SHIFT
 | |
| 	add	AO, AORIG, r0
 | |
| 	add	BO, B,     TEMP
 | |
| 
 | |
| 	sub	TEMP, K, KK
 | |
| 
 | |
|  	fpmr	f1,  f0
 | |
| 	addi	BO,  BO,  - 2 * SIZE
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	srawi.	r0,  TEMP,  2
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L64
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	B1,  BO, INC2
 | |
| 	LFPDUX	A1,  AO, INC2
 | |
| 	LFPDUX	A2,  AO, INC2
 | |
| 	LFPDUX	B2,  BO, INC2
 | |
| 	LFPDUX	A3,  AO, INC2
 | |
| 	LFPDUX	A4,  AO, INC2
 | |
| 
 | |
| 	LFPDUX	B3,  BO, INC2
 | |
| 	LFPDUX	A5,  AO, INC2
 | |
| 	LFPDUX	A6,  AO, INC2
 | |
| 	LFPDUX	B4,  BO, INC2
 | |
| 	LFPDUX	A7,  AO, INC2
 | |
| 	LFPDUX	A8,  AO, INC2
 | |
| 	bdz-	.L63
 | |
| 	.align 4
 | |
| 
 | |
| .L62:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f2,  B1, A1, f2
 | |
| 	LFPDUX	A1,  AO, INC2
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	fxcsmadd	f3,  B1, A2, f3
 | |
| 	LFPDUX	A2,  AO, INC2
 | |
| 	LFPDUX	B1,  BO, INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B2, A3, f0
 | |
| 	fxcsmadd	f2,  B2, A3, f2
 | |
| 	LFPDUX	A3,  AO, INC2
 | |
| 	fxcpmadd	f1,  B2, A4, f1
 | |
| 	fxcsmadd	f3,  B2, A4, f3
 | |
| 	LFPDUX	A4,  AO, INC2
 | |
| 	LFPDUX	B2,  BO, INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B3, A5, f0
 | |
| 	fxcsmadd	f2,  B3, A5, f2
 | |
| 	LFPDUX	A5,  AO, INC2
 | |
| 	fxcpmadd	f1,  B3, A6, f1
 | |
| 	fxcsmadd	f3,  B3, A6, f3
 | |
| 	LFPDUX	A6,  AO, INC2
 | |
| 	LFPDUX	B3,  BO, INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B4, A7, f0
 | |
| 	fxcsmadd	f2,  B4, A7, f2
 | |
| 	LFPDUX	A7,  AO, INC2
 | |
| 	fxcpmadd	f1,  B4, A8, f1
 | |
| 	fxcsmadd	f3,  B4, A8, f3
 | |
| 	LFPDUX	A8,  AO, INC2
 | |
| 	LFPDUX	B4,  BO, INC2
 | |
| 	bdnz+	.L62
 | |
| 	.align 4
 | |
| 
 | |
| .L63:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f2,  B1, A1, f2
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	fxcsmadd	f3,  B1, A2, f3
 | |
| 
 | |
| 	fxcpmadd	f0,  B2, A3, f0
 | |
| 	fxcsmadd	f2,  B2, A3, f2
 | |
| 	fxcpmadd	f1,  B2, A4, f1
 | |
| 	fxcsmadd	f3,  B2, A4, f3
 | |
| 
 | |
| 	fxcpmadd	f0,  B3, A5, f0
 | |
| 	fxcsmadd	f2,  B3, A5, f2
 | |
| 	fxcpmadd	f1,  B3, A6, f1
 | |
| 	fxcsmadd	f3,  B3, A6, f3
 | |
| 
 | |
| 	fxcpmadd	f0,  B4, A7, f0
 | |
| 	fxcsmadd	f2,  B4, A7, f2
 | |
| 	fxcpmadd	f1,  B4, A8, f1
 | |
| 	fxcsmadd	f3,  B4, A8, f3
 | |
| 	.align 4
 | |
| 
 | |
| .L64:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi.	r0,  KK,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L68
 | |
| #else
 | |
| 	andi.	r0, TEMP, 3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L68
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	bdz-	.L67
 | |
| 	.align 4
 | |
| 
 | |
| .L66:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f2,  B1, A1, f2
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	fxcsmadd	f3,  B1, A2, f3
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	bdnz+	.L66
 | |
| 	.align 4
 | |
| 
 | |
| .L67:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f2,  B1, A1, f2
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	fxcsmadd	f3,  B1, A2, f3
 | |
| 	.align 4
 | |
| 
 | |
| .L68:
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	subi	r0, KK, 4
 | |
| #else
 | |
| 	subi	r0, KK, 2
 | |
| #endif
 | |
| 	slwi	TEMP, r0, 2 + BASE_SHIFT
 | |
| 	slwi	r0,   r0, 1 + BASE_SHIFT
 | |
| 	add	AO, AORIG, TEMP
 | |
| 	add	BO,  B,     r0
 | |
| 	addi	BO,  BO, - 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	fpmr	f24, f0
 | |
| 	fpmr	f25, f1
 | |
| 
 | |
| 	fsmfp	f0,  f2
 | |
| 	fsmfp	f1,  f3
 | |
| 	fsmtp	f2,  f24
 | |
| 	fsmtp	f3,  f25
 | |
| 
 | |
| 	LFPDUX	f16, BO,  INC2
 | |
| 	LFPDUX	f17, BO,  INC2
 | |
| 	LFPDUX	f18, BO,  INC2
 | |
| 	LFPDUX	f19, BO,  INC2
 | |
| 
 | |
| 	subi	BO,  BO,   8 * SIZE
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f2,  f17,  f2
 | |
| 	fpsub	f1,  f18,  f1
 | |
| 	fpsub	f3,  f19,  f3
 | |
| #else
 | |
| 	LFPDUX	f16, AO,  INC2
 | |
| 	LFPDUX	f17, AO,  INC2
 | |
| 	LFPDUX	f18, AO,  INC2
 | |
| 	LFPDUX	f19, AO,  INC2
 | |
| 
 | |
| 	subi	AO,  AO,   8 * SIZE
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f1,  f17,  f1
 | |
| 	fpsub	f2,  f18,  f2
 | |
| 	fpsub	f3,  f19,  f3
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	addi	AO,  AO,  18 * SIZE
 | |
| 
 | |
| 	LFPDUX	A1,  AO,  INCM2
 | |
| 	LFPDUX	A2,  AO,  INCM2
 | |
| 	LFPDUX	A3,  AO,  INCM2
 | |
| 	LFPDUX	A4,  AO,  INCM2
 | |
| 	add	AO,  AO,  INCM2
 | |
| 	LFPDUX	A5,  AO,  INCM2
 | |
| 	add	AO,  AO,  INCM2
 | |
| 	LFPDUX	A6,  AO,  INCM2
 | |
| 
 | |
| 	subi	AO,  AO,  2 * SIZE
 | |
| 
 | |
| 	fxsmul	   f3,  A1, f3
 | |
| 	fxcpnmsub  f1,  A1, f3,  f1
 | |
| 	fxcsnmsub  f2,  A2, f3,  f2
 | |
| 	fxcpnmsub  f0,  A2, f3,  f0
 | |
| 
 | |
| 	fxpmul	   f1,  A3, f1
 | |
| 	fxcsnmsub  f2,  A4, f1,  f2
 | |
| 	fxcpnmsub  f0,  A4, f1,  f0
 | |
| 
 | |
| 	fxsmul	   f2,  A5, f2
 | |
| 	fxcpnmsub  f0,  A5, f2,  f0
 | |
| 
 | |
| 	fxpmul	   f0,  A6, f0
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 
 | |
| 	add	AO,  AO,  INC2
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	add	AO,  AO,  INC2
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 
 | |
| 	subi	AO,  AO,  16 * SIZE
 | |
| 
 | |
| 	fxpmul	   f0,  A1,  f0
 | |
| 	fxcsnmsub  f2,  A1, f0, f2
 | |
| 	fxcpnmsub  f1,  A2, f0, f1
 | |
| 	fxcsnmsub  f3,  A2, f0, f3
 | |
| 
 | |
| 	fxsmul	   f2,  A3,  f2
 | |
| 	fxcpnmsub  f1,  A4,  f2,  f1
 | |
| 	fxcsnmsub  f3,  A4,  f2,  f3
 | |
| 
 | |
| 	fxpmul	   f1,  A5,  f1
 | |
| 	fxcsnmsub  f3,  A5,  f1, f3
 | |
| 
 | |
| 	fxsmul	   f3,  A6,  f3
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LFPDUX	A1,  BO,  INC2
 | |
| 	LFPDUX	A2,  BO,  INC2
 | |
| 
 | |
| 	subi	BO, BO, 4 * SIZE
 | |
| 
 | |
| 	fxpmul	f0,  A1,  f0
 | |
| 	fxpmul	f1,  A1,  f1
 | |
| 
 | |
| 	fxcsnmsub    f2,  A1, f0, f2
 | |
| 	fxcsnmsub    f3,  A1, f1, f3
 | |
| 
 | |
| 	fxsmul	f2,  A2,  f2
 | |
| 	fxsmul	f3,  A2,  f3
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LFPDUX	A2,  BO,  INC2
 | |
| 	LFPDUX	A1,  BO,  INC2
 | |
| 
 | |
| 	subi	BO, BO, 4 * SIZE
 | |
| 
 | |
| 	fxsmul	f2,  A1,  f2
 | |
| 	fxsmul	f3,  A1,  f3
 | |
| 
 | |
| 	fxcpnmsub    f0,  A1, f2,  f0
 | |
| 	fxcpnmsub    f1,  A1, f3,  f1
 | |
| 
 | |
| 	fxpmul	f0,  A2,  f0
 | |
| 	fxpmul	f1,  A2,  f1
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 4 * SIZE
 | |
| 	subi	CO2, CO2, 4 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	STFPDUX	f0,  BO,  INC2
 | |
| 	STFPDUX	f2,  BO,  INC2
 | |
| 	STFPDUX	f1,  BO,  INC2
 | |
| 	STFPDUX	f3,  BO,  INC2
 | |
| 
 | |
| 	subi	BO,  BO,   8 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFDUX	f2,  CO1, INC
 | |
| 	STFDUX	f1,  CO1, INC
 | |
| 	STFDUX	f3,  CO1, INC
 | |
| 
 | |
| 	STFSDUX	f0,  CO2, INC
 | |
| 	STFSDUX	f2,  CO2, INC
 | |
| 	STFSDUX	f1,  CO2, INC
 | |
| 	STFSDUX	f3,  CO2, INC
 | |
| #else
 | |
| 	STFPDUX	f0,  AO,  INC2
 | |
| 	STFPDUX	f1,  AO,  INC2
 | |
| 	STFPDUX	f2,  AO,  INC2
 | |
| 	STFPDUX	f3,  AO,  INC2
 | |
| 
 | |
| 	subi	AO,  AO,   8 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFSDUX	f0,  CO1, INC
 | |
| 	STFDUX	f1,  CO1, INC
 | |
| 	STFSDUX	f1,  CO1, INC
 | |
| 
 | |
| 	STFDUX	f2,  CO2, INC
 | |
| 	STFSDUX	f2,  CO2, INC
 | |
| 	STFDUX	f3,  CO2, INC
 | |
| 	STFSDUX	f3,  CO2, INC
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 4 * SIZE
 | |
| 	subi	CO2, CO2, 4 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 2 + BASE_SHIFT
 | |
| 	add	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	sub	TEMP, K, KK
 | |
| 	slwi	r0,   TEMP, 2 + BASE_SHIFT
 | |
| 	slwi	TEMP, TEMP, 1 + BASE_SHIFT
 | |
| 	add	AO, AO, r0
 | |
| 	add	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	addi	KK, KK, 4
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	KK, KK, 4
 | |
| #endif
 | |
| 
 | |
| 	li	r0, FZERO
 | |
| 	lfpsx	f0, SP, r0
 | |
| 	.align 4
 | |
| 
 | |
| .L80:
 | |
| 	srawi.	I, M,  3
 | |
| 	ble	.L89
 | |
| 	.align 4
 | |
| 
 | |
| .L51:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	fpmr	f4,  f0
 | |
| 	addi	BO,   B,  - 2 * SIZE
 | |
|  	fpmr	f1,  f0
 | |
| 	fpmr	f5,  f0
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f6,  f0
 | |
| 
 | |
| 	srawi.	r0,  KK,  2
 | |
| 	fpmr	f3,  f0
 | |
| 	mtspr	CTR, r0
 | |
| 	fpmr	f7,  f0
 | |
| 	ble	.L54
 | |
| #else
 | |
| 
 | |
| #ifdef LN
 | |
| 	slwi	r0,   K,  3 + BASE_SHIFT
 | |
| 	sub	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| 	slwi	r0  , KK, 3 + BASE_SHIFT
 | |
| 	slwi	TEMP, KK, 1 + BASE_SHIFT
 | |
| 	add	AO, AORIG, r0
 | |
| 	add	BO, B,     TEMP
 | |
| 
 | |
| 	sub	TEMP, K, KK
 | |
| 
 | |
| 	fpmr	f4,  f0
 | |
| 	addi	BO,  BO,  - 2 * SIZE
 | |
|  	fpmr	f1,  f0
 | |
| 	fpmr	f5,  f0
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f6,  f0
 | |
| 
 | |
| 	srawi.	r0,  TEMP,  2
 | |
| 	fpmr	f3,  f0
 | |
| 	mtspr	CTR, r0
 | |
| 	fpmr	f7,  f0
 | |
| 	ble	.L54
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	LFPDUX	B2,  BO,  INC2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 
 | |
| 	LFPDUX	B3,  BO,  INC2
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 	bdz-	.L53
 | |
| 	.align 4
 | |
| 
 | |
| .L52:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	LFPDUX	B4,  BO,  INC2
 | |
| 	fxcsmadd	f4,  B1, A1, f4
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B1, A2, f5
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f2,  B1, A3, f2
 | |
| 	nop
 | |
| 	fxcsmadd	f6,  B1, A3, f6
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	fxcpmadd	f3,  B1, A4, f3
 | |
| 	nop
 | |
| 	fxcsmadd	f7,  B1, A4, f7
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B2, A5, f0
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	fxcsmadd	f4,  B2, A5, f4
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B2, A6, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B2, A6, f5
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f2,  B2, A7, f2
 | |
| 	nop
 | |
| 	fxcsmadd	f6,  B2, A7, f6
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	fxcpmadd	f3,  B2, A8, f3
 | |
| 	nop
 | |
| 	fxcsmadd	f7,  B2, A8, f7
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B3, A1, f0
 | |
| 	LFPDUX	B2,  BO,  INC2
 | |
| 	fxcsmadd	f4,  B3, A1, f4
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B3, A2, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B3, A2, f5
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f2,  B3, A3, f2
 | |
| 	nop
 | |
| 	fxcsmadd	f6,  B3, A3, f6
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	fxcpmadd	f3,  B3, A4, f3
 | |
| 	nop
 | |
| 	fxcsmadd	f7,  B3, A4, f7
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B4, A5, f0
 | |
| 	LFPDUX	B3,  BO,  INC2
 | |
| 	fxcsmadd	f4,  B4, A5, f4
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B4, A6, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B4, A6, f5
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f2,  B4, A7, f2
 | |
| 	nop
 | |
| 	fxcsmadd	f6,  B4, A7, f6
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	fxcpmadd	f3,  B4, A8, f3
 | |
| 	nop
 | |
| 	fxcsmadd	f7,  B4, A8, f7
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 	bdnz+	.L52
 | |
| 	.align 4
 | |
| 
 | |
| .L53:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	LFPDUX	B4,  BO,  INC2
 | |
| 	fxcsmadd	f4,  B1, A1, f4
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B1, A2, f5
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f2,  B1, A3, f2
 | |
| 	nop
 | |
| 	fxcsmadd	f6,  B1, A3, f6
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	fxcpmadd	f3,  B1, A4, f3
 | |
| 	nop
 | |
| 	fxcsmadd	f7,  B1, A4, f7
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B2, A5, f0
 | |
| 	nop
 | |
| 	fxcsmadd	f4,  B2, A5, f4
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B2, A6, f1
 | |
| 	nop
 | |
| 	fxcsmadd	f5,  B2, A6, f5
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f2,  B2, A7, f2
 | |
| 	nop
 | |
| 	fxcsmadd	f6,  B2, A7, f6
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	fxcpmadd	f3,  B2, A8, f3
 | |
| 	nop
 | |
| 	fxcsmadd	f7,  B2, A8, f7
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B3, A1, f0
 | |
| 	fxcsmadd	f4,  B3, A1, f4
 | |
| 	fxcpmadd	f1,  B3, A2, f1
 | |
| 	fxcsmadd	f5,  B3, A2, f5
 | |
| 
 | |
| 	fxcpmadd	f2,  B3, A3, f2
 | |
| 	fxcsmadd	f6,  B3, A3, f6
 | |
| 	fxcpmadd	f3,  B3, A4, f3
 | |
| 	fxcsmadd	f7,  B3, A4, f7
 | |
| 
 | |
| 	fxcpmadd	f0,  B4, A5, f0
 | |
| 	fxcsmadd	f4,  B4, A5, f4
 | |
| 	fxcpmadd	f1,  B4, A6, f1
 | |
| 	fxcsmadd	f5,  B4, A6, f5
 | |
| 
 | |
| 	fxcpmadd	f2,  B4, A7, f2
 | |
| 	fxcsmadd	f6,  B4, A7, f6
 | |
| 	fxcpmadd	f3,  B4, A8, f3
 | |
| 	fxcsmadd	f7,  B4, A8, f7
 | |
| 	.align 4
 | |
| 
 | |
| .L54:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi.	r0,  KK,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L58
 | |
| #else
 | |
| 	andi.	r0, TEMP, 3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L58
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 	bdz-	.L57
 | |
| 	.align 4
 | |
| 
 | |
| .L56:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f4,  B1, A1, f4
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	fxcsmadd	f5,  B1, A2, f5
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f2,  B1, A3, f2
 | |
| 	fxcsmadd	f6,  B1, A3, f6
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	fxcpmadd	f3,  B1, A4, f3
 | |
| 	fxcsmadd	f7,  B1, A4, f7
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	bdnz+	.L56
 | |
| 	.align 4
 | |
| 
 | |
| .L57:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f4,  B1, A1, f4
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	fxcsmadd	f5,  B1, A2, f5
 | |
| 
 | |
| 	fxcpmadd	f2,  B1, A3, f2
 | |
| 	fxcsmadd	f6,  B1, A3, f6
 | |
| 	fxcpmadd	f3,  B1, A4, f3
 | |
| 	fxcsmadd	f7,  B1, A4, f7
 | |
| 	.align 4
 | |
| 
 | |
| .L58:
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	subi	r0, KK, 8
 | |
| #else
 | |
| 	subi	r0, KK, 2
 | |
| #endif
 | |
| 	slwi	TEMP, r0, 3 + BASE_SHIFT
 | |
| 	slwi	r0,   r0, 1 + BASE_SHIFT
 | |
| 	add	AO, AORIG, TEMP
 | |
| 	add	BO,  B,     r0
 | |
| 	addi	BO,  BO, - 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	fpmr	f24, f0
 | |
| 	fpmr	f25, f1
 | |
| 	fpmr	f26, f2
 | |
| 	fpmr	f27, f3
 | |
| 
 | |
| 	fsmfp	f0,  f4
 | |
| 	fsmfp	f1,  f5
 | |
| 	fsmfp	f2,  f6
 | |
| 	fsmfp	f3,  f7
 | |
| 
 | |
| 	fsmtp	f4,  f24
 | |
| 	fsmtp	f5,  f25
 | |
| 	fsmtp	f6,  f26
 | |
| 	fsmtp	f7,  f27
 | |
| 
 | |
| 	LFPDUX	f16, BO,  INC2
 | |
| 	LFPDUX	f17, BO,  INC2
 | |
| 	LFPDUX	f18, BO,  INC2
 | |
| 	LFPDUX	f19, BO,  INC2
 | |
| 
 | |
|  	LFPDUX	f20, BO,  INC2
 | |
| 	LFPDUX	f21, BO,  INC2
 | |
| 	LFPDUX	f22, BO,  INC2
 | |
| 	LFPDUX	f23, BO,  INC2
 | |
| 
 | |
| 	subi	BO,  BO,  16 * SIZE
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f4,  f17,  f4
 | |
| 	fpsub	f1,  f18,  f1
 | |
| 	fpsub	f5,  f19,  f5
 | |
| 
 | |
| 	fpsub	f2,  f20,  f2
 | |
| 	fpsub	f6,  f21,  f6
 | |
| 	fpsub	f3,  f22,  f3
 | |
| 	fpsub	f7,  f23,  f7
 | |
| 
 | |
| #else
 | |
| 	LFPDUX	f16, AO,  INC2
 | |
| 	LFPDUX	f17, AO,  INC2
 | |
| 	LFPDUX	f18, AO,  INC2
 | |
| 	LFPDUX	f19, AO,  INC2
 | |
| 
 | |
| 	LFPDUX	f20, AO,  INC2
 | |
| 	LFPDUX	f21, AO,  INC2
 | |
| 	LFPDUX	f22, AO,  INC2
 | |
| 	LFPDUX	f23, AO,  INC2
 | |
| 
 | |
| 	subi	AO,  AO,  16 * SIZE
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f1,  f17,  f1
 | |
| 	fpsub	f2,  f18,  f2
 | |
| 	fpsub	f3,  f19,  f3
 | |
| 	fpsub	f4,  f20,  f4
 | |
| 	fpsub	f5,  f21,  f5
 | |
| 	fpsub	f6,  f22,  f6
 | |
| 	fpsub	f7,  f23,  f7
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
|        addi	AO,  AO,  66 * SIZE
 | |
| 
 | |
| 	LFPDUX	A1,  AO,  INCM2
 | |
| 	LFPDUX	A2,  AO,  INCM2
 | |
| 	LFPDUX	A3,  AO,  INCM2
 | |
| 	LFPDUX	A4,  AO,  INCM2
 | |
| 	LFPDUX	A5,  AO,  INCM2
 | |
| 	LFPDUX	A6,  AO,  INCM2
 | |
| 	LFPDUX	A7,  AO,  INCM2
 | |
| 	LFPDUX	A8,  AO,  INCM2
 | |
| 
 | |
| 	fxsmul	   f7,  A1, f7
 | |
| 	fxcpnmsub  f3,  A1, f7,  f3
 | |
| 	fxcsnmsub  f6,  A2, f7,  f6
 | |
| 	fxcpnmsub  f2,  A2, f7,  f2
 | |
| 
 | |
| 	fxcsnmsub  f5,  A3, f7,  f5
 | |
| 	fxcpnmsub  f1,  A3, f7,  f1
 | |
| 	fxcsnmsub  f4,  A4, f7,  f4
 | |
| 	fxcpnmsub  f0,  A4, f7,  f0
 | |
| 
 | |
| 	fxpmul	   f3,  A5, f3
 | |
| 	fxcsnmsub  f6,  A6, f3,  f6
 | |
| 	fxcpnmsub  f2,  A6, f3,  f2
 | |
| 
 | |
| 	fxcsnmsub  f5,  A7, f3,  f5
 | |
| 	fxcpnmsub  f1,  A7, f3,  f1
 | |
| 	fxcsnmsub  f4,  A8, f3,  f4
 | |
| 	fxcpnmsub  f0,  A8, f3,  f0
 | |
| 
 | |
| 	add	AO,  AO,  INCM2
 | |
| 	LFPDUX	A1,  AO,  INCM2
 | |
| 	LFPDUX	A2,  AO,  INCM2
 | |
| 	LFPDUX	A3,  AO,  INCM2
 | |
| 
 | |
| 	add	AO,  AO,  INCM2
 | |
| 	LFPDUX	A4,  AO,  INCM2
 | |
| 	LFPDUX	A5,  AO,  INCM2
 | |
| 	LFPDUX	A6,  AO,  INCM2
 | |
| 
 | |
| 	add	AO,  AO,  INCM2
 | |
| 	add	AO,  AO,  INCM2
 | |
| 	LFPDUX	A7,  AO,  INCM2
 | |
| 	LFPDUX	A8,  AO,  INCM2
 | |
| 
 | |
| 	fxsmul	   f6,  A1, f6
 | |
| 	fxcpnmsub  f2,  A1, f6,  f2
 | |
| 	fxcsnmsub  f5,  A2, f6,  f5
 | |
| 	fxcpnmsub  f1,  A2, f6,  f1
 | |
| 	fxcsnmsub  f4,  A3, f6,  f4
 | |
| 	fxcpnmsub  f0,  A3, f6,  f0
 | |
| 
 | |
| 	fxpmul	   f2,  A4, f2
 | |
| 	fxcsnmsub  f5,  A5, f2,  f5
 | |
| 	fxcpnmsub  f1,  A5, f2,  f1
 | |
| 	fxcsnmsub  f4,  A6, f2,  f4
 | |
| 	fxcpnmsub  f0,  A6, f2,  f0
 | |
| 
 | |
| 	fxsmul	   f5,  A7, f5
 | |
| 	fxcpnmsub  f1,  A7, f5,  f1
 | |
| 	fxcsnmsub  f4,  A8, f5,  f4
 | |
| 	fxcpnmsub  f0,  A8, f5,  f0
 | |
| 
 | |
| 	add	AO,  AO,  INCM2
 | |
| 	add	AO,  AO,  INCM2
 | |
| 	LFPDUX	A1,  AO,  INCM2
 | |
| 	LFPDUX	A2,  AO,  INCM2
 | |
| 
 | |
| 	subi	AO,  AO, 6 * SIZE
 | |
| 	LFPDUX	A3,  AO,  INCM2
 | |
| 	subi	AO,  AO, 6 * SIZE
 | |
| 	LFPDUX	A4,  AO,  INCM2
 | |
| 
 | |
| 	addi	AO,  AO,  -2 * SIZE
 | |
| 
 | |
| 	fxpmul	   f1,  A1, f1
 | |
| 	fxcsnmsub  f4,  A2, f1,  f4
 | |
| 	fxcpnmsub  f0,  A2, f1,  f0
 | |
| 
 | |
| 	fxsmul	   f4,  A3, f4
 | |
| 	fxcpnmsub  f0,  A3, f4,  f0
 | |
| 
 | |
| 	fxpmul	f0,  A4, f0
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 
 | |
| 	fxpmul	   f0,  A1,  f0
 | |
| 	fxcsnmsub  f4,  A1, f0, f4
 | |
| 	fxcpnmsub  f1,  A2, f0, f1
 | |
| 	fxcsnmsub  f5,  A2, f0, f5
 | |
| 	fxcpnmsub  f2,  A3, f0, f2
 | |
| 	fxcsnmsub  f6,  A3, f0, f6
 | |
| 	fxcpnmsub  f3,  A4, f0, f3
 | |
| 	fxcsnmsub  f7,  A4, f0, f7
 | |
| 
 | |
| 	fxsmul	   f4,  A5,  f4
 | |
| 	fxcpnmsub  f1,  A6, f4,  f1
 | |
| 	fxcsnmsub  f5,  A6, f4,  f5
 | |
| 	fxcpnmsub  f2,  A7, f4,  f2
 | |
| 	fxcsnmsub  f6,  A7, f4,  f6
 | |
| 	fxcpnmsub  f3,  A8, f4,  f3
 | |
| 	fxcsnmsub  f7,  A8, f4,  f7
 | |
| 
 | |
| 	add	AO,  AO,  INC2
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 
 | |
| 	add	AO,  AO,  INC2
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 
 | |
| 	add	AO,  AO,  INC2
 | |
| 	add	AO,  AO,  INC2
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 
 | |
| 	fxpmul	   f1,  A1,  f1
 | |
| 	fxcsnmsub  f5,  A1, f1, f5
 | |
| 	fxcpnmsub  f2,  A2, f1, f2
 | |
| 	fxcsnmsub  f6,  A2, f1, f6
 | |
| 	fxcpnmsub  f3,  A3, f1, f3
 | |
| 	fxcsnmsub  f7,  A3, f1, f7
 | |
| 
 | |
| 	fxsmul	   f5,  A4,  f5
 | |
| 	fxcpnmsub  f2,  A5, f5,  f2
 | |
| 	fxcsnmsub  f6,  A5, f5,  f6
 | |
| 	fxcpnmsub  f3,  A6, f5,  f3
 | |
| 	fxcsnmsub  f7,  A6, f5,  f7
 | |
| 
 | |
| 	fxpmul	   f2,  A7,  f2
 | |
| 	fxcsnmsub  f6,  A7, f2,  f6
 | |
| 	fxcpnmsub  f3,  A8, f2,  f3
 | |
| 	fxcsnmsub  f7,  A8, f2,  f7
 | |
| 
 | |
| 	add	AO,  AO,  INC2
 | |
| 	add	AO,  AO,  INC2
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 
 | |
| 	addi	AO,  AO,  6 * SIZE
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	addi	AO,  AO,  6 * SIZE
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 
 | |
| 	subi	AO,  AO,  64 * SIZE
 | |
| 
 | |
| 	fxsmul	   f6,  A1,  f6
 | |
| 	fxcpnmsub  f3,  A2, f6,  f3
 | |
| 	fxcsnmsub  f7,  A2, f6,  f7
 | |
| 
 | |
| 	fxpmul	   f3,  A3,  f3
 | |
| 	fxcsnmsub  f7,  A3, f3,  f7
 | |
| 
 | |
| 	fxsmul	   f7,  A4,  f7
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LFPDUX	A1,  BO,  INC2
 | |
| 	LFPDUX	A2,  BO,  INC2
 | |
| 
 | |
| 	subi	BO, BO, 4 * SIZE
 | |
| 
 | |
| 	fxpmul	f0,  A1,  f0
 | |
| 	fxpmul	f1,  A1,  f1
 | |
| 	fxpmul	f2,  A1,  f2
 | |
| 	fxpmul	f3,  A1,  f3
 | |
| 
 | |
| 	fxcsnmsub    f4,  A1, f0, f4
 | |
| 	fxcsnmsub    f5,  A1, f1, f5
 | |
| 	fxcsnmsub    f6,  A1, f2, f6
 | |
| 	fxcsnmsub    f7,  A1, f3, f7
 | |
| 
 | |
| 	fxsmul	f4,  A2,  f4
 | |
| 	fxsmul	f5,  A2,  f5
 | |
| 	fxsmul	f6,  A2,  f6
 | |
| 	fxsmul	f7,  A2,  f7
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LFPDUX	A2,  BO,  INC2
 | |
| 	LFPDUX	A1,  BO,  INC2
 | |
| 
 | |
| 	subi	BO, BO, 4 * SIZE
 | |
| 
 | |
| 	fxsmul	f4,  A1,  f4
 | |
| 	fxsmul	f5,  A1,  f5
 | |
| 	fxsmul	f6,  A1,  f6
 | |
| 	fxsmul	f7,  A1,  f7
 | |
| 
 | |
| 	fxcpnmsub    f0,  A1, f4,  f0
 | |
| 	fxcpnmsub    f1,  A1, f5,  f1
 | |
| 	fxcpnmsub    f2,  A1, f6,  f2
 | |
| 	fxcpnmsub    f3,  A1, f7,  f3
 | |
| 
 | |
| 	fxpmul	f0,  A2,  f0
 | |
| 	fxpmul	f1,  A2,  f1
 | |
| 	fxpmul	f2,  A2,  f2
 | |
| 	fxpmul	f3,  A2,  f3
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 8 * SIZE
 | |
| 	subi	CO2, CO2, 8 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	STFPDUX	f0,  BO,  INC2
 | |
| 	STFPDUX	f4,  BO,  INC2
 | |
| 	STFPDUX	f1,  BO,  INC2
 | |
| 	STFPDUX	f5,  BO,  INC2
 | |
| 	STFPDUX	f2,  BO,  INC2
 | |
| 	STFPDUX	f6,  BO,  INC2
 | |
| 	STFPDUX	f3,  BO,  INC2
 | |
| 	STFPDUX	f7,  BO,  INC2
 | |
| 
 | |
| 	subi	BO,  BO,  16 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFDUX	f4,  CO1, INC
 | |
| 	STFDUX	f1,  CO1, INC
 | |
| 	STFDUX	f5,  CO1, INC
 | |
| 	STFDUX	f2,  CO1, INC
 | |
| 	STFDUX	f6,  CO1, INC
 | |
| 	STFDUX	f3,  CO1, INC
 | |
| 	STFDUX	f7,  CO1, INC
 | |
| 
 | |
| 	STFSDUX	f0,  CO2, INC
 | |
| 	STFSDUX	f4,  CO2, INC
 | |
| 	STFSDUX	f1,  CO2, INC
 | |
| 	STFSDUX	f5,  CO2, INC
 | |
| 	STFSDUX	f2,  CO2, INC
 | |
| 	STFSDUX	f6,  CO2, INC
 | |
| 	STFSDUX	f3,  CO2, INC
 | |
| 	STFSDUX	f7,  CO2, INC
 | |
| #else
 | |
| 	STFPDUX	f0,  AO,  INC2
 | |
| 	STFPDUX	f1,  AO,  INC2
 | |
| 	STFPDUX	f2,  AO,  INC2
 | |
| 	STFPDUX	f3,  AO,  INC2
 | |
| 	STFPDUX	f4,  AO,  INC2
 | |
| 	STFPDUX	f5,  AO,  INC2
 | |
| 	STFPDUX	f6,  AO,  INC2
 | |
| 	STFPDUX	f7,  AO,  INC2
 | |
| 
 | |
| 	subi	AO,  AO,  16 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFSDUX	f0,  CO1, INC
 | |
| 	STFDUX	f1,  CO1, INC
 | |
| 	STFSDUX	f1,  CO1, INC
 | |
| 	STFDUX	f2,  CO1, INC
 | |
| 	STFSDUX	f2,  CO1, INC
 | |
| 	STFDUX	f3,  CO1, INC
 | |
| 	STFSDUX	f3,  CO1, INC
 | |
| 
 | |
| 	STFDUX	f4,  CO2, INC
 | |
| 	STFSDUX	f4,  CO2, INC
 | |
| 	STFDUX	f5,  CO2, INC
 | |
| 	STFSDUX	f5,  CO2, INC
 | |
| 	STFDUX	f6,  CO2, INC
 | |
| 	STFSDUX	f6,  CO2, INC
 | |
| 	STFDUX	f7,  CO2, INC
 | |
| 	STFSDUX	f7,  CO2, INC
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 8 * SIZE
 | |
| 	subi	CO2, CO2, 8 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 3 + BASE_SHIFT
 | |
| 	add	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	sub	TEMP, K, KK
 | |
| 	slwi	r0,   TEMP, 3 + BASE_SHIFT
 | |
| 	slwi	TEMP, TEMP, 1 + BASE_SHIFT
 | |
| 	add	AO, AO, r0
 | |
| 	add	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	addi	KK, KK, 8
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	KK, KK, 8
 | |
| #endif
 | |
| 
 | |
| 	addic.	I, I, -1
 | |
| 	li	r0, FZERO
 | |
| 
 | |
| 	lfpsx	f0, SP, r0
 | |
| 	bgt+	.L51
 | |
| 	.align 4
 | |
| 
 | |
| .L89:
 | |
| #ifdef LN
 | |
| 	slwi	r0, K, 1 + BASE_SHIFT
 | |
| 	add	B, B, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	addi	B,  BO, 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	addi	KK, KK, 2
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	subi	KK, KK, 2
 | |
| #endif
 | |
| 	.align 4
 | |
| 
 | |
| .L90:
 | |
| 	andi.	J, N,  1
 | |
| 	beq	.L999
 | |
| 
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 0 + BASE_SHIFT
 | |
| 	sub	B, B, r0
 | |
| 
 | |
| 	sub	C, C, LDC
 | |
| #endif
 | |
| 
 | |
| 	mr	CO1, C
 | |
| 
 | |
| #ifdef LN
 | |
| 	add	KK, M, OFFSET
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	mr	KK, OFFSET
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| 	addi	AORIG, A, -2 * SIZE
 | |
| #else
 | |
| 	addi	AO, A, -2 * SIZE
 | |
| #endif
 | |
| #ifndef RT
 | |
| 	add	C,  CO1, LDC
 | |
| #endif
 | |
| 
 | |
| 	li	r0, FZERO
 | |
| 	lfpsx	f0, SP, r0
 | |
| 
 | |
| 	andi.	I, M,  1
 | |
| 	beq	.L100
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	addi	BO,  B,  - 2 * SIZE
 | |
| 	fpmr	f1,  f0
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	srawi.	r0,  KK,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L124
 | |
| #else
 | |
| 
 | |
| #ifdef LN
 | |
| 	slwi	r0,   K,  0 + BASE_SHIFT
 | |
| 	sub	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| 	slwi	r0  , KK, 0 + BASE_SHIFT
 | |
| 	slwi	TEMP, KK, 0 + BASE_SHIFT
 | |
| 	add	AO, AORIG, r0
 | |
| 	add	BO, B,     TEMP
 | |
| 
 | |
| 	sub	TEMP, K, KK
 | |
| 
 | |
| 	addi	BO,  BO,  - 2 * SIZE
 | |
| 	fpmr	f1,  f0
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	srawi.	r0,  TEMP,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L124
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	LFPDUX	B2,  BO,  INC2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	LFPDUX	B3,  BO,  INC2
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 	LFPDUX	B4,  BO,  INC2
 | |
| 	bdz-	.L123
 | |
| 	.align 4
 | |
| 
 | |
| .L122:
 | |
| 	fpmadd	f0,  A1, B1, f0
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	fpmadd	f1,  A2, B2, f1
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	LFPDUX	B2,  BO,  INC2
 | |
| 	fpmadd	f2,  A3, B3, f2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	LFPDUX	B3,  BO,  INC2
 | |
| 	fpmadd	f3,  A4, B4, f3
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 	LFPDUX	B4,  BO,  INC2
 | |
| 	bdnz+	.L122
 | |
| 	.align 4
 | |
| 
 | |
| .L123:
 | |
| 	fpmadd	f0,  A1, B1, f0
 | |
| 	fpmadd	f1,  A2, B2, f1
 | |
| 	fpmadd	f2,  A3, B3, f2
 | |
| 	fpmadd	f3,  A4, B4, f3
 | |
| 	.align 4
 | |
| 
 | |
| .L124:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi.	r0,  KK,  7
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L128
 | |
| #else
 | |
| 	andi.	r0, TEMP, 7
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L128
 | |
| #endif
 | |
| 
 | |
| 	LFDX	A1,  AO,  INC2
 | |
| 	LFDX	B1,  BO,  INC2
 | |
| 	add	AO, AO, INC
 | |
| 	add	BO, BO, INC
 | |
| 	bdz-	.L127
 | |
| 	.align 4
 | |
| 
 | |
| .L126:
 | |
| 	fmadd	f0,  A1, B1, f0
 | |
| 	LFDX	A1,  AO,  INC2
 | |
| 	LFDX	B1,  BO,  INC2
 | |
| 	add	AO, AO, INC
 | |
| 	add	BO, BO, INC
 | |
| 	bdnz+	.L126
 | |
| 	.align 4
 | |
| 
 | |
| .L127:
 | |
| 	fmadd	f0,  A1, B1, f0
 | |
| 	.align 4
 | |
| 
 | |
| .L128:
 | |
| 	fpadd	f0, f0, f1
 | |
| 	fpadd	f2, f2, f3
 | |
| 	fpadd	f0, f0, f2
 | |
| 	fsmtp	f1, f0
 | |
| 
 | |
| 	fadd	f0, f0, f1
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	subi	r0, KK, 1
 | |
| #else
 | |
| 	subi	r0, KK, 1
 | |
| #endif
 | |
| 	slwi	TEMP, r0, 0 + BASE_SHIFT
 | |
| 	slwi	r0,   r0, 0 + BASE_SHIFT
 | |
| 	add	AO, AORIG, TEMP
 | |
| 	add	BO,  B,     r0
 | |
| 	addi	BO,  BO, - 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LFDX	f16, BO,  INC2
 | |
| 
 | |
| 	fsub	f0,  f16,  f0
 | |
| #else
 | |
| 	LFDX	f16, AO,  INC2
 | |
| 
 | |
| 	fsub	f0,  f16,  f0
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	LFD	A1, (2 +  0) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f0, A1, f0
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	LFD	A1, (2 +  0) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f0, A1, f0
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LFDX	A1,  BO,  INC2
 | |
| 
 | |
| 	fmul	f0,  A1,  f0
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LFDX	A1,  BO,  INC2
 | |
| 
 | |
| 	fmul	f0,  A1,  f0
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 1 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	STFDX	f0,  BO,  INC2
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| #else
 | |
| 	STFDX	f0,  AO,  INC2
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 1 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 0 + BASE_SHIFT
 | |
| 	add	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	sub	TEMP, K, KK
 | |
| 	slwi	r0,   TEMP, 0 + BASE_SHIFT
 | |
| 	slwi	TEMP, TEMP, 0 + BASE_SHIFT
 | |
| 	add	AO, AO, r0
 | |
| 	add	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	addi	KK, KK, 1
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	KK, KK, 1
 | |
| #endif
 | |
| 	li	r0, FZERO
 | |
| 	lfpsx	f0, SP, r0
 | |
| 	.align 4
 | |
| 
 | |
| .L100:
 | |
| 	andi.	I, M,  2
 | |
| 	beq	.L110
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	addi	BO,  B,  - 2 * SIZE
 | |
| 	fpmr	f1,  f0
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	srawi.	r0,  KK,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L114
 | |
| #else
 | |
| 
 | |
| #ifdef LN
 | |
| 	slwi	r0,   K,  1 + BASE_SHIFT
 | |
| 	sub	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| 	slwi	r0  , KK, 1 + BASE_SHIFT
 | |
| 	slwi	TEMP, KK, 0 + BASE_SHIFT
 | |
| 	add	AO, AORIG, r0
 | |
| 	add	BO, B,     TEMP
 | |
| 
 | |
| 	sub	TEMP, K, KK
 | |
| 
 | |
| 	addi	BO,  BO,  - 2 * SIZE
 | |
| 	fpmr	f1,  f0
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	srawi.	r0, TEMP,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L114
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 	LFPDUX	B2,  BO,  INC2
 | |
| 
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 	LFPDUX	B3,  BO,  INC2
 | |
| 
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 	LFPDUX	B4,  BO,  INC2
 | |
| 	bdz-	.L113
 | |
| 	.align 4
 | |
| 
 | |
| .L112:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcsmadd	f1,  B1, A2, f1
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	fxcpmadd	f2,  B2, A3, f2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	fxcsmadd	f3,  B2, A4, f3
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 	LFPDUX	B2,  BO,  INC2
 | |
| 	fxcpmadd	f0,  B3, A5, f0
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	fxcsmadd	f1,  B3, A6, f1
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 	LFPDUX	B3,  BO,  INC2
 | |
| 	fxcpmadd	f2,  B4, A7, f2
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	fxcsmadd	f3,  B4, A8, f3
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 	LFPDUX	B4,  BO,  INC2
 | |
| 	bdnz+	.L112
 | |
| 	.align 4
 | |
| 
 | |
| .L113:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcsmadd	f1,  B1, A2, f1
 | |
| 	fxcpmadd	f2,  B2, A3, f2
 | |
| 	fxcsmadd	f3,  B2, A4, f3
 | |
| 	fxcpmadd	f0,  B3, A5, f0
 | |
| 	fxcsmadd	f1,  B3, A6, f1
 | |
| 	fxcpmadd	f2,  B4, A7, f2
 | |
| 	fxcsmadd	f3,  B4, A8, f3
 | |
| 	.align 4
 | |
| 
 | |
| .L114:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi.	r0,  KK,  7
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L118
 | |
| #else
 | |
| 	andi.	r0, TEMP, 7
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L118
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFDX	B1,  BO,  INC2
 | |
| 	add	BO, BO, INC
 | |
| 	bdz-	.L117
 | |
| 	.align 4
 | |
| 
 | |
| .L116:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFDX	B1,  BO,  INC2
 | |
| 	add	BO, BO, INC
 | |
| 	bdnz+	.L116
 | |
| 	.align 4
 | |
| 
 | |
| .L117:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	.align 4
 | |
| 
 | |
| .L118:
 | |
| 	fpadd	f0, f0, f1
 | |
| 	fpadd	f2, f3, f2
 | |
| 	fpadd	f0, f0, f2
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	subi	r0, KK, 2
 | |
| #else
 | |
| 	subi	r0, KK, 1
 | |
| #endif
 | |
| 	slwi	TEMP, r0, 1 + BASE_SHIFT
 | |
| 	slwi	r0,   r0, 0 + BASE_SHIFT
 | |
| 	add	AO, AORIG, TEMP
 | |
| 	add	BO,  B,     r0
 | |
| 	addi	BO,  BO, - 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LFPDX	f16, BO,  INC2
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| #else
 | |
| 	LFPDX	f16, AO,  INC2
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	fsmtp	f4, f0
 | |
| 
 | |
| 	LFD	A1, (2 +  3) * SIZE(AO)
 | |
| 	LFD	A2, (2 +  2) * SIZE(AO)
 | |
| 	LFD	A3, (2 +  0) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f4, A1, f4
 | |
| 	fnmsub	f0, A2, f4, f0
 | |
| 	fmul	f0, A3, f0
 | |
| 	fsmfp	f0, f4
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	fsmtp	f4, f0
 | |
| 
 | |
| 	LFD	A1, (2 +  0) * SIZE(AO)
 | |
| 	LFD	A2, (2 +  1) * SIZE(AO)
 | |
| 	LFD	A3, (2 +  3) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f0, A1, f0
 | |
| 	fnmsub	f4, A2, f0, f4
 | |
| 	fmul	f4, A3, f4
 | |
| 
 | |
| 	fsmfp	f0, f4
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LFPDX	A1,  BO,  INC2
 | |
| 
 | |
| 	fxpmul	f0,  A1,  f0
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LFPDX	A1,  BO,  INC2
 | |
| 
 | |
| 	fxpmul	f0,  A1,  f0
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	STFPDX	f0,  BO,  INC2
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFSDUX	f0,  CO1, INC
 | |
| #else
 | |
| 	STFPDX	f0,  AO,  INC2
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFSDUX	f0,  CO1, INC
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 1 + BASE_SHIFT
 | |
| 	add	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	sub	TEMP, K, KK
 | |
| 	slwi	r0,   TEMP, 1 + BASE_SHIFT
 | |
| 	slwi	TEMP, TEMP, 0 + BASE_SHIFT
 | |
| 	add	AO, AO, r0
 | |
| 	add	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	addi	KK, KK, 2
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	KK, KK, 2
 | |
| #endif
 | |
| 
 | |
| 	li	r0, FZERO
 | |
| 	lfpsx	f0, SP, r0
 | |
| 	.align 4
 | |
| 
 | |
| .L110:
 | |
| 	andi.	I, M,  4
 | |
| 	beq	.L120
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	addi	BO,  B,  - 2 * SIZE
 | |
| 	fpmr	f1,  f0
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f3, f0
 | |
| 
 | |
| 	srawi.	r0,  KK,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L104
 | |
| #else
 | |
| 
 | |
| #ifdef LN
 | |
| 	slwi	r0,   K,  2 + BASE_SHIFT
 | |
| 	sub	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| 	slwi	r0  , KK, 2 + BASE_SHIFT
 | |
| 	slwi	TEMP, KK, 0 + BASE_SHIFT
 | |
| 	add	AO, AORIG, r0
 | |
| 	add	BO, B,     TEMP
 | |
| 
 | |
| 	sub	TEMP, K, KK
 | |
| 
 | |
| 	addi	BO,  BO,  - 2 * SIZE
 | |
| 	fpmr	f1,  f0
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f3, f0
 | |
| 
 | |
| 	srawi.	r0,  TEMP,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L104
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 	LFPDUX	B2,  BO,  INC2
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 	LFPDUX	B3,  BO,  INC2
 | |
| 	LFPDUX	B4,  BO,  INC2
 | |
| 
 | |
| 	bdz-	.L103
 | |
| 	.align 4
 | |
| 
 | |
| .L102:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	fxcsmadd	f2,  B1, A3, f2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	fxcsmadd	f3,  B1, A4, f3
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B2, A5, f0
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B2, A6, f1
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 	fxcsmadd	f2,  B2, A7, f2
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	fxcsmadd	f3,  B2, A8, f3
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 	LFPDUX	B2,  BO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B3, A1, f0
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B3, A2, f1
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	fxcsmadd	f2,  B3, A3, f2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	fxcsmadd	f3,  B3, A4, f3
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 	LFPDUX	B3,  BO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B4, A5, f0
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B4, A6, f1
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 	fxcsmadd	f2,  B4, A7, f2
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	fxcsmadd	f3,  B4, A8, f3
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 	LFPDUX	B4,  BO,  INC2
 | |
| 	bdnz+	.L102
 | |
| 	.align 4
 | |
| 
 | |
| .L103:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	fxcsmadd	f2,  B1, A3, f2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	fxcsmadd	f3,  B1, A4, f3
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B2, A5, f0
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B2, A6, f1
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 	fxcsmadd	f2,  B2, A7, f2
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	fxcsmadd	f3,  B2, A8, f3
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B3, A1, f0
 | |
| 	fxcpmadd	f1,  B3, A2, f1
 | |
| 	fxcsmadd	f2,  B3, A3, f2
 | |
| 	fxcsmadd	f3,  B3, A4, f3
 | |
| 
 | |
| 	fxcpmadd	f0,  B4, A5, f0
 | |
| 	fxcpmadd	f1,  B4, A6, f1
 | |
| 	fxcsmadd	f2,  B4, A7, f2
 | |
| 	fxcsmadd	f3,  B4, A8, f3
 | |
| 	.align 4
 | |
| 
 | |
| .L104:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi.	r0,  KK,  7
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L108
 | |
| #else
 | |
| 	andi.	r0, TEMP, 7
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L108
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFDX	B1,  BO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	add	BO, BO, INC
 | |
| 	bdz-	.L107
 | |
| 	.align 4
 | |
| 
 | |
| .L106:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	LFDX	B1,  BO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	add	BO, BO, INC
 | |
| 	bdnz+	.L106
 | |
| 	.align 4
 | |
| 
 | |
| .L107:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	.align 4
 | |
| 
 | |
| .L108:
 | |
| 	fpadd	f0, f0, f2
 | |
| 	fpadd	f1, f1, f3
 | |
| 
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	subi	r0, KK, 4
 | |
| #else
 | |
| 	subi	r0, KK, 1
 | |
| #endif
 | |
| 	slwi	TEMP, r0, 2 + BASE_SHIFT
 | |
| 	slwi	r0,   r0, 0 + BASE_SHIFT
 | |
| 	add	AO, AORIG, TEMP
 | |
| 	add	BO,  B,     r0
 | |
| 	addi	BO,  BO, - 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LFPDUX	f16, BO,  INC2
 | |
| 	LFPDUX	f17, BO,  INC2
 | |
| 
 | |
| 	subi	BO,  BO,   4 * SIZE
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f1,  f17,  f1
 | |
| #else
 | |
| 	LFPDUX	f16, AO,  INC2
 | |
| 	LFPDUX	f17, AO,  INC2
 | |
| 
 | |
| 	subi	AO,  AO,   4 * SIZE
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f1,  f17,  f1
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	fsmtp	f4, f0
 | |
| 	fsmtp	f5, f1
 | |
| 
 | |
| 	LFD	A1, (2 + 15) * SIZE(AO)
 | |
| 	LFD	A2, (2 + 14) * SIZE(AO)
 | |
| 	LFD	A3, (2 + 13) * SIZE(AO)
 | |
| 	LFD	A4, (2 + 12) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f5, A1, f5
 | |
| 	fnmsub	f1, A2, f5, f1
 | |
| 	fnmsub	f4, A3, f5, f4
 | |
| 	fnmsub	f0, A4, f5, f0
 | |
| 
 | |
| 	LFD	A1, (2 + 10) * SIZE(AO)
 | |
| 	LFD	A2, (2 +  9) * SIZE(AO)
 | |
| 	LFD	A3, (2 +  8) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f1, A1, f1
 | |
| 	fnmsub	f4, A2, f1, f4
 | |
| 	fnmsub	f0, A3, f1, f0
 | |
| 
 | |
| 	LFD	A1, (2 +  5) * SIZE(AO)
 | |
| 	LFD	A2, (2 +  4) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f4, A1, f4
 | |
| 	fnmsub	f0, A2, f4, f0
 | |
| 
 | |
| 	LFD	A1, (2 +  0) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f0, A1, f0
 | |
| 
 | |
| 	fsmfp	f0, f4
 | |
| 	fsmfp	f1, f5
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	fsmtp	f4, f0
 | |
| 	fsmtp	f5, f1
 | |
| 
 | |
| 	LFD	A1, (2 +  0) * SIZE(AO)
 | |
| 	LFD	A2, (2 +  1) * SIZE(AO)
 | |
| 	LFD	A3, (2 +  2) * SIZE(AO)
 | |
| 	LFD	A4, (2 +  3) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f0, A1, f0
 | |
| 	fnmsub	f4, A2, f0, f4
 | |
| 	fnmsub	f1, A3, f0, f1
 | |
| 	fnmsub	f5, A4, f0, f5
 | |
| 
 | |
| 	LFD	A1, (2 +  5) * SIZE(AO)
 | |
| 	LFD	A2, (2 +  6) * SIZE(AO)
 | |
| 	LFD	A3, (2 +  7) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f4, A1, f4
 | |
| 	fnmsub	f1, A2, f4, f1
 | |
| 	fnmsub	f5, A3, f4, f5
 | |
| 
 | |
| 	LFD	A1, (2 + 10) * SIZE(AO)
 | |
| 	LFD	A2, (2 + 11) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f1, A1, f1
 | |
| 	fnmsub	f5, A2, f1, f5
 | |
| 
 | |
| 	LFD	A1, (2 + 15) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f5, A1, f5
 | |
| 
 | |
| 	fsmfp	f0, f4
 | |
| 	fsmfp	f1, f5
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LFPDX	A1,  BO,  INC2
 | |
| 
 | |
| 	fxpmul	f0,  A1,  f0
 | |
| 	fxpmul	f1,  A1,  f1
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LFPDX	A1,  BO,  INC2
 | |
| 
 | |
| 	fxpmul	f0,  A1,  f0
 | |
| 	fxpmul	f1,  A1,  f1
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 4 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	STFPDUX	f0,  BO,  INC2
 | |
| 	STFPDUX	f1,  BO,  INC2
 | |
| 
 | |
| 	subi	BO,  BO,   4 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFSDUX	f0,  CO1, INC
 | |
| 	STFDUX	f1,  CO1, INC
 | |
| 	STFSDUX	f1,  CO1, INC
 | |
| #else
 | |
| 	STFPDUX	f0,  AO,  INC2
 | |
| 	STFPDUX	f1,  AO,  INC2
 | |
| 
 | |
| 	subi	AO,  AO,   4 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFSDUX	f0,  CO1, INC
 | |
| 	STFDUX	f1,  CO1, INC
 | |
| 	STFSDUX	f1,  CO1, INC
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 4 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 2 + BASE_SHIFT
 | |
| 	add	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	sub	TEMP, K, KK
 | |
| 	slwi	r0,   TEMP, 2 + BASE_SHIFT
 | |
| 	slwi	TEMP, TEMP, 0 + BASE_SHIFT
 | |
| 	add	AO, AO, r0
 | |
| 	add	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	addi	KK, KK, 4
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	KK, KK, 4
 | |
| #endif
 | |
| 
 | |
| 	li	r0, FZERO
 | |
| 	lfpsx	f0, SP, r0
 | |
| 	.align 4
 | |
| 
 | |
| .L120:
 | |
| 	srawi.	I, M,  3
 | |
| 	ble	.L129
 | |
| 	.align 4
 | |
| 
 | |
| .L91:
 | |
| #if defined(LT) || defined(RN)
 | |
|  	fpmr	f1,  f0
 | |
| 	addi	BO,  B,  - 2 * SIZE
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	srawi.	r0,  KK,  2
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L94
 | |
| #else
 | |
| 
 | |
| #ifdef LN
 | |
| 	slwi	r0,   K,  3 + BASE_SHIFT
 | |
| 	sub	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| 	slwi	r0  , KK, 3 + BASE_SHIFT
 | |
| 	slwi	TEMP, KK, 0 + BASE_SHIFT
 | |
| 	add	AO, AORIG, r0
 | |
| 	add	BO, B,     TEMP
 | |
| 
 | |
| 	sub	TEMP, K, KK
 | |
| 
 | |
|  	fpmr	f1,  f0
 | |
| 	addi	BO,  BO,  - 2 * SIZE
 | |
| 	fpmr	f2,  f0
 | |
| 	fpmr	f3,  f0
 | |
| 
 | |
| 	srawi.	r0,  TEMP,  2
 | |
| 	mtspr	CTR, r0
 | |
| 	ble	.L94
 | |
| #endif
 | |
| 
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 	LFPDUX	B2,  BO,  INC2
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 	bdz-	.L93
 | |
| 	.align 4
 | |
| 
 | |
| .L92:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	fxcpmadd	f2,  B1, A3, f2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	fxcpmadd	f3,  B1, A4, f3
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 
 | |
| 	fxcsmadd	f0,  B1, A5, f0
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	fxcsmadd	f1,  B1, A6, f1
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 	fxcsmadd	f2,  B1, A7, f2
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	fxcsmadd	f3,  B1, A8, f3
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 	LFPDUX	B1,  BO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B2, A1, f0
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B2, A2, f1
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	fxcpmadd	f2,  B2, A3, f2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	fxcpmadd	f3,  B2, A4, f3
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 
 | |
| 	fxcsmadd	f0,  B2, A5, f0
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	fxcsmadd	f1,  B2, A6, f1
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 	fxcsmadd	f2,  B2, A7, f2
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	fxcsmadd	f3,  B2, A8, f3
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 	LFPDUX	B2,  BO,  INC2
 | |
| 	bdnz+	.L92
 | |
| 	.align 4
 | |
| 
 | |
| .L93:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	fxcpmadd	f2,  B1, A3, f2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	fxcpmadd	f3,  B1, A4, f3
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 
 | |
| 	fxcsmadd	f0,  B1, A5, f0
 | |
| 	LFPDUX	A5,  AO,  INC2
 | |
| 	fxcsmadd	f1,  B1, A6, f1
 | |
| 	LFPDUX	A6,  AO,  INC2
 | |
| 	fxcsmadd	f2,  B1, A7, f2
 | |
| 	LFPDUX	A7,  AO,  INC2
 | |
| 	fxcsmadd	f3,  B1, A8, f3
 | |
| 	LFPDUX	A8,  AO,  INC2
 | |
| 
 | |
| 	fxcpmadd	f0,  B2, A1, f0
 | |
| 	fxcpmadd	f1,  B2, A2, f1
 | |
| 	fxcpmadd	f2,  B2, A3, f2
 | |
| 	fxcpmadd	f3,  B2, A4, f3
 | |
| 
 | |
| 	fxcsmadd	f0,  B2, A5, f0
 | |
| 	fxcsmadd	f1,  B2, A6, f1
 | |
| 	fxcsmadd	f2,  B2, A7, f2
 | |
| 	fxcsmadd	f3,  B2, A8, f3
 | |
| 	.align 4
 | |
| 
 | |
| .L94:
 | |
| #if defined(LT) || defined(RN)
 | |
| 	andi.	r0,  KK,  3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L98
 | |
| #else
 | |
| 	andi.	r0, TEMP, 3
 | |
| 	mtspr	CTR, r0
 | |
| 	ble+	.L98
 | |
| #endif
 | |
| 
 | |
| 	LFDX	B1,  BO,  INC2
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 	add	BO, BO, INC
 | |
| 	bdz-	.L97
 | |
| 	.align 4
 | |
| 
 | |
| .L96:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	LFPDUX	A1,  AO,  INC2
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	LFPDUX	A2,  AO,  INC2
 | |
| 	fxcpmadd	f2,  B1, A3, f2
 | |
| 	LFPDUX	A3,  AO,  INC2
 | |
| 	fxcpmadd	f3,  B1, A4, f3
 | |
| 	LFDX	B1,  BO,  INC2
 | |
| 	LFPDUX	A4,  AO,  INC2
 | |
| 	add	BO, BO, INC
 | |
| 	bdnz+	.L96
 | |
| 	.align 4
 | |
| 
 | |
| .L97:
 | |
| 	fxcpmadd	f0,  B1, A1, f0
 | |
| 	fxcpmadd	f1,  B1, A2, f1
 | |
| 	fxcpmadd	f2,  B1, A3, f2
 | |
| 	fxcpmadd	f3,  B1, A4, f3
 | |
| 	.align 4
 | |
| 
 | |
| .L98:
 | |
| #if defined(LN) || defined(RT)
 | |
| #ifdef LN
 | |
| 	subi	r0, KK, 8
 | |
| #else
 | |
| 	subi	r0, KK, 1
 | |
| #endif
 | |
| 	slwi	TEMP, r0, 3 + BASE_SHIFT
 | |
| 	slwi	r0,   r0, 0 + BASE_SHIFT
 | |
| 	add	AO, AORIG, TEMP
 | |
| 	add	BO,  B,     r0
 | |
| 	addi	BO,  BO, - 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	LFPDUX	f16, BO,  INC2
 | |
| 	LFPDUX	f17, BO,  INC2
 | |
| 	LFPDUX	f18, BO,  INC2
 | |
| 	LFPDUX	f19, BO,  INC2
 | |
| 
 | |
| 	subi	BO,  BO,   8 * SIZE
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f1,  f17,  f1
 | |
| 	fpsub	f2,  f18,  f2
 | |
| 	fpsub	f3,  f19,  f3
 | |
| #else
 | |
| 	LFPDUX	f16, AO,  INC2
 | |
| 	LFPDUX	f17, AO,  INC2
 | |
| 	LFPDUX	f18, AO,  INC2
 | |
| 	LFPDUX	f19, AO,  INC2
 | |
| 
 | |
| 	subi	AO,  AO,   8 * SIZE
 | |
| 
 | |
| 	fpsub	f0,  f16,  f0
 | |
| 	fpsub	f1,  f17,  f1
 | |
| 	fpsub	f2,  f18,  f2
 | |
| 	fpsub	f3,  f19,  f3
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	fsmtp	f4, f0
 | |
| 	fsmtp	f5, f1
 | |
| 	fsmtp	f6, f2
 | |
| 	fsmtp	f7, f3
 | |
| 
 | |
| 	LFD	A1, (2 + 63) * SIZE(AO)
 | |
| 	LFD	A2, (2 + 62) * SIZE(AO)
 | |
| 	LFD	A3, (2 + 61) * SIZE(AO)
 | |
| 	LFD	A4, (2 + 60) * SIZE(AO)
 | |
| 	LFD	A5, (2 + 59) * SIZE(AO)
 | |
| 	LFD	A6, (2 + 58) * SIZE(AO)
 | |
| 	LFD	A7, (2 + 57) * SIZE(AO)
 | |
| 	LFD	A8, (2 + 56) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f7, A1, f7
 | |
| 	fnmsub	f3, A2, f7, f3
 | |
| 	fnmsub	f6, A3, f7, f6
 | |
| 	fnmsub	f2, A4, f7, f2
 | |
| 	fnmsub	f5, A5, f7, f5
 | |
| 	fnmsub	f1, A6, f7, f1
 | |
| 	fnmsub	f4, A7, f7, f4
 | |
| 	fnmsub	f0, A8, f7, f0
 | |
| 
 | |
| 	LFD	A1, (2 + 54) * SIZE(AO)
 | |
| 	LFD	A2, (2 + 53) * SIZE(AO)
 | |
| 	LFD	A3, (2 + 52) * SIZE(AO)
 | |
| 	LFD	A4, (2 + 51) * SIZE(AO)
 | |
| 	LFD	A5, (2 + 50) * SIZE(AO)
 | |
| 	LFD	A6, (2 + 49) * SIZE(AO)
 | |
| 	LFD	A7, (2 + 48) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f3, A1, f3
 | |
| 	fnmsub	f6, A2, f3, f6
 | |
| 	fnmsub	f2, A3, f3, f2
 | |
| 	fnmsub	f5, A4, f3, f5
 | |
| 	fnmsub	f1, A5, f3, f1
 | |
| 	fnmsub	f4, A6, f3, f4
 | |
| 	fnmsub	f0, A7, f3, f0
 | |
| 
 | |
| 	LFD	A1, (2 + 45) * SIZE(AO)
 | |
| 	LFD	A2, (2 + 44) * SIZE(AO)
 | |
| 	LFD	A3, (2 + 43) * SIZE(AO)
 | |
| 	LFD	A4, (2 + 42) * SIZE(AO)
 | |
| 	LFD	A5, (2 + 41) * SIZE(AO)
 | |
| 	LFD	A6, (2 + 40) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f6, A1, f6
 | |
| 	fnmsub	f2, A2, f6, f2
 | |
| 	fnmsub	f5, A3, f6, f5
 | |
| 	fnmsub	f1, A4, f6, f1
 | |
| 	fnmsub	f4, A5, f6, f4
 | |
| 	fnmsub	f0, A6, f6, f0
 | |
| 
 | |
| 	LFD	A1, (2 + 36) * SIZE(AO)
 | |
| 	LFD	A2, (2 + 35) * SIZE(AO)
 | |
| 	LFD	A3, (2 + 34) * SIZE(AO)
 | |
| 	LFD	A4, (2 + 33) * SIZE(AO)
 | |
| 	LFD	A5, (2 + 32) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f2, A1, f2
 | |
| 	fnmsub	f5, A2, f2, f5
 | |
| 	fnmsub	f1, A3, f2, f1
 | |
| 	fnmsub	f4, A4, f2, f4
 | |
| 	fnmsub	f0, A5, f2, f0
 | |
| 
 | |
| 	LFD	A1, (2 + 27) * SIZE(AO)
 | |
| 	LFD	A2, (2 + 26) * SIZE(AO)
 | |
| 	LFD	A3, (2 + 25) * SIZE(AO)
 | |
| 	LFD	A4, (2 + 24) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f5, A1, f5
 | |
| 	fnmsub	f1, A2, f5, f1
 | |
| 	fnmsub	f4, A3, f5, f4
 | |
| 	fnmsub	f0, A4, f5, f0
 | |
| 
 | |
| 	LFD	A1, (2 + 18) * SIZE(AO)
 | |
| 	LFD	A2, (2 + 17) * SIZE(AO)
 | |
| 	LFD	A3, (2 + 16) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f1, A1, f1
 | |
| 	fnmsub	f4, A2, f1, f4
 | |
| 	fnmsub	f0, A3, f1, f0
 | |
| 
 | |
| 	LFD	A1, (2 +  9) * SIZE(AO)
 | |
| 	LFD	A2, (2 +  8) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f4, A1, f4
 | |
| 	fnmsub	f0, A2, f4, f0
 | |
| 
 | |
| 	LFD	A1, (2 +  0) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f0, A1, f0
 | |
| 
 | |
| 	fsmfp	f0, f4
 | |
| 	fsmfp	f1, f5
 | |
| 	fsmfp	f2, f6
 | |
| 	fsmfp	f3, f7
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	fsmtp	f4, f0
 | |
| 	fsmtp	f5, f1
 | |
| 	fsmtp	f6, f2
 | |
| 	fsmtp	f7, f3
 | |
| 
 | |
| 	LFD	A1, (2 +  0) * SIZE(AO)
 | |
| 	LFD	A2, (2 +  1) * SIZE(AO)
 | |
| 	LFD	A3, (2 +  2) * SIZE(AO)
 | |
| 	LFD	A4, (2 +  3) * SIZE(AO)
 | |
| 	LFD	A5, (2 +  4) * SIZE(AO)
 | |
| 	LFD	A6, (2 +  5) * SIZE(AO)
 | |
| 	LFD	A7, (2 +  6) * SIZE(AO)
 | |
| 	LFD	A8, (2 +  7) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f0, A1, f0
 | |
| 	fnmsub	f4, A2, f0, f4
 | |
| 	fnmsub	f1, A3, f0, f1
 | |
| 	fnmsub	f5, A4, f0, f5
 | |
| 	fnmsub	f2, A5, f0, f2
 | |
| 	fnmsub	f6, A6, f0, f6
 | |
| 	fnmsub	f3, A7, f0, f3
 | |
| 	fnmsub	f7, A8, f0, f7
 | |
| 
 | |
| 	LFD	A1, (2 +  9) * SIZE(AO)
 | |
| 	LFD	A2, (2 + 10) * SIZE(AO)
 | |
| 	LFD	A3, (2 + 11) * SIZE(AO)
 | |
| 	LFD	A4, (2 + 12) * SIZE(AO)
 | |
| 	LFD	A5, (2 + 13) * SIZE(AO)
 | |
| 	LFD	A6, (2 + 14) * SIZE(AO)
 | |
| 	LFD	A7, (2 + 15) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f4, A1, f4
 | |
| 	fnmsub	f1, A2, f4, f1
 | |
| 	fnmsub	f5, A3, f4, f5
 | |
| 	fnmsub	f2, A4, f4, f2
 | |
| 	fnmsub	f6, A5, f4, f6
 | |
| 	fnmsub	f3, A6, f4, f3
 | |
| 	fnmsub	f7, A7, f4, f7
 | |
| 
 | |
| 	LFD	A1, (2 + 18) * SIZE(AO)
 | |
| 	LFD	A2, (2 + 19) * SIZE(AO)
 | |
| 	LFD	A3, (2 + 20) * SIZE(AO)
 | |
| 	LFD	A4, (2 + 21) * SIZE(AO)
 | |
| 	LFD	A5, (2 + 22) * SIZE(AO)
 | |
| 	LFD	A6, (2 + 23) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f1, A1, f1
 | |
| 	fnmsub	f5, A2, f1, f5
 | |
| 	fnmsub	f2, A3, f1, f2
 | |
| 	fnmsub	f6, A4, f1, f6
 | |
| 	fnmsub	f3, A5, f1, f3
 | |
| 	fnmsub	f7, A6, f1, f7
 | |
| 
 | |
| 	LFD	A1, (2 + 27) * SIZE(AO)
 | |
| 	LFD	A2, (2 + 28) * SIZE(AO)
 | |
| 	LFD	A3, (2 + 29) * SIZE(AO)
 | |
| 	LFD	A4, (2 + 30) * SIZE(AO)
 | |
| 	LFD	A5, (2 + 31) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f5, A1, f5
 | |
| 	fnmsub	f2, A2, f5, f2
 | |
| 	fnmsub	f6, A3, f5, f6
 | |
| 	fnmsub	f3, A4, f5, f3
 | |
| 	fnmsub	f7, A5, f5, f7
 | |
| 
 | |
| 	LFD	A1, (2 + 36) * SIZE(AO)
 | |
| 	LFD	A2, (2 + 37) * SIZE(AO)
 | |
| 	LFD	A3, (2 + 38) * SIZE(AO)
 | |
| 	LFD	A4, (2 + 39) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f2, A1, f2
 | |
| 	fnmsub	f6, A2, f2, f6
 | |
| 	fnmsub	f3, A3, f2, f3
 | |
| 	fnmsub	f7, A4, f2, f7
 | |
| 
 | |
| 	LFD	A1, (2 + 45) * SIZE(AO)
 | |
| 	LFD	A2, (2 + 46) * SIZE(AO)
 | |
| 	LFD	A3, (2 + 47) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f6, A1, f6
 | |
| 	fnmsub	f3, A2, f6, f3
 | |
| 	fnmsub	f7, A3, f6, f7
 | |
| 
 | |
| 	LFD	A1, (2 + 54) * SIZE(AO)
 | |
| 	LFD	A2, (2 + 55) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f3, A1, f3
 | |
| 	fnmsub	f7, A2, f3, f7
 | |
| 
 | |
| 	LFD	A1, (2 + 63) * SIZE(AO)
 | |
| 
 | |
| 	fmul	f7, A1, f7
 | |
| 
 | |
| 	fsmfp	f0, f4
 | |
| 	fsmfp	f1, f5
 | |
| 	fsmfp	f2, f6
 | |
| 	fsmfp	f3, f7
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	LFPDX	A1,  BO,  INC2
 | |
| 
 | |
| 	fxpmul	f0,  A1,  f0
 | |
| 	fxpmul	f1,  A1,  f1
 | |
| 	fxpmul	f2,  A1,  f2
 | |
| 	fxpmul	f3,  A1,  f3
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	LFPDX	A1,  BO,  INC2
 | |
| 
 | |
| 	fxpmul	f0,  A1,  f0
 | |
| 	fxpmul	f1,  A1,  f1
 | |
| 	fxpmul	f2,  A1,  f2
 | |
| 	fxpmul	f3,  A1,  f3
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 8 * SIZE
 | |
| #endif
 | |
| 
 | |
| #if defined(LN) || defined(LT)
 | |
| 	STFPDUX	f0,  BO,  INC2
 | |
| 	STFPDUX	f1,  BO,  INC2
 | |
| 	STFPDUX	f2,  BO,  INC2
 | |
| 	STFPDUX	f3,  BO,  INC2
 | |
| 
 | |
| 	subi	BO,  BO,   8 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFSDUX	f0,  CO1, INC
 | |
| 	STFDUX	f1,  CO1, INC
 | |
| 	STFSDUX	f1,  CO1, INC
 | |
| 	STFDUX	f2,  CO1, INC
 | |
| 	STFSDUX	f2,  CO1, INC
 | |
| 	STFDUX	f3,  CO1, INC
 | |
| 	STFSDUX	f3,  CO1, INC
 | |
| #else
 | |
| 	STFPDUX	f0,  AO,  INC2
 | |
| 	STFPDUX	f1,  AO,  INC2
 | |
| 	STFPDUX	f2,  AO,  INC2
 | |
| 	STFPDUX	f3,  AO,  INC2
 | |
| 
 | |
| 	subi	AO,  AO,   8 * SIZE
 | |
| 
 | |
| 	STFDUX	f0,  CO1, INC
 | |
| 	STFSDUX	f0,  CO1, INC
 | |
| 	STFDUX	f1,  CO1, INC
 | |
| 	STFSDUX	f1,  CO1, INC
 | |
| 	STFDUX	f2,  CO1, INC
 | |
| 	STFSDUX	f2,  CO1, INC
 | |
| 	STFDUX	f3,  CO1, INC
 | |
| 	STFSDUX	f3,  CO1, INC
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	CO1, CO1, 8 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	slwi	r0, K, 3 + BASE_SHIFT
 | |
| 	add	AORIG, AORIG, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	sub	TEMP, K, KK
 | |
| 	slwi	r0,   TEMP, 3 + BASE_SHIFT
 | |
| 	slwi	TEMP, TEMP, 0 + BASE_SHIFT
 | |
| 	add	AO, AO, r0
 | |
| 	add	BO, BO, TEMP
 | |
| #endif
 | |
| 
 | |
| #ifdef LT
 | |
| 	addi	KK, KK, 8
 | |
| #endif
 | |
| 
 | |
| #ifdef LN
 | |
| 	subi	KK, KK, 8
 | |
| #endif
 | |
| 
 | |
| 	addic.	I, I, -1
 | |
| 	li	r0, FZERO
 | |
| 
 | |
| 	lfpsx	f0, SP, r0
 | |
| 	bgt+	.L91
 | |
| 	.align 4
 | |
| 
 | |
| .L129:
 | |
| #ifdef LN
 | |
| 	slwi	r0, K, 0 + BASE_SHIFT
 | |
| 	add	B, B, r0
 | |
| #endif
 | |
| 
 | |
| #if defined(LT) || defined(RN)
 | |
| 	addi	B,  BO, 2 * SIZE
 | |
| #endif
 | |
| 
 | |
| #ifdef RN
 | |
| 	addi	KK, KK, 1
 | |
| #endif
 | |
| 
 | |
| #ifdef RT
 | |
| 	subi	KK, KK, 1
 | |
| #endif
 | |
| 	.align 4
 | |
| 
 | |
| 
 | |
| .L999:
 | |
| 	addi	SP, SP, 12
 | |
| 
 | |
| 	lwzu	r14,   4(SP)
 | |
| 	lwzu	r15,   4(SP)
 | |
| 
 | |
| 	lwzu	r16,   4(SP)
 | |
| 	lwzu	r17,   4(SP)
 | |
| 	lwzu	r18,   4(SP)
 | |
| 	lwzu	r19,   4(SP)
 | |
| 
 | |
| 	lwzu	r20,   4(SP)
 | |
| 	lwzu	r21,   4(SP)
 | |
| 	lwzu	r22,   4(SP)
 | |
| 	lwzu	r23,   4(SP)
 | |
| 
 | |
| 	lwzu	r24,   4(SP)
 | |
| 	lwzu	r25,   4(SP)
 | |
| 	lwzu	r26,   4(SP)
 | |
| 	lwzu	r27,   4(SP)
 | |
| 
 | |
| 	lwzu	r28,   4(SP)
 | |
| 	lwzu	r29,   4(SP)
 | |
| 	lwzu	r30,   4(SP)
 | |
| 	lwzu	r31,   4(SP)
 | |
| 
 | |
| 	subi	SP, SP, 12
 | |
| 	li	r0, 16
 | |
| 
 | |
| 	lfpdux	f31, SP, r0
 | |
| 	lfpdux	f30, SP, r0
 | |
| 	lfpdux	f29, SP, r0
 | |
| 	lfpdux	f28, SP, r0
 | |
| 	lfpdux	f27, SP, r0
 | |
| 	lfpdux	f26, SP, r0
 | |
| 	lfpdux	f25, SP, r0
 | |
| 	lfpdux	f24, SP, r0
 | |
| 	lfpdux	f23, SP, r0
 | |
| 	lfpdux	f22, SP, r0
 | |
| 	lfpdux	f21, SP, r0
 | |
| 	lfpdux	f20, SP, r0
 | |
| 	lfpdux	f19, SP, r0
 | |
| 	lfpdux	f18, SP, r0
 | |
| 	lfpdux	f17, SP, r0
 | |
| 	lfpdux	f16, SP, r0
 | |
| 	lfpdux	f15, SP, r0
 | |
| 	lfpdux	f14, SP, r0
 | |
| 	addi	SP, SP, 16
 | |
| 	blr
 | |
| 
 | |
| 
 | |
| 	EPILOGUE
 | |
| #endif
 |