666 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			666 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| /*********************************************************************/
 | |
| /* Copyright 2009, 2010 The University of Texas at Austin.           */
 | |
| /* All rights reserved.                                              */
 | |
| /*                                                                   */
 | |
| /* Redistribution and use in source and binary forms, with or        */
 | |
| /* without modification, are permitted provided that the following   */
 | |
| /* conditions are met:                                               */
 | |
| /*                                                                   */
 | |
| /*   1. Redistributions of source code must retain the above         */
 | |
| /*      copyright notice, this list of conditions and the following  */
 | |
| /*      disclaimer.                                                  */
 | |
| /*                                                                   */
 | |
| /*   2. Redistributions in binary form must reproduce the above      */
 | |
| /*      copyright notice, this list of conditions and the following  */
 | |
| /*      disclaimer in the documentation and/or other materials       */
 | |
| /*      provided with the distribution.                              */
 | |
| /*                                                                   */
 | |
| /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
 | |
| /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
 | |
| /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
 | |
| /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
 | |
| /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
 | |
| /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
 | |
| /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
 | |
| /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
 | |
| /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
 | |
| /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
 | |
| /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
 | |
| /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
 | |
| /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
 | |
| /*    POSSIBILITY OF SUCH DAMAGE.                                    */
 | |
| /*                                                                   */
 | |
| /* The views and conclusions contained in the software and           */
 | |
| /* documentation are those of the authors and should not be          */
 | |
| /* interpreted as representing official policies, either expressed   */
 | |
| /* or implied, of The University of Texas at Austin.                 */
 | |
| /*********************************************************************/
 | |
| 
 | |
| #define ASSEMBLER
 | |
| #include "common.h"
 | |
| 
 | |
| #define N	r3
 | |
| #define X	r6
 | |
| #define INCX	r7
 | |
| #define Y	r8
 | |
| #define INCY	r9
 | |
| 
 | |
| #define INCX2	r4
 | |
| #define INCY2	r5
 | |
| #define X2	r10
 | |
| #define Y2	r11
 | |
| 
 | |
| #define A1	f0
 | |
| #define A2	f1
 | |
| #define A3	f2
 | |
| #define A4	f3
 | |
| #define A5	f4
 | |
| 
 | |
| #define B1	f5
 | |
| #define B2	f6
 | |
| #define B3	f7
 | |
| #define B4	f8
 | |
| #define B5	f9
 | |
| 
 | |
| #define T1	f10
 | |
| #define T2	f11
 | |
| #define T3	f12
 | |
| #define T4	f13
 | |
| #define T5	f14
 | |
| #define T6	f15
 | |
| #define T7	f16
 | |
| 
 | |
| 	PROLOGUE
 | |
| 	PROFCODE
 | |
| 
 | |
| 	li	r10, -16
 | |
| 
 | |
| 	stfpdux	f14, SP, r10
 | |
| 	stfpdux	f15, SP, r10
 | |
| 	stfpdux	f16, SP, r10
 | |
| 
 | |
| 	slwi	INCX,  INCX, BASE_SHIFT
 | |
| 	slwi	INCY,  INCY, BASE_SHIFT
 | |
| 	add	INCX2, INCX, INCX
 | |
| 	add	INCY2, INCY, INCY
 | |
| 
 | |
| 	cmpwi	cr0, N, 0
 | |
| 	ble	LL(999)
 | |
| 
 | |
| 	cmpwi	cr0, INCX, SIZE
 | |
| 	bne	LL(100)
 | |
| 	cmpwi	cr0, INCY, SIZE
 | |
| 	bne	LL(100)
 | |
| 
 | |
| 	sub	X, X, INCX2
 | |
| 	sub	Y, Y, INCY2
 | |
| 
 | |
| 	mr	X2, X
 | |
| 	mr	Y2, Y
 | |
| 
 | |
| 	andi.	r0, X, 2 * SIZE - 1
 | |
| 	bne	LL(30)
 | |
| 	andi.	r0, Y, 2 * SIZE - 1
 | |
| 	bne	LL(20)
 | |
| 	.align 4
 | |
| 
 | |
| LL(10):  /* X : aligned     Y : aligned */
 | |
| 
 | |
| 	srawi.	r0, N, 2
 | |
| 	mtspr	CTR,  r0
 | |
| 	beq-	LL(15)
 | |
| 
 | |
| 	LFPDUX	A1,    X,  INCX2
 | |
| 	LFPDUX	B1,    Y,  INCY2
 | |
| 	LFPDUX	A2,    X,  INCX2
 | |
| 	LFPDUX	B2,    Y,  INCY2
 | |
| 	LFPDUX	A3,    X,  INCX2
 | |
| 	LFPDUX	B3,    Y,  INCY2
 | |
| 	LFPDUX	A4,    X,  INCX2
 | |
| 	LFPDUX	B4,    Y,  INCY2
 | |
| 	bdz	LL(13)
 | |
| 	.align 4
 | |
| 
 | |
| LL(12):
 | |
| 	STFPDUX	B1,    X2, INCY2
 | |
| 	LFPDUX	B1,    Y,  INCY2
 | |
| 	STFPDUX	A1,    Y2, INCY2
 | |
| 	LFPDUX	A1,    X,  INCX2
 | |
| 
 | |
| 	STFPDUX	B2,    X2, INCY2
 | |
| 	LFPDUX	B2,    Y,  INCY2
 | |
| 	STFPDUX	A2,    Y2, INCY2
 | |
| 	LFPDUX	A2,    X,  INCX2
 | |
| 
 | |
| 	STFPDUX	B3,    X2, INCY2
 | |
| 	LFPDUX	B3,    Y,  INCY2
 | |
| 	STFPDUX	A3,    Y2, INCY2
 | |
| 	LFPDUX	A3,    X,  INCX2
 | |
| 
 | |
| 	STFPDUX	B4,    X2, INCY2
 | |
| 	LFPDUX	B4,    Y,  INCY2
 | |
| 	STFPDUX	A4,    Y2, INCY2
 | |
| 	LFPDUX	A4,    X,  INCX2
 | |
| 	bdnz	LL(12)
 | |
| 	.align 4
 | |
| 
 | |
| LL(13):
 | |
| 	STFPDUX	B1,    X2, INCY2
 | |
| 	STFPDUX	A1,    Y2, INCY2
 | |
| 	STFPDUX	B2,    X2, INCY2
 | |
| 	STFPDUX	A2,    Y2, INCY2
 | |
| 	STFPDUX	B3,    X2, INCY2
 | |
| 	STFPDUX	A3,    Y2, INCY2
 | |
| 	STFPDUX	B4,    X2, INCY2
 | |
| 	STFPDUX	A4,    Y2, INCY2
 | |
| 	.align 4
 | |
| 
 | |
| LL(15):
 | |
| 	andi.	r0,  N, 3
 | |
| 	beq	LL(999)
 | |
| 
 | |
| 	andi.	r0,  N, 2
 | |
| 	beq	LL(16)
 | |
| 
 | |
| 	LFPDUX	A1,    X,  INCX2
 | |
| 	LFPDUX	B1,    Y,  INCY2
 | |
| 	LFPDUX	A2,    X,  INCX2
 | |
| 	LFPDUX	B2,    Y,  INCY2
 | |
| 
 | |
| 	STFPDUX	B1,    X2, INCY2
 | |
| 	STFPDUX	A1,    Y2, INCY2
 | |
| 	STFPDUX	B2,    X2, INCY2
 | |
| 	STFPDUX	A2,    Y2, INCY2
 | |
| 	.align 4
 | |
| 
 | |
| LL(16):
 | |
| 	andi.	r0,  N, 1
 | |
| 	beq	LL(999)
 | |
| 
 | |
| 	LFPDUX	A1,    X,  INCX2
 | |
| 	LFPDUX	B1,    Y,  INCY2
 | |
| 
 | |
| 	STFPDUX	B1,    X2, INCY2
 | |
| 	STFPDUX	A1,    Y2, INCY2
 | |
| 	b  LL(999)
 | |
| 	.align 4
 | |
| 
 | |
| LL(20):  /* X : aligned     Y : unaligned */
 | |
| 
 | |
| 	LFXDUX	A1, X, INCX2
 | |
| 	LFDX	B1, Y, INCY2
 | |
| 
 | |
| 	STFSDX	A1, Y2, INCY2
 | |
| 
 | |
| 	add	Y,  Y,  INCY
 | |
| 	add	Y2, Y2, INCY
 | |
| 
 | |
| 	addi	N, N, -1
 | |
| 	cmpwi	cr0, N, 0
 | |
| 	ble	LL(29)
 | |
| 	.align 4
 | |
| 
 | |
| 	srawi.	r0, N, 2
 | |
| 	mtspr	CTR,  r0
 | |
| 	beq-	LL(25)
 | |
| 
 | |
| 	LFXDUX	T1,    X,  INCX2
 | |
| 	LFXDUX	T2,    Y,  INCY2
 | |
| 	LFXDUX	T3,    X,  INCX2
 | |
| 	LFXDUX	T4,    Y,  INCY2
 | |
| 
 | |
| 	LFPDUX	A4,    X,  INCX2
 | |
| 	fsmr	A1, T1
 | |
| 	LFPDUX	B4,    Y,  INCY2
 | |
| 	fsmr	B1, T2
 | |
| 	LFPDUX	A5,    X,  INCX2
 | |
| 	fsmr	T1, T3
 | |
| 	LFPDUX	B5,    Y,  INCY2
 | |
| 	fsmr	T2, T4
 | |
| 	bdz	LL(23)
 | |
| 	.align 4
 | |
| 
 | |
| LL(22):
 | |
| 	fxmr	T5, A4
 | |
| 	STFPDUX	A1,    Y2, INCY2
 | |
| 	fxmr	T6, B4
 | |
| 	STFPDUX	B1,    X2, INCX2
 | |
| 	fxmr	A1, A5
 | |
| 	STFPDUX	T1,    Y2, INCY2
 | |
| 	fxmr	B1, B5
 | |
| 	STFPDUX	T2,    X2, INCX2
 | |
| 
 | |
| 	fsmr	T3, T5
 | |
| 	LFPDUX	A2,    X,  INCX2
 | |
| 	fsmr	T4, T6
 | |
|  	LFPDUX	B2,    Y,  INCY2
 | |
| 	fsmr	T5, A1
 | |
| 	LFPDUX	A3,    X,  INCX2
 | |
| 	fsmr	T6, B1
 | |
| 	LFPDUX	B3,    Y,  INCY2
 | |
| 
 | |
| 	fxmr	T1, A2
 | |
| 	STFPDUX	T3,    Y2, INCY2
 | |
| 	fxmr	T2, B2
 | |
| 	STFPDUX	T4,    X2, INCX2
 | |
| 	fxmr	T3, A3
 | |
| 	STFPDUX	T5,    Y2, INCY2
 | |
| 	fxmr	T4, B3
 | |
| 	STFPDUX	T6,    X2, INCX2
 | |
| 
 | |
| 	fsmr	A1, T1
 | |
| 	LFPDUX	A4,    X,  INCX2
 | |
| 	fsmr	B1, T2
 | |
| 	LFPDUX	B4,    Y,  INCY2
 | |
| 	fsmr	T1, T3
 | |
| 	LFPDUX	A5,    X,  INCX2
 | |
| 	fsmr	T2, T4
 | |
| 	LFPDUX	B5,    Y,  INCY2
 | |
| 	bdnz	LL(22)
 | |
| 	.align 4
 | |
| 
 | |
| LL(23):
 | |
| 	fxmr	T5, A4
 | |
| 	STFPDUX	A1,    Y2, INCY2
 | |
| 	fxmr	T6, B4
 | |
| 	STFPDUX	B1,    X2, INCX2
 | |
| 	fxmr	A1, A5
 | |
| 	STFPDUX	T1,    Y2, INCY2
 | |
| 	fxmr	B1, B5
 | |
| 	STFPDUX	T2,    X2, INCX2
 | |
| 
 | |
| 	fsmr	T3, T5
 | |
| 	fsmr	T4, T6
 | |
| 	fsmr	T5, A1
 | |
| 	fsmr	T6, B1
 | |
| 
 | |
| 	STFPDUX	T3,    Y2, INCY2
 | |
| 	STFPDUX	T4,    X2, INCX2
 | |
| 	STFPDUX	T5,    Y2, INCY2
 | |
| 	STFPDUX	T6,    X2, INCX2
 | |
| 	.align 4
 | |
| 
 | |
| LL(25):
 | |
| 	andi.	r0,  N, 3
 | |
| 	beq	LL(29)
 | |
| 
 | |
| 	andi.	r0,  N, 2
 | |
| 	beq	LL(27)
 | |
| 
 | |
| 	LFXDUX	A2,    X,  INCX2
 | |
| 	LFXDUX	B2,    Y,  INCY2
 | |
| 	LFXDUX	A3,    X,  INCX2
 | |
| 	LFXDUX	B3,    Y,  INCY2
 | |
| 
 | |
| 	fsmr	A1, A2
 | |
| 	fsmr	B1, B2
 | |
| 	fsmr	A2, A3
 | |
| 	fsmr	B2, B3
 | |
| 
 | |
| 	STFPDUX	A1,    Y2, INCY2
 | |
| 	STFPDUX	B1,    X2, INCX2
 | |
| 	STFPDUX	A2,    Y2, INCY2
 | |
| 	fpmr	A1, A3
 | |
| 	STFPDUX	B2,    X2, INCX2
 | |
| 	fpmr	B1, B3
 | |
| 	.align 4
 | |
| 
 | |
| LL(27):
 | |
| 	andi.	r0,  N, 1
 | |
| 	beq	LL(29)
 | |
| 
 | |
| 	LFXDUX	A2,    X,  INCX2
 | |
| 	LFXDUX	B2,    Y,  INCY2
 | |
| 	fsmr	A1, A2
 | |
| 	fsmr	B1, B2
 | |
| 	STFPDUX	A1,    Y2, INCY2
 | |
| 	fpmr	A1, A2
 | |
| 	STFPDUX	B1,    X2, INCX2
 | |
| 	fpmr	B1, B2
 | |
| 	.align 4
 | |
| 
 | |
| LL(29):
 | |
| 	LFSDX	B1,    Y,  INCY2
 | |
| 	STFDX	A1,    Y2, INCY2
 | |
| 	STFPDX	B1,    X2, INCX2
 | |
| 	b  LL(999)
 | |
| 	.align 4
 | |
| 
 | |
| 
 | |
| LL(30):  /* X : unaligned   Y : aligned */
 | |
| 
 | |
| 	andi.	r0, Y, 2 * SIZE - 1
 | |
| 	bne	LL(40)
 | |
| 
 | |
| 	LFXDUX	A1, Y, INCY2
 | |
| 	LFDX	B1, X, INCX2
 | |
| 
 | |
| 	STFSDX	A1, X2, INCX2
 | |
| 
 | |
| 	add	X,  X,  INCX
 | |
| 	add	X2, X2, INCX
 | |
| 
 | |
| 	addi	N, N, -1
 | |
| 	cmpwi	cr0, N, 0
 | |
| 	ble	LL(39)
 | |
| 	.align 4
 | |
| 
 | |
| 	srawi.	r0, N, 2
 | |
| 	mtspr	CTR,  r0
 | |
| 	beq-	LL(35)
 | |
| 
 | |
| 	LFXDUX	T1,    Y,  INCY2
 | |
| 	LFXDUX	T2,    X,  INCX2
 | |
| 	LFXDUX	T3,    Y,  INCY2
 | |
| 	LFXDUX	T4,    X,  INCX2
 | |
| 
 | |
| 	LFPDUX	A4,    Y,  INCY2
 | |
| 	fsmr	A1, T1
 | |
| 	LFPDUX	B4,    X,  INCX2
 | |
| 	fsmr	B1, T2
 | |
| 	LFPDUX	A5,    Y,  INCY2
 | |
| 	fsmr	T1, T3
 | |
| 	LFPDUX	B5,    X,  INCX2
 | |
| 	fsmr	T2, T4
 | |
| 	bdz	LL(33)
 | |
| 	.align 4
 | |
| 
 | |
| LL(32):
 | |
| 	fxmr	T5, A4
 | |
| 	STFPDUX	A1,    X2, INCX2
 | |
| 	fxmr	T6, B4
 | |
| 	STFPDUX	B1,    Y2, INCY2
 | |
| 	fxmr	A1, A5
 | |
| 	STFPDUX	T1,    X2, INCX2
 | |
| 	fxmr	B1, B5
 | |
| 	STFPDUX	T2,    Y2, INCY2
 | |
| 
 | |
| 	fsmr	T3, T5
 | |
| 	LFPDUX	A2,    Y,  INCY2
 | |
| 	fsmr	T4, T6
 | |
|  	LFPDUX	B2,    X,  INCX2
 | |
| 	fsmr	T5, A1
 | |
| 	LFPDUX	A3,    Y,  INCY2
 | |
| 	fsmr	T6, B1
 | |
| 	LFPDUX	B3,    X,  INCX2
 | |
| 
 | |
| 	fxmr	T1, A2
 | |
| 	STFPDUX	T3,    X2, INCX2
 | |
| 	fxmr	T2, B2
 | |
| 	STFPDUX	T4,    Y2, INCY2
 | |
| 	fxmr	T3, A3
 | |
| 	STFPDUX	T5,    X2, INCX2
 | |
| 	fxmr	T4, B3
 | |
| 	STFPDUX	T6,    Y2, INCY2
 | |
| 
 | |
| 	fsmr	A1, T1
 | |
| 	LFPDUX	A4,    Y,  INCY2
 | |
| 	fsmr	B1, T2
 | |
| 	LFPDUX	B4,    X,  INCX2
 | |
| 	fsmr	T1, T3
 | |
| 	LFPDUX	A5,    Y,  INCY2
 | |
| 	fsmr	T2, T4
 | |
| 	LFPDUX	B5,    X,  INCX2
 | |
| 	bdnz	LL(32)
 | |
| 	.align 4
 | |
| 
 | |
| LL(33):
 | |
| 	fxmr	T5, A4
 | |
| 	STFPDUX	A1,    X2, INCX2
 | |
| 	fxmr	T6, B4
 | |
| 	STFPDUX	B1,    Y2, INCY2
 | |
| 	fxmr	A1, A5
 | |
| 	STFPDUX	T1,    X2, INCX2
 | |
| 	fxmr	B1, B5
 | |
| 	STFPDUX	T2,    Y2, INCY2
 | |
| 
 | |
| 	fsmr	T3, T5
 | |
| 	fsmr	T4, T6
 | |
| 	fsmr	T5, A1
 | |
| 	fsmr	T6, B1
 | |
| 
 | |
| 	STFPDUX	T3,    X2, INCX2
 | |
| 	STFPDUX	T4,    Y2, INCY2
 | |
| 	STFPDUX	T5,    X2, INCX2
 | |
| 	STFPDUX	T6,    Y2, INCY2
 | |
| 	.align 4
 | |
| 
 | |
| LL(35):
 | |
| 	andi.	r0,  N, 3
 | |
| 	beq	LL(39)
 | |
| 
 | |
| 	andi.	r0,  N, 2
 | |
| 	beq	LL(37)
 | |
| 
 | |
| 	LFXDUX	A2,    Y,  INCY2
 | |
| 	LFXDUX	B2,    X,  INCX2
 | |
| 	LFXDUX	A3,    Y,  INCY2
 | |
| 	LFXDUX	B3,    X,  INCX2
 | |
| 
 | |
| 	fsmr	A1, A2
 | |
| 	fsmr	B1, B2
 | |
| 	fsmr	A2, A3
 | |
| 	fsmr	B2, B3
 | |
| 
 | |
| 	STFPDUX	A1,    X2, INCX2
 | |
| 	STFPDUX	B1,    Y2, INCY2
 | |
| 	STFPDUX	A2,    X2, INCX2
 | |
| 	fpmr	A1, A3
 | |
| 	STFPDUX	B2,    Y2, INCY2
 | |
| 	fpmr	B1, B3
 | |
| 	.align 4
 | |
| 
 | |
| LL(37):
 | |
| 	andi.	r0,  N, 1
 | |
| 	beq	LL(39)
 | |
| 
 | |
| 	LFXDUX	A2,    Y,  INCY2
 | |
| 	LFXDUX	B2,    X,  INCX2
 | |
| 	fsmr	A1, A2
 | |
| 	fsmr	B1, B2
 | |
| 	STFPDUX	A1,    X2, INCX2
 | |
| 	fpmr	A1, A2
 | |
| 	STFPDUX	B1,    Y2, INCY2
 | |
| 	fpmr	B1, B2
 | |
| 	.align 4
 | |
| 
 | |
| LL(39):
 | |
| 	LFSDX	B1,    X,  INCX2
 | |
| 	STFDX	A1,    X2, INCX2
 | |
| 	STFPDX	B1,    Y2, INCY2
 | |
| 	b  LL(999)
 | |
| 	.align 4
 | |
| 
 | |
| LL(40):  /* X : unaligned   Y : unaligned */
 | |
| 
 | |
| 	LFDX	A1, Y, INCY2
 | |
| 	LFDX	B1, X, INCX2
 | |
| 	add	X,  X,  INCX
 | |
| 	add	Y,  Y,  INCY
 | |
| 
 | |
| 	addi	N, N, -1
 | |
| 	cmpwi	cr0, N, 0
 | |
| 
 | |
| 	STFDX	A1, X2, INCX2
 | |
| 	STFDX	B1, Y2, INCY2
 | |
| 	add	X2, X2, INCX
 | |
| 	add	Y2, Y2, INCY
 | |
| 	ble	LL(49)
 | |
| 
 | |
| 	srawi.	r0, N, 2
 | |
| 	mtspr	CTR,  r0
 | |
| 	beq-	LL(45)
 | |
| 
 | |
| 	LFPDUX	A1,    X,  INCX2
 | |
| 	LFPDUX	B1,    Y,  INCY2
 | |
| 	LFPDUX	A2,    X,  INCX2
 | |
| 	LFPDUX	B2,    Y,  INCY2
 | |
| 	LFPDUX	A3,    X,  INCX2
 | |
| 	LFPDUX	B3,    Y,  INCY2
 | |
| 	LFPDUX	A4,    X,  INCX2
 | |
| 	LFPDUX	B4,    Y,  INCY2
 | |
| 	bdz	LL(43)
 | |
| 	.align 4
 | |
| 
 | |
| LL(42):
 | |
| 	STFPDUX	B1,    X2, INCY2
 | |
| 	LFPDUX	B1,    Y,  INCY2
 | |
| 	STFPDUX	A1,    Y2, INCY2
 | |
| 	LFPDUX	A1,    X,  INCX2
 | |
| 
 | |
| 	STFPDUX	B2,    X2, INCY2
 | |
| 	LFPDUX	B2,    Y,  INCY2
 | |
| 	STFPDUX	A2,    Y2, INCY2
 | |
| 	LFPDUX	A2,    X,  INCX2
 | |
| 
 | |
| 	STFPDUX	B3,    X2, INCY2
 | |
| 	LFPDUX	B3,    Y,  INCY2
 | |
| 	STFPDUX	A3,    Y2, INCY2
 | |
| 	LFPDUX	A3,    X,  INCX2
 | |
| 
 | |
| 	STFPDUX	B4,    X2, INCY2
 | |
| 	LFPDUX	B4,    Y,  INCY2
 | |
| 	STFPDUX	A4,    Y2, INCY2
 | |
| 	LFPDUX	A4,    X,  INCX2
 | |
| 	bdnz	LL(42)
 | |
| 	.align 4
 | |
| 
 | |
| LL(43):
 | |
| 	STFPDUX	B1,    X2, INCY2
 | |
| 	STFPDUX	A1,    Y2, INCY2
 | |
| 	STFPDUX	B2,    X2, INCY2
 | |
| 	STFPDUX	A2,    Y2, INCY2
 | |
| 	STFPDUX	B3,    X2, INCY2
 | |
| 	STFPDUX	A3,    Y2, INCY2
 | |
| 	STFPDUX	B4,    X2, INCY2
 | |
| 	STFPDUX	A4,    Y2, INCY2
 | |
| 	.align 4
 | |
| 
 | |
| LL(45):
 | |
| 	andi.	r0,  N, 3
 | |
| 	beq	LL(49)
 | |
| 
 | |
| 	andi.	r0,  N, 2
 | |
| 	beq	LL(46)
 | |
| 
 | |
| 	LFPDUX	A1,    X,  INCX2
 | |
| 	LFPDUX	B1,    Y,  INCY2
 | |
| 	LFPDUX	A2,    X,  INCX2
 | |
| 	LFPDUX	B2,    Y,  INCY2
 | |
| 
 | |
| 	STFPDUX	B1,    X2, INCY2
 | |
| 	STFPDUX	A1,    Y2, INCY2
 | |
| 	STFPDUX	B2,    X2, INCY2
 | |
| 	STFPDUX	A2,    Y2, INCY2
 | |
| 	.align 4
 | |
| 
 | |
| LL(46):
 | |
| 	andi.	r0,  N, 1
 | |
| 	beq	LL(49)
 | |
| 
 | |
| 	LFPDUX	A1,    X,  INCX2
 | |
| 	LFPDUX	B1,    Y,  INCY2
 | |
| 
 | |
| 	STFPDUX	B1,    X2, INCY2
 | |
| 	STFPDUX	A1,    Y2, INCY2
 | |
| 	.align 4
 | |
| 
 | |
| LL(49):
 | |
| 	LFDX	A1, Y, INCY2
 | |
| 	LFDX	B1, X, INCX2
 | |
| 	STFDX	A1, X2, INCX2
 | |
| 	STFDX	B1, Y2, INCY2
 | |
| 	b  LL(999)
 | |
| 	.align 4
 | |
| 
 | |
| LL(100):
 | |
| 	subi	INCX2, INCX2, SIZE
 | |
| 	subi	INCY2, INCY2, SIZE
 | |
| 
 | |
| 	li	INCX, SIZE
 | |
| 	li	INCY, SIZE
 | |
| 
 | |
| 	sub	X, X, INCX2
 | |
| 	sub	Y, Y, INCY2
 | |
| 
 | |
| 	mr	X2, X
 | |
| 	mr	Y2, Y
 | |
| 
 | |
| 	srawi.	r0, N, 1
 | |
| 	mtspr	CTR,  r0
 | |
| 	beq-	LL(115)
 | |
| 
 | |
| 	LFDUX	A1,   X,  INCX2
 | |
| 	LFDUX	B1,   Y,  INCY2
 | |
| 	LFDUX	A2,   X,  INCX
 | |
| 	LFDUX	B2,   Y,  INCY
 | |
| 	LFDUX	A3,   X,  INCX2
 | |
| 	LFDUX	B3,   Y,  INCY2
 | |
| 	LFDUX	A4,   X,  INCX
 | |
| 	LFDUX	B4,   Y,  INCY
 | |
| 	bdz	LL(113)
 | |
| 	.align 4
 | |
| 
 | |
| LL(112):
 | |
| 	STFDUX	B1,   X2, INCX2
 | |
| 	LFDUX	B1,   Y,  INCY2
 | |
| 	STFDUX	A1,   Y2, INCY2
 | |
| 	LFDUX	A1,   X,  INCX2
 | |
| 
 | |
| 	STFDUX	B2,   X2, INCX
 | |
| 	LFDUX	B2,   Y,  INCY
 | |
| 	STFDUX	A2,   Y2, INCY
 | |
| 	LFDUX	A2,   X,  INCX
 | |
| 
 | |
| 	STFDUX	B3,   X2, INCX2
 | |
| 	LFDUX	B3,   Y,  INCY2
 | |
| 	STFDUX	A3,   Y2, INCY2
 | |
| 	LFDUX	A3,   X,  INCX2
 | |
| 
 | |
| 	STFDUX	B4,   X2, INCX
 | |
| 	LFDUX	B4,   Y,  INCY
 | |
| 	STFDUX	A4,   Y2, INCY
 | |
| 	LFDUX	A4,   X,  INCX
 | |
| 	bdnz	LL(112)
 | |
| 	.align 4
 | |
| 
 | |
| LL(113):
 | |
| 	STFDUX	B1,   X2, INCX2
 | |
| 	STFDUX	A1,   Y2, INCY2
 | |
| 	STFDUX	B2,   X2, INCX
 | |
| 	STFDUX	A2,   Y2, INCY
 | |
| 
 | |
| 	STFDUX	B3,   X2, INCX2
 | |
| 	STFDUX	A3,   Y2, INCY2
 | |
| 	STFDUX	B4,   X2, INCX
 | |
| 	STFDUX	A4,   Y2, INCY
 | |
| 	.align 4
 | |
| 
 | |
| LL(115):
 | |
| 	andi.	r0,  N, 1
 | |
| 	beq	LL(999)
 | |
| 
 | |
| 	LFDUX	A1,   X,  INCX2
 | |
| 	LFDUX	A2,   X,  INCX
 | |
| 	LFDUX	B1,   Y,  INCY2
 | |
| 	LFDUX	B2,   Y,  INCY
 | |
| 
 | |
| 	STFDUX	B1,   X2, INCX2
 | |
| 	STFDUX	B2,   X2, INCX
 | |
| 	STFDUX	A1,   Y2, INCY2
 | |
| 	STFDUX	A2,   Y2, INCY
 | |
| 	.align 4
 | |
| 
 | |
| LL(999):
 | |
| 	li	r10, 16
 | |
| 	addi	SP, SP,  -16
 | |
| 
 | |
| 	lfpdux	f16, SP, r10
 | |
| 	lfpdux	f15, SP, r10
 | |
| 	lfpdux	f14, SP, r10
 | |
| 
 | |
| 	addi	SP, SP,  16
 | |
| 	blr
 | |
| 
 | |
| 	EPILOGUE
 |