1218 lines
22 KiB
ArmAsm
1218 lines
22 KiB
ArmAsm
/*********************************************************************/
|
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
|
/* All rights reserved. */
|
|
/* */
|
|
/* Redistribution and use in source and binary forms, with or */
|
|
/* without modification, are permitted provided that the following */
|
|
/* conditions are met: */
|
|
/* */
|
|
/* 1. Redistributions of source code must retain the above */
|
|
/* copyright notice, this list of conditions and the following */
|
|
/* disclaimer. */
|
|
/* */
|
|
/* 2. Redistributions in binary form must reproduce the above */
|
|
/* copyright notice, this list of conditions and the following */
|
|
/* disclaimer in the documentation and/or other materials */
|
|
/* provided with the distribution. */
|
|
/* */
|
|
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
|
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
|
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
|
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
|
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
|
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
|
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
|
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
|
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
|
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
|
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
|
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
|
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
|
/* */
|
|
/* The views and conclusions contained in the software and */
|
|
/* documentation are those of the authors and should not be */
|
|
/* interpreted as representing official policies, either expressed */
|
|
/* or implied, of The University of Texas at Austin. */
|
|
/*********************************************************************/
|
|
|
|
#define ASSEMBLER
|
|
#include "common.h"
|
|
|
|
#define M r3
|
|
#define N r4
|
|
#define A r5
|
|
#define LDA r6
|
|
#define B r7
|
|
|
|
#define AO1 r8
|
|
#define AO2 r9
|
|
#define AO3 r10
|
|
#define AO4 r11
|
|
|
|
#define J r12
|
|
|
|
#define AO5 r26
|
|
#define AO6 r27
|
|
#define AO7 r28
|
|
#define AO8 r29
|
|
#define INC r30
|
|
#define INC2 r31
|
|
|
|
#define c01 f0
|
|
#define c02 f1
|
|
#define c03 f2
|
|
#define c04 f3
|
|
#define c05 f4
|
|
#define c06 f5
|
|
#define c07 f6
|
|
#define c08 f7
|
|
#define c09 f8
|
|
#define c10 f9
|
|
#define c11 f10
|
|
#define c12 f11
|
|
#define c13 f12
|
|
#define c14 f13
|
|
#define c15 f14
|
|
#define c16 f15
|
|
|
|
#define c17 f16
|
|
#define c18 f17
|
|
#define c19 f18
|
|
#define c20 f19
|
|
#define c21 f20
|
|
#define c22 f21
|
|
#define c23 f22
|
|
#define c24 f23
|
|
#define c25 f24
|
|
#define c26 f25
|
|
#define c27 f26
|
|
#define c28 f27
|
|
#define c29 f28
|
|
#define c30 f29
|
|
#define c31 f30
|
|
#define c32 f31
|
|
|
|
#define sel_p f30
|
|
#define sel_s f31
|
|
|
|
|
|
PROLOGUE
|
|
PROFCODE
|
|
|
|
li r0, -16
|
|
|
|
stfpdux f14, SP, r0
|
|
stfpdux f15, SP, r0
|
|
stfpdux f16, SP, r0
|
|
stfpdux f17, SP, r0
|
|
stfpdux f18, SP, r0
|
|
stfpdux f19, SP, r0
|
|
stfpdux f20, SP, r0
|
|
stfpdux f21, SP, r0
|
|
stfpdux f22, SP, r0
|
|
stfpdux f23, SP, r0
|
|
stfpdux f24, SP, r0
|
|
stfpdux f25, SP, r0
|
|
stfpdux f26, SP, r0
|
|
stfpdux f27, SP, r0
|
|
stfpdux f28, SP, r0
|
|
stfpdux f29, SP, r0
|
|
stfpdux f30, SP, r0
|
|
stfpdux f31, SP, r0
|
|
|
|
stwu r31, -4(SP)
|
|
stwu r30, -4(SP)
|
|
stwu r29, -4(SP)
|
|
stwu r28, -4(SP)
|
|
|
|
stwu r27, -4(SP)
|
|
stwu r26, -4(SP)
|
|
|
|
lis r9, 0x3f80
|
|
lis r10, 0xbf80
|
|
|
|
stwu r9, -4(SP)
|
|
stwu r10, -4(SP)
|
|
stwu r10, -4(SP)
|
|
stwu r9, -4(SP)
|
|
|
|
slwi LDA, LDA, BASE_SHIFT
|
|
|
|
li r0, 0
|
|
lfpsux sel_p, SP, r0
|
|
li r0, 8
|
|
lfpsux sel_s, SP, r0
|
|
|
|
cmpwi cr0, M, 0
|
|
ble- .L999
|
|
cmpwi cr0, N, 0
|
|
ble- .L999
|
|
|
|
li INC, 1 * SIZE
|
|
li INC2, 2 * SIZE
|
|
|
|
subi B, B, 2 * SIZE
|
|
|
|
andi. r0, A, 2 * SIZE - 1
|
|
bne .L100
|
|
andi. r0, LDA, 2 * SIZE - 1
|
|
bne .L100
|
|
|
|
subi A, A, 2 * SIZE
|
|
srawi. J, N, 3
|
|
ble .L20
|
|
.align 4
|
|
.L11:
|
|
mr AO1, A
|
|
add AO2, A, LDA
|
|
add AO3, AO2, LDA
|
|
add AO4, AO3, LDA
|
|
add AO5, AO4, LDA
|
|
add AO6, AO5, LDA
|
|
add AO7, AO6, LDA
|
|
add AO8, AO7, LDA
|
|
add A, AO8, LDA
|
|
|
|
srawi. r0, M, 2
|
|
mtspr CTR, r0
|
|
ble .L15
|
|
.align 4
|
|
|
|
.L12:
|
|
LFPDUX c01, AO1, INC2
|
|
LFXDUX c02, AO2, INC2
|
|
LFPDUX c03, AO3, INC2
|
|
LFXDUX c04, AO4, INC2
|
|
|
|
LFPDUX c05, AO5, INC2
|
|
LFXDUX c06, AO6, INC2
|
|
LFPDUX c07, AO7, INC2
|
|
LFXDUX c08, AO8, INC2
|
|
|
|
LFPDUX c09, AO1, INC2
|
|
LFXDUX c10, AO2, INC2
|
|
LFPDUX c11, AO3, INC2
|
|
LFXDUX c12, AO4, INC2
|
|
fpsel c17, sel_p, c01, c02
|
|
|
|
LFPDUX c13, AO5, INC2
|
|
fpsel c18, sel_p, c03, c04
|
|
LFXDUX c14, AO6, INC2
|
|
fpsel c19, sel_p, c05, c06
|
|
LFPDUX c15, AO7, INC2
|
|
fpsel c20, sel_p, c07, c08
|
|
LFXDUX c16, AO8, INC2
|
|
fpsel c21, sel_s, c01, c02
|
|
|
|
fpsel c22, sel_s, c03, c04
|
|
STFPDUX c17, B, INC2
|
|
fpsel c23, sel_s, c05, c06
|
|
STFPDUX c18, B, INC2
|
|
fpsel c24, sel_s, c07, c08
|
|
STFPDUX c19, B, INC2
|
|
|
|
fpsel c01, sel_p, c09, c10
|
|
STFPDUX c20, B, INC2
|
|
fpsel c02, sel_p, c11, c12
|
|
STFXDUX c21, B, INC2
|
|
fpsel c03, sel_p, c13, c14
|
|
STFXDUX c22, B, INC2
|
|
fpsel c04, sel_p, c15, c16
|
|
STFXDUX c23, B, INC2
|
|
|
|
fpsel c05, sel_s, c09, c10
|
|
STFXDUX c24, B, INC2
|
|
fpsel c06, sel_s, c11, c12
|
|
STFPDUX c01, B, INC2
|
|
fpsel c07, sel_s, c13, c14
|
|
STFPDUX c02, B, INC2
|
|
fpsel c08, sel_s, c15, c16
|
|
STFPDUX c03, B, INC2
|
|
|
|
STFPDUX c04, B, INC2
|
|
STFXDUX c05, B, INC2
|
|
STFXDUX c06, B, INC2
|
|
STFXDUX c07, B, INC2
|
|
STFXDUX c08, B, INC2
|
|
bdnz .L12
|
|
.align 4
|
|
|
|
.L15:
|
|
andi. r0, M, 3
|
|
ble .L19
|
|
|
|
andi. r0, M, 2
|
|
beq .L17
|
|
|
|
LFPDUX c01, AO1, INC2
|
|
LFXDUX c02, AO2, INC2
|
|
LFPDUX c03, AO3, INC2
|
|
LFXDUX c04, AO4, INC2
|
|
|
|
LFPDUX c05, AO5, INC2
|
|
fpsel c09, sel_p, c01, c02
|
|
LFXDUX c06, AO6, INC2
|
|
fpsel c10, sel_p, c03, c04
|
|
LFPDUX c07, AO7, INC2
|
|
fpsel c11, sel_p, c05, c06
|
|
LFXDUX c08, AO8, INC2
|
|
fpsel c12, sel_p, c07, c08
|
|
|
|
fpsel c13, sel_s, c01, c02
|
|
fpsel c14, sel_s, c03, c04
|
|
STFPDUX c09, B, INC2
|
|
fpsel c15, sel_s, c05, c06
|
|
STFPDUX c10, B, INC2
|
|
fpsel c16, sel_s, c07, c08
|
|
STFPDUX c11, B, INC2
|
|
|
|
STFPDUX c12, B, INC2
|
|
STFXDUX c13, B, INC2
|
|
STFXDUX c14, B, INC2
|
|
STFXDUX c15, B, INC2
|
|
STFXDUX c16, B, INC2
|
|
.align 4
|
|
|
|
.L17:
|
|
andi. r0, M, 1
|
|
beq .L19
|
|
|
|
LFDUX c01, AO1, INC2
|
|
LFDUX c02, AO3, INC2
|
|
LFDUX c03, AO5, INC2
|
|
LFDUX c04, AO7, INC2
|
|
|
|
LFSDUX c01, AO2, INC2
|
|
LFSDUX c02, AO4, INC2
|
|
LFSDUX c03, AO6, INC2
|
|
LFSDUX c04, AO8, INC2
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c02, B, INC2
|
|
STFPDUX c03, B, INC2
|
|
STFPDUX c04, B, INC2
|
|
.align 4
|
|
|
|
.L19:
|
|
addic. J, J, -1
|
|
bgt .L11
|
|
.align 4
|
|
|
|
.L20:
|
|
andi. J, N, 4
|
|
ble .L30
|
|
.align 4
|
|
.L21:
|
|
mr AO1, A
|
|
add AO2, A, LDA
|
|
add AO3, AO2, LDA
|
|
add AO4, AO3, LDA
|
|
add A, AO4, LDA
|
|
|
|
srawi. r0, M, 3
|
|
mtspr CTR, r0
|
|
ble .L25
|
|
.align 4
|
|
|
|
.L22:
|
|
LFPDUX c01, AO1, INC2
|
|
LFXDUX c02, AO2, INC2
|
|
LFPDUX c03, AO3, INC2
|
|
LFXDUX c04, AO4, INC2
|
|
|
|
LFPDUX c05, AO1, INC2
|
|
LFXDUX c06, AO2, INC2
|
|
LFPDUX c07, AO3, INC2
|
|
LFXDUX c08, AO4, INC2
|
|
|
|
LFPDUX c09, AO1, INC2
|
|
LFXDUX c10, AO2, INC2
|
|
LFPDUX c11, AO3, INC2
|
|
LFXDUX c12, AO4, INC2
|
|
fpsel c17, sel_p, c01, c02
|
|
|
|
LFPDUX c13, AO1, INC2
|
|
fpsel c18, sel_p, c03, c04
|
|
LFXDUX c14, AO2, INC2
|
|
fpsel c19, sel_s, c01, c02
|
|
LFPDUX c15, AO3, INC2
|
|
fpsel c20, sel_s, c03, c04
|
|
LFXDUX c16, AO4, INC2
|
|
fpsel c21, sel_p, c05, c06
|
|
|
|
fpsel c22, sel_p, c07, c08
|
|
STFPDUX c17, B, INC2
|
|
fpsel c23, sel_s, c05, c06
|
|
STFPDUX c18, B, INC2
|
|
fpsel c24, sel_s, c07, c08
|
|
STFXDUX c19, B, INC2
|
|
|
|
fpsel c01, sel_p, c09, c10
|
|
STFXDUX c20, B, INC2
|
|
fpsel c02, sel_p, c11, c12
|
|
STFPDUX c21, B, INC2
|
|
fpsel c03, sel_s, c09, c10
|
|
STFPDUX c22, B, INC2
|
|
fpsel c04, sel_s, c11, c12
|
|
STFXDUX c23, B, INC2
|
|
|
|
fpsel c05, sel_p, c13, c14
|
|
STFXDUX c24, B, INC2
|
|
fpsel c06, sel_p, c15, c16
|
|
STFPDUX c01, B, INC2
|
|
fpsel c07, sel_s, c13, c14
|
|
STFPDUX c02, B, INC2
|
|
fpsel c08, sel_s, c15, c16
|
|
STFXDUX c03, B, INC2
|
|
|
|
STFXDUX c04, B, INC2
|
|
STFPDUX c05, B, INC2
|
|
STFPDUX c06, B, INC2
|
|
STFXDUX c07, B, INC2
|
|
STFXDUX c08, B, INC2
|
|
bdnz .L22
|
|
.align 4
|
|
|
|
.L25:
|
|
andi. r0, M, 7
|
|
ble .L30
|
|
|
|
andi. r0, M, 4
|
|
beq .L26
|
|
|
|
LFPDUX c01, AO1, INC2
|
|
LFXDUX c02, AO2, INC2
|
|
LFPDUX c03, AO3, INC2
|
|
LFXDUX c04, AO4, INC2
|
|
|
|
LFPDUX c05, AO1, INC2
|
|
fpsel c09, sel_p, c01, c02
|
|
LFXDUX c06, AO2, INC2
|
|
fpsel c10, sel_p, c03, c04
|
|
LFPDUX c07, AO3, INC2
|
|
fpsel c11, sel_s, c01, c02
|
|
LFXDUX c08, AO4, INC2
|
|
fpsel c12, sel_s, c03, c04
|
|
|
|
fpsel c13, sel_p, c05, c06
|
|
fpsel c14, sel_p, c07, c08
|
|
STFPDUX c09, B, INC2
|
|
fpsel c15, sel_s, c05, c06
|
|
STFPDUX c10, B, INC2
|
|
fpsel c16, sel_s, c07, c08
|
|
STFXDUX c11, B, INC2
|
|
|
|
STFXDUX c12, B, INC2
|
|
STFPDUX c13, B, INC2
|
|
STFPDUX c14, B, INC2
|
|
STFXDUX c15, B, INC2
|
|
STFXDUX c16, B, INC2
|
|
.align 4
|
|
|
|
.L26:
|
|
andi. r0, M, 2
|
|
beq .L27
|
|
|
|
LFPDUX c01, AO1, INC2
|
|
LFXDUX c02, AO2, INC2
|
|
LFPDUX c03, AO3, INC2
|
|
LFXDUX c04, AO4, INC2
|
|
|
|
fpsel c05, sel_p, c01, c02
|
|
fpsel c06, sel_p, c03, c04
|
|
fpsel c07, sel_s, c01, c02
|
|
fpsel c08, sel_s, c03, c04
|
|
|
|
STFPDUX c05, B, INC2
|
|
STFPDUX c06, B, INC2
|
|
STFXDUX c07, B, INC2
|
|
STFXDUX c08, B, INC2
|
|
.align 4
|
|
|
|
.L27:
|
|
andi. r0, M, 1
|
|
beq .L30
|
|
|
|
LFDUX c01, AO1, INC2
|
|
LFDUX c02, AO2, INC2
|
|
LFDUX c03, AO3, INC2
|
|
LFDUX c04, AO4, INC2
|
|
|
|
fsmfp c01, c02
|
|
fsmfp c03, c04
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c03, B, INC2
|
|
.align 4
|
|
|
|
|
|
.L30:
|
|
andi. J, N, 2
|
|
ble .L40
|
|
|
|
mr AO1, A
|
|
add AO2, A, LDA
|
|
add A, AO2, LDA
|
|
|
|
srawi. r0, M, 3
|
|
mtspr CTR, r0
|
|
ble .L35
|
|
.align 4
|
|
|
|
.L32:
|
|
LFPDUX c01, AO1, INC2
|
|
LFXDUX c05, AO2, INC2
|
|
LFPDUX c02, AO1, INC2
|
|
LFXDUX c06, AO2, INC2
|
|
|
|
LFPDUX c03, AO1, INC2
|
|
fpsel c09, sel_p, c01, c05
|
|
LFXDUX c07, AO2, INC2
|
|
fpsel c10, sel_s, c01, c05
|
|
LFPDUX c04, AO1, INC2
|
|
fpsel c11, sel_p, c02, c06
|
|
LFXDUX c08, AO2, INC2
|
|
fpsel c12, sel_s, c02, c06
|
|
|
|
fpsel c13, sel_p, c03, c07
|
|
fpsel c14, sel_s, c03, c07
|
|
STFPDUX c09, B, INC2
|
|
fpsel c15, sel_p, c04, c08
|
|
STFXDUX c10, B, INC2
|
|
fpsel c16, sel_s, c04, c08
|
|
STFPDUX c11, B, INC2
|
|
STFXDUX c12, B, INC2
|
|
|
|
STFPDUX c13, B, INC2
|
|
STFXDUX c14, B, INC2
|
|
STFPDUX c15, B, INC2
|
|
STFXDUX c16, B, INC2
|
|
bdnz .L32
|
|
.align 4
|
|
|
|
.L35:
|
|
andi. r0, M, 7
|
|
ble .L40
|
|
|
|
andi. r0, M, 4
|
|
beq .L36
|
|
|
|
LFPDUX c01, AO1, INC2
|
|
LFXDUX c03, AO2, INC2
|
|
LFPDUX c02, AO1, INC2
|
|
LFXDUX c04, AO2, INC2
|
|
|
|
fpsel c05, sel_p, c01, c03
|
|
fpsel c06, sel_s, c01, c03
|
|
fpsel c07, sel_p, c02, c04
|
|
fpsel c08, sel_s, c02, c04
|
|
|
|
STFPDUX c05, B, INC2
|
|
STFXDUX c06, B, INC2
|
|
STFPDUX c07, B, INC2
|
|
STFXDUX c08, B, INC2
|
|
.align 4
|
|
|
|
.L36:
|
|
andi. r0, M, 2
|
|
beq .L37
|
|
|
|
LFPDUX c01, AO1, INC2
|
|
LFXDUX c02, AO2, INC2
|
|
|
|
fpsel c03, sel_p, c01, c02
|
|
fpsel c04, sel_s, c01, c02
|
|
|
|
STFPDUX c03, B, INC2
|
|
STFXDUX c04, B, INC2
|
|
.align 4
|
|
|
|
.L37:
|
|
andi. r0, M, 1
|
|
beq .L40
|
|
|
|
LFDUX c01, AO1, INC2
|
|
LFDUX c02, AO2, INC2
|
|
|
|
fsmfp c01, c02
|
|
STFPDUX c01, B, INC2
|
|
.align 4
|
|
|
|
.L40:
|
|
andi. J, N, 1
|
|
ble .L999
|
|
|
|
mr AO1, A
|
|
|
|
srawi. r0, M, 3
|
|
mtspr CTR, r0
|
|
ble .L45
|
|
.align 4
|
|
|
|
.L42:
|
|
LFPDUX c01, AO1, INC2
|
|
LFPDUX c02, AO1, INC2
|
|
LFPDUX c03, AO1, INC2
|
|
LFPDUX c04, AO1, INC2
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c02, B, INC2
|
|
STFPDUX c03, B, INC2
|
|
STFPDUX c04, B, INC2
|
|
bdnz .L42
|
|
.align 4
|
|
|
|
.L45:
|
|
andi. r0, M, 7
|
|
ble .L999
|
|
|
|
andi. r0, M, 4
|
|
beq .L46
|
|
|
|
LFPDUX c01, AO1, INC2
|
|
LFPDUX c02, AO1, INC2
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c02, B, INC2
|
|
.align 4
|
|
|
|
.L46:
|
|
andi. r0, M, 2
|
|
beq .L47
|
|
|
|
LFPDUX c01, AO1, INC2
|
|
STFPDUX c01, B, INC2
|
|
.align 4
|
|
|
|
.L47:
|
|
andi. r0, M, 1
|
|
beq .L999
|
|
|
|
LFDX c01, AO1, INC2
|
|
STFDX c01, B, INC2
|
|
b .L999
|
|
.align 4
|
|
|
|
|
|
.L100:
|
|
subi A, A, 1 * SIZE
|
|
srawi. J, N, 3
|
|
ble .L120
|
|
.align 4
|
|
.L111:
|
|
mr AO1, A
|
|
add AO2, A, LDA
|
|
add AO3, AO2, LDA
|
|
add AO4, AO3, LDA
|
|
add AO5, AO4, LDA
|
|
add AO6, AO5, LDA
|
|
add AO7, AO6, LDA
|
|
add AO8, AO7, LDA
|
|
add A, AO8, LDA
|
|
|
|
srawi. r0, M, 3
|
|
mtspr CTR, r0
|
|
ble .L115
|
|
.align 4
|
|
|
|
.L112:
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c05, AO1, INC
|
|
LFDUX c09, AO1, INC
|
|
LFDUX c13, AO1, INC
|
|
|
|
LFDUX c17, AO1, INC
|
|
LFDUX c21, AO1, INC
|
|
LFDUX c25, AO1, INC
|
|
LFDUX c29, AO1, INC
|
|
|
|
LFSDUX c01, AO2, INC
|
|
LFSDUX c05, AO2, INC
|
|
LFSDUX c09, AO2, INC
|
|
LFSDUX c13, AO2, INC
|
|
|
|
LFSDUX c17, AO2, INC
|
|
LFSDUX c21, AO2, INC
|
|
LFSDUX c25, AO2, INC
|
|
LFSDUX c29, AO2, INC
|
|
|
|
LFDUX c02, AO3, INC
|
|
LFDUX c06, AO3, INC
|
|
LFDUX c10, AO3, INC
|
|
LFDUX c14, AO3, INC
|
|
|
|
LFDUX c18, AO3, INC
|
|
LFDUX c22, AO3, INC
|
|
LFDUX c26, AO3, INC
|
|
LFDUX c30, AO3, INC
|
|
|
|
LFSDUX c02, AO4, INC
|
|
LFSDUX c06, AO4, INC
|
|
LFSDUX c10, AO4, INC
|
|
LFSDUX c14, AO4, INC
|
|
|
|
LFSDUX c18, AO4, INC
|
|
LFSDUX c22, AO4, INC
|
|
LFSDUX c26, AO4, INC
|
|
LFSDUX c30, AO4, INC
|
|
|
|
LFDUX c03, AO5, INC
|
|
LFDUX c07, AO5, INC
|
|
LFDUX c11, AO5, INC
|
|
LFDUX c15, AO5, INC
|
|
|
|
LFDUX c19, AO5, INC
|
|
LFDUX c23, AO5, INC
|
|
LFDUX c27, AO5, INC
|
|
LFDUX c31, AO5, INC
|
|
|
|
LFSDUX c03, AO6, INC
|
|
LFSDUX c07, AO6, INC
|
|
LFSDUX c11, AO6, INC
|
|
LFSDUX c15, AO6, INC
|
|
|
|
LFSDUX c19, AO6, INC
|
|
LFSDUX c23, AO6, INC
|
|
LFSDUX c27, AO6, INC
|
|
LFSDUX c31, AO6, INC
|
|
|
|
LFDUX c04, AO7, INC
|
|
LFDUX c08, AO7, INC
|
|
LFDUX c12, AO7, INC
|
|
LFDUX c16, AO7, INC
|
|
|
|
LFDUX c20, AO7, INC
|
|
LFDUX c24, AO7, INC
|
|
LFDUX c28, AO7, INC
|
|
LFDUX c32, AO7, INC
|
|
|
|
LFSDUX c04, AO8, INC
|
|
LFSDUX c08, AO8, INC
|
|
LFSDUX c12, AO8, INC
|
|
LFSDUX c16, AO8, INC
|
|
|
|
LFSDUX c20, AO8, INC
|
|
LFSDUX c24, AO8, INC
|
|
LFSDUX c28, AO8, INC
|
|
LFSDUX c32, AO8, INC
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c02, B, INC2
|
|
STFPDUX c03, B, INC2
|
|
STFPDUX c04, B, INC2
|
|
STFPDUX c05, B, INC2
|
|
STFPDUX c06, B, INC2
|
|
STFPDUX c07, B, INC2
|
|
STFPDUX c08, B, INC2
|
|
|
|
STFPDUX c09, B, INC2
|
|
STFPDUX c10, B, INC2
|
|
STFPDUX c11, B, INC2
|
|
STFPDUX c12, B, INC2
|
|
STFPDUX c13, B, INC2
|
|
STFPDUX c14, B, INC2
|
|
STFPDUX c15, B, INC2
|
|
STFPDUX c16, B, INC2
|
|
|
|
STFPDUX c17, B, INC2
|
|
STFPDUX c18, B, INC2
|
|
STFPDUX c19, B, INC2
|
|
STFPDUX c20, B, INC2
|
|
STFPDUX c21, B, INC2
|
|
STFPDUX c22, B, INC2
|
|
STFPDUX c23, B, INC2
|
|
STFPDUX c24, B, INC2
|
|
|
|
STFPDUX c25, B, INC2
|
|
STFPDUX c26, B, INC2
|
|
STFPDUX c27, B, INC2
|
|
STFPDUX c28, B, INC2
|
|
STFPDUX c29, B, INC2
|
|
STFPDUX c30, B, INC2
|
|
STFPDUX c31, B, INC2
|
|
STFPDUX c32, B, INC2
|
|
bdnz .L112
|
|
.align 4
|
|
|
|
.L115:
|
|
andi. r0, M, 7
|
|
ble .L119
|
|
|
|
andi. r0, M, 4
|
|
beq .L116
|
|
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c05, AO1, INC
|
|
LFDUX c09, AO1, INC
|
|
LFDUX c13, AO1, INC
|
|
|
|
LFSDUX c01, AO2, INC
|
|
LFSDUX c05, AO2, INC
|
|
LFSDUX c09, AO2, INC
|
|
LFSDUX c13, AO2, INC
|
|
|
|
LFDUX c02, AO3, INC
|
|
LFDUX c06, AO3, INC
|
|
LFDUX c10, AO3, INC
|
|
LFDUX c14, AO3, INC
|
|
|
|
LFSDUX c02, AO4, INC
|
|
LFSDUX c06, AO4, INC
|
|
LFSDUX c10, AO4, INC
|
|
LFSDUX c14, AO4, INC
|
|
|
|
LFDUX c03, AO5, INC
|
|
LFDUX c07, AO5, INC
|
|
LFDUX c11, AO5, INC
|
|
LFDUX c15, AO5, INC
|
|
|
|
LFSDUX c03, AO6, INC
|
|
LFSDUX c07, AO6, INC
|
|
LFSDUX c11, AO6, INC
|
|
LFSDUX c15, AO6, INC
|
|
|
|
LFDUX c04, AO7, INC
|
|
LFDUX c08, AO7, INC
|
|
LFDUX c12, AO7, INC
|
|
LFDUX c16, AO7, INC
|
|
|
|
LFSDUX c04, AO8, INC
|
|
LFSDUX c08, AO8, INC
|
|
LFSDUX c12, AO8, INC
|
|
LFSDUX c16, AO8, INC
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c02, B, INC2
|
|
STFPDUX c03, B, INC2
|
|
STFPDUX c04, B, INC2
|
|
STFPDUX c05, B, INC2
|
|
STFPDUX c06, B, INC2
|
|
STFPDUX c07, B, INC2
|
|
STFPDUX c08, B, INC2
|
|
|
|
STFPDUX c09, B, INC2
|
|
STFPDUX c10, B, INC2
|
|
STFPDUX c11, B, INC2
|
|
STFPDUX c12, B, INC2
|
|
STFPDUX c13, B, INC2
|
|
STFPDUX c14, B, INC2
|
|
STFPDUX c15, B, INC2
|
|
STFPDUX c16, B, INC2
|
|
.align 4
|
|
|
|
.L116:
|
|
andi. r0, M, 2
|
|
beq .L117
|
|
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c05, AO1, INC
|
|
LFDUX c02, AO3, INC
|
|
LFDUX c06, AO3, INC
|
|
|
|
LFSDUX c01, AO2, INC
|
|
LFSDUX c05, AO2, INC
|
|
LFSDUX c02, AO4, INC
|
|
LFSDUX c06, AO4, INC
|
|
|
|
LFDUX c03, AO5, INC
|
|
LFDUX c07, AO5, INC
|
|
LFDUX c04, AO7, INC
|
|
LFDUX c08, AO7, INC
|
|
|
|
LFSDUX c03, AO6, INC
|
|
LFSDUX c07, AO6, INC
|
|
LFSDUX c04, AO8, INC
|
|
LFSDUX c08, AO8, INC
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c02, B, INC2
|
|
STFPDUX c03, B, INC2
|
|
STFPDUX c04, B, INC2
|
|
STFPDUX c05, B, INC2
|
|
STFPDUX c06, B, INC2
|
|
STFPDUX c07, B, INC2
|
|
STFPDUX c08, B, INC2
|
|
.align 4
|
|
|
|
.L117:
|
|
andi. r0, M, 1
|
|
beq .L119
|
|
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c02, AO3, INC
|
|
LFDUX c03, AO5, INC
|
|
LFDUX c04, AO7, INC
|
|
|
|
LFSDUX c01, AO2, INC
|
|
LFSDUX c02, AO4, INC
|
|
LFSDUX c03, AO6, INC
|
|
LFSDUX c04, AO8, INC
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c02, B, INC2
|
|
STFPDUX c03, B, INC2
|
|
STFPDUX c04, B, INC2
|
|
.align 4
|
|
|
|
.L119:
|
|
addic. J, J, -1
|
|
bgt .L111
|
|
.align 4
|
|
|
|
.L120:
|
|
andi. J, N, 4
|
|
ble .L130
|
|
.align 4
|
|
.L121:
|
|
mr AO1, A
|
|
add AO2, A, LDA
|
|
add AO3, AO2, LDA
|
|
add AO4, AO3, LDA
|
|
add A, AO4, LDA
|
|
|
|
srawi. r0, M, 3
|
|
mtspr CTR, r0
|
|
ble .L125
|
|
.align 4
|
|
|
|
.L122:
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c02, AO1, INC
|
|
LFDUX c03, AO1, INC
|
|
LFDUX c04, AO1, INC
|
|
|
|
LFDUX c09, AO1, INC
|
|
LFDUX c10, AO1, INC
|
|
LFDUX c11, AO1, INC
|
|
LFDUX c12, AO1, INC
|
|
|
|
LFSDUX c01, AO2, INC
|
|
LFSDUX c02, AO2, INC
|
|
LFSDUX c03, AO2, INC
|
|
LFSDUX c04, AO2, INC
|
|
|
|
LFSDUX c09, AO2, INC
|
|
LFSDUX c10, AO2, INC
|
|
LFSDUX c11, AO2, INC
|
|
LFSDUX c12, AO2, INC
|
|
|
|
LFDUX c05, AO3, INC
|
|
LFDUX c06, AO3, INC
|
|
LFDUX c07, AO3, INC
|
|
LFDUX c08, AO3, INC
|
|
|
|
LFDUX c13, AO3, INC
|
|
LFDUX c14, AO3, INC
|
|
LFDUX c15, AO3, INC
|
|
LFDUX c16, AO3, INC
|
|
|
|
LFSDUX c05, AO4, INC
|
|
LFSDUX c06, AO4, INC
|
|
LFSDUX c07, AO4, INC
|
|
LFSDUX c08, AO4, INC
|
|
|
|
LFSDUX c13, AO4, INC
|
|
LFSDUX c14, AO4, INC
|
|
LFSDUX c15, AO4, INC
|
|
LFSDUX c16, AO4, INC
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c05, B, INC2
|
|
STFPDUX c02, B, INC2
|
|
STFPDUX c06, B, INC2
|
|
STFPDUX c03, B, INC2
|
|
STFPDUX c07, B, INC2
|
|
STFPDUX c04, B, INC2
|
|
STFPDUX c08, B, INC2
|
|
|
|
STFPDUX c09, B, INC2
|
|
STFPDUX c13, B, INC2
|
|
STFPDUX c10, B, INC2
|
|
STFPDUX c14, B, INC2
|
|
STFPDUX c11, B, INC2
|
|
STFPDUX c15, B, INC2
|
|
STFPDUX c12, B, INC2
|
|
STFPDUX c16, B, INC2
|
|
bdnz .L122
|
|
.align 4
|
|
|
|
.L125:
|
|
andi. r0, M, 7
|
|
ble .L130
|
|
|
|
andi. r0, M, 4
|
|
beq .L126
|
|
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c02, AO1, INC
|
|
LFDUX c03, AO1, INC
|
|
LFDUX c04, AO1, INC
|
|
|
|
LFSDUX c01, AO2, INC
|
|
LFSDUX c02, AO2, INC
|
|
LFSDUX c03, AO2, INC
|
|
LFSDUX c04, AO2, INC
|
|
|
|
LFDUX c05, AO3, INC
|
|
LFDUX c06, AO3, INC
|
|
LFDUX c07, AO3, INC
|
|
LFDUX c08, AO3, INC
|
|
|
|
LFSDUX c05, AO4, INC
|
|
LFSDUX c06, AO4, INC
|
|
LFSDUX c07, AO4, INC
|
|
LFSDUX c08, AO4, INC
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c05, B, INC2
|
|
STFPDUX c02, B, INC2
|
|
STFPDUX c06, B, INC2
|
|
STFPDUX c03, B, INC2
|
|
STFPDUX c07, B, INC2
|
|
STFPDUX c04, B, INC2
|
|
STFPDUX c08, B, INC2
|
|
.align 4
|
|
|
|
.L126:
|
|
andi. r0, M, 2
|
|
beq .L127
|
|
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c02, AO1, INC
|
|
|
|
LFSDUX c01, AO2, INC
|
|
LFSDUX c02, AO2, INC
|
|
|
|
LFDUX c05, AO3, INC
|
|
LFDUX c06, AO3, INC
|
|
|
|
LFSDUX c05, AO4, INC
|
|
LFSDUX c06, AO4, INC
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c05, B, INC2
|
|
STFPDUX c02, B, INC2
|
|
STFPDUX c06, B, INC2
|
|
.align 4
|
|
|
|
.L127:
|
|
andi. r0, M, 1
|
|
beq .L130
|
|
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c05, AO3, INC
|
|
|
|
nop
|
|
nop
|
|
|
|
LFSDUX c01, AO2, INC
|
|
LFSDUX c05, AO4, INC
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c05, B, INC2
|
|
.align 4
|
|
|
|
|
|
.L130:
|
|
andi. J, N, 2
|
|
ble .L140
|
|
|
|
mr AO1, A
|
|
add AO2, A, LDA
|
|
add A, AO2, LDA
|
|
|
|
srawi. r0, M, 3
|
|
mtspr CTR, r0
|
|
ble .L135
|
|
.align 4
|
|
|
|
.L132:
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c02, AO1, INC
|
|
LFDUX c03, AO1, INC
|
|
LFDUX c04, AO1, INC
|
|
|
|
LFDUX c09, AO1, INC
|
|
LFDUX c10, AO1, INC
|
|
LFDUX c11, AO1, INC
|
|
LFDUX c12, AO1, INC
|
|
|
|
LFSDUX c01, AO2, INC
|
|
LFSDUX c02, AO2, INC
|
|
LFSDUX c03, AO2, INC
|
|
LFSDUX c04, AO2, INC
|
|
|
|
LFSDUX c09, AO2, INC
|
|
LFSDUX c10, AO2, INC
|
|
LFSDUX c11, AO2, INC
|
|
LFSDUX c12, AO2, INC
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c02, B, INC2
|
|
STFPDUX c03, B, INC2
|
|
STFPDUX c04, B, INC2
|
|
|
|
STFPDUX c09, B, INC2
|
|
STFPDUX c10, B, INC2
|
|
STFPDUX c11, B, INC2
|
|
STFPDUX c12, B, INC2
|
|
bdnz .L132
|
|
.align 4
|
|
|
|
.L135:
|
|
andi. r0, M, 7
|
|
ble .L140
|
|
|
|
andi. r0, M, 4
|
|
beq .L136
|
|
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c02, AO1, INC
|
|
LFDUX c03, AO1, INC
|
|
LFDUX c04, AO1, INC
|
|
|
|
LFSDUX c01, AO2, INC
|
|
LFSDUX c02, AO2, INC
|
|
LFSDUX c03, AO2, INC
|
|
LFSDUX c04, AO2, INC
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c02, B, INC2
|
|
STFPDUX c03, B, INC2
|
|
STFPDUX c04, B, INC2
|
|
.align 4
|
|
|
|
.L136:
|
|
andi. r0, M, 2
|
|
beq .L137
|
|
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c02, AO1, INC
|
|
|
|
LFSDUX c01, AO2, INC
|
|
LFSDUX c02, AO2, INC
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c02, B, INC2
|
|
.align 4
|
|
|
|
.L137:
|
|
andi. r0, M, 1
|
|
beq .L140
|
|
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c02, AO2, INC
|
|
|
|
fsmfp c01, c02
|
|
STFPDUX c01, B, INC2
|
|
.align 4
|
|
|
|
.L140:
|
|
andi. J, N, 1
|
|
ble .L999
|
|
|
|
mr AO1, A
|
|
|
|
srawi. r0, M, 3
|
|
mtspr CTR, r0
|
|
ble .L145
|
|
.align 4
|
|
|
|
.L142:
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c02, AO1, INC
|
|
LFDUX c03, AO1, INC
|
|
LFDUX c04, AO1, INC
|
|
|
|
LFDUX c05, AO1, INC
|
|
LFDUX c06, AO1, INC
|
|
LFDUX c07, AO1, INC
|
|
LFDUX c08, AO1, INC
|
|
|
|
fsmfp c01, c02
|
|
fsmfp c03, c04
|
|
fsmfp c05, c06
|
|
fsmfp c07, c08
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c03, B, INC2
|
|
STFPDUX c05, B, INC2
|
|
STFPDUX c07, B, INC2
|
|
bdnz .L142
|
|
.align 4
|
|
|
|
.L145:
|
|
andi. r0, M, 7
|
|
ble .L999
|
|
|
|
andi. r0, M, 4
|
|
beq .L146
|
|
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c02, AO1, INC
|
|
LFDUX c03, AO1, INC
|
|
LFDUX c04, AO1, INC
|
|
|
|
fsmfp c01, c02
|
|
fsmfp c03, c04
|
|
|
|
STFPDUX c01, B, INC2
|
|
STFPDUX c03, B, INC2
|
|
.align 4
|
|
|
|
.L146:
|
|
andi. r0, M, 2
|
|
beq .L147
|
|
|
|
LFDUX c01, AO1, INC
|
|
LFDUX c02, AO1, INC
|
|
|
|
fsmfp c01, c02
|
|
STFPDUX c01, B, INC2
|
|
.align 4
|
|
|
|
.L147:
|
|
andi. r0, M, 1
|
|
beq .L999
|
|
|
|
LFDX c01, AO1, INC
|
|
STFDX c01, B, INC2
|
|
.align 4
|
|
|
|
.L999:
|
|
addi SP, SP, 4
|
|
|
|
lwzu r26, 4(SP)
|
|
lwzu r27, 4(SP)
|
|
|
|
lwzu r28, 4(SP)
|
|
lwzu r29, 4(SP)
|
|
lwzu r30, 4(SP)
|
|
lwzu r31, 4(SP)
|
|
|
|
subi SP, SP, 12
|
|
li r0, 16
|
|
|
|
lfpdux f31, SP, r0
|
|
lfpdux f30, SP, r0
|
|
lfpdux f29, SP, r0
|
|
lfpdux f28, SP, r0
|
|
lfpdux f27, SP, r0
|
|
lfpdux f26, SP, r0
|
|
lfpdux f25, SP, r0
|
|
lfpdux f24, SP, r0
|
|
lfpdux f23, SP, r0
|
|
lfpdux f22, SP, r0
|
|
lfpdux f21, SP, r0
|
|
lfpdux f20, SP, r0
|
|
lfpdux f19, SP, r0
|
|
lfpdux f18, SP, r0
|
|
lfpdux f17, SP, r0
|
|
lfpdux f16, SP, r0
|
|
lfpdux f15, SP, r0
|
|
lfpdux f14, SP, r0
|
|
addi SP, SP, 16
|
|
blr
|
|
EPILOGUE
|