Modified assembly label name, so that they are hidden.
Added license informations.
This commit is contained in:
parent
0afc76fd65
commit
085f215257
|
@ -1,3 +1,38 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
/**************************************************************************************
|
||||||
|
* 2016/03/05 Werner Saar (wernsaar@googlemail.com)
|
||||||
|
* BLASTEST : OK
|
||||||
|
* CTEST : OK
|
||||||
|
* TEST : OK
|
||||||
|
* LAPACK-TEST : OK
|
||||||
|
**************************************************************************************/
|
||||||
|
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
|
@ -218,11 +253,11 @@
|
||||||
|
|
||||||
|
|
||||||
cmpwi cr0, M, 0
|
cmpwi cr0, M, 0
|
||||||
ble L999_H1
|
ble .L999_H1
|
||||||
cmpwi cr0, N, 0
|
cmpwi cr0, N, 0
|
||||||
ble L999_H1
|
ble .L999_H1
|
||||||
cmpwi cr0, K, 0
|
cmpwi cr0, K, 0
|
||||||
ble L999_H1
|
ble .L999_H1
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
addi ALPHA, SP, 296
|
addi ALPHA, SP, 296
|
||||||
|
@ -241,7 +276,7 @@
|
||||||
|
|
||||||
#include "dgemm_logic_16x4_power8.S"
|
#include "dgemm_logic_16x4_power8.S"
|
||||||
|
|
||||||
L999:
|
.L999:
|
||||||
addi r3, 0, 0
|
addi r3, 0, 0
|
||||||
|
|
||||||
lfd f14, 0(SP)
|
lfd f14, 0(SP)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,3 +1,38 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
/**************************************************************************************
|
||||||
|
* 2016/03/05 Werner Saar (wernsaar@googlemail.com)
|
||||||
|
* BLASTEST : OK
|
||||||
|
* CTEST : OK
|
||||||
|
* TEST : OK
|
||||||
|
* LAPACK-TEST : OK
|
||||||
|
**************************************************************************************/
|
||||||
|
|
||||||
/*********************************************************************
|
/*********************************************************************
|
||||||
* Macros for N=4, M=16 *
|
* Macros for N=4, M=16 *
|
||||||
*********************************************************************/
|
*********************************************************************/
|
||||||
|
|
|
@ -1,3 +1,38 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
/**************************************************************************************
|
||||||
|
* 2016/03/05 Werner Saar (wernsaar@googlemail.com)
|
||||||
|
* BLASTEST : OK
|
||||||
|
* CTEST : OK
|
||||||
|
* TEST : OK
|
||||||
|
* LAPACK-TEST : OK
|
||||||
|
**************************************************************************************/
|
||||||
|
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
|
@ -228,11 +263,11 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
cmpwi cr0, M, 0
|
cmpwi cr0, M, 0
|
||||||
ble L999_H1
|
ble .L999_H1
|
||||||
cmpwi cr0, N, 0
|
cmpwi cr0, N, 0
|
||||||
ble L999_H1
|
ble .L999_H1
|
||||||
cmpwi cr0, K, 0
|
cmpwi cr0, K, 0
|
||||||
ble L999_H1
|
ble .L999_H1
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
addi ALPHA, SP, 296
|
addi ALPHA, SP, 296
|
||||||
|
@ -251,7 +286,7 @@
|
||||||
|
|
||||||
#include "dtrmm_logic_16x4_power8.S"
|
#include "dtrmm_logic_16x4_power8.S"
|
||||||
|
|
||||||
L999:
|
.L999:
|
||||||
addi r3, 0, 0
|
addi r3, 0, 0
|
||||||
|
|
||||||
lfd f14, 0(SP)
|
lfd f14, 0(SP)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,3 +1,38 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
/**************************************************************************************
|
||||||
|
* 2016/03/05 Werner Saar (wernsaar@googlemail.com)
|
||||||
|
* BLASTEST : OK
|
||||||
|
* CTEST : OK
|
||||||
|
* TEST : OK
|
||||||
|
* LAPACK-TEST : OK
|
||||||
|
**************************************************************************************/
|
||||||
|
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
|
@ -233,11 +268,11 @@
|
||||||
#include "zgemm_macros_8x2_power8.S"
|
#include "zgemm_macros_8x2_power8.S"
|
||||||
|
|
||||||
cmpwi cr0, M, 0
|
cmpwi cr0, M, 0
|
||||||
ble L999
|
ble .L999
|
||||||
cmpwi cr0, N, 0
|
cmpwi cr0, N, 0
|
||||||
ble L999
|
ble .L999
|
||||||
cmpwi cr0, K, 0
|
cmpwi cr0, K, 0
|
||||||
ble L999
|
ble .L999
|
||||||
|
|
||||||
slwi LDC, LDC, ZBASE_SHIFT
|
slwi LDC, LDC, ZBASE_SHIFT
|
||||||
li PRE, 256
|
li PRE, 256
|
||||||
|
@ -260,7 +295,7 @@
|
||||||
|
|
||||||
#include "zgemm_logic_8x2_power8.S"
|
#include "zgemm_logic_8x2_power8.S"
|
||||||
|
|
||||||
L999:
|
.L999:
|
||||||
addi r3, 0, 0
|
addi r3, 0, 0
|
||||||
|
|
||||||
lfd f14, 0(SP)
|
lfd f14, 0(SP)
|
||||||
|
|
|
@ -1,25 +1,25 @@
|
||||||
srawi. J, N, 1
|
srawi. J, N, 1
|
||||||
ble ZGEMM_L2_END
|
ble .LZGEMM_L2_END
|
||||||
|
|
||||||
ZGEMM_L2_BEGIN:
|
.LZGEMM_L2_BEGIN:
|
||||||
|
|
||||||
mr CO, C
|
mr CO, C
|
||||||
mr AO, A
|
mr AO, A
|
||||||
slwi T1, LDC , 1
|
slwi T1, LDC , 1
|
||||||
add C, C, T1
|
add C, C, T1
|
||||||
srawi. I, M, 3
|
srawi. I, M, 3
|
||||||
ble ZGEMM_L2x8_END
|
ble .LZGEMM_L2x8_END
|
||||||
|
|
||||||
ZGEMM_L2x8_BEGIN:
|
.LZGEMM_L2x8_BEGIN:
|
||||||
|
|
||||||
|
|
||||||
mr BO, B
|
mr BO, B
|
||||||
srawi. L, K, 3
|
srawi. L, K, 3
|
||||||
ble ZGEMM_L2x8_SUB0
|
ble .LZGEMM_L2x8_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZGEMM_L2x8_SUB4
|
ble .LZGEMM_L2x8_SUB4
|
||||||
|
|
||||||
ZGEMM_L2x8_LOOP_START:
|
.LZGEMM_L2x8_LOOP_START:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
LOAD2x8_1
|
LOAD2x8_1
|
||||||
|
@ -42,11 +42,11 @@ ZGEMM_L2x8_LOOP_START:
|
||||||
KERNEL2x8_2
|
KERNEL2x8_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZGEMM_L2x8_LOOP_END
|
ble .LZGEMM_L2x8_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZGEMM_L2x8_LOOP:
|
.LZGEMM_L2x8_LOOP:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
KERNEL2x8_1
|
KERNEL2x8_1
|
||||||
|
@ -67,9 +67,9 @@ ZGEMM_L2x8_LOOP:
|
||||||
KERNEL2x8_2
|
KERNEL2x8_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L2x8_LOOP
|
bgt .LZGEMM_L2x8_LOOP
|
||||||
|
|
||||||
ZGEMM_L2x8_LOOP_END:
|
.LZGEMM_L2x8_LOOP_END:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
KERNEL2x8_1
|
KERNEL2x8_1
|
||||||
|
@ -88,9 +88,9 @@ ZGEMM_L2x8_LOOP_END:
|
||||||
KERNEL2x8_1
|
KERNEL2x8_1
|
||||||
KERNEL2x8_E2
|
KERNEL2x8_E2
|
||||||
|
|
||||||
b ZGEMM_L2x8_SUB1
|
b .LZGEMM_L2x8_SUB1
|
||||||
|
|
||||||
ZGEMM_L2x8_SUB4:
|
.LZGEMM_L2x8_SUB4:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
KERNEL2x8_SUBI1
|
KERNEL2x8_SUBI1
|
||||||
|
@ -106,53 +106,53 @@ ZGEMM_L2x8_SUB4:
|
||||||
KERNEL2x8_SUB1
|
KERNEL2x8_SUB1
|
||||||
KERNEL2x8_SUB1
|
KERNEL2x8_SUB1
|
||||||
|
|
||||||
b ZGEMM_L2x8_SUB1
|
b .LZGEMM_L2x8_SUB1
|
||||||
|
|
||||||
ZGEMM_L2x8_SUB0:
|
.LZGEMM_L2x8_SUB0:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
|
|
||||||
KERNEL2x8_SUBI1
|
KERNEL2x8_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZGEMM_L2x8_SAVE
|
ble .LZGEMM_L2x8_SAVE
|
||||||
b ZGEMM_L2x8_SUB2
|
b .LZGEMM_L2x8_SUB2
|
||||||
|
|
||||||
ZGEMM_L2x8_SUB1:
|
.LZGEMM_L2x8_SUB1:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
ble ZGEMM_L2x8_SAVE
|
ble .LZGEMM_L2x8_SAVE
|
||||||
|
|
||||||
ZGEMM_L2x8_SUB2:
|
.LZGEMM_L2x8_SUB2:
|
||||||
|
|
||||||
KERNEL2x8_SUB1
|
KERNEL2x8_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L2x8_SUB2
|
bgt .LZGEMM_L2x8_SUB2
|
||||||
|
|
||||||
ZGEMM_L2x8_SAVE:
|
.LZGEMM_L2x8_SAVE:
|
||||||
|
|
||||||
SAVE2x8
|
SAVE2x8
|
||||||
|
|
||||||
addic. I, I, -1
|
addic. I, I, -1
|
||||||
bgt ZGEMM_L2x8_BEGIN
|
bgt .LZGEMM_L2x8_BEGIN
|
||||||
|
|
||||||
ZGEMM_L2x8_END:
|
.LZGEMM_L2x8_END:
|
||||||
|
|
||||||
ZGEMM_L2x4_BEGIN:
|
.LZGEMM_L2x4_BEGIN:
|
||||||
|
|
||||||
andi. T2, M, 7
|
andi. T2, M, 7
|
||||||
ble ZGEMM_L2x1_END
|
ble .LZGEMM_L2x1_END
|
||||||
|
|
||||||
andi. T1, M, 4
|
andi. T1, M, 4
|
||||||
ble ZGEMM_L2x4_END
|
ble .LZGEMM_L2x4_END
|
||||||
mr BO, B
|
mr BO, B
|
||||||
srawi. L, K, 3
|
srawi. L, K, 3
|
||||||
ble ZGEMM_L2x4_SUB0
|
ble .LZGEMM_L2x4_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZGEMM_L2x4_SUB4
|
ble .LZGEMM_L2x4_SUB4
|
||||||
|
|
||||||
ZGEMM_L2x4_LOOP_START:
|
.LZGEMM_L2x4_LOOP_START:
|
||||||
|
|
||||||
LOAD2x4_1
|
LOAD2x4_1
|
||||||
KERNEL2x4_I1
|
KERNEL2x4_I1
|
||||||
|
@ -166,11 +166,11 @@ ZGEMM_L2x4_LOOP_START:
|
||||||
KERNEL2x4_2
|
KERNEL2x4_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZGEMM_L2x4_LOOP_END
|
ble .LZGEMM_L2x4_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZGEMM_L2x4_LOOP:
|
.LZGEMM_L2x4_LOOP:
|
||||||
|
|
||||||
KERNEL2x4_1
|
KERNEL2x4_1
|
||||||
KERNEL2x4_2
|
KERNEL2x4_2
|
||||||
|
@ -183,9 +183,9 @@ ZGEMM_L2x4_LOOP:
|
||||||
KERNEL2x4_2
|
KERNEL2x4_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L2x4_LOOP
|
bgt .LZGEMM_L2x4_LOOP
|
||||||
|
|
||||||
ZGEMM_L2x4_LOOP_END:
|
.LZGEMM_L2x4_LOOP_END:
|
||||||
|
|
||||||
KERNEL2x4_1
|
KERNEL2x4_1
|
||||||
KERNEL2x4_2
|
KERNEL2x4_2
|
||||||
|
@ -197,9 +197,9 @@ ZGEMM_L2x4_LOOP_END:
|
||||||
KERNEL2x4_1
|
KERNEL2x4_1
|
||||||
KERNEL2x4_E2
|
KERNEL2x4_E2
|
||||||
|
|
||||||
b ZGEMM_L2x4_SUB1
|
b .LZGEMM_L2x4_SUB1
|
||||||
|
|
||||||
ZGEMM_L2x4_SUB4:
|
.LZGEMM_L2x4_SUB4:
|
||||||
|
|
||||||
KERNEL2x4_SUBI1
|
KERNEL2x4_SUBI1
|
||||||
KERNEL2x4_SUB1
|
KERNEL2x4_SUB1
|
||||||
|
@ -211,48 +211,48 @@ ZGEMM_L2x4_SUB4:
|
||||||
KERNEL2x4_SUB1
|
KERNEL2x4_SUB1
|
||||||
KERNEL2x4_SUB1
|
KERNEL2x4_SUB1
|
||||||
|
|
||||||
b ZGEMM_L2x4_SUB1
|
b .LZGEMM_L2x4_SUB1
|
||||||
|
|
||||||
ZGEMM_L2x4_SUB0:
|
.LZGEMM_L2x4_SUB0:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
|
|
||||||
KERNEL2x4_SUBI1
|
KERNEL2x4_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZGEMM_L2x4_SAVE
|
ble .LZGEMM_L2x4_SAVE
|
||||||
b ZGEMM_L2x4_SUB2
|
b .LZGEMM_L2x4_SUB2
|
||||||
|
|
||||||
ZGEMM_L2x4_SUB1:
|
.LZGEMM_L2x4_SUB1:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
ble ZGEMM_L2x4_SAVE
|
ble .LZGEMM_L2x4_SAVE
|
||||||
|
|
||||||
ZGEMM_L2x4_SUB2:
|
.LZGEMM_L2x4_SUB2:
|
||||||
|
|
||||||
KERNEL2x4_SUB1
|
KERNEL2x4_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L2x4_SUB2
|
bgt .LZGEMM_L2x4_SUB2
|
||||||
|
|
||||||
ZGEMM_L2x4_SAVE:
|
.LZGEMM_L2x4_SAVE:
|
||||||
|
|
||||||
SAVE2x4
|
SAVE2x4
|
||||||
|
|
||||||
ZGEMM_L2x4_END:
|
.LZGEMM_L2x4_END:
|
||||||
|
|
||||||
ZGEMM_L2x2_BEGIN:
|
.LZGEMM_L2x2_BEGIN:
|
||||||
|
|
||||||
|
|
||||||
andi. T1, M, 2
|
andi. T1, M, 2
|
||||||
ble ZGEMM_L2x2_END
|
ble .LZGEMM_L2x2_END
|
||||||
mr BO, B
|
mr BO, B
|
||||||
srawi. L, K, 3
|
srawi. L, K, 3
|
||||||
ble ZGEMM_L2x2_SUB0
|
ble .LZGEMM_L2x2_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZGEMM_L2x2_SUB4
|
ble .LZGEMM_L2x2_SUB4
|
||||||
|
|
||||||
ZGEMM_L2x2_LOOP_START:
|
.LZGEMM_L2x2_LOOP_START:
|
||||||
|
|
||||||
LOAD2x2_1
|
LOAD2x2_1
|
||||||
KERNEL2x2_I1
|
KERNEL2x2_I1
|
||||||
|
@ -266,11 +266,11 @@ ZGEMM_L2x2_LOOP_START:
|
||||||
KERNEL2x2_2
|
KERNEL2x2_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZGEMM_L2x2_LOOP_END
|
ble .LZGEMM_L2x2_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZGEMM_L2x2_LOOP:
|
.LZGEMM_L2x2_LOOP:
|
||||||
|
|
||||||
KERNEL2x2_1
|
KERNEL2x2_1
|
||||||
KERNEL2x2_2
|
KERNEL2x2_2
|
||||||
|
@ -283,9 +283,9 @@ ZGEMM_L2x2_LOOP:
|
||||||
KERNEL2x2_2
|
KERNEL2x2_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L2x2_LOOP
|
bgt .LZGEMM_L2x2_LOOP
|
||||||
|
|
||||||
ZGEMM_L2x2_LOOP_END:
|
.LZGEMM_L2x2_LOOP_END:
|
||||||
|
|
||||||
KERNEL2x2_1
|
KERNEL2x2_1
|
||||||
KERNEL2x2_2
|
KERNEL2x2_2
|
||||||
|
@ -297,9 +297,9 @@ ZGEMM_L2x2_LOOP_END:
|
||||||
KERNEL2x2_1
|
KERNEL2x2_1
|
||||||
KERNEL2x2_E2
|
KERNEL2x2_E2
|
||||||
|
|
||||||
b ZGEMM_L2x2_SUB1
|
b .LZGEMM_L2x2_SUB1
|
||||||
|
|
||||||
ZGEMM_L2x2_SUB4:
|
.LZGEMM_L2x2_SUB4:
|
||||||
|
|
||||||
KERNEL2x2_SUBI1
|
KERNEL2x2_SUBI1
|
||||||
KERNEL2x2_SUB1
|
KERNEL2x2_SUB1
|
||||||
|
@ -311,48 +311,48 @@ ZGEMM_L2x2_SUB4:
|
||||||
KERNEL2x2_SUB1
|
KERNEL2x2_SUB1
|
||||||
KERNEL2x2_SUB1
|
KERNEL2x2_SUB1
|
||||||
|
|
||||||
b ZGEMM_L2x2_SUB1
|
b .LZGEMM_L2x2_SUB1
|
||||||
|
|
||||||
ZGEMM_L2x2_SUB0:
|
.LZGEMM_L2x2_SUB0:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
|
|
||||||
KERNEL2x2_SUBI1
|
KERNEL2x2_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZGEMM_L2x2_SAVE
|
ble .LZGEMM_L2x2_SAVE
|
||||||
b ZGEMM_L2x2_SUB2
|
b .LZGEMM_L2x2_SUB2
|
||||||
|
|
||||||
ZGEMM_L2x2_SUB1:
|
.LZGEMM_L2x2_SUB1:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
ble ZGEMM_L2x2_SAVE
|
ble .LZGEMM_L2x2_SAVE
|
||||||
|
|
||||||
ZGEMM_L2x2_SUB2:
|
.LZGEMM_L2x2_SUB2:
|
||||||
|
|
||||||
KERNEL2x2_SUB1
|
KERNEL2x2_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L2x2_SUB2
|
bgt .LZGEMM_L2x2_SUB2
|
||||||
|
|
||||||
ZGEMM_L2x2_SAVE:
|
.LZGEMM_L2x2_SAVE:
|
||||||
|
|
||||||
SAVE2x2
|
SAVE2x2
|
||||||
|
|
||||||
ZGEMM_L2x2_END:
|
.LZGEMM_L2x2_END:
|
||||||
|
|
||||||
ZGEMM_L2x1_BEGIN:
|
.LZGEMM_L2x1_BEGIN:
|
||||||
|
|
||||||
|
|
||||||
andi. T1, M, 1
|
andi. T1, M, 1
|
||||||
ble ZGEMM_L2x1_END
|
ble .LZGEMM_L2x1_END
|
||||||
mr BO, B
|
mr BO, B
|
||||||
srawi. L, K, 3
|
srawi. L, K, 3
|
||||||
ble ZGEMM_L2x1_SUB0
|
ble .LZGEMM_L2x1_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZGEMM_L2x1_SUB4
|
ble .LZGEMM_L2x1_SUB4
|
||||||
|
|
||||||
ZGEMM_L2x1_LOOP_START:
|
.LZGEMM_L2x1_LOOP_START:
|
||||||
|
|
||||||
LOAD2x1_1
|
LOAD2x1_1
|
||||||
KERNEL2x1_I1
|
KERNEL2x1_I1
|
||||||
|
@ -366,11 +366,11 @@ ZGEMM_L2x1_LOOP_START:
|
||||||
KERNEL2x1_2
|
KERNEL2x1_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZGEMM_L2x1_LOOP_END
|
ble .LZGEMM_L2x1_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZGEMM_L2x1_LOOP:
|
.LZGEMM_L2x1_LOOP:
|
||||||
|
|
||||||
KERNEL2x1_1
|
KERNEL2x1_1
|
||||||
KERNEL2x1_2
|
KERNEL2x1_2
|
||||||
|
@ -383,9 +383,9 @@ ZGEMM_L2x1_LOOP:
|
||||||
KERNEL2x1_2
|
KERNEL2x1_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L2x1_LOOP
|
bgt .LZGEMM_L2x1_LOOP
|
||||||
|
|
||||||
ZGEMM_L2x1_LOOP_END:
|
.LZGEMM_L2x1_LOOP_END:
|
||||||
|
|
||||||
KERNEL2x1_1
|
KERNEL2x1_1
|
||||||
KERNEL2x1_2
|
KERNEL2x1_2
|
||||||
|
@ -397,9 +397,9 @@ ZGEMM_L2x1_LOOP_END:
|
||||||
KERNEL2x1_1
|
KERNEL2x1_1
|
||||||
KERNEL2x1_E2
|
KERNEL2x1_E2
|
||||||
|
|
||||||
b ZGEMM_L2x1_SUB1
|
b .LZGEMM_L2x1_SUB1
|
||||||
|
|
||||||
ZGEMM_L2x1_SUB4:
|
.LZGEMM_L2x1_SUB4:
|
||||||
|
|
||||||
KERNEL2x1_SUBI1
|
KERNEL2x1_SUBI1
|
||||||
KERNEL2x1_SUB1
|
KERNEL2x1_SUB1
|
||||||
|
@ -411,72 +411,72 @@ ZGEMM_L2x1_SUB4:
|
||||||
KERNEL2x1_SUB1
|
KERNEL2x1_SUB1
|
||||||
KERNEL2x1_SUB1
|
KERNEL2x1_SUB1
|
||||||
|
|
||||||
b ZGEMM_L2x1_SUB1
|
b .LZGEMM_L2x1_SUB1
|
||||||
|
|
||||||
ZGEMM_L2x1_SUB0:
|
.LZGEMM_L2x1_SUB0:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
|
|
||||||
KERNEL2x1_SUBI1
|
KERNEL2x1_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZGEMM_L2x1_SAVE
|
ble .LZGEMM_L2x1_SAVE
|
||||||
b ZGEMM_L2x1_SUB2
|
b .LZGEMM_L2x1_SUB2
|
||||||
|
|
||||||
ZGEMM_L2x1_SUB1:
|
.LZGEMM_L2x1_SUB1:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
ble ZGEMM_L2x1_SAVE
|
ble .LZGEMM_L2x1_SAVE
|
||||||
|
|
||||||
ZGEMM_L2x1_SUB2:
|
.LZGEMM_L2x1_SUB2:
|
||||||
|
|
||||||
KERNEL2x1_SUB1
|
KERNEL2x1_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L2x1_SUB2
|
bgt .LZGEMM_L2x1_SUB2
|
||||||
|
|
||||||
ZGEMM_L2x1_SAVE:
|
.LZGEMM_L2x1_SAVE:
|
||||||
|
|
||||||
SAVE2x1
|
SAVE2x1
|
||||||
|
|
||||||
ZGEMM_L2x1_END:
|
.LZGEMM_L2x1_END:
|
||||||
|
|
||||||
slwi T1, K, 5
|
slwi T1, K, 5
|
||||||
add B, B, T1
|
add B, B, T1
|
||||||
|
|
||||||
addic. J, J, -1
|
addic. J, J, -1
|
||||||
bgt ZGEMM_L2_BEGIN
|
bgt .LZGEMM_L2_BEGIN
|
||||||
|
|
||||||
andi. T2, N, 1
|
andi. T2, N, 1
|
||||||
ble L999
|
ble .L999
|
||||||
|
|
||||||
ZGEMM_L2_END:
|
.LZGEMM_L2_END:
|
||||||
|
|
||||||
b ZGEMM_L1_BEGIN
|
b .LZGEMM_L1_BEGIN
|
||||||
|
|
||||||
L999_H1:
|
.L999_H1:
|
||||||
|
|
||||||
b L999
|
b .L999
|
||||||
|
|
||||||
ZGEMM_L1_BEGIN:
|
.LZGEMM_L1_BEGIN:
|
||||||
|
|
||||||
andi. T1, N, 1
|
andi. T1, N, 1
|
||||||
ble ZGEMM_L1_END
|
ble .LZGEMM_L1_END
|
||||||
mr CO, C
|
mr CO, C
|
||||||
mr AO, A
|
mr AO, A
|
||||||
srawi. I, M, 3
|
srawi. I, M, 3
|
||||||
ble ZGEMM_L1x8_END
|
ble .LZGEMM_L1x8_END
|
||||||
|
|
||||||
ZGEMM_L1x8_BEGIN:
|
.LZGEMM_L1x8_BEGIN:
|
||||||
|
|
||||||
|
|
||||||
mr BO, B
|
mr BO, B
|
||||||
srawi. L, K, 3
|
srawi. L, K, 3
|
||||||
ble ZGEMM_L1x8_SUB0
|
ble .LZGEMM_L1x8_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZGEMM_L1x8_SUB4
|
ble .LZGEMM_L1x8_SUB4
|
||||||
|
|
||||||
ZGEMM_L1x8_LOOP_START:
|
.LZGEMM_L1x8_LOOP_START:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
LOAD1x8_1
|
LOAD1x8_1
|
||||||
|
@ -499,11 +499,11 @@ ZGEMM_L1x8_LOOP_START:
|
||||||
KERNEL1x8_2
|
KERNEL1x8_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZGEMM_L1x8_LOOP_END
|
ble .LZGEMM_L1x8_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZGEMM_L1x8_LOOP:
|
.LZGEMM_L1x8_LOOP:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
KERNEL1x8_1
|
KERNEL1x8_1
|
||||||
|
@ -524,9 +524,9 @@ ZGEMM_L1x8_LOOP:
|
||||||
KERNEL1x8_2
|
KERNEL1x8_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L1x8_LOOP
|
bgt .LZGEMM_L1x8_LOOP
|
||||||
|
|
||||||
ZGEMM_L1x8_LOOP_END:
|
.LZGEMM_L1x8_LOOP_END:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
KERNEL1x8_1
|
KERNEL1x8_1
|
||||||
|
@ -545,9 +545,9 @@ ZGEMM_L1x8_LOOP_END:
|
||||||
KERNEL1x8_1
|
KERNEL1x8_1
|
||||||
KERNEL1x8_E2
|
KERNEL1x8_E2
|
||||||
|
|
||||||
b ZGEMM_L1x8_SUB1
|
b .LZGEMM_L1x8_SUB1
|
||||||
|
|
||||||
ZGEMM_L1x8_SUB4:
|
.LZGEMM_L1x8_SUB4:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
KERNEL1x8_SUBI1
|
KERNEL1x8_SUBI1
|
||||||
|
@ -563,53 +563,53 @@ ZGEMM_L1x8_SUB4:
|
||||||
KERNEL1x8_SUB1
|
KERNEL1x8_SUB1
|
||||||
KERNEL1x8_SUB1
|
KERNEL1x8_SUB1
|
||||||
|
|
||||||
b ZGEMM_L1x8_SUB1
|
b .LZGEMM_L1x8_SUB1
|
||||||
|
|
||||||
ZGEMM_L1x8_SUB0:
|
.LZGEMM_L1x8_SUB0:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
|
|
||||||
KERNEL1x8_SUBI1
|
KERNEL1x8_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZGEMM_L1x8_SAVE
|
ble .LZGEMM_L1x8_SAVE
|
||||||
b ZGEMM_L1x8_SUB2
|
b .LZGEMM_L1x8_SUB2
|
||||||
|
|
||||||
ZGEMM_L1x8_SUB1:
|
.LZGEMM_L1x8_SUB1:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
ble ZGEMM_L1x8_SAVE
|
ble .LZGEMM_L1x8_SAVE
|
||||||
|
|
||||||
ZGEMM_L1x8_SUB2:
|
.LZGEMM_L1x8_SUB2:
|
||||||
|
|
||||||
KERNEL1x8_SUB1
|
KERNEL1x8_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L1x8_SUB2
|
bgt .LZGEMM_L1x8_SUB2
|
||||||
|
|
||||||
ZGEMM_L1x8_SAVE:
|
.LZGEMM_L1x8_SAVE:
|
||||||
|
|
||||||
SAVE1x8
|
SAVE1x8
|
||||||
|
|
||||||
addic. I, I, -1
|
addic. I, I, -1
|
||||||
bgt ZGEMM_L1x8_BEGIN
|
bgt .LZGEMM_L1x8_BEGIN
|
||||||
|
|
||||||
ZGEMM_L1x8_END:
|
.LZGEMM_L1x8_END:
|
||||||
|
|
||||||
ZGEMM_L1x4_BEGIN:
|
.LZGEMM_L1x4_BEGIN:
|
||||||
|
|
||||||
andi. T2, M, 7
|
andi. T2, M, 7
|
||||||
ble ZGEMM_L1x1_END
|
ble .LZGEMM_L1x1_END
|
||||||
|
|
||||||
andi. T1, M, 4
|
andi. T1, M, 4
|
||||||
ble ZGEMM_L1x4_END
|
ble .LZGEMM_L1x4_END
|
||||||
mr BO, B
|
mr BO, B
|
||||||
srawi. L, K, 3
|
srawi. L, K, 3
|
||||||
ble ZGEMM_L1x4_SUB0
|
ble .LZGEMM_L1x4_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZGEMM_L1x4_SUB4
|
ble .LZGEMM_L1x4_SUB4
|
||||||
|
|
||||||
ZGEMM_L1x4_LOOP_START:
|
.LZGEMM_L1x4_LOOP_START:
|
||||||
|
|
||||||
LOAD1x4_1
|
LOAD1x4_1
|
||||||
KERNEL1x4_I1
|
KERNEL1x4_I1
|
||||||
|
@ -623,11 +623,11 @@ ZGEMM_L1x4_LOOP_START:
|
||||||
KERNEL1x4_2
|
KERNEL1x4_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZGEMM_L1x4_LOOP_END
|
ble .LZGEMM_L1x4_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZGEMM_L1x4_LOOP:
|
.LZGEMM_L1x4_LOOP:
|
||||||
|
|
||||||
KERNEL1x4_1
|
KERNEL1x4_1
|
||||||
KERNEL1x4_2
|
KERNEL1x4_2
|
||||||
|
@ -640,9 +640,9 @@ ZGEMM_L1x4_LOOP:
|
||||||
KERNEL1x4_2
|
KERNEL1x4_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L1x4_LOOP
|
bgt .LZGEMM_L1x4_LOOP
|
||||||
|
|
||||||
ZGEMM_L1x4_LOOP_END:
|
.LZGEMM_L1x4_LOOP_END:
|
||||||
|
|
||||||
KERNEL1x4_1
|
KERNEL1x4_1
|
||||||
KERNEL1x4_2
|
KERNEL1x4_2
|
||||||
|
@ -654,9 +654,9 @@ ZGEMM_L1x4_LOOP_END:
|
||||||
KERNEL1x4_1
|
KERNEL1x4_1
|
||||||
KERNEL1x4_E2
|
KERNEL1x4_E2
|
||||||
|
|
||||||
b ZGEMM_L1x4_SUB1
|
b .LZGEMM_L1x4_SUB1
|
||||||
|
|
||||||
ZGEMM_L1x4_SUB4:
|
.LZGEMM_L1x4_SUB4:
|
||||||
|
|
||||||
KERNEL1x4_SUBI1
|
KERNEL1x4_SUBI1
|
||||||
KERNEL1x4_SUB1
|
KERNEL1x4_SUB1
|
||||||
|
@ -668,48 +668,48 @@ ZGEMM_L1x4_SUB4:
|
||||||
KERNEL1x4_SUB1
|
KERNEL1x4_SUB1
|
||||||
KERNEL1x4_SUB1
|
KERNEL1x4_SUB1
|
||||||
|
|
||||||
b ZGEMM_L1x4_SUB1
|
b .LZGEMM_L1x4_SUB1
|
||||||
|
|
||||||
ZGEMM_L1x4_SUB0:
|
.LZGEMM_L1x4_SUB0:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
|
|
||||||
KERNEL1x4_SUBI1
|
KERNEL1x4_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZGEMM_L1x4_SAVE
|
ble .LZGEMM_L1x4_SAVE
|
||||||
b ZGEMM_L1x4_SUB2
|
b .LZGEMM_L1x4_SUB2
|
||||||
|
|
||||||
ZGEMM_L1x4_SUB1:
|
.LZGEMM_L1x4_SUB1:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
ble ZGEMM_L1x4_SAVE
|
ble .LZGEMM_L1x4_SAVE
|
||||||
|
|
||||||
ZGEMM_L1x4_SUB2:
|
.LZGEMM_L1x4_SUB2:
|
||||||
|
|
||||||
KERNEL1x4_SUB1
|
KERNEL1x4_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L1x4_SUB2
|
bgt .LZGEMM_L1x4_SUB2
|
||||||
|
|
||||||
ZGEMM_L1x4_SAVE:
|
.LZGEMM_L1x4_SAVE:
|
||||||
|
|
||||||
SAVE1x4
|
SAVE1x4
|
||||||
|
|
||||||
ZGEMM_L1x4_END:
|
.LZGEMM_L1x4_END:
|
||||||
|
|
||||||
ZGEMM_L1x2_BEGIN:
|
.LZGEMM_L1x2_BEGIN:
|
||||||
|
|
||||||
|
|
||||||
andi. T1, M, 2
|
andi. T1, M, 2
|
||||||
ble ZGEMM_L1x2_END
|
ble .LZGEMM_L1x2_END
|
||||||
mr BO, B
|
mr BO, B
|
||||||
srawi. L, K, 3
|
srawi. L, K, 3
|
||||||
ble ZGEMM_L1x2_SUB0
|
ble .LZGEMM_L1x2_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZGEMM_L1x2_SUB4
|
ble .LZGEMM_L1x2_SUB4
|
||||||
|
|
||||||
ZGEMM_L1x2_LOOP_START:
|
.LZGEMM_L1x2_LOOP_START:
|
||||||
|
|
||||||
LOAD1x2_1
|
LOAD1x2_1
|
||||||
KERNEL1x2_I1
|
KERNEL1x2_I1
|
||||||
|
@ -723,11 +723,11 @@ ZGEMM_L1x2_LOOP_START:
|
||||||
KERNEL1x2_2
|
KERNEL1x2_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZGEMM_L1x2_LOOP_END
|
ble .LZGEMM_L1x2_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZGEMM_L1x2_LOOP:
|
.LZGEMM_L1x2_LOOP:
|
||||||
|
|
||||||
KERNEL1x2_1
|
KERNEL1x2_1
|
||||||
KERNEL1x2_2
|
KERNEL1x2_2
|
||||||
|
@ -740,9 +740,9 @@ ZGEMM_L1x2_LOOP:
|
||||||
KERNEL1x2_2
|
KERNEL1x2_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L1x2_LOOP
|
bgt .LZGEMM_L1x2_LOOP
|
||||||
|
|
||||||
ZGEMM_L1x2_LOOP_END:
|
.LZGEMM_L1x2_LOOP_END:
|
||||||
|
|
||||||
KERNEL1x2_1
|
KERNEL1x2_1
|
||||||
KERNEL1x2_2
|
KERNEL1x2_2
|
||||||
|
@ -754,9 +754,9 @@ ZGEMM_L1x2_LOOP_END:
|
||||||
KERNEL1x2_1
|
KERNEL1x2_1
|
||||||
KERNEL1x2_E2
|
KERNEL1x2_E2
|
||||||
|
|
||||||
b ZGEMM_L1x2_SUB1
|
b .LZGEMM_L1x2_SUB1
|
||||||
|
|
||||||
ZGEMM_L1x2_SUB4:
|
.LZGEMM_L1x2_SUB4:
|
||||||
|
|
||||||
KERNEL1x2_SUBI1
|
KERNEL1x2_SUBI1
|
||||||
KERNEL1x2_SUB1
|
KERNEL1x2_SUB1
|
||||||
|
@ -768,48 +768,48 @@ ZGEMM_L1x2_SUB4:
|
||||||
KERNEL1x2_SUB1
|
KERNEL1x2_SUB1
|
||||||
KERNEL1x2_SUB1
|
KERNEL1x2_SUB1
|
||||||
|
|
||||||
b ZGEMM_L1x2_SUB1
|
b .LZGEMM_L1x2_SUB1
|
||||||
|
|
||||||
ZGEMM_L1x2_SUB0:
|
.LZGEMM_L1x2_SUB0:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
|
|
||||||
KERNEL1x2_SUBI1
|
KERNEL1x2_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZGEMM_L1x2_SAVE
|
ble .LZGEMM_L1x2_SAVE
|
||||||
b ZGEMM_L1x2_SUB2
|
b .LZGEMM_L1x2_SUB2
|
||||||
|
|
||||||
ZGEMM_L1x2_SUB1:
|
.LZGEMM_L1x2_SUB1:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
ble ZGEMM_L1x2_SAVE
|
ble .LZGEMM_L1x2_SAVE
|
||||||
|
|
||||||
ZGEMM_L1x2_SUB2:
|
.LZGEMM_L1x2_SUB2:
|
||||||
|
|
||||||
KERNEL1x2_SUB1
|
KERNEL1x2_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L1x2_SUB2
|
bgt .LZGEMM_L1x2_SUB2
|
||||||
|
|
||||||
ZGEMM_L1x2_SAVE:
|
.LZGEMM_L1x2_SAVE:
|
||||||
|
|
||||||
SAVE1x2
|
SAVE1x2
|
||||||
|
|
||||||
ZGEMM_L1x2_END:
|
.LZGEMM_L1x2_END:
|
||||||
|
|
||||||
ZGEMM_L1x1_BEGIN:
|
.LZGEMM_L1x1_BEGIN:
|
||||||
|
|
||||||
|
|
||||||
andi. T1, M, 1
|
andi. T1, M, 1
|
||||||
ble ZGEMM_L1x1_END
|
ble .LZGEMM_L1x1_END
|
||||||
mr BO, B
|
mr BO, B
|
||||||
srawi. L, K, 3
|
srawi. L, K, 3
|
||||||
ble ZGEMM_L1x1_SUB0
|
ble .LZGEMM_L1x1_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZGEMM_L1x1_SUB4
|
ble .LZGEMM_L1x1_SUB4
|
||||||
|
|
||||||
ZGEMM_L1x1_LOOP_START:
|
.LZGEMM_L1x1_LOOP_START:
|
||||||
|
|
||||||
LOAD1x1_1
|
LOAD1x1_1
|
||||||
KERNEL1x1_I1
|
KERNEL1x1_I1
|
||||||
|
@ -823,11 +823,11 @@ ZGEMM_L1x1_LOOP_START:
|
||||||
KERNEL1x1_2
|
KERNEL1x1_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZGEMM_L1x1_LOOP_END
|
ble .LZGEMM_L1x1_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZGEMM_L1x1_LOOP:
|
.LZGEMM_L1x1_LOOP:
|
||||||
|
|
||||||
KERNEL1x1_1
|
KERNEL1x1_1
|
||||||
KERNEL1x1_2
|
KERNEL1x1_2
|
||||||
|
@ -840,9 +840,9 @@ ZGEMM_L1x1_LOOP:
|
||||||
KERNEL1x1_2
|
KERNEL1x1_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L1x1_LOOP
|
bgt .LZGEMM_L1x1_LOOP
|
||||||
|
|
||||||
ZGEMM_L1x1_LOOP_END:
|
.LZGEMM_L1x1_LOOP_END:
|
||||||
|
|
||||||
KERNEL1x1_1
|
KERNEL1x1_1
|
||||||
KERNEL1x1_2
|
KERNEL1x1_2
|
||||||
|
@ -854,9 +854,9 @@ ZGEMM_L1x1_LOOP_END:
|
||||||
KERNEL1x1_1
|
KERNEL1x1_1
|
||||||
KERNEL1x1_E2
|
KERNEL1x1_E2
|
||||||
|
|
||||||
b ZGEMM_L1x1_SUB1
|
b .LZGEMM_L1x1_SUB1
|
||||||
|
|
||||||
ZGEMM_L1x1_SUB4:
|
.LZGEMM_L1x1_SUB4:
|
||||||
|
|
||||||
KERNEL1x1_SUBI1
|
KERNEL1x1_SUBI1
|
||||||
KERNEL1x1_SUB1
|
KERNEL1x1_SUB1
|
||||||
|
@ -868,34 +868,34 @@ ZGEMM_L1x1_SUB4:
|
||||||
KERNEL1x1_SUB1
|
KERNEL1x1_SUB1
|
||||||
KERNEL1x1_SUB1
|
KERNEL1x1_SUB1
|
||||||
|
|
||||||
b ZGEMM_L1x1_SUB1
|
b .LZGEMM_L1x1_SUB1
|
||||||
|
|
||||||
ZGEMM_L1x1_SUB0:
|
.LZGEMM_L1x1_SUB0:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
|
|
||||||
KERNEL1x1_SUBI1
|
KERNEL1x1_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZGEMM_L1x1_SAVE
|
ble .LZGEMM_L1x1_SAVE
|
||||||
b ZGEMM_L1x1_SUB2
|
b .LZGEMM_L1x1_SUB2
|
||||||
|
|
||||||
ZGEMM_L1x1_SUB1:
|
.LZGEMM_L1x1_SUB1:
|
||||||
|
|
||||||
andi. L, K, 7
|
andi. L, K, 7
|
||||||
ble ZGEMM_L1x1_SAVE
|
ble .LZGEMM_L1x1_SAVE
|
||||||
|
|
||||||
ZGEMM_L1x1_SUB2:
|
.LZGEMM_L1x1_SUB2:
|
||||||
|
|
||||||
KERNEL1x1_SUB1
|
KERNEL1x1_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZGEMM_L1x1_SUB2
|
bgt .LZGEMM_L1x1_SUB2
|
||||||
|
|
||||||
ZGEMM_L1x1_SAVE:
|
.LZGEMM_L1x1_SAVE:
|
||||||
|
|
||||||
SAVE1x1
|
SAVE1x1
|
||||||
|
|
||||||
ZGEMM_L1x1_END:
|
.LZGEMM_L1x1_END:
|
||||||
|
|
||||||
ZGEMM_L1_END:
|
.LZGEMM_L1_END:
|
||||||
|
|
|
@ -1,3 +1,39 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
/**************************************************************************************
|
||||||
|
* 2016/03/05 Werner Saar (wernsaar@googlemail.com)
|
||||||
|
* BLASTEST : OK
|
||||||
|
* CTEST : OK
|
||||||
|
* TEST : OK
|
||||||
|
* LAPACK-TEST : OK
|
||||||
|
**************************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
|
#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
|
||||||
|
|
||||||
#define XSFADD_R1 xsadddp
|
#define XSFADD_R1 xsadddp
|
||||||
|
|
|
@ -1,3 +1,38 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
/**************************************************************************************
|
||||||
|
* 2016/03/05 Werner Saar (wernsaar@googlemail.com)
|
||||||
|
* BLASTEST : OK
|
||||||
|
* CTEST : OK
|
||||||
|
* TEST : OK
|
||||||
|
* LAPACK-TEST : OK
|
||||||
|
**************************************************************************************/
|
||||||
|
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
|
@ -239,11 +274,11 @@
|
||||||
#include "zgemm_macros_8x2_power8.S"
|
#include "zgemm_macros_8x2_power8.S"
|
||||||
|
|
||||||
cmpwi cr0, M, 0
|
cmpwi cr0, M, 0
|
||||||
ble L999
|
ble .L999
|
||||||
cmpwi cr0, N, 0
|
cmpwi cr0, N, 0
|
||||||
ble L999
|
ble .L999
|
||||||
cmpwi cr0, K, 0
|
cmpwi cr0, K, 0
|
||||||
ble L999
|
ble .L999
|
||||||
|
|
||||||
slwi LDC, LDC, ZBASE_SHIFT
|
slwi LDC, LDC, ZBASE_SHIFT
|
||||||
li PRE, 256
|
li PRE, 256
|
||||||
|
@ -266,7 +301,7 @@
|
||||||
|
|
||||||
#include "ztrmm_logic_8x2_power8.S"
|
#include "ztrmm_logic_8x2_power8.S"
|
||||||
|
|
||||||
L999:
|
.L999:
|
||||||
addi r3, 0, 0
|
addi r3, 0, 0
|
||||||
|
|
||||||
lfd f14, 0(SP)
|
lfd f14, 0(SP)
|
||||||
|
|
|
@ -1,7 +1,43 @@
|
||||||
srawi. J, N, 1
|
/***************************************************************************
|
||||||
ble ZTRMM_L2_END
|
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
ZTRMM_L2_BEGIN:
|
/**************************************************************************************
|
||||||
|
* 2016/03/05 Werner Saar (wernsaar@googlemail.com)
|
||||||
|
* BLASTEST : OK
|
||||||
|
* CTEST : OK
|
||||||
|
* TEST : OK
|
||||||
|
* LAPACK-TEST : OK
|
||||||
|
**************************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
srawi. J, N, 1
|
||||||
|
ble .LZTRMM_L2_END
|
||||||
|
|
||||||
|
.LZTRMM_L2_BEGIN:
|
||||||
|
|
||||||
mr CO, C
|
mr CO, C
|
||||||
mr AO, A
|
mr AO, A
|
||||||
|
@ -13,9 +49,9 @@ ZTRMM_L2_BEGIN:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
srawi. I, M, 3
|
srawi. I, M, 3
|
||||||
ble ZTRMM_L2x8_END
|
ble .LZTRMM_L2x8_END
|
||||||
|
|
||||||
ZTRMM_L2x8_BEGIN:
|
.LZTRMM_L2x8_BEGIN:
|
||||||
|
|
||||||
|
|
||||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||||
|
@ -42,11 +78,11 @@ ZTRMM_L2x8_BEGIN:
|
||||||
mr KKK, T1
|
mr KKK, T1
|
||||||
mr K1, T1
|
mr K1, T1
|
||||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||||
ble ZTRMM_L2x8_SUB0
|
ble .LZTRMM_L2x8_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZTRMM_L2x8_SUB4
|
ble .LZTRMM_L2x8_SUB4
|
||||||
|
|
||||||
ZTRMM_L2x8_LOOP_START:
|
.LZTRMM_L2x8_LOOP_START:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
LOAD2x8_1
|
LOAD2x8_1
|
||||||
|
@ -69,11 +105,11 @@ ZTRMM_L2x8_LOOP_START:
|
||||||
KERNEL2x8_2
|
KERNEL2x8_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZTRMM_L2x8_LOOP_END
|
ble .LZTRMM_L2x8_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZTRMM_L2x8_LOOP:
|
.LZTRMM_L2x8_LOOP:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
KERNEL2x8_1
|
KERNEL2x8_1
|
||||||
|
@ -94,9 +130,9 @@ ZTRMM_L2x8_LOOP:
|
||||||
KERNEL2x8_2
|
KERNEL2x8_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L2x8_LOOP
|
bgt .LZTRMM_L2x8_LOOP
|
||||||
|
|
||||||
ZTRMM_L2x8_LOOP_END:
|
.LZTRMM_L2x8_LOOP_END:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
KERNEL2x8_1
|
KERNEL2x8_1
|
||||||
|
@ -115,9 +151,9 @@ ZTRMM_L2x8_LOOP_END:
|
||||||
KERNEL2x8_1
|
KERNEL2x8_1
|
||||||
KERNEL2x8_E2
|
KERNEL2x8_E2
|
||||||
|
|
||||||
b ZTRMM_L2x8_SUB1
|
b .LZTRMM_L2x8_SUB1
|
||||||
|
|
||||||
ZTRMM_L2x8_SUB4:
|
.LZTRMM_L2x8_SUB4:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
KERNEL2x8_SUBI1
|
KERNEL2x8_SUBI1
|
||||||
|
@ -133,31 +169,31 @@ ZTRMM_L2x8_SUB4:
|
||||||
KERNEL2x8_SUB1
|
KERNEL2x8_SUB1
|
||||||
KERNEL2x8_SUB1
|
KERNEL2x8_SUB1
|
||||||
|
|
||||||
b ZTRMM_L2x8_SUB1
|
b .LZTRMM_L2x8_SUB1
|
||||||
|
|
||||||
ZTRMM_L2x8_SUB0:
|
.LZTRMM_L2x8_SUB0:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
|
|
||||||
KERNEL2x8_SUBI1
|
KERNEL2x8_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZTRMM_L2x8_SAVE
|
ble .LZTRMM_L2x8_SAVE
|
||||||
b ZTRMM_L2x8_SUB2
|
b .LZTRMM_L2x8_SUB2
|
||||||
|
|
||||||
ZTRMM_L2x8_SUB1:
|
.LZTRMM_L2x8_SUB1:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
ble ZTRMM_L2x8_SAVE
|
ble .LZTRMM_L2x8_SAVE
|
||||||
|
|
||||||
ZTRMM_L2x8_SUB2:
|
.LZTRMM_L2x8_SUB2:
|
||||||
|
|
||||||
KERNEL2x8_SUB1
|
KERNEL2x8_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L2x8_SUB2
|
bgt .LZTRMM_L2x8_SUB2
|
||||||
|
|
||||||
ZTRMM_L2x8_SAVE:
|
.LZTRMM_L2x8_SAVE:
|
||||||
|
|
||||||
SAVE2x8
|
SAVE2x8
|
||||||
|
|
||||||
|
@ -175,16 +211,16 @@ ZTRMM_L2x8_SAVE:
|
||||||
|
|
||||||
|
|
||||||
addic. I, I, -1
|
addic. I, I, -1
|
||||||
bgt ZTRMM_L2x8_BEGIN
|
bgt .LZTRMM_L2x8_BEGIN
|
||||||
|
|
||||||
ZTRMM_L2x8_END:
|
.LZTRMM_L2x8_END:
|
||||||
|
|
||||||
ZTRMM_L2x4_BEGIN:
|
.LZTRMM_L2x4_BEGIN:
|
||||||
andi. T2, M, 7
|
andi. T2, M, 7
|
||||||
ble ZTRMM_L2x1_END
|
ble .LZTRMM_L2x1_END
|
||||||
|
|
||||||
andi. T1, M, 4
|
andi. T1, M, 4
|
||||||
ble ZTRMM_L2x4_END
|
ble .LZTRMM_L2x4_END
|
||||||
|
|
||||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||||
mr BO, B // B -> BO
|
mr BO, B // B -> BO
|
||||||
|
@ -210,11 +246,11 @@ ZTRMM_L2x4_BEGIN:
|
||||||
mr KKK, T1
|
mr KKK, T1
|
||||||
mr K1, T1
|
mr K1, T1
|
||||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||||
ble ZTRMM_L2x4_SUB0
|
ble .LZTRMM_L2x4_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZTRMM_L2x4_SUB4
|
ble .LZTRMM_L2x4_SUB4
|
||||||
|
|
||||||
ZTRMM_L2x4_LOOP_START:
|
.LZTRMM_L2x4_LOOP_START:
|
||||||
|
|
||||||
LOAD2x4_1
|
LOAD2x4_1
|
||||||
KERNEL2x4_I1
|
KERNEL2x4_I1
|
||||||
|
@ -228,11 +264,11 @@ ZTRMM_L2x4_LOOP_START:
|
||||||
KERNEL2x4_2
|
KERNEL2x4_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZTRMM_L2x4_LOOP_END
|
ble .LZTRMM_L2x4_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZTRMM_L2x4_LOOP:
|
.LZTRMM_L2x4_LOOP:
|
||||||
|
|
||||||
KERNEL2x4_1
|
KERNEL2x4_1
|
||||||
KERNEL2x4_2
|
KERNEL2x4_2
|
||||||
|
@ -245,9 +281,9 @@ ZTRMM_L2x4_LOOP:
|
||||||
KERNEL2x4_2
|
KERNEL2x4_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L2x4_LOOP
|
bgt .LZTRMM_L2x4_LOOP
|
||||||
|
|
||||||
ZTRMM_L2x4_LOOP_END:
|
.LZTRMM_L2x4_LOOP_END:
|
||||||
|
|
||||||
KERNEL2x4_1
|
KERNEL2x4_1
|
||||||
KERNEL2x4_2
|
KERNEL2x4_2
|
||||||
|
@ -259,9 +295,9 @@ ZTRMM_L2x4_LOOP_END:
|
||||||
KERNEL2x4_1
|
KERNEL2x4_1
|
||||||
KERNEL2x4_E2
|
KERNEL2x4_E2
|
||||||
|
|
||||||
b ZTRMM_L2x4_SUB1
|
b .LZTRMM_L2x4_SUB1
|
||||||
|
|
||||||
ZTRMM_L2x4_SUB4:
|
.LZTRMM_L2x4_SUB4:
|
||||||
|
|
||||||
KERNEL2x4_SUBI1
|
KERNEL2x4_SUBI1
|
||||||
KERNEL2x4_SUB1
|
KERNEL2x4_SUB1
|
||||||
|
@ -273,31 +309,31 @@ ZTRMM_L2x4_SUB4:
|
||||||
KERNEL2x4_SUB1
|
KERNEL2x4_SUB1
|
||||||
KERNEL2x4_SUB1
|
KERNEL2x4_SUB1
|
||||||
|
|
||||||
b ZTRMM_L2x4_SUB1
|
b .LZTRMM_L2x4_SUB1
|
||||||
|
|
||||||
ZTRMM_L2x4_SUB0:
|
.LZTRMM_L2x4_SUB0:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
|
|
||||||
KERNEL2x4_SUBI1
|
KERNEL2x4_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZTRMM_L2x4_SAVE
|
ble .LZTRMM_L2x4_SAVE
|
||||||
b ZTRMM_L2x4_SUB2
|
b .LZTRMM_L2x4_SUB2
|
||||||
|
|
||||||
ZTRMM_L2x4_SUB1:
|
.LZTRMM_L2x4_SUB1:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
ble ZTRMM_L2x4_SAVE
|
ble .LZTRMM_L2x4_SAVE
|
||||||
|
|
||||||
ZTRMM_L2x4_SUB2:
|
.LZTRMM_L2x4_SUB2:
|
||||||
|
|
||||||
KERNEL2x4_SUB1
|
KERNEL2x4_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L2x4_SUB2
|
bgt .LZTRMM_L2x4_SUB2
|
||||||
|
|
||||||
ZTRMM_L2x4_SAVE:
|
.LZTRMM_L2x4_SAVE:
|
||||||
|
|
||||||
SAVE2x4
|
SAVE2x4
|
||||||
|
|
||||||
|
@ -314,12 +350,12 @@ ZTRMM_L2x4_SAVE:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
ZTRMM_L2x4_END:
|
.LZTRMM_L2x4_END:
|
||||||
|
|
||||||
ZTRMM_L2x2_BEGIN:
|
.LZTRMM_L2x2_BEGIN:
|
||||||
|
|
||||||
andi. T1, M, 2
|
andi. T1, M, 2
|
||||||
ble ZTRMM_L2x2_END
|
ble .LZTRMM_L2x2_END
|
||||||
|
|
||||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||||
mr BO, B // B -> BO
|
mr BO, B // B -> BO
|
||||||
|
@ -345,11 +381,11 @@ ZTRMM_L2x2_BEGIN:
|
||||||
mr KKK, T1
|
mr KKK, T1
|
||||||
mr K1, T1
|
mr K1, T1
|
||||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||||
ble ZTRMM_L2x2_SUB0
|
ble .LZTRMM_L2x2_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZTRMM_L2x2_SUB4
|
ble .LZTRMM_L2x2_SUB4
|
||||||
|
|
||||||
ZTRMM_L2x2_LOOP_START:
|
.LZTRMM_L2x2_LOOP_START:
|
||||||
|
|
||||||
LOAD2x2_1
|
LOAD2x2_1
|
||||||
KERNEL2x2_I1
|
KERNEL2x2_I1
|
||||||
|
@ -363,11 +399,11 @@ ZTRMM_L2x2_LOOP_START:
|
||||||
KERNEL2x2_2
|
KERNEL2x2_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZTRMM_L2x2_LOOP_END
|
ble .LZTRMM_L2x2_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZTRMM_L2x2_LOOP:
|
.LZTRMM_L2x2_LOOP:
|
||||||
|
|
||||||
KERNEL2x2_1
|
KERNEL2x2_1
|
||||||
KERNEL2x2_2
|
KERNEL2x2_2
|
||||||
|
@ -380,9 +416,9 @@ ZTRMM_L2x2_LOOP:
|
||||||
KERNEL2x2_2
|
KERNEL2x2_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L2x2_LOOP
|
bgt .LZTRMM_L2x2_LOOP
|
||||||
|
|
||||||
ZTRMM_L2x2_LOOP_END:
|
.LZTRMM_L2x2_LOOP_END:
|
||||||
|
|
||||||
KERNEL2x2_1
|
KERNEL2x2_1
|
||||||
KERNEL2x2_2
|
KERNEL2x2_2
|
||||||
|
@ -394,9 +430,9 @@ ZTRMM_L2x2_LOOP_END:
|
||||||
KERNEL2x2_1
|
KERNEL2x2_1
|
||||||
KERNEL2x2_E2
|
KERNEL2x2_E2
|
||||||
|
|
||||||
b ZTRMM_L2x2_SUB1
|
b .LZTRMM_L2x2_SUB1
|
||||||
|
|
||||||
ZTRMM_L2x2_SUB4:
|
.LZTRMM_L2x2_SUB4:
|
||||||
|
|
||||||
KERNEL2x2_SUBI1
|
KERNEL2x2_SUBI1
|
||||||
KERNEL2x2_SUB1
|
KERNEL2x2_SUB1
|
||||||
|
@ -408,31 +444,31 @@ ZTRMM_L2x2_SUB4:
|
||||||
KERNEL2x2_SUB1
|
KERNEL2x2_SUB1
|
||||||
KERNEL2x2_SUB1
|
KERNEL2x2_SUB1
|
||||||
|
|
||||||
b ZTRMM_L2x2_SUB1
|
b .LZTRMM_L2x2_SUB1
|
||||||
|
|
||||||
ZTRMM_L2x2_SUB0:
|
.LZTRMM_L2x2_SUB0:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
|
|
||||||
KERNEL2x2_SUBI1
|
KERNEL2x2_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZTRMM_L2x2_SAVE
|
ble .LZTRMM_L2x2_SAVE
|
||||||
b ZTRMM_L2x2_SUB2
|
b .LZTRMM_L2x2_SUB2
|
||||||
|
|
||||||
ZTRMM_L2x2_SUB1:
|
.LZTRMM_L2x2_SUB1:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
ble ZTRMM_L2x2_SAVE
|
ble .LZTRMM_L2x2_SAVE
|
||||||
|
|
||||||
ZTRMM_L2x2_SUB2:
|
.LZTRMM_L2x2_SUB2:
|
||||||
|
|
||||||
KERNEL2x2_SUB1
|
KERNEL2x2_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L2x2_SUB2
|
bgt .LZTRMM_L2x2_SUB2
|
||||||
|
|
||||||
ZTRMM_L2x2_SAVE:
|
.LZTRMM_L2x2_SAVE:
|
||||||
|
|
||||||
SAVE2x2
|
SAVE2x2
|
||||||
|
|
||||||
|
@ -449,12 +485,12 @@ ZTRMM_L2x2_SAVE:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
ZTRMM_L2x2_END:
|
.LZTRMM_L2x2_END:
|
||||||
|
|
||||||
ZTRMM_L2x1_BEGIN:
|
.LZTRMM_L2x1_BEGIN:
|
||||||
|
|
||||||
andi. T1, M, 1
|
andi. T1, M, 1
|
||||||
ble ZTRMM_L2x1_END
|
ble .LZTRMM_L2x1_END
|
||||||
|
|
||||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||||
mr BO, B // B -> BO
|
mr BO, B // B -> BO
|
||||||
|
@ -480,11 +516,11 @@ ZTRMM_L2x1_BEGIN:
|
||||||
mr KKK, T1
|
mr KKK, T1
|
||||||
mr K1, T1
|
mr K1, T1
|
||||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||||
ble ZTRMM_L2x1_SUB0
|
ble .LZTRMM_L2x1_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZTRMM_L2x1_SUB4
|
ble .LZTRMM_L2x1_SUB4
|
||||||
|
|
||||||
ZTRMM_L2x1_LOOP_START:
|
.LZTRMM_L2x1_LOOP_START:
|
||||||
|
|
||||||
LOAD2x1_1
|
LOAD2x1_1
|
||||||
KERNEL2x1_I1
|
KERNEL2x1_I1
|
||||||
|
@ -498,11 +534,11 @@ ZTRMM_L2x1_LOOP_START:
|
||||||
KERNEL2x1_2
|
KERNEL2x1_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZTRMM_L2x1_LOOP_END
|
ble .LZTRMM_L2x1_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZTRMM_L2x1_LOOP:
|
.LZTRMM_L2x1_LOOP:
|
||||||
|
|
||||||
KERNEL2x1_1
|
KERNEL2x1_1
|
||||||
KERNEL2x1_2
|
KERNEL2x1_2
|
||||||
|
@ -515,9 +551,9 @@ ZTRMM_L2x1_LOOP:
|
||||||
KERNEL2x1_2
|
KERNEL2x1_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L2x1_LOOP
|
bgt .LZTRMM_L2x1_LOOP
|
||||||
|
|
||||||
ZTRMM_L2x1_LOOP_END:
|
.LZTRMM_L2x1_LOOP_END:
|
||||||
|
|
||||||
KERNEL2x1_1
|
KERNEL2x1_1
|
||||||
KERNEL2x1_2
|
KERNEL2x1_2
|
||||||
|
@ -529,9 +565,9 @@ ZTRMM_L2x1_LOOP_END:
|
||||||
KERNEL2x1_1
|
KERNEL2x1_1
|
||||||
KERNEL2x1_E2
|
KERNEL2x1_E2
|
||||||
|
|
||||||
b ZTRMM_L2x1_SUB1
|
b .LZTRMM_L2x1_SUB1
|
||||||
|
|
||||||
ZTRMM_L2x1_SUB4:
|
.LZTRMM_L2x1_SUB4:
|
||||||
|
|
||||||
KERNEL2x1_SUBI1
|
KERNEL2x1_SUBI1
|
||||||
KERNEL2x1_SUB1
|
KERNEL2x1_SUB1
|
||||||
|
@ -543,31 +579,31 @@ ZTRMM_L2x1_SUB4:
|
||||||
KERNEL2x1_SUB1
|
KERNEL2x1_SUB1
|
||||||
KERNEL2x1_SUB1
|
KERNEL2x1_SUB1
|
||||||
|
|
||||||
b ZTRMM_L2x1_SUB1
|
b .LZTRMM_L2x1_SUB1
|
||||||
|
|
||||||
ZTRMM_L2x1_SUB0:
|
.LZTRMM_L2x1_SUB0:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
|
|
||||||
KERNEL2x1_SUBI1
|
KERNEL2x1_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZTRMM_L2x1_SAVE
|
ble .LZTRMM_L2x1_SAVE
|
||||||
b ZTRMM_L2x1_SUB2
|
b .LZTRMM_L2x1_SUB2
|
||||||
|
|
||||||
ZTRMM_L2x1_SUB1:
|
.LZTRMM_L2x1_SUB1:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
ble ZTRMM_L2x1_SAVE
|
ble .LZTRMM_L2x1_SAVE
|
||||||
|
|
||||||
ZTRMM_L2x1_SUB2:
|
.LZTRMM_L2x1_SUB2:
|
||||||
|
|
||||||
KERNEL2x1_SUB1
|
KERNEL2x1_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L2x1_SUB2
|
bgt .LZTRMM_L2x1_SUB2
|
||||||
|
|
||||||
ZTRMM_L2x1_SAVE:
|
.LZTRMM_L2x1_SAVE:
|
||||||
|
|
||||||
SAVE2x1
|
SAVE2x1
|
||||||
|
|
||||||
|
@ -584,7 +620,7 @@ ZTRMM_L2x1_SAVE:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
ZTRMM_L2x1_END:
|
.LZTRMM_L2x1_END:
|
||||||
|
|
||||||
slwi T1, K, 5
|
slwi T1, K, 5
|
||||||
add B, B, T1
|
add B, B, T1
|
||||||
|
@ -595,23 +631,23 @@ ZTRMM_L2x1_END:
|
||||||
|
|
||||||
|
|
||||||
addic. J, J, -1
|
addic. J, J, -1
|
||||||
bgt ZTRMM_L2_BEGIN
|
bgt .LZTRMM_L2_BEGIN
|
||||||
|
|
||||||
andi. T2, N, 1
|
andi. T2, N, 1
|
||||||
ble L999
|
ble .L999
|
||||||
|
|
||||||
ZTRMM_L2_END:
|
.LZTRMM_L2_END:
|
||||||
|
|
||||||
b ZTRMM_L1_BEGIN
|
b .LZTRMM_L1_BEGIN
|
||||||
|
|
||||||
L999_H1:
|
.L999_H1:
|
||||||
|
|
||||||
b L999
|
b .L999
|
||||||
|
|
||||||
ZTRMM_L1_BEGIN:
|
.LZTRMM_L1_BEGIN:
|
||||||
|
|
||||||
andi. T1, N, 1
|
andi. T1, N, 1
|
||||||
ble ZTRMM_L1_END
|
ble .LZTRMM_L1_END
|
||||||
mr CO, C
|
mr CO, C
|
||||||
mr AO, A
|
mr AO, A
|
||||||
|
|
||||||
|
@ -620,9 +656,9 @@ ZTRMM_L1_BEGIN:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
srawi. I, M, 3
|
srawi. I, M, 3
|
||||||
ble ZTRMM_L1x8_END
|
ble .LZTRMM_L1x8_END
|
||||||
|
|
||||||
ZTRMM_L1x8_BEGIN:
|
.LZTRMM_L1x8_BEGIN:
|
||||||
|
|
||||||
|
|
||||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||||
|
@ -649,11 +685,11 @@ ZTRMM_L1x8_BEGIN:
|
||||||
mr KKK, T1
|
mr KKK, T1
|
||||||
mr K1, T1
|
mr K1, T1
|
||||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||||
ble ZTRMM_L1x8_SUB0
|
ble .LZTRMM_L1x8_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZTRMM_L1x8_SUB4
|
ble .LZTRMM_L1x8_SUB4
|
||||||
|
|
||||||
ZTRMM_L1x8_LOOP_START:
|
.LZTRMM_L1x8_LOOP_START:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
LOAD1x8_1
|
LOAD1x8_1
|
||||||
|
@ -676,11 +712,11 @@ ZTRMM_L1x8_LOOP_START:
|
||||||
KERNEL1x8_2
|
KERNEL1x8_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZTRMM_L1x8_LOOP_END
|
ble .LZTRMM_L1x8_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZTRMM_L1x8_LOOP:
|
.LZTRMM_L1x8_LOOP:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
KERNEL1x8_1
|
KERNEL1x8_1
|
||||||
|
@ -701,9 +737,9 @@ ZTRMM_L1x8_LOOP:
|
||||||
KERNEL1x8_2
|
KERNEL1x8_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L1x8_LOOP
|
bgt .LZTRMM_L1x8_LOOP
|
||||||
|
|
||||||
ZTRMM_L1x8_LOOP_END:
|
.LZTRMM_L1x8_LOOP_END:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
KERNEL1x8_1
|
KERNEL1x8_1
|
||||||
|
@ -722,9 +758,9 @@ ZTRMM_L1x8_LOOP_END:
|
||||||
KERNEL1x8_1
|
KERNEL1x8_1
|
||||||
KERNEL1x8_E2
|
KERNEL1x8_E2
|
||||||
|
|
||||||
b ZTRMM_L1x8_SUB1
|
b .LZTRMM_L1x8_SUB1
|
||||||
|
|
||||||
ZTRMM_L1x8_SUB4:
|
.LZTRMM_L1x8_SUB4:
|
||||||
|
|
||||||
dcbt AO, PRE
|
dcbt AO, PRE
|
||||||
KERNEL1x8_SUBI1
|
KERNEL1x8_SUBI1
|
||||||
|
@ -740,31 +776,31 @@ ZTRMM_L1x8_SUB4:
|
||||||
KERNEL1x8_SUB1
|
KERNEL1x8_SUB1
|
||||||
KERNEL1x8_SUB1
|
KERNEL1x8_SUB1
|
||||||
|
|
||||||
b ZTRMM_L1x8_SUB1
|
b .LZTRMM_L1x8_SUB1
|
||||||
|
|
||||||
ZTRMM_L1x8_SUB0:
|
.LZTRMM_L1x8_SUB0:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
|
|
||||||
KERNEL1x8_SUBI1
|
KERNEL1x8_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZTRMM_L1x8_SAVE
|
ble .LZTRMM_L1x8_SAVE
|
||||||
b ZTRMM_L1x8_SUB2
|
b .LZTRMM_L1x8_SUB2
|
||||||
|
|
||||||
ZTRMM_L1x8_SUB1:
|
.LZTRMM_L1x8_SUB1:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
ble ZTRMM_L1x8_SAVE
|
ble .LZTRMM_L1x8_SAVE
|
||||||
|
|
||||||
ZTRMM_L1x8_SUB2:
|
.LZTRMM_L1x8_SUB2:
|
||||||
|
|
||||||
KERNEL1x8_SUB1
|
KERNEL1x8_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L1x8_SUB2
|
bgt .LZTRMM_L1x8_SUB2
|
||||||
|
|
||||||
ZTRMM_L1x8_SAVE:
|
.LZTRMM_L1x8_SAVE:
|
||||||
|
|
||||||
SAVE1x8
|
SAVE1x8
|
||||||
|
|
||||||
|
@ -782,16 +818,16 @@ ZTRMM_L1x8_SAVE:
|
||||||
|
|
||||||
|
|
||||||
addic. I, I, -1
|
addic. I, I, -1
|
||||||
bgt ZTRMM_L1x8_BEGIN
|
bgt .LZTRMM_L1x8_BEGIN
|
||||||
|
|
||||||
ZTRMM_L1x8_END:
|
.LZTRMM_L1x8_END:
|
||||||
|
|
||||||
ZTRMM_L1x4_BEGIN:
|
.LZTRMM_L1x4_BEGIN:
|
||||||
andi. T2, M, 7
|
andi. T2, M, 7
|
||||||
ble ZTRMM_L1x1_END
|
ble .LZTRMM_L1x1_END
|
||||||
|
|
||||||
andi. T1, M, 4
|
andi. T1, M, 4
|
||||||
ble ZTRMM_L1x4_END
|
ble .LZTRMM_L1x4_END
|
||||||
|
|
||||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||||
mr BO, B // B -> BO
|
mr BO, B // B -> BO
|
||||||
|
@ -817,11 +853,11 @@ ZTRMM_L1x4_BEGIN:
|
||||||
mr KKK, T1
|
mr KKK, T1
|
||||||
mr K1, T1
|
mr K1, T1
|
||||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||||
ble ZTRMM_L1x4_SUB0
|
ble .LZTRMM_L1x4_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZTRMM_L1x4_SUB4
|
ble .LZTRMM_L1x4_SUB4
|
||||||
|
|
||||||
ZTRMM_L1x4_LOOP_START:
|
.LZTRMM_L1x4_LOOP_START:
|
||||||
|
|
||||||
LOAD1x4_1
|
LOAD1x4_1
|
||||||
KERNEL1x4_I1
|
KERNEL1x4_I1
|
||||||
|
@ -835,11 +871,11 @@ ZTRMM_L1x4_LOOP_START:
|
||||||
KERNEL1x4_2
|
KERNEL1x4_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZTRMM_L1x4_LOOP_END
|
ble .LZTRMM_L1x4_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZTRMM_L1x4_LOOP:
|
.LZTRMM_L1x4_LOOP:
|
||||||
|
|
||||||
KERNEL1x4_1
|
KERNEL1x4_1
|
||||||
KERNEL1x4_2
|
KERNEL1x4_2
|
||||||
|
@ -852,9 +888,9 @@ ZTRMM_L1x4_LOOP:
|
||||||
KERNEL1x4_2
|
KERNEL1x4_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L1x4_LOOP
|
bgt .LZTRMM_L1x4_LOOP
|
||||||
|
|
||||||
ZTRMM_L1x4_LOOP_END:
|
.LZTRMM_L1x4_LOOP_END:
|
||||||
|
|
||||||
KERNEL1x4_1
|
KERNEL1x4_1
|
||||||
KERNEL1x4_2
|
KERNEL1x4_2
|
||||||
|
@ -866,9 +902,9 @@ ZTRMM_L1x4_LOOP_END:
|
||||||
KERNEL1x4_1
|
KERNEL1x4_1
|
||||||
KERNEL1x4_E2
|
KERNEL1x4_E2
|
||||||
|
|
||||||
b ZTRMM_L1x4_SUB1
|
b .LZTRMM_L1x4_SUB1
|
||||||
|
|
||||||
ZTRMM_L1x4_SUB4:
|
.LZTRMM_L1x4_SUB4:
|
||||||
|
|
||||||
KERNEL1x4_SUBI1
|
KERNEL1x4_SUBI1
|
||||||
KERNEL1x4_SUB1
|
KERNEL1x4_SUB1
|
||||||
|
@ -880,31 +916,31 @@ ZTRMM_L1x4_SUB4:
|
||||||
KERNEL1x4_SUB1
|
KERNEL1x4_SUB1
|
||||||
KERNEL1x4_SUB1
|
KERNEL1x4_SUB1
|
||||||
|
|
||||||
b ZTRMM_L1x4_SUB1
|
b .LZTRMM_L1x4_SUB1
|
||||||
|
|
||||||
ZTRMM_L1x4_SUB0:
|
.LZTRMM_L1x4_SUB0:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
|
|
||||||
KERNEL1x4_SUBI1
|
KERNEL1x4_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZTRMM_L1x4_SAVE
|
ble .LZTRMM_L1x4_SAVE
|
||||||
b ZTRMM_L1x4_SUB2
|
b .LZTRMM_L1x4_SUB2
|
||||||
|
|
||||||
ZTRMM_L1x4_SUB1:
|
.LZTRMM_L1x4_SUB1:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
ble ZTRMM_L1x4_SAVE
|
ble .LZTRMM_L1x4_SAVE
|
||||||
|
|
||||||
ZTRMM_L1x4_SUB2:
|
.LZTRMM_L1x4_SUB2:
|
||||||
|
|
||||||
KERNEL1x4_SUB1
|
KERNEL1x4_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L1x4_SUB2
|
bgt .LZTRMM_L1x4_SUB2
|
||||||
|
|
||||||
ZTRMM_L1x4_SAVE:
|
.LZTRMM_L1x4_SAVE:
|
||||||
|
|
||||||
SAVE1x4
|
SAVE1x4
|
||||||
|
|
||||||
|
@ -921,12 +957,12 @@ ZTRMM_L1x4_SAVE:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
ZTRMM_L1x4_END:
|
.LZTRMM_L1x4_END:
|
||||||
|
|
||||||
ZTRMM_L1x2_BEGIN:
|
.LZTRMM_L1x2_BEGIN:
|
||||||
|
|
||||||
andi. T1, M, 2
|
andi. T1, M, 2
|
||||||
ble ZTRMM_L1x2_END
|
ble .LZTRMM_L1x2_END
|
||||||
|
|
||||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||||
mr BO, B // B -> BO
|
mr BO, B // B -> BO
|
||||||
|
@ -952,11 +988,11 @@ ZTRMM_L1x2_BEGIN:
|
||||||
mr KKK, T1
|
mr KKK, T1
|
||||||
mr K1, T1
|
mr K1, T1
|
||||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||||
ble ZTRMM_L1x2_SUB0
|
ble .LZTRMM_L1x2_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZTRMM_L1x2_SUB4
|
ble .LZTRMM_L1x2_SUB4
|
||||||
|
|
||||||
ZTRMM_L1x2_LOOP_START:
|
.LZTRMM_L1x2_LOOP_START:
|
||||||
|
|
||||||
LOAD1x2_1
|
LOAD1x2_1
|
||||||
KERNEL1x2_I1
|
KERNEL1x2_I1
|
||||||
|
@ -970,11 +1006,11 @@ ZTRMM_L1x2_LOOP_START:
|
||||||
KERNEL1x2_2
|
KERNEL1x2_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZTRMM_L1x2_LOOP_END
|
ble .LZTRMM_L1x2_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZTRMM_L1x2_LOOP:
|
.LZTRMM_L1x2_LOOP:
|
||||||
|
|
||||||
KERNEL1x2_1
|
KERNEL1x2_1
|
||||||
KERNEL1x2_2
|
KERNEL1x2_2
|
||||||
|
@ -987,9 +1023,9 @@ ZTRMM_L1x2_LOOP:
|
||||||
KERNEL1x2_2
|
KERNEL1x2_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L1x2_LOOP
|
bgt .LZTRMM_L1x2_LOOP
|
||||||
|
|
||||||
ZTRMM_L1x2_LOOP_END:
|
.LZTRMM_L1x2_LOOP_END:
|
||||||
|
|
||||||
KERNEL1x2_1
|
KERNEL1x2_1
|
||||||
KERNEL1x2_2
|
KERNEL1x2_2
|
||||||
|
@ -1001,9 +1037,9 @@ ZTRMM_L1x2_LOOP_END:
|
||||||
KERNEL1x2_1
|
KERNEL1x2_1
|
||||||
KERNEL1x2_E2
|
KERNEL1x2_E2
|
||||||
|
|
||||||
b ZTRMM_L1x2_SUB1
|
b .LZTRMM_L1x2_SUB1
|
||||||
|
|
||||||
ZTRMM_L1x2_SUB4:
|
.LZTRMM_L1x2_SUB4:
|
||||||
|
|
||||||
KERNEL1x2_SUBI1
|
KERNEL1x2_SUBI1
|
||||||
KERNEL1x2_SUB1
|
KERNEL1x2_SUB1
|
||||||
|
@ -1015,31 +1051,31 @@ ZTRMM_L1x2_SUB4:
|
||||||
KERNEL1x2_SUB1
|
KERNEL1x2_SUB1
|
||||||
KERNEL1x2_SUB1
|
KERNEL1x2_SUB1
|
||||||
|
|
||||||
b ZTRMM_L1x2_SUB1
|
b .LZTRMM_L1x2_SUB1
|
||||||
|
|
||||||
ZTRMM_L1x2_SUB0:
|
.LZTRMM_L1x2_SUB0:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
|
|
||||||
KERNEL1x2_SUBI1
|
KERNEL1x2_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZTRMM_L1x2_SAVE
|
ble .LZTRMM_L1x2_SAVE
|
||||||
b ZTRMM_L1x2_SUB2
|
b .LZTRMM_L1x2_SUB2
|
||||||
|
|
||||||
ZTRMM_L1x2_SUB1:
|
.LZTRMM_L1x2_SUB1:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
ble ZTRMM_L1x2_SAVE
|
ble .LZTRMM_L1x2_SAVE
|
||||||
|
|
||||||
ZTRMM_L1x2_SUB2:
|
.LZTRMM_L1x2_SUB2:
|
||||||
|
|
||||||
KERNEL1x2_SUB1
|
KERNEL1x2_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L1x2_SUB2
|
bgt .LZTRMM_L1x2_SUB2
|
||||||
|
|
||||||
ZTRMM_L1x2_SAVE:
|
.LZTRMM_L1x2_SAVE:
|
||||||
|
|
||||||
SAVE1x2
|
SAVE1x2
|
||||||
|
|
||||||
|
@ -1056,12 +1092,12 @@ ZTRMM_L1x2_SAVE:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
ZTRMM_L1x2_END:
|
.LZTRMM_L1x2_END:
|
||||||
|
|
||||||
ZTRMM_L1x1_BEGIN:
|
.LZTRMM_L1x1_BEGIN:
|
||||||
|
|
||||||
andi. T1, M, 1
|
andi. T1, M, 1
|
||||||
ble ZTRMM_L1x1_END
|
ble .LZTRMM_L1x1_END
|
||||||
|
|
||||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||||
mr BO, B // B -> BO
|
mr BO, B // B -> BO
|
||||||
|
@ -1087,11 +1123,11 @@ ZTRMM_L1x1_BEGIN:
|
||||||
mr KKK, T1
|
mr KKK, T1
|
||||||
mr K1, T1
|
mr K1, T1
|
||||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||||
ble ZTRMM_L1x1_SUB0
|
ble .LZTRMM_L1x1_SUB0
|
||||||
cmpwi cr0, L, 1
|
cmpwi cr0, L, 1
|
||||||
ble ZTRMM_L1x1_SUB4
|
ble .LZTRMM_L1x1_SUB4
|
||||||
|
|
||||||
ZTRMM_L1x1_LOOP_START:
|
.LZTRMM_L1x1_LOOP_START:
|
||||||
|
|
||||||
LOAD1x1_1
|
LOAD1x1_1
|
||||||
KERNEL1x1_I1
|
KERNEL1x1_I1
|
||||||
|
@ -1105,11 +1141,11 @@ ZTRMM_L1x1_LOOP_START:
|
||||||
KERNEL1x1_2
|
KERNEL1x1_2
|
||||||
|
|
||||||
addic. L, L, -2
|
addic. L, L, -2
|
||||||
ble ZTRMM_L1x1_LOOP_END
|
ble .LZTRMM_L1x1_LOOP_END
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
ZTRMM_L1x1_LOOP:
|
.LZTRMM_L1x1_LOOP:
|
||||||
|
|
||||||
KERNEL1x1_1
|
KERNEL1x1_1
|
||||||
KERNEL1x1_2
|
KERNEL1x1_2
|
||||||
|
@ -1122,9 +1158,9 @@ ZTRMM_L1x1_LOOP:
|
||||||
KERNEL1x1_2
|
KERNEL1x1_2
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L1x1_LOOP
|
bgt .LZTRMM_L1x1_LOOP
|
||||||
|
|
||||||
ZTRMM_L1x1_LOOP_END:
|
.LZTRMM_L1x1_LOOP_END:
|
||||||
|
|
||||||
KERNEL1x1_1
|
KERNEL1x1_1
|
||||||
KERNEL1x1_2
|
KERNEL1x1_2
|
||||||
|
@ -1136,9 +1172,9 @@ ZTRMM_L1x1_LOOP_END:
|
||||||
KERNEL1x1_1
|
KERNEL1x1_1
|
||||||
KERNEL1x1_E2
|
KERNEL1x1_E2
|
||||||
|
|
||||||
b ZTRMM_L1x1_SUB1
|
b .LZTRMM_L1x1_SUB1
|
||||||
|
|
||||||
ZTRMM_L1x1_SUB4:
|
.LZTRMM_L1x1_SUB4:
|
||||||
|
|
||||||
KERNEL1x1_SUBI1
|
KERNEL1x1_SUBI1
|
||||||
KERNEL1x1_SUB1
|
KERNEL1x1_SUB1
|
||||||
|
@ -1150,31 +1186,31 @@ ZTRMM_L1x1_SUB4:
|
||||||
KERNEL1x1_SUB1
|
KERNEL1x1_SUB1
|
||||||
KERNEL1x1_SUB1
|
KERNEL1x1_SUB1
|
||||||
|
|
||||||
b ZTRMM_L1x1_SUB1
|
b .LZTRMM_L1x1_SUB1
|
||||||
|
|
||||||
ZTRMM_L1x1_SUB0:
|
.LZTRMM_L1x1_SUB0:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
|
|
||||||
KERNEL1x1_SUBI1
|
KERNEL1x1_SUBI1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
ble ZTRMM_L1x1_SAVE
|
ble .LZTRMM_L1x1_SAVE
|
||||||
b ZTRMM_L1x1_SUB2
|
b .LZTRMM_L1x1_SUB2
|
||||||
|
|
||||||
ZTRMM_L1x1_SUB1:
|
.LZTRMM_L1x1_SUB1:
|
||||||
|
|
||||||
andi. L, K1, 7 // K1 & 7 -> L
|
andi. L, K1, 7 // K1 & 7 -> L
|
||||||
ble ZTRMM_L1x1_SAVE
|
ble .LZTRMM_L1x1_SAVE
|
||||||
|
|
||||||
ZTRMM_L1x1_SUB2:
|
.LZTRMM_L1x1_SUB2:
|
||||||
|
|
||||||
KERNEL1x1_SUB1
|
KERNEL1x1_SUB1
|
||||||
|
|
||||||
addic. L, L, -1
|
addic. L, L, -1
|
||||||
bgt ZTRMM_L1x1_SUB2
|
bgt .LZTRMM_L1x1_SUB2
|
||||||
|
|
||||||
ZTRMM_L1x1_SAVE:
|
.LZTRMM_L1x1_SAVE:
|
||||||
|
|
||||||
SAVE1x1
|
SAVE1x1
|
||||||
|
|
||||||
|
@ -1191,11 +1227,11 @@ ZTRMM_L1x1_SAVE:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
ZTRMM_L1x1_END:
|
.LZTRMM_L1x1_END:
|
||||||
|
|
||||||
#if !defined(LEFT)
|
#if !defined(LEFT)
|
||||||
addi KK, KK, 1 // KK += Number of values in B
|
addi KK, KK, 1 // KK += Number of values in B
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
ZTRMM_L1_END:
|
.LZTRMM_L1_END:
|
||||||
|
|
Loading…
Reference in New Issue