Modified assembly label name, so that they are hidden.
Added license informations.
This commit is contained in:
parent
0afc76fd65
commit
085f215257
|
@ -1,3 +1,38 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2016/03/05 Werner Saar (wernsaar@googlemail.com)
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
**************************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
|
@ -218,11 +253,11 @@
|
|||
|
||||
|
||||
cmpwi cr0, M, 0
|
||||
ble L999_H1
|
||||
ble .L999_H1
|
||||
cmpwi cr0, N, 0
|
||||
ble L999_H1
|
||||
ble .L999_H1
|
||||
cmpwi cr0, K, 0
|
||||
ble L999_H1
|
||||
ble .L999_H1
|
||||
|
||||
#ifdef __64BIT__
|
||||
addi ALPHA, SP, 296
|
||||
|
@ -241,7 +276,7 @@
|
|||
|
||||
#include "dgemm_logic_16x4_power8.S"
|
||||
|
||||
L999:
|
||||
.L999:
|
||||
addi r3, 0, 0
|
||||
|
||||
lfd f14, 0(SP)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,3 +1,38 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2016/03/05 Werner Saar (wernsaar@googlemail.com)
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
**************************************************************************************/
|
||||
|
||||
/*********************************************************************
|
||||
* Macros for N=4, M=16 *
|
||||
*********************************************************************/
|
||||
|
|
|
@ -1,3 +1,38 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2016/03/05 Werner Saar (wernsaar@googlemail.com)
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
**************************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
|
@ -228,11 +263,11 @@
|
|||
#endif
|
||||
|
||||
cmpwi cr0, M, 0
|
||||
ble L999_H1
|
||||
ble .L999_H1
|
||||
cmpwi cr0, N, 0
|
||||
ble L999_H1
|
||||
ble .L999_H1
|
||||
cmpwi cr0, K, 0
|
||||
ble L999_H1
|
||||
ble .L999_H1
|
||||
|
||||
#ifdef __64BIT__
|
||||
addi ALPHA, SP, 296
|
||||
|
@ -251,7 +286,7 @@
|
|||
|
||||
#include "dtrmm_logic_16x4_power8.S"
|
||||
|
||||
L999:
|
||||
.L999:
|
||||
addi r3, 0, 0
|
||||
|
||||
lfd f14, 0(SP)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,3 +1,38 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2016/03/05 Werner Saar (wernsaar@googlemail.com)
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
**************************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
|
@ -233,11 +268,11 @@
|
|||
#include "zgemm_macros_8x2_power8.S"
|
||||
|
||||
cmpwi cr0, M, 0
|
||||
ble L999
|
||||
ble .L999
|
||||
cmpwi cr0, N, 0
|
||||
ble L999
|
||||
ble .L999
|
||||
cmpwi cr0, K, 0
|
||||
ble L999
|
||||
ble .L999
|
||||
|
||||
slwi LDC, LDC, ZBASE_SHIFT
|
||||
li PRE, 256
|
||||
|
@ -260,7 +295,7 @@
|
|||
|
||||
#include "zgemm_logic_8x2_power8.S"
|
||||
|
||||
L999:
|
||||
.L999:
|
||||
addi r3, 0, 0
|
||||
|
||||
lfd f14, 0(SP)
|
||||
|
|
|
@ -1,25 +1,25 @@
|
|||
srawi. J, N, 1
|
||||
ble ZGEMM_L2_END
|
||||
ble .LZGEMM_L2_END
|
||||
|
||||
ZGEMM_L2_BEGIN:
|
||||
.LZGEMM_L2_BEGIN:
|
||||
|
||||
mr CO, C
|
||||
mr AO, A
|
||||
slwi T1, LDC , 1
|
||||
add C, C, T1
|
||||
srawi. I, M, 3
|
||||
ble ZGEMM_L2x8_END
|
||||
ble .LZGEMM_L2x8_END
|
||||
|
||||
ZGEMM_L2x8_BEGIN:
|
||||
.LZGEMM_L2x8_BEGIN:
|
||||
|
||||
|
||||
mr BO, B
|
||||
srawi. L, K, 3
|
||||
ble ZGEMM_L2x8_SUB0
|
||||
ble .LZGEMM_L2x8_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZGEMM_L2x8_SUB4
|
||||
ble .LZGEMM_L2x8_SUB4
|
||||
|
||||
ZGEMM_L2x8_LOOP_START:
|
||||
.LZGEMM_L2x8_LOOP_START:
|
||||
|
||||
dcbt AO, PRE
|
||||
LOAD2x8_1
|
||||
|
@ -42,11 +42,11 @@ ZGEMM_L2x8_LOOP_START:
|
|||
KERNEL2x8_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZGEMM_L2x8_LOOP_END
|
||||
ble .LZGEMM_L2x8_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZGEMM_L2x8_LOOP:
|
||||
.LZGEMM_L2x8_LOOP:
|
||||
|
||||
dcbt AO, PRE
|
||||
KERNEL2x8_1
|
||||
|
@ -67,9 +67,9 @@ ZGEMM_L2x8_LOOP:
|
|||
KERNEL2x8_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L2x8_LOOP
|
||||
bgt .LZGEMM_L2x8_LOOP
|
||||
|
||||
ZGEMM_L2x8_LOOP_END:
|
||||
.LZGEMM_L2x8_LOOP_END:
|
||||
|
||||
dcbt AO, PRE
|
||||
KERNEL2x8_1
|
||||
|
@ -88,9 +88,9 @@ ZGEMM_L2x8_LOOP_END:
|
|||
KERNEL2x8_1
|
||||
KERNEL2x8_E2
|
||||
|
||||
b ZGEMM_L2x8_SUB1
|
||||
b .LZGEMM_L2x8_SUB1
|
||||
|
||||
ZGEMM_L2x8_SUB4:
|
||||
.LZGEMM_L2x8_SUB4:
|
||||
|
||||
dcbt AO, PRE
|
||||
KERNEL2x8_SUBI1
|
||||
|
@ -106,53 +106,53 @@ ZGEMM_L2x8_SUB4:
|
|||
KERNEL2x8_SUB1
|
||||
KERNEL2x8_SUB1
|
||||
|
||||
b ZGEMM_L2x8_SUB1
|
||||
b .LZGEMM_L2x8_SUB1
|
||||
|
||||
ZGEMM_L2x8_SUB0:
|
||||
.LZGEMM_L2x8_SUB0:
|
||||
|
||||
andi. L, K, 7
|
||||
|
||||
KERNEL2x8_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZGEMM_L2x8_SAVE
|
||||
b ZGEMM_L2x8_SUB2
|
||||
ble .LZGEMM_L2x8_SAVE
|
||||
b .LZGEMM_L2x8_SUB2
|
||||
|
||||
ZGEMM_L2x8_SUB1:
|
||||
.LZGEMM_L2x8_SUB1:
|
||||
|
||||
andi. L, K, 7
|
||||
ble ZGEMM_L2x8_SAVE
|
||||
ble .LZGEMM_L2x8_SAVE
|
||||
|
||||
ZGEMM_L2x8_SUB2:
|
||||
.LZGEMM_L2x8_SUB2:
|
||||
|
||||
KERNEL2x8_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L2x8_SUB2
|
||||
bgt .LZGEMM_L2x8_SUB2
|
||||
|
||||
ZGEMM_L2x8_SAVE:
|
||||
.LZGEMM_L2x8_SAVE:
|
||||
|
||||
SAVE2x8
|
||||
|
||||
addic. I, I, -1
|
||||
bgt ZGEMM_L2x8_BEGIN
|
||||
bgt .LZGEMM_L2x8_BEGIN
|
||||
|
||||
ZGEMM_L2x8_END:
|
||||
.LZGEMM_L2x8_END:
|
||||
|
||||
ZGEMM_L2x4_BEGIN:
|
||||
.LZGEMM_L2x4_BEGIN:
|
||||
|
||||
andi. T2, M, 7
|
||||
ble ZGEMM_L2x1_END
|
||||
ble .LZGEMM_L2x1_END
|
||||
|
||||
andi. T1, M, 4
|
||||
ble ZGEMM_L2x4_END
|
||||
ble .LZGEMM_L2x4_END
|
||||
mr BO, B
|
||||
srawi. L, K, 3
|
||||
ble ZGEMM_L2x4_SUB0
|
||||
ble .LZGEMM_L2x4_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZGEMM_L2x4_SUB4
|
||||
ble .LZGEMM_L2x4_SUB4
|
||||
|
||||
ZGEMM_L2x4_LOOP_START:
|
||||
.LZGEMM_L2x4_LOOP_START:
|
||||
|
||||
LOAD2x4_1
|
||||
KERNEL2x4_I1
|
||||
|
@ -166,11 +166,11 @@ ZGEMM_L2x4_LOOP_START:
|
|||
KERNEL2x4_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZGEMM_L2x4_LOOP_END
|
||||
ble .LZGEMM_L2x4_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZGEMM_L2x4_LOOP:
|
||||
.LZGEMM_L2x4_LOOP:
|
||||
|
||||
KERNEL2x4_1
|
||||
KERNEL2x4_2
|
||||
|
@ -183,9 +183,9 @@ ZGEMM_L2x4_LOOP:
|
|||
KERNEL2x4_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L2x4_LOOP
|
||||
bgt .LZGEMM_L2x4_LOOP
|
||||
|
||||
ZGEMM_L2x4_LOOP_END:
|
||||
.LZGEMM_L2x4_LOOP_END:
|
||||
|
||||
KERNEL2x4_1
|
||||
KERNEL2x4_2
|
||||
|
@ -197,9 +197,9 @@ ZGEMM_L2x4_LOOP_END:
|
|||
KERNEL2x4_1
|
||||
KERNEL2x4_E2
|
||||
|
||||
b ZGEMM_L2x4_SUB1
|
||||
b .LZGEMM_L2x4_SUB1
|
||||
|
||||
ZGEMM_L2x4_SUB4:
|
||||
.LZGEMM_L2x4_SUB4:
|
||||
|
||||
KERNEL2x4_SUBI1
|
||||
KERNEL2x4_SUB1
|
||||
|
@ -211,48 +211,48 @@ ZGEMM_L2x4_SUB4:
|
|||
KERNEL2x4_SUB1
|
||||
KERNEL2x4_SUB1
|
||||
|
||||
b ZGEMM_L2x4_SUB1
|
||||
b .LZGEMM_L2x4_SUB1
|
||||
|
||||
ZGEMM_L2x4_SUB0:
|
||||
.LZGEMM_L2x4_SUB0:
|
||||
|
||||
andi. L, K, 7
|
||||
|
||||
KERNEL2x4_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZGEMM_L2x4_SAVE
|
||||
b ZGEMM_L2x4_SUB2
|
||||
ble .LZGEMM_L2x4_SAVE
|
||||
b .LZGEMM_L2x4_SUB2
|
||||
|
||||
ZGEMM_L2x4_SUB1:
|
||||
.LZGEMM_L2x4_SUB1:
|
||||
|
||||
andi. L, K, 7
|
||||
ble ZGEMM_L2x4_SAVE
|
||||
ble .LZGEMM_L2x4_SAVE
|
||||
|
||||
ZGEMM_L2x4_SUB2:
|
||||
.LZGEMM_L2x4_SUB2:
|
||||
|
||||
KERNEL2x4_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L2x4_SUB2
|
||||
bgt .LZGEMM_L2x4_SUB2
|
||||
|
||||
ZGEMM_L2x4_SAVE:
|
||||
.LZGEMM_L2x4_SAVE:
|
||||
|
||||
SAVE2x4
|
||||
|
||||
ZGEMM_L2x4_END:
|
||||
.LZGEMM_L2x4_END:
|
||||
|
||||
ZGEMM_L2x2_BEGIN:
|
||||
.LZGEMM_L2x2_BEGIN:
|
||||
|
||||
|
||||
andi. T1, M, 2
|
||||
ble ZGEMM_L2x2_END
|
||||
ble .LZGEMM_L2x2_END
|
||||
mr BO, B
|
||||
srawi. L, K, 3
|
||||
ble ZGEMM_L2x2_SUB0
|
||||
ble .LZGEMM_L2x2_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZGEMM_L2x2_SUB4
|
||||
ble .LZGEMM_L2x2_SUB4
|
||||
|
||||
ZGEMM_L2x2_LOOP_START:
|
||||
.LZGEMM_L2x2_LOOP_START:
|
||||
|
||||
LOAD2x2_1
|
||||
KERNEL2x2_I1
|
||||
|
@ -266,11 +266,11 @@ ZGEMM_L2x2_LOOP_START:
|
|||
KERNEL2x2_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZGEMM_L2x2_LOOP_END
|
||||
ble .LZGEMM_L2x2_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZGEMM_L2x2_LOOP:
|
||||
.LZGEMM_L2x2_LOOP:
|
||||
|
||||
KERNEL2x2_1
|
||||
KERNEL2x2_2
|
||||
|
@ -283,9 +283,9 @@ ZGEMM_L2x2_LOOP:
|
|||
KERNEL2x2_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L2x2_LOOP
|
||||
bgt .LZGEMM_L2x2_LOOP
|
||||
|
||||
ZGEMM_L2x2_LOOP_END:
|
||||
.LZGEMM_L2x2_LOOP_END:
|
||||
|
||||
KERNEL2x2_1
|
||||
KERNEL2x2_2
|
||||
|
@ -297,9 +297,9 @@ ZGEMM_L2x2_LOOP_END:
|
|||
KERNEL2x2_1
|
||||
KERNEL2x2_E2
|
||||
|
||||
b ZGEMM_L2x2_SUB1
|
||||
b .LZGEMM_L2x2_SUB1
|
||||
|
||||
ZGEMM_L2x2_SUB4:
|
||||
.LZGEMM_L2x2_SUB4:
|
||||
|
||||
KERNEL2x2_SUBI1
|
||||
KERNEL2x2_SUB1
|
||||
|
@ -311,48 +311,48 @@ ZGEMM_L2x2_SUB4:
|
|||
KERNEL2x2_SUB1
|
||||
KERNEL2x2_SUB1
|
||||
|
||||
b ZGEMM_L2x2_SUB1
|
||||
b .LZGEMM_L2x2_SUB1
|
||||
|
||||
ZGEMM_L2x2_SUB0:
|
||||
.LZGEMM_L2x2_SUB0:
|
||||
|
||||
andi. L, K, 7
|
||||
|
||||
KERNEL2x2_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZGEMM_L2x2_SAVE
|
||||
b ZGEMM_L2x2_SUB2
|
||||
ble .LZGEMM_L2x2_SAVE
|
||||
b .LZGEMM_L2x2_SUB2
|
||||
|
||||
ZGEMM_L2x2_SUB1:
|
||||
.LZGEMM_L2x2_SUB1:
|
||||
|
||||
andi. L, K, 7
|
||||
ble ZGEMM_L2x2_SAVE
|
||||
ble .LZGEMM_L2x2_SAVE
|
||||
|
||||
ZGEMM_L2x2_SUB2:
|
||||
.LZGEMM_L2x2_SUB2:
|
||||
|
||||
KERNEL2x2_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L2x2_SUB2
|
||||
bgt .LZGEMM_L2x2_SUB2
|
||||
|
||||
ZGEMM_L2x2_SAVE:
|
||||
.LZGEMM_L2x2_SAVE:
|
||||
|
||||
SAVE2x2
|
||||
|
||||
ZGEMM_L2x2_END:
|
||||
.LZGEMM_L2x2_END:
|
||||
|
||||
ZGEMM_L2x1_BEGIN:
|
||||
.LZGEMM_L2x1_BEGIN:
|
||||
|
||||
|
||||
andi. T1, M, 1
|
||||
ble ZGEMM_L2x1_END
|
||||
ble .LZGEMM_L2x1_END
|
||||
mr BO, B
|
||||
srawi. L, K, 3
|
||||
ble ZGEMM_L2x1_SUB0
|
||||
ble .LZGEMM_L2x1_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZGEMM_L2x1_SUB4
|
||||
ble .LZGEMM_L2x1_SUB4
|
||||
|
||||
ZGEMM_L2x1_LOOP_START:
|
||||
.LZGEMM_L2x1_LOOP_START:
|
||||
|
||||
LOAD2x1_1
|
||||
KERNEL2x1_I1
|
||||
|
@ -366,11 +366,11 @@ ZGEMM_L2x1_LOOP_START:
|
|||
KERNEL2x1_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZGEMM_L2x1_LOOP_END
|
||||
ble .LZGEMM_L2x1_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZGEMM_L2x1_LOOP:
|
||||
.LZGEMM_L2x1_LOOP:
|
||||
|
||||
KERNEL2x1_1
|
||||
KERNEL2x1_2
|
||||
|
@ -383,9 +383,9 @@ ZGEMM_L2x1_LOOP:
|
|||
KERNEL2x1_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L2x1_LOOP
|
||||
bgt .LZGEMM_L2x1_LOOP
|
||||
|
||||
ZGEMM_L2x1_LOOP_END:
|
||||
.LZGEMM_L2x1_LOOP_END:
|
||||
|
||||
KERNEL2x1_1
|
||||
KERNEL2x1_2
|
||||
|
@ -397,9 +397,9 @@ ZGEMM_L2x1_LOOP_END:
|
|||
KERNEL2x1_1
|
||||
KERNEL2x1_E2
|
||||
|
||||
b ZGEMM_L2x1_SUB1
|
||||
b .LZGEMM_L2x1_SUB1
|
||||
|
||||
ZGEMM_L2x1_SUB4:
|
||||
.LZGEMM_L2x1_SUB4:
|
||||
|
||||
KERNEL2x1_SUBI1
|
||||
KERNEL2x1_SUB1
|
||||
|
@ -411,72 +411,72 @@ ZGEMM_L2x1_SUB4:
|
|||
KERNEL2x1_SUB1
|
||||
KERNEL2x1_SUB1
|
||||
|
||||
b ZGEMM_L2x1_SUB1
|
||||
b .LZGEMM_L2x1_SUB1
|
||||
|
||||
ZGEMM_L2x1_SUB0:
|
||||
.LZGEMM_L2x1_SUB0:
|
||||
|
||||
andi. L, K, 7
|
||||
|
||||
KERNEL2x1_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZGEMM_L2x1_SAVE
|
||||
b ZGEMM_L2x1_SUB2
|
||||
ble .LZGEMM_L2x1_SAVE
|
||||
b .LZGEMM_L2x1_SUB2
|
||||
|
||||
ZGEMM_L2x1_SUB1:
|
||||
.LZGEMM_L2x1_SUB1:
|
||||
|
||||
andi. L, K, 7
|
||||
ble ZGEMM_L2x1_SAVE
|
||||
ble .LZGEMM_L2x1_SAVE
|
||||
|
||||
ZGEMM_L2x1_SUB2:
|
||||
.LZGEMM_L2x1_SUB2:
|
||||
|
||||
KERNEL2x1_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L2x1_SUB2
|
||||
bgt .LZGEMM_L2x1_SUB2
|
||||
|
||||
ZGEMM_L2x1_SAVE:
|
||||
.LZGEMM_L2x1_SAVE:
|
||||
|
||||
SAVE2x1
|
||||
|
||||
ZGEMM_L2x1_END:
|
||||
.LZGEMM_L2x1_END:
|
||||
|
||||
slwi T1, K, 5
|
||||
add B, B, T1
|
||||
|
||||
addic. J, J, -1
|
||||
bgt ZGEMM_L2_BEGIN
|
||||
bgt .LZGEMM_L2_BEGIN
|
||||
|
||||
andi. T2, N, 1
|
||||
ble L999
|
||||
ble .L999
|
||||
|
||||
ZGEMM_L2_END:
|
||||
.LZGEMM_L2_END:
|
||||
|
||||
b ZGEMM_L1_BEGIN
|
||||
b .LZGEMM_L1_BEGIN
|
||||
|
||||
L999_H1:
|
||||
.L999_H1:
|
||||
|
||||
b L999
|
||||
b .L999
|
||||
|
||||
ZGEMM_L1_BEGIN:
|
||||
.LZGEMM_L1_BEGIN:
|
||||
|
||||
andi. T1, N, 1
|
||||
ble ZGEMM_L1_END
|
||||
ble .LZGEMM_L1_END
|
||||
mr CO, C
|
||||
mr AO, A
|
||||
srawi. I, M, 3
|
||||
ble ZGEMM_L1x8_END
|
||||
ble .LZGEMM_L1x8_END
|
||||
|
||||
ZGEMM_L1x8_BEGIN:
|
||||
.LZGEMM_L1x8_BEGIN:
|
||||
|
||||
|
||||
mr BO, B
|
||||
srawi. L, K, 3
|
||||
ble ZGEMM_L1x8_SUB0
|
||||
ble .LZGEMM_L1x8_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZGEMM_L1x8_SUB4
|
||||
ble .LZGEMM_L1x8_SUB4
|
||||
|
||||
ZGEMM_L1x8_LOOP_START:
|
||||
.LZGEMM_L1x8_LOOP_START:
|
||||
|
||||
dcbt AO, PRE
|
||||
LOAD1x8_1
|
||||
|
@ -499,11 +499,11 @@ ZGEMM_L1x8_LOOP_START:
|
|||
KERNEL1x8_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZGEMM_L1x8_LOOP_END
|
||||
ble .LZGEMM_L1x8_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZGEMM_L1x8_LOOP:
|
||||
.LZGEMM_L1x8_LOOP:
|
||||
|
||||
dcbt AO, PRE
|
||||
KERNEL1x8_1
|
||||
|
@ -524,9 +524,9 @@ ZGEMM_L1x8_LOOP:
|
|||
KERNEL1x8_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L1x8_LOOP
|
||||
bgt .LZGEMM_L1x8_LOOP
|
||||
|
||||
ZGEMM_L1x8_LOOP_END:
|
||||
.LZGEMM_L1x8_LOOP_END:
|
||||
|
||||
dcbt AO, PRE
|
||||
KERNEL1x8_1
|
||||
|
@ -545,9 +545,9 @@ ZGEMM_L1x8_LOOP_END:
|
|||
KERNEL1x8_1
|
||||
KERNEL1x8_E2
|
||||
|
||||
b ZGEMM_L1x8_SUB1
|
||||
b .LZGEMM_L1x8_SUB1
|
||||
|
||||
ZGEMM_L1x8_SUB4:
|
||||
.LZGEMM_L1x8_SUB4:
|
||||
|
||||
dcbt AO, PRE
|
||||
KERNEL1x8_SUBI1
|
||||
|
@ -563,53 +563,53 @@ ZGEMM_L1x8_SUB4:
|
|||
KERNEL1x8_SUB1
|
||||
KERNEL1x8_SUB1
|
||||
|
||||
b ZGEMM_L1x8_SUB1
|
||||
b .LZGEMM_L1x8_SUB1
|
||||
|
||||
ZGEMM_L1x8_SUB0:
|
||||
.LZGEMM_L1x8_SUB0:
|
||||
|
||||
andi. L, K, 7
|
||||
|
||||
KERNEL1x8_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZGEMM_L1x8_SAVE
|
||||
b ZGEMM_L1x8_SUB2
|
||||
ble .LZGEMM_L1x8_SAVE
|
||||
b .LZGEMM_L1x8_SUB2
|
||||
|
||||
ZGEMM_L1x8_SUB1:
|
||||
.LZGEMM_L1x8_SUB1:
|
||||
|
||||
andi. L, K, 7
|
||||
ble ZGEMM_L1x8_SAVE
|
||||
ble .LZGEMM_L1x8_SAVE
|
||||
|
||||
ZGEMM_L1x8_SUB2:
|
||||
.LZGEMM_L1x8_SUB2:
|
||||
|
||||
KERNEL1x8_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L1x8_SUB2
|
||||
bgt .LZGEMM_L1x8_SUB2
|
||||
|
||||
ZGEMM_L1x8_SAVE:
|
||||
.LZGEMM_L1x8_SAVE:
|
||||
|
||||
SAVE1x8
|
||||
|
||||
addic. I, I, -1
|
||||
bgt ZGEMM_L1x8_BEGIN
|
||||
bgt .LZGEMM_L1x8_BEGIN
|
||||
|
||||
ZGEMM_L1x8_END:
|
||||
.LZGEMM_L1x8_END:
|
||||
|
||||
ZGEMM_L1x4_BEGIN:
|
||||
.LZGEMM_L1x4_BEGIN:
|
||||
|
||||
andi. T2, M, 7
|
||||
ble ZGEMM_L1x1_END
|
||||
ble .LZGEMM_L1x1_END
|
||||
|
||||
andi. T1, M, 4
|
||||
ble ZGEMM_L1x4_END
|
||||
ble .LZGEMM_L1x4_END
|
||||
mr BO, B
|
||||
srawi. L, K, 3
|
||||
ble ZGEMM_L1x4_SUB0
|
||||
ble .LZGEMM_L1x4_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZGEMM_L1x4_SUB4
|
||||
ble .LZGEMM_L1x4_SUB4
|
||||
|
||||
ZGEMM_L1x4_LOOP_START:
|
||||
.LZGEMM_L1x4_LOOP_START:
|
||||
|
||||
LOAD1x4_1
|
||||
KERNEL1x4_I1
|
||||
|
@ -623,11 +623,11 @@ ZGEMM_L1x4_LOOP_START:
|
|||
KERNEL1x4_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZGEMM_L1x4_LOOP_END
|
||||
ble .LZGEMM_L1x4_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZGEMM_L1x4_LOOP:
|
||||
.LZGEMM_L1x4_LOOP:
|
||||
|
||||
KERNEL1x4_1
|
||||
KERNEL1x4_2
|
||||
|
@ -640,9 +640,9 @@ ZGEMM_L1x4_LOOP:
|
|||
KERNEL1x4_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L1x4_LOOP
|
||||
bgt .LZGEMM_L1x4_LOOP
|
||||
|
||||
ZGEMM_L1x4_LOOP_END:
|
||||
.LZGEMM_L1x4_LOOP_END:
|
||||
|
||||
KERNEL1x4_1
|
||||
KERNEL1x4_2
|
||||
|
@ -654,9 +654,9 @@ ZGEMM_L1x4_LOOP_END:
|
|||
KERNEL1x4_1
|
||||
KERNEL1x4_E2
|
||||
|
||||
b ZGEMM_L1x4_SUB1
|
||||
b .LZGEMM_L1x4_SUB1
|
||||
|
||||
ZGEMM_L1x4_SUB4:
|
||||
.LZGEMM_L1x4_SUB4:
|
||||
|
||||
KERNEL1x4_SUBI1
|
||||
KERNEL1x4_SUB1
|
||||
|
@ -668,48 +668,48 @@ ZGEMM_L1x4_SUB4:
|
|||
KERNEL1x4_SUB1
|
||||
KERNEL1x4_SUB1
|
||||
|
||||
b ZGEMM_L1x4_SUB1
|
||||
b .LZGEMM_L1x4_SUB1
|
||||
|
||||
ZGEMM_L1x4_SUB0:
|
||||
.LZGEMM_L1x4_SUB0:
|
||||
|
||||
andi. L, K, 7
|
||||
|
||||
KERNEL1x4_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZGEMM_L1x4_SAVE
|
||||
b ZGEMM_L1x4_SUB2
|
||||
ble .LZGEMM_L1x4_SAVE
|
||||
b .LZGEMM_L1x4_SUB2
|
||||
|
||||
ZGEMM_L1x4_SUB1:
|
||||
.LZGEMM_L1x4_SUB1:
|
||||
|
||||
andi. L, K, 7
|
||||
ble ZGEMM_L1x4_SAVE
|
||||
ble .LZGEMM_L1x4_SAVE
|
||||
|
||||
ZGEMM_L1x4_SUB2:
|
||||
.LZGEMM_L1x4_SUB2:
|
||||
|
||||
KERNEL1x4_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L1x4_SUB2
|
||||
bgt .LZGEMM_L1x4_SUB2
|
||||
|
||||
ZGEMM_L1x4_SAVE:
|
||||
.LZGEMM_L1x4_SAVE:
|
||||
|
||||
SAVE1x4
|
||||
|
||||
ZGEMM_L1x4_END:
|
||||
.LZGEMM_L1x4_END:
|
||||
|
||||
ZGEMM_L1x2_BEGIN:
|
||||
.LZGEMM_L1x2_BEGIN:
|
||||
|
||||
|
||||
andi. T1, M, 2
|
||||
ble ZGEMM_L1x2_END
|
||||
ble .LZGEMM_L1x2_END
|
||||
mr BO, B
|
||||
srawi. L, K, 3
|
||||
ble ZGEMM_L1x2_SUB0
|
||||
ble .LZGEMM_L1x2_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZGEMM_L1x2_SUB4
|
||||
ble .LZGEMM_L1x2_SUB4
|
||||
|
||||
ZGEMM_L1x2_LOOP_START:
|
||||
.LZGEMM_L1x2_LOOP_START:
|
||||
|
||||
LOAD1x2_1
|
||||
KERNEL1x2_I1
|
||||
|
@ -723,11 +723,11 @@ ZGEMM_L1x2_LOOP_START:
|
|||
KERNEL1x2_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZGEMM_L1x2_LOOP_END
|
||||
ble .LZGEMM_L1x2_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZGEMM_L1x2_LOOP:
|
||||
.LZGEMM_L1x2_LOOP:
|
||||
|
||||
KERNEL1x2_1
|
||||
KERNEL1x2_2
|
||||
|
@ -740,9 +740,9 @@ ZGEMM_L1x2_LOOP:
|
|||
KERNEL1x2_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L1x2_LOOP
|
||||
bgt .LZGEMM_L1x2_LOOP
|
||||
|
||||
ZGEMM_L1x2_LOOP_END:
|
||||
.LZGEMM_L1x2_LOOP_END:
|
||||
|
||||
KERNEL1x2_1
|
||||
KERNEL1x2_2
|
||||
|
@ -754,9 +754,9 @@ ZGEMM_L1x2_LOOP_END:
|
|||
KERNEL1x2_1
|
||||
KERNEL1x2_E2
|
||||
|
||||
b ZGEMM_L1x2_SUB1
|
||||
b .LZGEMM_L1x2_SUB1
|
||||
|
||||
ZGEMM_L1x2_SUB4:
|
||||
.LZGEMM_L1x2_SUB4:
|
||||
|
||||
KERNEL1x2_SUBI1
|
||||
KERNEL1x2_SUB1
|
||||
|
@ -768,48 +768,48 @@ ZGEMM_L1x2_SUB4:
|
|||
KERNEL1x2_SUB1
|
||||
KERNEL1x2_SUB1
|
||||
|
||||
b ZGEMM_L1x2_SUB1
|
||||
b .LZGEMM_L1x2_SUB1
|
||||
|
||||
ZGEMM_L1x2_SUB0:
|
||||
.LZGEMM_L1x2_SUB0:
|
||||
|
||||
andi. L, K, 7
|
||||
|
||||
KERNEL1x2_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZGEMM_L1x2_SAVE
|
||||
b ZGEMM_L1x2_SUB2
|
||||
ble .LZGEMM_L1x2_SAVE
|
||||
b .LZGEMM_L1x2_SUB2
|
||||
|
||||
ZGEMM_L1x2_SUB1:
|
||||
.LZGEMM_L1x2_SUB1:
|
||||
|
||||
andi. L, K, 7
|
||||
ble ZGEMM_L1x2_SAVE
|
||||
ble .LZGEMM_L1x2_SAVE
|
||||
|
||||
ZGEMM_L1x2_SUB2:
|
||||
.LZGEMM_L1x2_SUB2:
|
||||
|
||||
KERNEL1x2_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L1x2_SUB2
|
||||
bgt .LZGEMM_L1x2_SUB2
|
||||
|
||||
ZGEMM_L1x2_SAVE:
|
||||
.LZGEMM_L1x2_SAVE:
|
||||
|
||||
SAVE1x2
|
||||
|
||||
ZGEMM_L1x2_END:
|
||||
.LZGEMM_L1x2_END:
|
||||
|
||||
ZGEMM_L1x1_BEGIN:
|
||||
.LZGEMM_L1x1_BEGIN:
|
||||
|
||||
|
||||
andi. T1, M, 1
|
||||
ble ZGEMM_L1x1_END
|
||||
ble .LZGEMM_L1x1_END
|
||||
mr BO, B
|
||||
srawi. L, K, 3
|
||||
ble ZGEMM_L1x1_SUB0
|
||||
ble .LZGEMM_L1x1_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZGEMM_L1x1_SUB4
|
||||
ble .LZGEMM_L1x1_SUB4
|
||||
|
||||
ZGEMM_L1x1_LOOP_START:
|
||||
.LZGEMM_L1x1_LOOP_START:
|
||||
|
||||
LOAD1x1_1
|
||||
KERNEL1x1_I1
|
||||
|
@ -823,11 +823,11 @@ ZGEMM_L1x1_LOOP_START:
|
|||
KERNEL1x1_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZGEMM_L1x1_LOOP_END
|
||||
ble .LZGEMM_L1x1_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZGEMM_L1x1_LOOP:
|
||||
.LZGEMM_L1x1_LOOP:
|
||||
|
||||
KERNEL1x1_1
|
||||
KERNEL1x1_2
|
||||
|
@ -840,9 +840,9 @@ ZGEMM_L1x1_LOOP:
|
|||
KERNEL1x1_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L1x1_LOOP
|
||||
bgt .LZGEMM_L1x1_LOOP
|
||||
|
||||
ZGEMM_L1x1_LOOP_END:
|
||||
.LZGEMM_L1x1_LOOP_END:
|
||||
|
||||
KERNEL1x1_1
|
||||
KERNEL1x1_2
|
||||
|
@ -854,9 +854,9 @@ ZGEMM_L1x1_LOOP_END:
|
|||
KERNEL1x1_1
|
||||
KERNEL1x1_E2
|
||||
|
||||
b ZGEMM_L1x1_SUB1
|
||||
b .LZGEMM_L1x1_SUB1
|
||||
|
||||
ZGEMM_L1x1_SUB4:
|
||||
.LZGEMM_L1x1_SUB4:
|
||||
|
||||
KERNEL1x1_SUBI1
|
||||
KERNEL1x1_SUB1
|
||||
|
@ -868,34 +868,34 @@ ZGEMM_L1x1_SUB4:
|
|||
KERNEL1x1_SUB1
|
||||
KERNEL1x1_SUB1
|
||||
|
||||
b ZGEMM_L1x1_SUB1
|
||||
b .LZGEMM_L1x1_SUB1
|
||||
|
||||
ZGEMM_L1x1_SUB0:
|
||||
.LZGEMM_L1x1_SUB0:
|
||||
|
||||
andi. L, K, 7
|
||||
|
||||
KERNEL1x1_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZGEMM_L1x1_SAVE
|
||||
b ZGEMM_L1x1_SUB2
|
||||
ble .LZGEMM_L1x1_SAVE
|
||||
b .LZGEMM_L1x1_SUB2
|
||||
|
||||
ZGEMM_L1x1_SUB1:
|
||||
.LZGEMM_L1x1_SUB1:
|
||||
|
||||
andi. L, K, 7
|
||||
ble ZGEMM_L1x1_SAVE
|
||||
ble .LZGEMM_L1x1_SAVE
|
||||
|
||||
ZGEMM_L1x1_SUB2:
|
||||
.LZGEMM_L1x1_SUB2:
|
||||
|
||||
KERNEL1x1_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZGEMM_L1x1_SUB2
|
||||
bgt .LZGEMM_L1x1_SUB2
|
||||
|
||||
ZGEMM_L1x1_SAVE:
|
||||
.LZGEMM_L1x1_SAVE:
|
||||
|
||||
SAVE1x1
|
||||
|
||||
ZGEMM_L1x1_END:
|
||||
.LZGEMM_L1x1_END:
|
||||
|
||||
ZGEMM_L1_END:
|
||||
.LZGEMM_L1_END:
|
||||
|
|
|
@ -1,3 +1,39 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2016/03/05 Werner Saar (wernsaar@googlemail.com)
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
**************************************************************************************/
|
||||
|
||||
|
||||
#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
|
||||
|
||||
#define XSFADD_R1 xsadddp
|
||||
|
|
|
@ -1,3 +1,38 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2016/03/05 Werner Saar (wernsaar@googlemail.com)
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
**************************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
|
@ -239,11 +274,11 @@
|
|||
#include "zgemm_macros_8x2_power8.S"
|
||||
|
||||
cmpwi cr0, M, 0
|
||||
ble L999
|
||||
ble .L999
|
||||
cmpwi cr0, N, 0
|
||||
ble L999
|
||||
ble .L999
|
||||
cmpwi cr0, K, 0
|
||||
ble L999
|
||||
ble .L999
|
||||
|
||||
slwi LDC, LDC, ZBASE_SHIFT
|
||||
li PRE, 256
|
||||
|
@ -266,7 +301,7 @@
|
|||
|
||||
#include "ztrmm_logic_8x2_power8.S"
|
||||
|
||||
L999:
|
||||
.L999:
|
||||
addi r3, 0, 0
|
||||
|
||||
lfd f14, 0(SP)
|
||||
|
|
|
@ -1,7 +1,43 @@
|
|||
srawi. J, N, 1
|
||||
ble ZTRMM_L2_END
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
ZTRMM_L2_BEGIN:
|
||||
/**************************************************************************************
|
||||
* 2016/03/05 Werner Saar (wernsaar@googlemail.com)
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* LAPACK-TEST : OK
|
||||
**************************************************************************************/
|
||||
|
||||
|
||||
srawi. J, N, 1
|
||||
ble .LZTRMM_L2_END
|
||||
|
||||
.LZTRMM_L2_BEGIN:
|
||||
|
||||
mr CO, C
|
||||
mr AO, A
|
||||
|
@ -13,9 +49,9 @@ ZTRMM_L2_BEGIN:
|
|||
#endif
|
||||
|
||||
srawi. I, M, 3
|
||||
ble ZTRMM_L2x8_END
|
||||
ble .LZTRMM_L2x8_END
|
||||
|
||||
ZTRMM_L2x8_BEGIN:
|
||||
.LZTRMM_L2x8_BEGIN:
|
||||
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||
|
@ -42,11 +78,11 @@ ZTRMM_L2x8_BEGIN:
|
|||
mr KKK, T1
|
||||
mr K1, T1
|
||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||
ble ZTRMM_L2x8_SUB0
|
||||
ble .LZTRMM_L2x8_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZTRMM_L2x8_SUB4
|
||||
ble .LZTRMM_L2x8_SUB4
|
||||
|
||||
ZTRMM_L2x8_LOOP_START:
|
||||
.LZTRMM_L2x8_LOOP_START:
|
||||
|
||||
dcbt AO, PRE
|
||||
LOAD2x8_1
|
||||
|
@ -69,11 +105,11 @@ ZTRMM_L2x8_LOOP_START:
|
|||
KERNEL2x8_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZTRMM_L2x8_LOOP_END
|
||||
ble .LZTRMM_L2x8_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZTRMM_L2x8_LOOP:
|
||||
.LZTRMM_L2x8_LOOP:
|
||||
|
||||
dcbt AO, PRE
|
||||
KERNEL2x8_1
|
||||
|
@ -94,9 +130,9 @@ ZTRMM_L2x8_LOOP:
|
|||
KERNEL2x8_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L2x8_LOOP
|
||||
bgt .LZTRMM_L2x8_LOOP
|
||||
|
||||
ZTRMM_L2x8_LOOP_END:
|
||||
.LZTRMM_L2x8_LOOP_END:
|
||||
|
||||
dcbt AO, PRE
|
||||
KERNEL2x8_1
|
||||
|
@ -115,9 +151,9 @@ ZTRMM_L2x8_LOOP_END:
|
|||
KERNEL2x8_1
|
||||
KERNEL2x8_E2
|
||||
|
||||
b ZTRMM_L2x8_SUB1
|
||||
b .LZTRMM_L2x8_SUB1
|
||||
|
||||
ZTRMM_L2x8_SUB4:
|
||||
.LZTRMM_L2x8_SUB4:
|
||||
|
||||
dcbt AO, PRE
|
||||
KERNEL2x8_SUBI1
|
||||
|
@ -133,31 +169,31 @@ ZTRMM_L2x8_SUB4:
|
|||
KERNEL2x8_SUB1
|
||||
KERNEL2x8_SUB1
|
||||
|
||||
b ZTRMM_L2x8_SUB1
|
||||
b .LZTRMM_L2x8_SUB1
|
||||
|
||||
ZTRMM_L2x8_SUB0:
|
||||
.LZTRMM_L2x8_SUB0:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
|
||||
KERNEL2x8_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZTRMM_L2x8_SAVE
|
||||
b ZTRMM_L2x8_SUB2
|
||||
ble .LZTRMM_L2x8_SAVE
|
||||
b .LZTRMM_L2x8_SUB2
|
||||
|
||||
ZTRMM_L2x8_SUB1:
|
||||
.LZTRMM_L2x8_SUB1:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
ble ZTRMM_L2x8_SAVE
|
||||
ble .LZTRMM_L2x8_SAVE
|
||||
|
||||
ZTRMM_L2x8_SUB2:
|
||||
.LZTRMM_L2x8_SUB2:
|
||||
|
||||
KERNEL2x8_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L2x8_SUB2
|
||||
bgt .LZTRMM_L2x8_SUB2
|
||||
|
||||
ZTRMM_L2x8_SAVE:
|
||||
.LZTRMM_L2x8_SAVE:
|
||||
|
||||
SAVE2x8
|
||||
|
||||
|
@ -175,16 +211,16 @@ ZTRMM_L2x8_SAVE:
|
|||
|
||||
|
||||
addic. I, I, -1
|
||||
bgt ZTRMM_L2x8_BEGIN
|
||||
bgt .LZTRMM_L2x8_BEGIN
|
||||
|
||||
ZTRMM_L2x8_END:
|
||||
.LZTRMM_L2x8_END:
|
||||
|
||||
ZTRMM_L2x4_BEGIN:
|
||||
.LZTRMM_L2x4_BEGIN:
|
||||
andi. T2, M, 7
|
||||
ble ZTRMM_L2x1_END
|
||||
ble .LZTRMM_L2x1_END
|
||||
|
||||
andi. T1, M, 4
|
||||
ble ZTRMM_L2x4_END
|
||||
ble .LZTRMM_L2x4_END
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||
mr BO, B // B -> BO
|
||||
|
@ -210,11 +246,11 @@ ZTRMM_L2x4_BEGIN:
|
|||
mr KKK, T1
|
||||
mr K1, T1
|
||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||
ble ZTRMM_L2x4_SUB0
|
||||
ble .LZTRMM_L2x4_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZTRMM_L2x4_SUB4
|
||||
ble .LZTRMM_L2x4_SUB4
|
||||
|
||||
ZTRMM_L2x4_LOOP_START:
|
||||
.LZTRMM_L2x4_LOOP_START:
|
||||
|
||||
LOAD2x4_1
|
||||
KERNEL2x4_I1
|
||||
|
@ -228,11 +264,11 @@ ZTRMM_L2x4_LOOP_START:
|
|||
KERNEL2x4_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZTRMM_L2x4_LOOP_END
|
||||
ble .LZTRMM_L2x4_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZTRMM_L2x4_LOOP:
|
||||
.LZTRMM_L2x4_LOOP:
|
||||
|
||||
KERNEL2x4_1
|
||||
KERNEL2x4_2
|
||||
|
@ -245,9 +281,9 @@ ZTRMM_L2x4_LOOP:
|
|||
KERNEL2x4_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L2x4_LOOP
|
||||
bgt .LZTRMM_L2x4_LOOP
|
||||
|
||||
ZTRMM_L2x4_LOOP_END:
|
||||
.LZTRMM_L2x4_LOOP_END:
|
||||
|
||||
KERNEL2x4_1
|
||||
KERNEL2x4_2
|
||||
|
@ -259,9 +295,9 @@ ZTRMM_L2x4_LOOP_END:
|
|||
KERNEL2x4_1
|
||||
KERNEL2x4_E2
|
||||
|
||||
b ZTRMM_L2x4_SUB1
|
||||
b .LZTRMM_L2x4_SUB1
|
||||
|
||||
ZTRMM_L2x4_SUB4:
|
||||
.LZTRMM_L2x4_SUB4:
|
||||
|
||||
KERNEL2x4_SUBI1
|
||||
KERNEL2x4_SUB1
|
||||
|
@ -273,31 +309,31 @@ ZTRMM_L2x4_SUB4:
|
|||
KERNEL2x4_SUB1
|
||||
KERNEL2x4_SUB1
|
||||
|
||||
b ZTRMM_L2x4_SUB1
|
||||
b .LZTRMM_L2x4_SUB1
|
||||
|
||||
ZTRMM_L2x4_SUB0:
|
||||
.LZTRMM_L2x4_SUB0:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
|
||||
KERNEL2x4_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZTRMM_L2x4_SAVE
|
||||
b ZTRMM_L2x4_SUB2
|
||||
ble .LZTRMM_L2x4_SAVE
|
||||
b .LZTRMM_L2x4_SUB2
|
||||
|
||||
ZTRMM_L2x4_SUB1:
|
||||
.LZTRMM_L2x4_SUB1:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
ble ZTRMM_L2x4_SAVE
|
||||
ble .LZTRMM_L2x4_SAVE
|
||||
|
||||
ZTRMM_L2x4_SUB2:
|
||||
.LZTRMM_L2x4_SUB2:
|
||||
|
||||
KERNEL2x4_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L2x4_SUB2
|
||||
bgt .LZTRMM_L2x4_SUB2
|
||||
|
||||
ZTRMM_L2x4_SAVE:
|
||||
.LZTRMM_L2x4_SAVE:
|
||||
|
||||
SAVE2x4
|
||||
|
||||
|
@ -314,12 +350,12 @@ ZTRMM_L2x4_SAVE:
|
|||
#endif
|
||||
|
||||
|
||||
ZTRMM_L2x4_END:
|
||||
.LZTRMM_L2x4_END:
|
||||
|
||||
ZTRMM_L2x2_BEGIN:
|
||||
.LZTRMM_L2x2_BEGIN:
|
||||
|
||||
andi. T1, M, 2
|
||||
ble ZTRMM_L2x2_END
|
||||
ble .LZTRMM_L2x2_END
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||
mr BO, B // B -> BO
|
||||
|
@ -345,11 +381,11 @@ ZTRMM_L2x2_BEGIN:
|
|||
mr KKK, T1
|
||||
mr K1, T1
|
||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||
ble ZTRMM_L2x2_SUB0
|
||||
ble .LZTRMM_L2x2_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZTRMM_L2x2_SUB4
|
||||
ble .LZTRMM_L2x2_SUB4
|
||||
|
||||
ZTRMM_L2x2_LOOP_START:
|
||||
.LZTRMM_L2x2_LOOP_START:
|
||||
|
||||
LOAD2x2_1
|
||||
KERNEL2x2_I1
|
||||
|
@ -363,11 +399,11 @@ ZTRMM_L2x2_LOOP_START:
|
|||
KERNEL2x2_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZTRMM_L2x2_LOOP_END
|
||||
ble .LZTRMM_L2x2_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZTRMM_L2x2_LOOP:
|
||||
.LZTRMM_L2x2_LOOP:
|
||||
|
||||
KERNEL2x2_1
|
||||
KERNEL2x2_2
|
||||
|
@ -380,9 +416,9 @@ ZTRMM_L2x2_LOOP:
|
|||
KERNEL2x2_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L2x2_LOOP
|
||||
bgt .LZTRMM_L2x2_LOOP
|
||||
|
||||
ZTRMM_L2x2_LOOP_END:
|
||||
.LZTRMM_L2x2_LOOP_END:
|
||||
|
||||
KERNEL2x2_1
|
||||
KERNEL2x2_2
|
||||
|
@ -394,9 +430,9 @@ ZTRMM_L2x2_LOOP_END:
|
|||
KERNEL2x2_1
|
||||
KERNEL2x2_E2
|
||||
|
||||
b ZTRMM_L2x2_SUB1
|
||||
b .LZTRMM_L2x2_SUB1
|
||||
|
||||
ZTRMM_L2x2_SUB4:
|
||||
.LZTRMM_L2x2_SUB4:
|
||||
|
||||
KERNEL2x2_SUBI1
|
||||
KERNEL2x2_SUB1
|
||||
|
@ -408,31 +444,31 @@ ZTRMM_L2x2_SUB4:
|
|||
KERNEL2x2_SUB1
|
||||
KERNEL2x2_SUB1
|
||||
|
||||
b ZTRMM_L2x2_SUB1
|
||||
b .LZTRMM_L2x2_SUB1
|
||||
|
||||
ZTRMM_L2x2_SUB0:
|
||||
.LZTRMM_L2x2_SUB0:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
|
||||
KERNEL2x2_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZTRMM_L2x2_SAVE
|
||||
b ZTRMM_L2x2_SUB2
|
||||
ble .LZTRMM_L2x2_SAVE
|
||||
b .LZTRMM_L2x2_SUB2
|
||||
|
||||
ZTRMM_L2x2_SUB1:
|
||||
.LZTRMM_L2x2_SUB1:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
ble ZTRMM_L2x2_SAVE
|
||||
ble .LZTRMM_L2x2_SAVE
|
||||
|
||||
ZTRMM_L2x2_SUB2:
|
||||
.LZTRMM_L2x2_SUB2:
|
||||
|
||||
KERNEL2x2_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L2x2_SUB2
|
||||
bgt .LZTRMM_L2x2_SUB2
|
||||
|
||||
ZTRMM_L2x2_SAVE:
|
||||
.LZTRMM_L2x2_SAVE:
|
||||
|
||||
SAVE2x2
|
||||
|
||||
|
@ -449,12 +485,12 @@ ZTRMM_L2x2_SAVE:
|
|||
#endif
|
||||
|
||||
|
||||
ZTRMM_L2x2_END:
|
||||
.LZTRMM_L2x2_END:
|
||||
|
||||
ZTRMM_L2x1_BEGIN:
|
||||
.LZTRMM_L2x1_BEGIN:
|
||||
|
||||
andi. T1, M, 1
|
||||
ble ZTRMM_L2x1_END
|
||||
ble .LZTRMM_L2x1_END
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||
mr BO, B // B -> BO
|
||||
|
@ -480,11 +516,11 @@ ZTRMM_L2x1_BEGIN:
|
|||
mr KKK, T1
|
||||
mr K1, T1
|
||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||
ble ZTRMM_L2x1_SUB0
|
||||
ble .LZTRMM_L2x1_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZTRMM_L2x1_SUB4
|
||||
ble .LZTRMM_L2x1_SUB4
|
||||
|
||||
ZTRMM_L2x1_LOOP_START:
|
||||
.LZTRMM_L2x1_LOOP_START:
|
||||
|
||||
LOAD2x1_1
|
||||
KERNEL2x1_I1
|
||||
|
@ -498,11 +534,11 @@ ZTRMM_L2x1_LOOP_START:
|
|||
KERNEL2x1_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZTRMM_L2x1_LOOP_END
|
||||
ble .LZTRMM_L2x1_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZTRMM_L2x1_LOOP:
|
||||
.LZTRMM_L2x1_LOOP:
|
||||
|
||||
KERNEL2x1_1
|
||||
KERNEL2x1_2
|
||||
|
@ -515,9 +551,9 @@ ZTRMM_L2x1_LOOP:
|
|||
KERNEL2x1_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L2x1_LOOP
|
||||
bgt .LZTRMM_L2x1_LOOP
|
||||
|
||||
ZTRMM_L2x1_LOOP_END:
|
||||
.LZTRMM_L2x1_LOOP_END:
|
||||
|
||||
KERNEL2x1_1
|
||||
KERNEL2x1_2
|
||||
|
@ -529,9 +565,9 @@ ZTRMM_L2x1_LOOP_END:
|
|||
KERNEL2x1_1
|
||||
KERNEL2x1_E2
|
||||
|
||||
b ZTRMM_L2x1_SUB1
|
||||
b .LZTRMM_L2x1_SUB1
|
||||
|
||||
ZTRMM_L2x1_SUB4:
|
||||
.LZTRMM_L2x1_SUB4:
|
||||
|
||||
KERNEL2x1_SUBI1
|
||||
KERNEL2x1_SUB1
|
||||
|
@ -543,31 +579,31 @@ ZTRMM_L2x1_SUB4:
|
|||
KERNEL2x1_SUB1
|
||||
KERNEL2x1_SUB1
|
||||
|
||||
b ZTRMM_L2x1_SUB1
|
||||
b .LZTRMM_L2x1_SUB1
|
||||
|
||||
ZTRMM_L2x1_SUB0:
|
||||
.LZTRMM_L2x1_SUB0:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
|
||||
KERNEL2x1_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZTRMM_L2x1_SAVE
|
||||
b ZTRMM_L2x1_SUB2
|
||||
ble .LZTRMM_L2x1_SAVE
|
||||
b .LZTRMM_L2x1_SUB2
|
||||
|
||||
ZTRMM_L2x1_SUB1:
|
||||
.LZTRMM_L2x1_SUB1:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
ble ZTRMM_L2x1_SAVE
|
||||
ble .LZTRMM_L2x1_SAVE
|
||||
|
||||
ZTRMM_L2x1_SUB2:
|
||||
.LZTRMM_L2x1_SUB2:
|
||||
|
||||
KERNEL2x1_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L2x1_SUB2
|
||||
bgt .LZTRMM_L2x1_SUB2
|
||||
|
||||
ZTRMM_L2x1_SAVE:
|
||||
.LZTRMM_L2x1_SAVE:
|
||||
|
||||
SAVE2x1
|
||||
|
||||
|
@ -584,7 +620,7 @@ ZTRMM_L2x1_SAVE:
|
|||
#endif
|
||||
|
||||
|
||||
ZTRMM_L2x1_END:
|
||||
.LZTRMM_L2x1_END:
|
||||
|
||||
slwi T1, K, 5
|
||||
add B, B, T1
|
||||
|
@ -595,23 +631,23 @@ ZTRMM_L2x1_END:
|
|||
|
||||
|
||||
addic. J, J, -1
|
||||
bgt ZTRMM_L2_BEGIN
|
||||
bgt .LZTRMM_L2_BEGIN
|
||||
|
||||
andi. T2, N, 1
|
||||
ble L999
|
||||
ble .L999
|
||||
|
||||
ZTRMM_L2_END:
|
||||
.LZTRMM_L2_END:
|
||||
|
||||
b ZTRMM_L1_BEGIN
|
||||
b .LZTRMM_L1_BEGIN
|
||||
|
||||
L999_H1:
|
||||
.L999_H1:
|
||||
|
||||
b L999
|
||||
b .L999
|
||||
|
||||
ZTRMM_L1_BEGIN:
|
||||
.LZTRMM_L1_BEGIN:
|
||||
|
||||
andi. T1, N, 1
|
||||
ble ZTRMM_L1_END
|
||||
ble .LZTRMM_L1_END
|
||||
mr CO, C
|
||||
mr AO, A
|
||||
|
||||
|
@ -620,9 +656,9 @@ ZTRMM_L1_BEGIN:
|
|||
#endif
|
||||
|
||||
srawi. I, M, 3
|
||||
ble ZTRMM_L1x8_END
|
||||
ble .LZTRMM_L1x8_END
|
||||
|
||||
ZTRMM_L1x8_BEGIN:
|
||||
.LZTRMM_L1x8_BEGIN:
|
||||
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||
|
@ -649,11 +685,11 @@ ZTRMM_L1x8_BEGIN:
|
|||
mr KKK, T1
|
||||
mr K1, T1
|
||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||
ble ZTRMM_L1x8_SUB0
|
||||
ble .LZTRMM_L1x8_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZTRMM_L1x8_SUB4
|
||||
ble .LZTRMM_L1x8_SUB4
|
||||
|
||||
ZTRMM_L1x8_LOOP_START:
|
||||
.LZTRMM_L1x8_LOOP_START:
|
||||
|
||||
dcbt AO, PRE
|
||||
LOAD1x8_1
|
||||
|
@ -676,11 +712,11 @@ ZTRMM_L1x8_LOOP_START:
|
|||
KERNEL1x8_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZTRMM_L1x8_LOOP_END
|
||||
ble .LZTRMM_L1x8_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZTRMM_L1x8_LOOP:
|
||||
.LZTRMM_L1x8_LOOP:
|
||||
|
||||
dcbt AO, PRE
|
||||
KERNEL1x8_1
|
||||
|
@ -701,9 +737,9 @@ ZTRMM_L1x8_LOOP:
|
|||
KERNEL1x8_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L1x8_LOOP
|
||||
bgt .LZTRMM_L1x8_LOOP
|
||||
|
||||
ZTRMM_L1x8_LOOP_END:
|
||||
.LZTRMM_L1x8_LOOP_END:
|
||||
|
||||
dcbt AO, PRE
|
||||
KERNEL1x8_1
|
||||
|
@ -722,9 +758,9 @@ ZTRMM_L1x8_LOOP_END:
|
|||
KERNEL1x8_1
|
||||
KERNEL1x8_E2
|
||||
|
||||
b ZTRMM_L1x8_SUB1
|
||||
b .LZTRMM_L1x8_SUB1
|
||||
|
||||
ZTRMM_L1x8_SUB4:
|
||||
.LZTRMM_L1x8_SUB4:
|
||||
|
||||
dcbt AO, PRE
|
||||
KERNEL1x8_SUBI1
|
||||
|
@ -740,31 +776,31 @@ ZTRMM_L1x8_SUB4:
|
|||
KERNEL1x8_SUB1
|
||||
KERNEL1x8_SUB1
|
||||
|
||||
b ZTRMM_L1x8_SUB1
|
||||
b .LZTRMM_L1x8_SUB1
|
||||
|
||||
ZTRMM_L1x8_SUB0:
|
||||
.LZTRMM_L1x8_SUB0:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
|
||||
KERNEL1x8_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZTRMM_L1x8_SAVE
|
||||
b ZTRMM_L1x8_SUB2
|
||||
ble .LZTRMM_L1x8_SAVE
|
||||
b .LZTRMM_L1x8_SUB2
|
||||
|
||||
ZTRMM_L1x8_SUB1:
|
||||
.LZTRMM_L1x8_SUB1:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
ble ZTRMM_L1x8_SAVE
|
||||
ble .LZTRMM_L1x8_SAVE
|
||||
|
||||
ZTRMM_L1x8_SUB2:
|
||||
.LZTRMM_L1x8_SUB2:
|
||||
|
||||
KERNEL1x8_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L1x8_SUB2
|
||||
bgt .LZTRMM_L1x8_SUB2
|
||||
|
||||
ZTRMM_L1x8_SAVE:
|
||||
.LZTRMM_L1x8_SAVE:
|
||||
|
||||
SAVE1x8
|
||||
|
||||
|
@ -782,16 +818,16 @@ ZTRMM_L1x8_SAVE:
|
|||
|
||||
|
||||
addic. I, I, -1
|
||||
bgt ZTRMM_L1x8_BEGIN
|
||||
bgt .LZTRMM_L1x8_BEGIN
|
||||
|
||||
ZTRMM_L1x8_END:
|
||||
.LZTRMM_L1x8_END:
|
||||
|
||||
ZTRMM_L1x4_BEGIN:
|
||||
.LZTRMM_L1x4_BEGIN:
|
||||
andi. T2, M, 7
|
||||
ble ZTRMM_L1x1_END
|
||||
ble .LZTRMM_L1x1_END
|
||||
|
||||
andi. T1, M, 4
|
||||
ble ZTRMM_L1x4_END
|
||||
ble .LZTRMM_L1x4_END
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||
mr BO, B // B -> BO
|
||||
|
@ -817,11 +853,11 @@ ZTRMM_L1x4_BEGIN:
|
|||
mr KKK, T1
|
||||
mr K1, T1
|
||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||
ble ZTRMM_L1x4_SUB0
|
||||
ble .LZTRMM_L1x4_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZTRMM_L1x4_SUB4
|
||||
ble .LZTRMM_L1x4_SUB4
|
||||
|
||||
ZTRMM_L1x4_LOOP_START:
|
||||
.LZTRMM_L1x4_LOOP_START:
|
||||
|
||||
LOAD1x4_1
|
||||
KERNEL1x4_I1
|
||||
|
@ -835,11 +871,11 @@ ZTRMM_L1x4_LOOP_START:
|
|||
KERNEL1x4_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZTRMM_L1x4_LOOP_END
|
||||
ble .LZTRMM_L1x4_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZTRMM_L1x4_LOOP:
|
||||
.LZTRMM_L1x4_LOOP:
|
||||
|
||||
KERNEL1x4_1
|
||||
KERNEL1x4_2
|
||||
|
@ -852,9 +888,9 @@ ZTRMM_L1x4_LOOP:
|
|||
KERNEL1x4_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L1x4_LOOP
|
||||
bgt .LZTRMM_L1x4_LOOP
|
||||
|
||||
ZTRMM_L1x4_LOOP_END:
|
||||
.LZTRMM_L1x4_LOOP_END:
|
||||
|
||||
KERNEL1x4_1
|
||||
KERNEL1x4_2
|
||||
|
@ -866,9 +902,9 @@ ZTRMM_L1x4_LOOP_END:
|
|||
KERNEL1x4_1
|
||||
KERNEL1x4_E2
|
||||
|
||||
b ZTRMM_L1x4_SUB1
|
||||
b .LZTRMM_L1x4_SUB1
|
||||
|
||||
ZTRMM_L1x4_SUB4:
|
||||
.LZTRMM_L1x4_SUB4:
|
||||
|
||||
KERNEL1x4_SUBI1
|
||||
KERNEL1x4_SUB1
|
||||
|
@ -880,31 +916,31 @@ ZTRMM_L1x4_SUB4:
|
|||
KERNEL1x4_SUB1
|
||||
KERNEL1x4_SUB1
|
||||
|
||||
b ZTRMM_L1x4_SUB1
|
||||
b .LZTRMM_L1x4_SUB1
|
||||
|
||||
ZTRMM_L1x4_SUB0:
|
||||
.LZTRMM_L1x4_SUB0:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
|
||||
KERNEL1x4_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZTRMM_L1x4_SAVE
|
||||
b ZTRMM_L1x4_SUB2
|
||||
ble .LZTRMM_L1x4_SAVE
|
||||
b .LZTRMM_L1x4_SUB2
|
||||
|
||||
ZTRMM_L1x4_SUB1:
|
||||
.LZTRMM_L1x4_SUB1:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
ble ZTRMM_L1x4_SAVE
|
||||
ble .LZTRMM_L1x4_SAVE
|
||||
|
||||
ZTRMM_L1x4_SUB2:
|
||||
.LZTRMM_L1x4_SUB2:
|
||||
|
||||
KERNEL1x4_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L1x4_SUB2
|
||||
bgt .LZTRMM_L1x4_SUB2
|
||||
|
||||
ZTRMM_L1x4_SAVE:
|
||||
.LZTRMM_L1x4_SAVE:
|
||||
|
||||
SAVE1x4
|
||||
|
||||
|
@ -921,12 +957,12 @@ ZTRMM_L1x4_SAVE:
|
|||
#endif
|
||||
|
||||
|
||||
ZTRMM_L1x4_END:
|
||||
.LZTRMM_L1x4_END:
|
||||
|
||||
ZTRMM_L1x2_BEGIN:
|
||||
.LZTRMM_L1x2_BEGIN:
|
||||
|
||||
andi. T1, M, 2
|
||||
ble ZTRMM_L1x2_END
|
||||
ble .LZTRMM_L1x2_END
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||
mr BO, B // B -> BO
|
||||
|
@ -952,11 +988,11 @@ ZTRMM_L1x2_BEGIN:
|
|||
mr KKK, T1
|
||||
mr K1, T1
|
||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||
ble ZTRMM_L1x2_SUB0
|
||||
ble .LZTRMM_L1x2_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZTRMM_L1x2_SUB4
|
||||
ble .LZTRMM_L1x2_SUB4
|
||||
|
||||
ZTRMM_L1x2_LOOP_START:
|
||||
.LZTRMM_L1x2_LOOP_START:
|
||||
|
||||
LOAD1x2_1
|
||||
KERNEL1x2_I1
|
||||
|
@ -970,11 +1006,11 @@ ZTRMM_L1x2_LOOP_START:
|
|||
KERNEL1x2_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZTRMM_L1x2_LOOP_END
|
||||
ble .LZTRMM_L1x2_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZTRMM_L1x2_LOOP:
|
||||
.LZTRMM_L1x2_LOOP:
|
||||
|
||||
KERNEL1x2_1
|
||||
KERNEL1x2_2
|
||||
|
@ -987,9 +1023,9 @@ ZTRMM_L1x2_LOOP:
|
|||
KERNEL1x2_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L1x2_LOOP
|
||||
bgt .LZTRMM_L1x2_LOOP
|
||||
|
||||
ZTRMM_L1x2_LOOP_END:
|
||||
.LZTRMM_L1x2_LOOP_END:
|
||||
|
||||
KERNEL1x2_1
|
||||
KERNEL1x2_2
|
||||
|
@ -1001,9 +1037,9 @@ ZTRMM_L1x2_LOOP_END:
|
|||
KERNEL1x2_1
|
||||
KERNEL1x2_E2
|
||||
|
||||
b ZTRMM_L1x2_SUB1
|
||||
b .LZTRMM_L1x2_SUB1
|
||||
|
||||
ZTRMM_L1x2_SUB4:
|
||||
.LZTRMM_L1x2_SUB4:
|
||||
|
||||
KERNEL1x2_SUBI1
|
||||
KERNEL1x2_SUB1
|
||||
|
@ -1015,31 +1051,31 @@ ZTRMM_L1x2_SUB4:
|
|||
KERNEL1x2_SUB1
|
||||
KERNEL1x2_SUB1
|
||||
|
||||
b ZTRMM_L1x2_SUB1
|
||||
b .LZTRMM_L1x2_SUB1
|
||||
|
||||
ZTRMM_L1x2_SUB0:
|
||||
.LZTRMM_L1x2_SUB0:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
|
||||
KERNEL1x2_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZTRMM_L1x2_SAVE
|
||||
b ZTRMM_L1x2_SUB2
|
||||
ble .LZTRMM_L1x2_SAVE
|
||||
b .LZTRMM_L1x2_SUB2
|
||||
|
||||
ZTRMM_L1x2_SUB1:
|
||||
.LZTRMM_L1x2_SUB1:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
ble ZTRMM_L1x2_SAVE
|
||||
ble .LZTRMM_L1x2_SAVE
|
||||
|
||||
ZTRMM_L1x2_SUB2:
|
||||
.LZTRMM_L1x2_SUB2:
|
||||
|
||||
KERNEL1x2_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L1x2_SUB2
|
||||
bgt .LZTRMM_L1x2_SUB2
|
||||
|
||||
ZTRMM_L1x2_SAVE:
|
||||
.LZTRMM_L1x2_SAVE:
|
||||
|
||||
SAVE1x2
|
||||
|
||||
|
@ -1056,12 +1092,12 @@ ZTRMM_L1x2_SAVE:
|
|||
#endif
|
||||
|
||||
|
||||
ZTRMM_L1x2_END:
|
||||
.LZTRMM_L1x2_END:
|
||||
|
||||
ZTRMM_L1x1_BEGIN:
|
||||
.LZTRMM_L1x1_BEGIN:
|
||||
|
||||
andi. T1, M, 1
|
||||
ble ZTRMM_L1x1_END
|
||||
ble .LZTRMM_L1x1_END
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||
mr BO, B // B -> BO
|
||||
|
@ -1087,11 +1123,11 @@ ZTRMM_L1x1_BEGIN:
|
|||
mr KKK, T1
|
||||
mr K1, T1
|
||||
srawi. L, K1, 3 // KTEMP / 8 -> L
|
||||
ble ZTRMM_L1x1_SUB0
|
||||
ble .LZTRMM_L1x1_SUB0
|
||||
cmpwi cr0, L, 1
|
||||
ble ZTRMM_L1x1_SUB4
|
||||
ble .LZTRMM_L1x1_SUB4
|
||||
|
||||
ZTRMM_L1x1_LOOP_START:
|
||||
.LZTRMM_L1x1_LOOP_START:
|
||||
|
||||
LOAD1x1_1
|
||||
KERNEL1x1_I1
|
||||
|
@ -1105,11 +1141,11 @@ ZTRMM_L1x1_LOOP_START:
|
|||
KERNEL1x1_2
|
||||
|
||||
addic. L, L, -2
|
||||
ble ZTRMM_L1x1_LOOP_END
|
||||
ble .LZTRMM_L1x1_LOOP_END
|
||||
|
||||
.align 5
|
||||
|
||||
ZTRMM_L1x1_LOOP:
|
||||
.LZTRMM_L1x1_LOOP:
|
||||
|
||||
KERNEL1x1_1
|
||||
KERNEL1x1_2
|
||||
|
@ -1122,9 +1158,9 @@ ZTRMM_L1x1_LOOP:
|
|||
KERNEL1x1_2
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L1x1_LOOP
|
||||
bgt .LZTRMM_L1x1_LOOP
|
||||
|
||||
ZTRMM_L1x1_LOOP_END:
|
||||
.LZTRMM_L1x1_LOOP_END:
|
||||
|
||||
KERNEL1x1_1
|
||||
KERNEL1x1_2
|
||||
|
@ -1136,9 +1172,9 @@ ZTRMM_L1x1_LOOP_END:
|
|||
KERNEL1x1_1
|
||||
KERNEL1x1_E2
|
||||
|
||||
b ZTRMM_L1x1_SUB1
|
||||
b .LZTRMM_L1x1_SUB1
|
||||
|
||||
ZTRMM_L1x1_SUB4:
|
||||
.LZTRMM_L1x1_SUB4:
|
||||
|
||||
KERNEL1x1_SUBI1
|
||||
KERNEL1x1_SUB1
|
||||
|
@ -1150,31 +1186,31 @@ ZTRMM_L1x1_SUB4:
|
|||
KERNEL1x1_SUB1
|
||||
KERNEL1x1_SUB1
|
||||
|
||||
b ZTRMM_L1x1_SUB1
|
||||
b .LZTRMM_L1x1_SUB1
|
||||
|
||||
ZTRMM_L1x1_SUB0:
|
||||
.LZTRMM_L1x1_SUB0:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
|
||||
KERNEL1x1_SUBI1
|
||||
|
||||
addic. L, L, -1
|
||||
ble ZTRMM_L1x1_SAVE
|
||||
b ZTRMM_L1x1_SUB2
|
||||
ble .LZTRMM_L1x1_SAVE
|
||||
b .LZTRMM_L1x1_SUB2
|
||||
|
||||
ZTRMM_L1x1_SUB1:
|
||||
.LZTRMM_L1x1_SUB1:
|
||||
|
||||
andi. L, K1, 7 // K1 & 7 -> L
|
||||
ble ZTRMM_L1x1_SAVE
|
||||
ble .LZTRMM_L1x1_SAVE
|
||||
|
||||
ZTRMM_L1x1_SUB2:
|
||||
.LZTRMM_L1x1_SUB2:
|
||||
|
||||
KERNEL1x1_SUB1
|
||||
|
||||
addic. L, L, -1
|
||||
bgt ZTRMM_L1x1_SUB2
|
||||
bgt .LZTRMM_L1x1_SUB2
|
||||
|
||||
ZTRMM_L1x1_SAVE:
|
||||
.LZTRMM_L1x1_SAVE:
|
||||
|
||||
SAVE1x1
|
||||
|
||||
|
@ -1191,11 +1227,11 @@ ZTRMM_L1x1_SAVE:
|
|||
#endif
|
||||
|
||||
|
||||
ZTRMM_L1x1_END:
|
||||
.LZTRMM_L1x1_END:
|
||||
|
||||
#if !defined(LEFT)
|
||||
addi KK, KK, 1 // KK += Number of values in B
|
||||
#endif
|
||||
|
||||
|
||||
ZTRMM_L1_END:
|
||||
.LZTRMM_L1_END:
|
||||
|
|
Loading…
Reference in New Issue