Merge pull request #4881 from martin-frbg/issue4805-2

Use fld.d/fst.d in PROLOGUE/EPILOGUE in LOONGSON3R5 GEMM
This commit is contained in:
Martin Kroeker 2024-08-16 08:47:12 +02:00 committed by GitHub
commit 7129a64d87
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 62 additions and 62 deletions

View File

@ -196,17 +196,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SDARG $r25, $sp, 16
SDARG $r26, $sp, 24
SDARG $r27, $sp, 32
ST $f23, $sp, 40
ST $f24, $sp, 48
ST $f25, $sp, 56
ST $f26, $sp, 64
ST $f27, $sp, 72
ST $f28, $sp, 80
ST $f29, $sp, 88
ST $f30, $sp, 96
ST $f31, $sp, 104
ST ALPHA_R,$sp, 112
ST ALPHA_I,$sp, 120
fst.d $f23, $sp, 40
fst.d $f24, $sp, 48
fst.d $f25, $sp, 56
fst.d $f26, $sp, 64
fst.d $f27, $sp, 72
fst.d $f28, $sp, 80
fst.d $f29, $sp, 88
fst.d $f30, $sp, 96
fst.d $f31, $sp, 104
fst.d ALPHA_R,$sp, 112
fst.d ALPHA_I,$sp, 120
xvldrepl.w VALPHAR, $sp, 112
xvldrepl.w VALPHAI, $sp, 120
@ -3741,17 +3741,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
LDARG $r25, $sp, 16
LDARG $r26, $sp, 24
LDARG $r27, $sp, 32
LD $f23, $sp, 40
LD $f24, $sp, 48
LD $f25, $sp, 56
LD $f26, $sp, 64
LD $f27, $sp, 72
LD $f28, $sp, 80
LD $f29, $sp, 88
LD $f30, $sp, 96
LD $f31, $sp, 104
fld.d $f23, $sp, 40
fld.d $f24, $sp, 48
fld.d $f25, $sp, 56
fld.d $f26, $sp, 64
fld.d $f27, $sp, 72
fld.d $f28, $sp, 80
fld.d $f29, $sp, 88
fld.d $f30, $sp, 96
fld.d $f31, $sp, 104
addi.d $sp, $sp, 128
jirl $r0, $r1, 0x0
EPILOGUE
EPILOGUE

View File

@ -1098,16 +1098,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SDARG $r25, $sp, 16
SDARG $r26, $sp, 24
SDARG $r27, $sp, 32
ST $f23, $sp, 40
ST $f24, $sp, 48
ST $f25, $sp, 56
ST $f26, $sp, 64
ST $f27, $sp, 72
ST $f28, $sp, 80
ST $f29, $sp, 88
ST $f30, $sp, 96
ST $f31, $sp, 104
ST ALPHA, $sp, 112
fst.d $f23, $sp, 40
fst.d $f24, $sp, 48
fst.d $f25, $sp, 56
fst.d $f26, $sp, 64
fst.d $f27, $sp, 72
fst.d $f28, $sp, 80
fst.d $f29, $sp, 88
fst.d $f30, $sp, 96
fst.d $f31, $sp, 104
fst.d ALPHA, $sp, 112
#if defined (TRMMKERNEL) && !defined(LEFT)
sub.d OFF, ZERO, OFFSET
@ -3504,15 +3504,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
LDARG $r25, $sp, 16
LDARG $r26, $sp, 24
LDARG $r27, $sp, 32
LD $f23, $sp, 40
LD $f24, $sp, 48
LD $f25, $sp, 56
LD $f26, $sp, 64
LD $f27, $sp, 72
LD $f28, $sp, 80
LD $f29, $sp, 88
LD $f30, $sp, 96
LD $f31, $sp, 104
fld.d $f23, $sp, 40
fld.d $f24, $sp, 48
fld.d $f25, $sp, 56
fld.d $f26, $sp, 64
fld.d $f27, $sp, 72
fld.d $f28, $sp, 80
fld.d $f29, $sp, 88
fld.d $f30, $sp, 96
fld.d $f31, $sp, 104
addi.d $sp, $sp, 120
jirl $r0, $r1, 0x0

View File

@ -196,17 +196,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SDARG $r25, $sp, 16
SDARG $r26, $sp, 24
SDARG $r27, $sp, 32
ST $f23, $sp, 40
ST $f24, $sp, 48
ST $f25, $sp, 56
ST $f26, $sp, 64
ST $f27, $sp, 72
ST $f28, $sp, 80
ST $f29, $sp, 88
ST $f30, $sp, 96
ST $f31, $sp, 104
ST ALPHA_R,$sp, 112
ST ALPHA_I,$sp, 120
fst.d $f23, $sp, 40
fst.d $f24, $sp, 48
fst.d $f25, $sp, 56
fst.d $f26, $sp, 64
fst.d $f27, $sp, 72
fst.d $f28, $sp, 80
fst.d $f29, $sp, 88
fst.d $f30, $sp, 96
fst.d $f31, $sp, 104
fst.d ALPHA_R,$sp, 112
fst.d ALPHA_I,$sp, 120
xvldrepl.d VALPHAR, $sp, 112
xvldrepl.d VALPHAI, $sp, 120
@ -3529,17 +3529,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
LDARG $r25, $sp, 16
LDARG $r26, $sp, 24
LDARG $r27, $sp, 32
LD $f23, $sp, 40
LD $f24, $sp, 48
LD $f25, $sp, 56
LD $f26, $sp, 64
LD $f27, $sp, 72
LD $f28, $sp, 80
LD $f29, $sp, 88
LD $f30, $sp, 96
LD $f31, $sp, 104
fld.d $f23, $sp, 40
fld.d $f24, $sp, 48
fld.d $f25, $sp, 56
fld.d $f26, $sp, 64
fld.d $f27, $sp, 72
fld.d $f28, $sp, 80
fld.d $f29, $sp, 88
fld.d $f30, $sp, 96
fld.d $f31, $sp, 104
addi.d $sp, $sp, 128
jirl $r0, $r1, 0x0
EPILOGUE
EPILOGUE

View File

@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#include "openblas_utest.h"
#pragma GCC optimize("no-gcse")
/*
void BLASFUNC(cpotrf)(char*, BLASINT*, complex float*, BLASINT*, BLASINT*);
void BLASFUNC(zpotrs_(char*, BLASINT*, BLASINT*, complex double*,