Merge pull request #3704 from XiWeiGu/loongarch64_dynamic_arch
LoongArch64: Add DYNAMIC_ARCH support
This commit is contained in:
commit
d0ba257de0
|
@ -680,6 +680,10 @@ ifeq ($(ARCH), mips64)
|
|||
DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), loongarch64)
|
||||
DYNAMIC_CORE = LOONGSON3R5 LOONGSON2K1000 LOONGSONGENERIC
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), zarch)
|
||||
DYNAMIC_CORE = ZARCH_GENERIC
|
||||
|
||||
|
|
|
@ -27,11 +27,15 @@ else
|
|||
ifeq ($(ARCH),mips64)
|
||||
COMMONOBJS += dynamic_mips64.$(SUFFIX)
|
||||
else
|
||||
ifeq ($(ARCH),loongarch64)
|
||||
COMMONOBJS += dynamic_loongarch64.$(SUFFIX)
|
||||
else
|
||||
COMMONOBJS += dynamic.$(SUFFIX)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
else
|
||||
COMMONOBJS += parameter.$(SUFFIX)
|
||||
endif
|
||||
|
@ -99,11 +103,15 @@ else
|
|||
ifeq ($(ARCH),mips64)
|
||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_mips64.$(SUFFIX)
|
||||
else
|
||||
ifeq ($(ARCH),loongarch64)
|
||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_loongarch64.$(SUFFIX)
|
||||
else
|
||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
else
|
||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
|
||||
endif
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
/*******************************************************************************
|
||||
Copyright (c) 2022, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*******************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
extern gotoblas_t gotoblas_LOONGSON3R5;
|
||||
extern gotoblas_t gotoblas_LOONGSON2K1000;
|
||||
extern gotoblas_t gotoblas_LOONGSONGENERIC;
|
||||
|
||||
extern void openblas_warning(int verbose, const char * msg);
|
||||
|
||||
#define NUM_CORETYPES 3
|
||||
|
||||
static char *corename[] = {
|
||||
"loongson3r5",
|
||||
"loongson2k1000",
|
||||
"loongsongeneric",
|
||||
"unknown"
|
||||
};
|
||||
|
||||
char *gotoblas_corename(void) {
|
||||
if (gotoblas == &gotoblas_LOONGSON3R5) return corename[0];
|
||||
if (gotoblas == &gotoblas_LOONGSON2K1000) return corename[1];
|
||||
if (gotoblas == &gotoblas_LOONGSONGENERIC) return corename[2];
|
||||
return corename[NUM_CORETYPES];
|
||||
}
|
||||
|
||||
static gotoblas_t *force_coretype(char *coretype) {
|
||||
int i;
|
||||
int found = -1;
|
||||
char message[128];
|
||||
|
||||
for ( i=0 ; i < NUM_CORETYPES; i++)
|
||||
{
|
||||
if (!strncasecmp(coretype, corename[i], 20))
|
||||
{
|
||||
found = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (found)
|
||||
{
|
||||
case 0: return (&gotoblas_LOONGSON3R5);
|
||||
case 1: return (&gotoblas_LOONGSON2K1000);
|
||||
case 2: return (&gotoblas_LOONGSONGENERIC);
|
||||
}
|
||||
snprintf(message, 128, "Core not found: %s\n", coretype);
|
||||
openblas_warning(1, message);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define LASX_MASK 1<<7
|
||||
#define LSX_MASK 1<<6
|
||||
#define LOONGARCH_CFG2 0x02
|
||||
|
||||
static gotoblas_t *get_coretype(void) {
|
||||
int ret = 0;
|
||||
__asm__ volatile (
|
||||
"cpucfg %0, %1 \n\t"
|
||||
: "+&r"(ret)
|
||||
: "r"(LOONGARCH_CFG2)
|
||||
);
|
||||
|
||||
if (ret & LASX_MASK)
|
||||
return &gotoblas_LOONGSON3R5;
|
||||
else if (ret & LSX_MASK)
|
||||
return &gotoblas_LOONGSON2K1000;
|
||||
else
|
||||
return &gotoblas_LOONGSONGENERIC;
|
||||
}
|
||||
|
||||
void gotoblas_dynamic_init(void) {
|
||||
char coremsg[128];
|
||||
char coren[22];
|
||||
char *p;
|
||||
|
||||
if (gotoblas) return;
|
||||
|
||||
p = getenv("OPENBLAS_CORETYPE");
|
||||
if ( p )
|
||||
{
|
||||
gotoblas = force_coretype(p);
|
||||
}
|
||||
else
|
||||
{
|
||||
gotoblas = get_coretype();
|
||||
}
|
||||
|
||||
if (gotoblas && gotoblas->init) {
|
||||
strncpy(coren, gotoblas_corename(), 20);
|
||||
sprintf(coremsg, "Core: %s\n", coren);
|
||||
openblas_warning(2, coremsg);
|
||||
gotoblas -> init();
|
||||
} else {
|
||||
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void gotoblas_dynamic_quit(void) {
|
||||
gotoblas = NULL;
|
||||
}
|
|
@ -108,10 +108,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
|||
SGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||
SGEMMINCOPYOBJ = sgemm_incopy.o
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifndef DGEMMKERNEL
|
||||
|
@ -120,10 +120,10 @@ DGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
|||
DGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifndef CGEMMKERNEL
|
||||
|
@ -132,10 +132,10 @@ CGEMMINCOPY = ../generic/zgemm_ncopy_1.c
|
|||
CGEMMITCOPY = ../generic/zgemm_tcopy_1.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||
CGEMMINCOPYOBJ = cgemm_incopy.o
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy.o
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifndef ZGEMMKERNEL
|
||||
|
@ -144,10 +144,10 @@ ZGEMMINCOPY = ../generic/zgemm_ncopy_1.c
|
|||
ZGEMMITCOPY = ../generic/zgemm_tcopy_1.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy.o
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy.o
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifndef SGEMM_BETA
|
||||
|
|
|
@ -3,10 +3,10 @@ DGEMMINCOPY = dgemm_ncopy_16.S
|
|||
DGEMMITCOPY = dgemm_tcopy_16.S
|
||||
DGEMMONCOPY = dgemm_ncopy_4.S
|
||||
DGEMMOTCOPY = dgemm_tcopy_4.S
|
||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
|
|
|
@ -11,26 +11,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
|||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
|
|
|
@ -1046,6 +1046,34 @@ static void init_parameter(void) {
|
|||
#endif
|
||||
}
|
||||
#else // (ARCH_MIPS64)
|
||||
#if (ARCH_LOONGARCH64)
|
||||
static void init_parameter(void) {
|
||||
|
||||
#ifdef BUILD_BFLOAT16
|
||||
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
|
||||
#endif
|
||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
||||
|
||||
#ifdef BUILD_BFLOAT16
|
||||
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
|
||||
#endif
|
||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
||||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
||||
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
|
||||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
|
||||
|
||||
#ifdef BUILD_BFLOAT16
|
||||
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
|
||||
#endif
|
||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
||||
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
|
||||
}
|
||||
#else // (ARCH_LOONGARCH64)
|
||||
#if (ARCH_POWER)
|
||||
static void init_parameter(void) {
|
||||
|
||||
|
@ -1899,5 +1927,6 @@ static void init_parameter(void) {
|
|||
}
|
||||
#endif //POWER
|
||||
#endif //ZARCH
|
||||
#endif //(ARCH_LOONGARCH64)
|
||||
#endif //(ARCH_MIPS64)
|
||||
#endif //(ARCH_ARM64)
|
||||
|
|
33
param.h
33
param.h
|
@ -2857,26 +2857,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
|
||||
#define SGEMM_DEFAULT_P sgemm_p
|
||||
#define SGEMM_DEFAULT_P 512
|
||||
#define DGEMM_DEFAULT_P 32
|
||||
#define QGEMM_DEFAULT_P qgemm_p
|
||||
#define CGEMM_DEFAULT_P cgemm_p
|
||||
#define ZGEMM_DEFAULT_P zgemm_p
|
||||
#define XGEMM_DEFAULT_P xgemm_p
|
||||
#define CGEMM_DEFAULT_P 128
|
||||
#define ZGEMM_DEFAULT_P 128
|
||||
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
#define SGEMM_DEFAULT_R 12288
|
||||
#define DGEMM_DEFAULT_R 858
|
||||
#define QGEMM_DEFAULT_R qgemm_r
|
||||
#define CGEMM_DEFAULT_R cgemm_r
|
||||
#define ZGEMM_DEFAULT_R zgemm_r
|
||||
#define XGEMM_DEFAULT_R xgemm_r
|
||||
#define CGEMM_DEFAULT_R 4096
|
||||
#define ZGEMM_DEFAULT_R 4096
|
||||
|
||||
#define SGEMM_DEFAULT_Q 128
|
||||
#define DGEMM_DEFAULT_Q 152
|
||||
#define QGEMM_DEFAULT_Q 128
|
||||
#define CGEMM_DEFAULT_Q 128
|
||||
#define ZGEMM_DEFAULT_Q 128
|
||||
#define XGEMM_DEFAULT_Q 128
|
||||
|
||||
#define SYMV_P 16
|
||||
#endif
|
||||
|
@ -3795,6 +3789,21 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
|
|||
#define DGEMM_DEFAULT_R 8192
|
||||
#define CGEMM_DEFAULT_R 4096
|
||||
#define ZGEMM_DEFAULT_R 4096
|
||||
#elif defined(ARCH_LOONGARCH64)
|
||||
#define SGEMM_DEFAULT_P 128
|
||||
#define DGEMM_DEFAULT_P 128
|
||||
#define CGEMM_DEFAULT_P 96
|
||||
#define ZGEMM_DEFAULT_P 64
|
||||
|
||||
#define SGEMM_DEFAULT_Q 240
|
||||
#define DGEMM_DEFAULT_Q 120
|
||||
#define CGEMM_DEFAULT_Q 120
|
||||
#define ZGEMM_DEFAULT_Q 120
|
||||
|
||||
#define SGEMM_DEFAULT_R 12288
|
||||
#define DGEMM_DEFAULT_R 8192
|
||||
#define CGEMM_DEFAULT_R 4096
|
||||
#define ZGEMM_DEFAULT_R 4096
|
||||
#else
|
||||
#define SGEMM_DEFAULT_P sgemm_p
|
||||
#define DGEMM_DEFAULT_P dgemm_p
|
||||
|
|
Loading…
Reference in New Issue