diff --git a/Makefile.system b/Makefile.system index e50d84f5a..3be5efa0c 100644 --- a/Makefile.system +++ b/Makefile.system @@ -680,6 +680,10 @@ ifeq ($(ARCH), mips64) DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4 endif +ifeq ($(ARCH), loongarch64) +DYNAMIC_CORE = LOONGSON3R5 LOONGSON2K1000 LOONGSONGENERIC +endif + ifeq ($(ARCH), zarch) DYNAMIC_CORE = ZARCH_GENERIC diff --git a/driver/others/Makefile b/driver/others/Makefile index 4a421ef31..e4e9ee108 100644 --- a/driver/others/Makefile +++ b/driver/others/Makefile @@ -27,11 +27,15 @@ else ifeq ($(ARCH),mips64) COMMONOBJS += dynamic_mips64.$(SUFFIX) else +ifeq ($(ARCH),loongarch64) +COMMONOBJS += dynamic_loongarch64.$(SUFFIX) +else COMMONOBJS += dynamic.$(SUFFIX) endif endif endif endif +endif else COMMONOBJS += parameter.$(SUFFIX) endif @@ -99,11 +103,15 @@ else ifeq ($(ARCH),mips64) HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_mips64.$(SUFFIX) else +ifeq ($(ARCH),loongarch64) +HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_loongarch64.$(SUFFIX) +else HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) endif endif endif endif +endif else HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) endif diff --git a/driver/others/dynamic_loongarch64.c b/driver/others/dynamic_loongarch64.c new file mode 100644 index 000000000..52f8bcb2f --- /dev/null +++ b/driver/others/dynamic_loongarch64.c @@ -0,0 +1,128 @@ +/******************************************************************************* +Copyright (c) 2022, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + +#include "common.h" + +extern gotoblas_t gotoblas_LOONGSON3R5; +extern gotoblas_t gotoblas_LOONGSON2K1000; +extern gotoblas_t gotoblas_LOONGSONGENERIC; + +extern void openblas_warning(int verbose, const char * msg); + +#define NUM_CORETYPES 3 + +static char *corename[] = { + "loongson3r5", + "loongson2k1000", + "loongsongeneric", + "unknown" +}; + +char *gotoblas_corename(void) { + if (gotoblas == &gotoblas_LOONGSON3R5) return corename[0]; + if (gotoblas == &gotoblas_LOONGSON2K1000) return corename[1]; + if (gotoblas == &gotoblas_LOONGSONGENERIC) return corename[2]; + return corename[NUM_CORETYPES]; +} + +static gotoblas_t *force_coretype(char *coretype) { + int i; + int found = -1; + char message[128]; + + for ( i=0 ; i < NUM_CORETYPES; i++) + { + if (!strncasecmp(coretype, corename[i], 20)) + { + found = i; + break; + } + } + + switch (found) + { + case 0: return (&gotoblas_LOONGSON3R5); + case 1: return (&gotoblas_LOONGSON2K1000); + case 2: return (&gotoblas_LOONGSONGENERIC); + } + snprintf(message, 128, "Core not found: %s\n", coretype); + openblas_warning(1, message); + return NULL; +} + +#define LASX_MASK 1<<7 +#define LSX_MASK 1<<6 +#define LOONGARCH_CFG2 0x02 + +static gotoblas_t *get_coretype(void) { + int ret = 0; + __asm__ volatile ( + "cpucfg %0, %1 \n\t" + : "+&r"(ret) + : "r"(LOONGARCH_CFG2) + ); + + if (ret & LASX_MASK) + return &gotoblas_LOONGSON3R5; + else if (ret & LSX_MASK) + return &gotoblas_LOONGSON2K1000; + else + return &gotoblas_LOONGSONGENERIC; +} + +void gotoblas_dynamic_init(void) { + char coremsg[128]; + char coren[22]; + char *p; + + if (gotoblas) return; + + p = getenv("OPENBLAS_CORETYPE"); + if ( p ) + { + gotoblas = force_coretype(p); + } + else + { + gotoblas = get_coretype(); + } + + if (gotoblas && gotoblas->init) { + strncpy(coren, gotoblas_corename(), 20); + sprintf(coremsg, "Core: %s\n", coren); + openblas_warning(2, coremsg); + gotoblas -> init(); + } else { + openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); + exit(1); + } + +} + +void gotoblas_dynamic_quit(void) { + gotoblas = NULL; +} diff --git a/kernel/loongarch64/KERNEL b/kernel/loongarch64/KERNEL index 1c11df9b6..e5d145a71 100644 --- a/kernel/loongarch64/KERNEL +++ b/kernel/loongarch64/KERNEL @@ -108,10 +108,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_2.c SGEMMITCOPY = ../generic/gemm_tcopy_2.c SGEMMONCOPY = ../generic/gemm_ncopy_8.c SGEMMOTCOPY = ../generic/gemm_tcopy_8.c -SGEMMINCOPYOBJ = sgemm_incopy.o -SGEMMITCOPYOBJ = sgemm_itcopy.o -SGEMMONCOPYOBJ = sgemm_oncopy.o -SGEMMOTCOPYOBJ = sgemm_otcopy.o +SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) +SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) +SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) +SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) endif ifndef DGEMMKERNEL @@ -120,10 +120,10 @@ DGEMMINCOPY = ../generic/gemm_ncopy_2.c DGEMMITCOPY = ../generic/gemm_tcopy_2.c DGEMMONCOPY = ../generic/gemm_ncopy_8.c DGEMMOTCOPY = ../generic/gemm_tcopy_8.c -DGEMMINCOPYOBJ = dgemm_incopy.o -DGEMMITCOPYOBJ = dgemm_itcopy.o -DGEMMONCOPYOBJ = dgemm_oncopy.o -DGEMMOTCOPYOBJ = dgemm_otcopy.o +DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) +DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) endif ifndef CGEMMKERNEL @@ -132,10 +132,10 @@ CGEMMINCOPY = ../generic/zgemm_ncopy_1.c CGEMMITCOPY = ../generic/zgemm_tcopy_1.c CGEMMONCOPY = ../generic/zgemm_ncopy_4.c CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c -CGEMMINCOPYOBJ = cgemm_incopy.o -CGEMMITCOPYOBJ = cgemm_itcopy.o -CGEMMONCOPYOBJ = cgemm_oncopy.o -CGEMMOTCOPYOBJ = cgemm_otcopy.o +CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) +CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) +CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) +CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) endif ifndef ZGEMMKERNEL @@ -144,10 +144,10 @@ ZGEMMINCOPY = ../generic/zgemm_ncopy_1.c ZGEMMITCOPY = ../generic/zgemm_tcopy_1.c ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c -ZGEMMINCOPYOBJ = zgemm_incopy.o -ZGEMMITCOPYOBJ = zgemm_itcopy.o -ZGEMMONCOPYOBJ = zgemm_oncopy.o -ZGEMMOTCOPYOBJ = zgemm_otcopy.o +ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) +ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) +ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) +ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) endif ifndef SGEMM_BETA diff --git a/kernel/loongarch64/KERNEL.LOONGSON3R5 b/kernel/loongarch64/KERNEL.LOONGSON3R5 index bb0441ab2..cda359040 100644 --- a/kernel/loongarch64/KERNEL.LOONGSON3R5 +++ b/kernel/loongarch64/KERNEL.LOONGSON3R5 @@ -3,10 +3,10 @@ DGEMMINCOPY = dgemm_ncopy_16.S DGEMMITCOPY = dgemm_tcopy_16.S DGEMMONCOPY = dgemm_ncopy_4.S DGEMMOTCOPY = dgemm_tcopy_4.S -DGEMMINCOPYOBJ = dgemm_incopy.o -DGEMMITCOPYOBJ = dgemm_itcopy.o -DGEMMONCOPYOBJ = dgemm_oncopy.o -DGEMMOTCOPYOBJ = dgemm_otcopy.o +DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) +DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c diff --git a/kernel/loongarch64/KERNEL.generic b/kernel/loongarch64/KERNEL.generic index 105b2f6fd..b772a6f82 100644 --- a/kernel/loongarch64/KERNEL.generic +++ b/kernel/loongarch64/KERNEL.generic @@ -11,26 +11,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c SGEMMKERNEL = ../generic/gemmkernel_2x2.c SGEMMONCOPY = ../generic/gemm_ncopy_2.c SGEMMOTCOPY = ../generic/gemm_tcopy_2.c -SGEMMONCOPYOBJ = sgemm_oncopy.o -SGEMMOTCOPYOBJ = sgemm_otcopy.o +SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) +SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) DGEMMKERNEL = ../generic/gemmkernel_2x2.c DGEMMONCOPY = ../generic/gemm_ncopy_2.c DGEMMOTCOPY = ../generic/gemm_tcopy_2.c -DGEMMONCOPYOBJ = dgemm_oncopy.o -DGEMMOTCOPYOBJ = dgemm_otcopy.o +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) CGEMMKERNEL = ../generic/zgemmkernel_2x2.c CGEMMONCOPY = ../generic/zgemm_ncopy_2.c CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c -CGEMMONCOPYOBJ = cgemm_oncopy.o -CGEMMOTCOPYOBJ = cgemm_otcopy.o +CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) +CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c -ZGEMMONCOPYOBJ = zgemm_oncopy.o -ZGEMMOTCOPYOBJ = zgemm_otcopy.o +ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) +ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 9f5d34d9b..8bcd31ef2 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -1046,6 +1046,34 @@ static void init_parameter(void) { #endif } #else // (ARCH_MIPS64) +#if (ARCH_LOONGARCH64) +static void init_parameter(void) { + +#ifdef BUILD_BFLOAT16 + TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; +#endif + TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; + TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; + TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; + TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; + +#ifdef BUILD_BFLOAT16 + TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R; +#endif + TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; + TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; + TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; + TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; + +#ifdef BUILD_BFLOAT16 + TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; +#endif + TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; + TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; + TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; + TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; +} +#else // (ARCH_LOONGARCH64) #if (ARCH_POWER) static void init_parameter(void) { @@ -1899,5 +1927,6 @@ static void init_parameter(void) { } #endif //POWER #endif //ZARCH +#endif //(ARCH_LOONGARCH64) #endif //(ARCH_MIPS64) #endif //(ARCH_ARM64) diff --git a/param.h b/param.h index eb52ef958..dc02147d8 100644 --- a/param.h +++ b/param.h @@ -2857,26 +2857,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ZGEMM_DEFAULT_UNROLL_M 1 #define XGEMM_DEFAULT_UNROLL_M 1 -#define SGEMM_DEFAULT_P sgemm_p +#define SGEMM_DEFAULT_P 512 #define DGEMM_DEFAULT_P 32 -#define QGEMM_DEFAULT_P qgemm_p -#define CGEMM_DEFAULT_P cgemm_p -#define ZGEMM_DEFAULT_P zgemm_p -#define XGEMM_DEFAULT_P xgemm_p +#define CGEMM_DEFAULT_P 128 +#define ZGEMM_DEFAULT_P 128 -#define SGEMM_DEFAULT_R sgemm_r +#define SGEMM_DEFAULT_R 12288 #define DGEMM_DEFAULT_R 858 -#define QGEMM_DEFAULT_R qgemm_r -#define CGEMM_DEFAULT_R cgemm_r -#define ZGEMM_DEFAULT_R zgemm_r -#define XGEMM_DEFAULT_R xgemm_r +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 #define SGEMM_DEFAULT_Q 128 #define DGEMM_DEFAULT_Q 152 -#define QGEMM_DEFAULT_Q 128 #define CGEMM_DEFAULT_Q 128 #define ZGEMM_DEFAULT_Q 128 -#define XGEMM_DEFAULT_Q 128 #define SYMV_P 16 #endif @@ -3795,6 +3789,21 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout #define DGEMM_DEFAULT_R 8192 #define CGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096 +#elif defined(ARCH_LOONGARCH64) +#define SGEMM_DEFAULT_P 128 +#define DGEMM_DEFAULT_P 128 +#define CGEMM_DEFAULT_P 96 +#define ZGEMM_DEFAULT_P 64 + +#define SGEMM_DEFAULT_Q 240 +#define DGEMM_DEFAULT_Q 120 +#define CGEMM_DEFAULT_Q 120 +#define ZGEMM_DEFAULT_Q 120 + +#define SGEMM_DEFAULT_R 12288 +#define DGEMM_DEFAULT_R 8192 +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 #else #define SGEMM_DEFAULT_P sgemm_p #define DGEMM_DEFAULT_P dgemm_p