diff --git a/Makefile.system b/Makefile.system index ff4311473..5919be851 100644 --- a/Makefile.system +++ b/Makefile.system @@ -891,11 +891,9 @@ BINARY_DEFINED = 1 endif ifeq ($(ARCH), loongarch64) -ifeq ($(CORE), LOONGSON3R5) CCOMMON_OPT += -march=loongarch64 -mabi=lp64 FCOMMON_OPT += -march=loongarch64 -mabi=lp64 endif -endif endif diff --git a/TargetList.txt b/TargetList.txt index a297fd0e8..d17caf480 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -121,7 +121,9 @@ RISCV64_GENERIC C910V 11.LOONGARCH64: +LOONGSONGENERIC LOONGSON3R5 +LOONGSON2K1000 12. Elbrus E2000: E2K diff --git a/cpuid_loongarch64.c b/cpuid_loongarch64.c index 79b186bf1..ca07c7ffb 100644 --- a/cpuid_loongarch64.c +++ b/cpuid_loongarch64.c @@ -33,30 +33,53 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#define CPU_UNKNOWN 0 -#define CPU_LOONGSON3R5 1 +/* If LASX extension instructions supported, + * using core LOONGSON3R5 + * If only LSX extension instructions supported, + * using core LOONGSON2K1000 + * If neither LASX nor LSX extension instructions supported, + * using core LOONGSONGENERIC (As far as I know, there is no such + * CPU yet) + */ + +#define CPU_GENERIC 0 +#define CPU_LOONGSON3R5 1 +#define CPU_LOONGSON2K1000 2 #define LOONGARCH_CFG2 0x02 #define LOONGARCH_LASX 1<<7 +#define LOONGARCH_LSX 1<<6 static char *cpuname[] = { - "UNKNOWN", - "LOONGSON3R5" + "LOONGSONGENERIC", + "LOONGSON3R5", + "LOONGSON2K1000" +}; + +static char *cpuname_lower[] = { + "loongsongeneric", + "loongson3r5", + "loongson2k1000" }; int detect(void) { - uint32_t reg = 0; +#ifdef __linux + uint32_t reg = 0; - __asm__ volatile ( - "cpucfg %0, %1 \n\t" - : "+&r"(reg) - : "r"(LOONGARCH_CFG2) - ); + __asm__ volatile ( + "cpucfg %0, %1 \n\t" + : "+&r"(reg) + : "r"(LOONGARCH_CFG2) + ); - if (reg & LOONGARCH_LASX) - return CPU_LOONGSON3R5; - else - return CPU_UNKNOWN; + if (reg & LOONGARCH_LASX) + return CPU_LOONGSON3R5; + else if (reg & LOONGARCH_LSX) + return CPU_LOONGSON2K1000; + else + return CPU_GENERIC; +#endif + return CPU_GENERIC; } char *get_corename(void) { @@ -68,11 +91,8 @@ void get_architecture(void) { } void get_subarchitecture(void) { - if (detect() == CPU_LOONGSON3R5) { - printf("LOONGSON3R5"); - } else { - printf("UNKNOWN"); - } + int d = detect(); + printf("%s", cpuname[d]); } void get_subdirname(void) { @@ -80,31 +100,44 @@ void get_subdirname(void) { } void get_cpuconfig(void) { - if (detect() == CPU_LOONGSON3R5) { - printf("#define LOONGSON3R5\n"); - printf("#define L1_DATA_SIZE 65536\n"); - printf("#define L1_DATA_LINESIZE 64\n"); - printf("#define L2_SIZE 1048576\n"); - printf("#define L2_LINESIZE 64\n"); - printf("#define DTB_DEFAULT_ENTRIES 64\n"); - printf("#define DTB_SIZE 4096\n"); - printf("#define L2_ASSOCIATIVE 16\n"); - } else { - printf("#define LOONGSON3R5\n"); - printf("#define L1_DATA_SIZE 65536\n"); - printf("#define L1_DATA_LINESIZE 64\n"); - printf("#define L2_SIZE 1048576\n"); - printf("#define L2_LINESIZE 64\n"); - printf("#define DTB_DEFAULT_ENTRIES 64\n"); - printf("#define DTB_SIZE 4096\n"); - printf("#define L2_ASSOCIATIVE 16\n"); + int d = detect(); + switch (d) { + case CPU_LOONGSON3R5: + printf("#define LOONGSON3R5\n"); + printf("#define L1_DATA_SIZE 65536\n"); + printf("#define L1_DATA_LINESIZE 64\n"); + printf("#define L2_SIZE 1048576\n"); + printf("#define L2_LINESIZE 64\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + printf("#define L2_ASSOCIATIVE 16\n"); + break; + + case CPU_LOONGSON2K1000: + printf("#define LOONGSON2K1000\n"); + printf("#define L1_DATA_SIZE 65536\n"); + printf("#define L1_DATA_LINESIZE 64\n"); + printf("#define L2_SIZE 262144\n"); + printf("#define L2_LINESIZE 64\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + printf("#define L2_ASSOCIATIVE 16\n"); + break; + + default: + printf("#define LOONGSONGENERIC\n"); + printf("#define L1_DATA_SIZE 65536\n"); + printf("#define L1_DATA_LINESIZE 64\n"); + printf("#define L2_SIZE 262144\n"); + printf("#define L2_LINESIZE 64\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + printf("#define L2_ASSOCIATIVE 16\n"); + break; } } void get_libname(void){ - if (detect() == CPU_LOONGSON3R5) { - printf("loongson3r5\n"); - } else { - printf("loongarch64\n"); - } + int d = detect(); + printf("%s", cpuname_lower[d]); } diff --git a/getarch.c b/getarch.c index 6cf50ab8e..7761551ea 100644 --- a/getarch.c +++ b/getarch.c @@ -132,9 +132,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* #define FORCE_PPC440FP2 */ /* #define FORCE_CELL */ /* #define FORCE_SICORTEX */ -/* #define FORCE_LOONGSON3R3 */ -/* #define FORCE_LOONGSON3R4 */ -/* #define FORCE_LOONGSON3R5 */ +/* #define FORCE_LOONGSON3R3 */ +/* #define FORCE_LOONGSON3R4 */ +/* #define FORCE_LOONGSON3R5 */ +/* #define FORCE_LOONGSON2K1000 */ +/* #define FORCE_LOONGSONGENERIC */ /* #define FORCE_I6400 */ /* #define FORCE_P6600 */ /* #define FORCE_P5600 */ @@ -969,6 +971,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #endif +#ifdef FORCE_LOONGSON2K1000 +#define FORCE +#define ARCHITECTURE "LOONGARCH" +#define SUBARCHITECTURE "LOONGSON2K1000" +#define SUBDIRNAME "loongarch64" +#define ARCHCONFIG "-DLOONGSON2K1000 " \ + "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ + "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " +#define LIBNAME "loongson2k1000" +#define CORENAME "LOONGSON2K1000" +#else +#endif + +#ifdef FORCE_LOONGSONGENERIC +#define FORCE +#define ARCHITECTURE "LOONGARCH" +#define SUBARCHITECTURE "LOONGSONGENERIC" +#define SUBDIRNAME "loongarch64" +#define ARCHCONFIG "-DLOONGSONGENERIC " \ + "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ + "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " +#define LIBNAME "loongsongeneric" +#define CORENAME "LOONGSONGENERIC" +#else +#endif + #ifdef FORCE_I6400 #define FORCE #define ARCHITECTURE "MIPS" diff --git a/param.h b/param.h index 62941dbb4..eb52ef958 100644 --- a/param.h +++ b/param.h @@ -2881,6 +2881,76 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif +#ifdef LOONGSON2K1000 +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL + +#define SGEMM_DEFAULT_UNROLL_M 2 +#define SGEMM_DEFAULT_UNROLL_N 8 + +#define DGEMM_DEFAULT_UNROLL_M 2 +#define DGEMM_DEFAULT_UNROLL_N 8 + +#define CGEMM_DEFAULT_UNROLL_M 1 +#define CGEMM_DEFAULT_UNROLL_N 4 + +#define ZGEMM_DEFAULT_UNROLL_M 1 +#define ZGEMM_DEFAULT_UNROLL_N 4 + +#define SGEMM_DEFAULT_P 128 +#define DGEMM_DEFAULT_P 128 +#define CGEMM_DEFAULT_P 96 +#define ZGEMM_DEFAULT_P 64 + +#define SGEMM_DEFAULT_Q 240 +#define DGEMM_DEFAULT_Q 120 +#define CGEMM_DEFAULT_Q 120 +#define ZGEMM_DEFAULT_Q 120 + +#define SGEMM_DEFAULT_R 12288 +#define DGEMM_DEFAULT_R 8192 +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 + +#define SYMV_P 16 +#endif + +#ifdef LOONGSONGENERIC +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL + +#define SGEMM_DEFAULT_UNROLL_M 2 +#define SGEMM_DEFAULT_UNROLL_N 8 + +#define DGEMM_DEFAULT_UNROLL_M 2 +#define DGEMM_DEFAULT_UNROLL_N 8 + +#define CGEMM_DEFAULT_UNROLL_M 1 +#define CGEMM_DEFAULT_UNROLL_N 4 + +#define ZGEMM_DEFAULT_UNROLL_M 1 +#define ZGEMM_DEFAULT_UNROLL_N 4 + +#define SGEMM_DEFAULT_P 128 +#define DGEMM_DEFAULT_P 128 +#define CGEMM_DEFAULT_P 96 +#define ZGEMM_DEFAULT_P 64 + +#define SGEMM_DEFAULT_Q 240 +#define DGEMM_DEFAULT_Q 120 +#define CGEMM_DEFAULT_Q 120 +#define ZGEMM_DEFAULT_Q 120 + +#define SGEMM_DEFAULT_R 12288 +#define DGEMM_DEFAULT_R 8192 +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 + +#define SYMV_P 16 +#endif + #if defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500) #define SNUMOPT 2 #define DNUMOPT 2