LoongArch64: Rename core

Use microarchitecture name instead of meaningless strings to name the core,
the legacy core is still retained.
1. Rename LOONGSONGENERIC to LA64_GENERIC
2. Rename LOONGSON3R5 to LA464
3. Rename LOONGSON2K1000 to LA264
This commit is contained in:
gxw 2024-09-18 17:20:43 +08:00
parent fca86e359c
commit 48698b2b1d
14 changed files with 515 additions and 133 deletions

View File

@ -23,6 +23,15 @@ jobs:
- target: LOONGSON2K1000 - target: LOONGSON2K1000
triple: loongarch64-unknown-linux-gnu triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000 opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
- target: LA64_GENERIC
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC
- target: LA464
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464
- target: LA264
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264
- target: DYNAMIC_ARCH - target: DYNAMIC_ARCH
triple: loongarch64-unknown-linux-gnu triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC

View File

@ -20,6 +20,12 @@ jobs:
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5 opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5
- target: LOONGSON2K1000 - target: LOONGSON2K1000
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000 opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
- target: LA64_GENERIC
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC
- target: LA464
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464
- target: LA264
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264
- target: DYNAMIC_ARCH - target: DYNAMIC_ARCH
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC

View File

@ -727,7 +727,7 @@ endif
endif endif
ifeq ($(ARCH), loongarch64) ifeq ($(ARCH), loongarch64)
DYNAMIC_CORE = LOONGSON3R5 LOONGSON2K1000 LOONGSONGENERIC DYNAMIC_CORE = LA64_GENERIC LA264 LA464
endif endif
ifeq ($(ARCH), riscv64) ifeq ($(ARCH), riscv64)

View File

@ -126,9 +126,17 @@ x280
RISCV64_ZVL256B RISCV64_ZVL256B
11.LOONGARCH64: 11.LOONGARCH64:
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 are legacy names,
// and it is recommended to use the more standardized naming conventions
// LA64_GENERIC/LA264/LA464. You can still specify TARGET as
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 during compilation or runtime,
// and they will be internally relocated to LA64_GENERIC/LA264/LA464.
LOONGSONGENERIC LOONGSONGENERIC
LOONGSON3R5
LOONGSON2K1000 LOONGSON2K1000
LOONGSON3R5
LA64_GENERIC
LA264
LA464
12. Elbrus E2000: 12. Elbrus E2000:
E2K E2K

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011-2020, The OpenBLAS Project Copyright (c) 2011-2024, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -32,53 +32,299 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/ **********************************************************************************/
#include <stdint.h> #include <stdint.h>
#include <sys/auxv.h>
#include <stdio.h> #include <stdio.h>
#include <math.h>
#include <string.h>
#include <sys/auxv.h>
/* If LASX extension instructions supported, #define CPU_LA64_GENERIC 0
* using core LOONGSON3R5 #define CPU_LA264 1
* If only LSX extension instructions supported, #define CPU_LA364 2
* using core LOONGSON2K1000 #define CPU_LA464 3
* If neither LASX nor LSX extension instructions supported, #define CPU_LA664 4
* using core LOONGSONGENERIC (As far as I know, there is no such
* CPU yet)
*/
#define CPU_GENERIC 0 #define CORE_LA64_GENERIC 0
#define CPU_LOONGSON3R5 1 #define CORE_LA264 1
#define CPU_LOONGSON2K1000 2 #define CORE_LA464 2
#define LA_HWCAP_LSX (1U << 4) #define LA_HWCAP_LSX (1U << 4)
#define LA_HWCAP_LASX (1U << 5) #define LA_HWCAP_LASX (1U << 5)
#define LOONGARCH_CFG0 0x00
#define LOONGARCH_CFG2 0x02
#define LOONGARCH_CFG10 0x10
#define LOONGARCH_CFG11 0x11
#define LOONGARCH_CFG12 0x12
#define LOONGARCH_CFG13 0x13
#define LOONGARCH_CFG14 0x14
#define LASX_MASK 1<<7
#define LSX_MASK 1<<6
#define PRID_SERIES_MASK 0xf000
#define PRID_SERIES_LA264 0xa000
#define PRID_SERIES_LA364 0xb000
#define PRID_SERIES_LA464 0xc000
#define PRID_SERIES_LA664 0xd000
#define CACHE_INFO_L1_IU 0
#define CACHE_INFO_L1_D 1
#define CACHE_INFO_L2_IU 2
#define CACHE_INFO_L2_D 3
#define CACHE_INFO_L3_IU 4
#define CACHE_INFO_L3_D 5
#define L1_IU_PRESENT_MASK 0x0001
#define L1_IU_UNITY_MASK 0x0002
#define L1_D_PRESENT_MASK 0x0004
#define L2_IU_PRESENT_MASK 0x0008
#define L2_IU_UNITY_MASK 0x0010
#define L2_D_PRESENT_MASK 0x0080
#define L3_IU_PRESENT_MASK 0x0400
#define L3_IU_UNITY_MASK 0x0800
#define L3_D_PRESENT_MASK 0x4000
#define CACHE_WAY_MINUS_1_MASK 0x0000ffff
#define CACHE_INDEX_LOG2_MASK 0x00ff0000
#define CACHE_LINESIZE_LOG2_MASK 0x7f000000
typedef struct {
int size;
int associative;
int linesize;
int unify;
int present;
} cache_info_t;
/* Using microarchitecture representation */
static char *cpuname[] = { static char *cpuname[] = {
"LOONGSONGENERIC", "LA64_GENERIC",
"LOONGSON3R5", "LA264", /* Loongson 64bit, 2-issue, Like 2K1000LA */
"LOONGSON2K1000" "LA364", /* Loongson 64bit, 3-issue, Like 2K2000 */
"LA464", /* Loongson 64bit, 4-issue, Like 3A5000, 3C5000L, 3C5000 and 3D5000 */
"LA664" /* Loongson 64bit, 6-issue, Like 3A6000, 3C6000 and 3D6000 */
}; };
static char *cpuname_lower[] = { static char *cpuname_lower[] = {
"loongsongeneric", "la64_generic",
"loongson3r5", "la264",
"loongson2k1000" "la364",
"la464",
"la664"
}; };
int detect(void) { static char *corename[] = {
#ifdef __linux "LA64_GENERIC", /* Implies using scalar instructions for optimization */
"LA264", /* Implies using LSX instructions for optimization */
"LA464", /* Implies using LASX instructions for optimization */
};
static char *corename_lower[] = {
"la64_generic",
"la264",
"la464",
};
/*
* Obtain cache and processor identification
* through the cpucfg command.
*/
static void get_cacheinfo(int type, cache_info_t *cacheinfo) {
cache_info_t cache_info;
memset(&cache_info, 0, sizeof(cache_info));
uint32_t reg_10 = 0;
__asm__ volatile (
"cpucfg %0, %1 \n\t"
: "+&r"(reg_10)
: "r"(LOONGARCH_CFG10)
);
switch (type) {
case CACHE_INFO_L1_IU:
if (reg_10 & L1_IU_PRESENT_MASK) {
uint32_t reg_11 = 0;
cache_info.present = reg_10 & L1_IU_PRESENT_MASK;
cache_info.unify = reg_10 & L1_IU_UNITY_MASK;
__asm__ volatile (
"cpucfg %0, %1 \n\t"
: "+&r"(reg_11)
: "r"(LOONGARCH_CFG11)
);
cache_info.associative = (reg_11 & CACHE_WAY_MINUS_1_MASK) + 1;
cache_info.linesize = 1 << ((reg_11 & CACHE_LINESIZE_LOG2_MASK) >> 24);
cache_info.size = cache_info.associative * cache_info.linesize *
(1 << ((reg_11 & CACHE_INDEX_LOG2_MASK) >> 16));
}
break;
case CACHE_INFO_L1_D:
if (reg_10 & L1_D_PRESENT_MASK) {
uint32_t reg_12 = 0;
cache_info.present = reg_10 & L1_D_PRESENT_MASK;
__asm__ volatile (
"cpucfg %0, %1 \n\t"
: "+&r"(reg_12)
: "r"(LOONGARCH_CFG12)
);
cache_info.associative = (reg_12 & CACHE_WAY_MINUS_1_MASK) + 1;
cache_info.linesize = 1 << ((reg_12 & CACHE_LINESIZE_LOG2_MASK) >> 24);
cache_info.size = cache_info.associative * cache_info.linesize *
(1 << ((reg_12 & CACHE_INDEX_LOG2_MASK) >> 16));
}
break;
case CACHE_INFO_L2_IU:
if (reg_10 & L2_IU_PRESENT_MASK) {
uint32_t reg_13 = 0;
cache_info.present = reg_10 & L2_IU_PRESENT_MASK;
cache_info.unify = reg_10 & L2_IU_UNITY_MASK;
__asm__ volatile (
"cpucfg %0, %1 \n\t"
: "+&r"(reg_13)
: "r"(LOONGARCH_CFG13)
);
cache_info.associative = (reg_13 & CACHE_WAY_MINUS_1_MASK) + 1;
cache_info.linesize = 1 << ((reg_13 & CACHE_LINESIZE_LOG2_MASK) >> 24);
cache_info.size = cache_info.associative * cache_info.linesize *
(1 << ((reg_13 & CACHE_INDEX_LOG2_MASK) >> 16));
}
break;
case CACHE_INFO_L2_D:
if (reg_10 & L2_D_PRESENT_MASK) {
cache_info.present = reg_10 & L2_D_PRESENT_MASK;
// No date fetch
}
break;
case CACHE_INFO_L3_IU:
if (reg_10 & L3_IU_PRESENT_MASK) {
uint32_t reg_14 = 0;
cache_info.present = reg_10 & L3_IU_PRESENT_MASK;
cache_info.unify = reg_10 & L3_IU_UNITY_MASK;
__asm__ volatile (
"cpucfg %0, %1 \n\t"
: "+&r"(reg_14)
: "r"(LOONGARCH_CFG14)
);
cache_info.associative = (reg_14 & CACHE_WAY_MINUS_1_MASK) + 1;
cache_info.linesize = 1 << ((reg_14 & CACHE_LINESIZE_LOG2_MASK) >> 24);
cache_info.size = cache_info.associative * cache_info.linesize *
(1 << ((reg_14 & CACHE_INDEX_LOG2_MASK) >> 16));
}
break;
case CACHE_INFO_L3_D:
if (reg_10 & L3_D_PRESENT_MASK) {
cache_info.present = reg_10 & L3_D_PRESENT_MASK;
// No data fetch
}
break;
default:
break;
}
*cacheinfo = cache_info;
}
static uint32_t get_prid() {
uint32_t reg = 0;
__asm__ volatile (
"cpucfg %0, %1 \n\t"
: "+&r"(reg)
: "r"(LOONGARCH_CFG0)
);
return reg;
}
static void get_cpucount(uint32_t *count) {
uint32_t num = 0;
FILE *f = fopen("/proc/cpuinfo", "r");
if (!f) return;
char buf[200];
while (fgets(buf, sizeof(buf), f))
{
if (!strncmp("processor", buf, 9))
num ++;
}
fclose(f);
*count = num;
}
/* Detect whether the OS supports the LASX instruction set */
static int os_support_lasx() {
int hwcap = (int)getauxval(AT_HWCAP); int hwcap = (int)getauxval(AT_HWCAP);
if (hwcap & LA_HWCAP_LASX) if (hwcap & LA_HWCAP_LASX)
return CPU_LOONGSON3R5; return 1;
else if (hwcap & LA_HWCAP_LSX)
return CPU_LOONGSON2K1000;
else else
return CPU_GENERIC; return 0;
#endif }
return CPU_GENERIC;
/* Detect whether the OS supports the LSX instruction set */
static int os_support_lsx() {
int hwcap = (int)getauxval(AT_HWCAP);
if (hwcap & LA_HWCAP_LSX)
return 1;
else
return 0;
}
int get_coretype(void) {
uint32_t prid = get_prid();
switch (prid & PRID_SERIES_MASK) {
case (PRID_SERIES_LA464):
case (PRID_SERIES_LA664):
if (os_support_lasx())
return CORE_LA464;
else if (os_support_lsx())
return CORE_LA264;
else
return CORE_LA64_GENERIC;
break;
case (PRID_SERIES_LA264):
case (PRID_SERIES_LA364):
if (os_support_lsx())
return CORE_LA264;
else
return CORE_LA64_GENERIC;
break;
default:
return CORE_LA64_GENERIC;
break;
}
}
int get_cputype(void) {
uint32_t prid = get_prid();
switch (prid & PRID_SERIES_MASK) {
case (PRID_SERIES_LA264):
return CPU_LA264;
break;
case (PRID_SERIES_LA364):
return CPU_LA364;
break;
case (PRID_SERIES_LA464):
return CPU_LA464;
break;
case (PRID_SERIES_LA664):
return CPU_LA664;
break;
default:
return CPU_LA64_GENERIC;
break;
}
} }
char *get_corename(void) { char *get_corename(void) {
return cpuname[detect()]; return corename[get_coretype()];
}
void get_libname(void){
printf("%s", corename_lower[get_coretype()]);
} }
void get_architecture(void) { void get_architecture(void) {
@ -86,8 +332,7 @@ void get_architecture(void) {
} }
void get_subarchitecture(void) { void get_subarchitecture(void) {
int d = detect(); printf("%s", cpuname[get_cputype()]);
printf("%s", cpuname[d]);
} }
void get_subdirname(void) { void get_subdirname(void) {
@ -95,50 +340,69 @@ void get_subdirname(void) {
} }
void get_cpuconfig(void) { void get_cpuconfig(void) {
uint32_t hwcaps = 0; cache_info_t info;
int d = detect(); uint32_t num_cores = 0;
switch (d) { printf("#define %s\n", corename[get_coretype()]); // Core name
case CPU_LOONGSON3R5:
printf("#define LOONGSON3R5\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L2_SIZE 1048576\n");
printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 16\n");
break;
case CPU_LOONGSON2K1000: printf("#define CPU_NAME %s\n", cpuname[get_cputype()]); // Cpu microarchitecture name
printf("#define LOONGSON2K1000\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L2_SIZE 262144\n");
printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 16\n");
break;
default: get_cacheinfo(CACHE_INFO_L1_IU, &info);
printf("#define LOONGSONGENERIC\n"); if (info.present) {
printf("#define L1_DATA_SIZE 65536\n"); if (info.unify) { // Unified cache, without distinguishing between instructions and data
printf("#define L1_DATA_LINESIZE 64\n"); printf("#define L1_SIZE %d\n", info.size);
printf("#define L2_SIZE 262144\n"); printf("#define L1_ASSOCIATIVE %d\n", info.associative);
printf("#define L2_LINESIZE 64\n"); printf("#define L1_LINESIZE %d\n", info.linesize);
printf("#define DTB_DEFAULT_ENTRIES 64\n"); } else {
printf("#define DTB_SIZE 4096\n"); printf("#define L1_CODE_SIZE %d\n", info.size);
printf("#define L2_ASSOCIATIVE 16\n"); printf("#define L1_CODE_ASSOCIATIVE %d\n", info.associative);
break; printf("#define L1_CODE_LINESIZE %d\n", info.linesize);
}
} }
hwcaps = (uint32_t)getauxval( AT_HWCAP ); if (!info.unify) {
if (hwcaps & LA_HWCAP_LSX) printf("#define HAVE_LSX\n"); get_cacheinfo(CACHE_INFO_L1_D, &info);
if (hwcaps & LA_HWCAP_LASX) printf("#define HAVE_LASX\n"); if (info.present) {
} printf("#define L1_DATA_SIZE %d\n", info.size);
printf("#define L1_DATA_ASSOCIATIVE %d\n", info.associative);
printf("#define L1_DATA_LINESIZE %d\n", info.linesize);
}
}
void get_libname(void){ get_cacheinfo(CACHE_INFO_L2_IU, &info);
int d = detect(); if (info.present > 0) {
printf("%s", cpuname_lower[d]); if (info.unify) {
printf("#define L2_SIZE %d\n", info.size);
printf("#define L2_ASSOCIATIVE %d\n", info.associative);
printf("#define L2_LINESIZE %d\n", info.linesize);
} else {
printf("#define L2_CODE_SIZE %d\n", info.size);
printf("#define L2_CODE_ASSOCIATIVE %d\n", info.associative);
printf("#define L2_CODE_LINESIZE %d\n", info.linesize);
}
}
get_cacheinfo(CACHE_INFO_L3_IU, &info);
if (info.present > 0) {
if (info.unify) {
printf("#define L3_SIZE %d\n", info.size);
printf("#define L3_ASSOCIATIVE %d\n", info.associative);
printf("#define L3_LINESIZE %d\n", info.linesize);
} else {
printf("#define L3_CODE_SIZE %d\n", info.size);
printf("#define L3_CODE_ASSOCIATIVE %d\n", info.associative);
printf("#define L3_CODE_LINESIZE %d\n", info.linesize);
}
}
if(os_support_lsx) printf("#define HAVE_LSX\n");
if(os_support_lasx) printf("#define HAVE_LASX\n");
get_cpucount(&num_cores);
if (num_cores)
printf("#define NUM_CORES %d\n", num_cores);
//TODO: Its unclear what this entry represents, but it is indeed necessary.
//It has been set based on reference to other platforms.
printf("#define DTB_DEFAULT_ENTRIES 64\n");
} }

View File

@ -1082,7 +1082,7 @@ if (buffer == NULL) {
} }
//For target LOONGSON3R5, applying an offset to the buffer is essential //For LOONGARCH64, applying an offset to the buffer is essential
//for minimizing cache conflicts and optimizing performance. //for minimizing cache conflicts and optimizing performance.
#if defined(ARCH_LOONGARCH64) && !defined(NO_AFFINITY) #if defined(ARCH_LOONGARCH64) && !defined(NO_AFFINITY)
if (sa == NULL) sa = (void *)((BLASLONG)buffer + (WhereAmI() & 0xf) * GEMM_OFFSET_A); if (sa == NULL) sa = (void *)((BLASLONG)buffer + (WhereAmI() & 0xf) * GEMM_OFFSET_A);

View File

@ -28,25 +28,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <sys/auxv.h> #include <sys/auxv.h>
#include "common.h" #include "common.h"
extern gotoblas_t gotoblas_LOONGSON3R5; #define NUM_CORETYPES 6
extern gotoblas_t gotoblas_LOONGSON2K1000; #define LOONGARCH_CFG0 0x00
extern gotoblas_t gotoblas_LOONGSONGENERIC; #define LA_HWCAP_LSX (1U << 4)
#define LA_HWCAP_LASX (1U << 5)
#define PRID_SERIES_MASK 0xf000
#define PRID_SERIES_LA264 0xa000
#define PRID_SERIES_LA364 0xb000
#define PRID_SERIES_LA464 0xc000
#define PRID_SERIES_LA664 0xd000
extern gotoblas_t gotoblas_LA64_GENERIC;
extern gotoblas_t gotoblas_LA264;
extern gotoblas_t gotoblas_LA464;
extern void openblas_warning(int verbose, const char * msg); extern void openblas_warning(int verbose, const char * msg);
#define NUM_CORETYPES 3
static char *corename[] = { static char *corename[] = {
"loongson3r5", "la64_generic",
"loongson2k1000", "la264",
"la464",
"loongsongeneric", "loongsongeneric",
"loongson2k1000",
"loongson3r5",
"unknown" "unknown"
}; };
char *gotoblas_corename(void) { char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_LOONGSON3R5) return corename[0]; if (gotoblas == &gotoblas_LA64_GENERIC) return corename[0];
if (gotoblas == &gotoblas_LOONGSON2K1000) return corename[1]; if (gotoblas == &gotoblas_LA264) return corename[1];
if (gotoblas == &gotoblas_LOONGSONGENERIC) return corename[2]; if (gotoblas == &gotoblas_LA464) return corename[2];
return corename[NUM_CORETYPES]; return corename[NUM_CORETYPES];
} }
@ -66,27 +77,78 @@ static gotoblas_t *force_coretype(char *coretype) {
switch (found) switch (found)
{ {
case 0: return (&gotoblas_LOONGSON3R5); case 0: return (&gotoblas_LA64_GENERIC);
case 1: return (&gotoblas_LOONGSON2K1000); case 1: return (&gotoblas_LA264);
case 2: return (&gotoblas_LOONGSONGENERIC); case 2: return (&gotoblas_LA464);
case 3: return (&gotoblas_LA64_GENERIC);
case 4: return (&gotoblas_LA264);
case 5: return (&gotoblas_LA464);
} }
snprintf(message, 128, "Core not found: %s\n", coretype); snprintf(message, 128, "Core not found: %s\n", coretype);
openblas_warning(1, message); openblas_warning(1, message);
return NULL; return NULL;
} }
#define LA_HWCAP_LSX (1U << 4)
#define LA_HWCAP_LASX (1U << 5)
static gotoblas_t *get_coretype(void) { /* Detect whether the OS supports the LASX instruction set */
int hwcap = (int)getauxval(AT_HWCAP); static int os_support_lasx() {
int hwcap = (int)getauxval(AT_HWCAP);
if (hwcap & LA_HWCAP_LASX) if (hwcap & LA_HWCAP_LASX)
return &gotoblas_LOONGSON3R5; return 1;
else if (hwcap & LA_HWCAP_LSX)
return &gotoblas_LOONGSON2K1000;
else else
return &gotoblas_LOONGSONGENERIC; return 0;
}
/* Detect whether the OS supports the LSX instruction set */
static int os_support_lsx() {
int hwcap = (int)getauxval(AT_HWCAP);
if (hwcap & LA_HWCAP_LSX)
return 1;
else
return 0;
}
static uint32_t get_prid() {
uint32_t reg = 0;
__asm__ volatile (
"cpucfg %0, %1 \n\t"
: "+&r"(reg)
: "r"(LOONGARCH_CFG0)
);
return reg;
}
/* Select core at runtime based on the
* cpu name and SIMD instructions supported
* by the system
*/
static gotoblas_t *get_coretype(void) {
uint32_t prid = get_prid();
switch (prid & PRID_SERIES_MASK) {
case (PRID_SERIES_LA464):
case (PRID_SERIES_LA664):
if (os_support_lasx())
return &gotoblas_LA464;
else if (os_support_lsx())
return &gotoblas_LA264;
else
return &gotoblas_LA64_GENERIC;
break;
case (PRID_SERIES_LA264):
case (PRID_SERIES_LA364):
if (os_support_lsx())
return &gotoblas_LA264;
else
return &gotoblas_LA64_GENERIC;
break;
default:
return &gotoblas_LA64_GENERIC;
break;
}
} }
void gotoblas_dynamic_init(void) { void gotoblas_dynamic_init(void) {

View File

@ -752,7 +752,7 @@ int get_L3_size() {
} }
void blas_set_parameter(void){ void blas_set_parameter(void){
#if defined(LOONGSON3R5) #if defined(LA464)
int L3_size = get_L3_size(); int L3_size = get_L3_size();
#ifdef SMP #ifdef SMP
if(blas_num_threads == 1){ if(blas_num_threads == 1){

View File

@ -135,11 +135,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* #define FORCE_CELL */ /* #define FORCE_CELL */
/* #define FORCE_MIPS64_GENERIC */ /* #define FORCE_MIPS64_GENERIC */
/* #define FORCE_SICORTEX */ /* #define FORCE_SICORTEX */
/* #define FORCE_LOONGSON3R3 */ /* #define FORCE_LOONGSON3R3 */
/* #define FORCE_LOONGSON3R4 */ /* #define FORCE_LOONGSON3R4 */
/* #define FORCE_LOONGSON3R5 */ /* #define FORCE_LOONGSON3R5 */
/* #define FORCE_LOONGSON2K1000 */ /* #define FORCE_LOONGSON2K1000 */
/* #define FORCE_LOONGSONGENERIC */ /* #define FORCE_LOONGSONGENERIC */
/* #define FORCE_LA64_GENERIC */
/* #define FORCE_LA264 */
/* #define FORCE_LA464 */
/* #define FORCE_I6400 */ /* #define FORCE_I6400 */
/* #define FORCE_P6600 */ /* #define FORCE_P6600 */
/* #define FORCE_P5600 */ /* #define FORCE_P5600 */
@ -153,7 +156,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* #define FORCE_EV5 */ /* #define FORCE_EV5 */
/* #define FORCE_EV6 */ /* #define FORCE_EV6 */
/* #define FORCE_CSKY */ /* #define FORCE_CSKY */
/* #define FORCE_CK860FV */ /* #define FORCE_CK860FV */
/* #define FORCE_GENERIC */ /* #define FORCE_GENERIC */
#ifdef FORCE_P2 #ifdef FORCE_P2
@ -979,46 +982,76 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else #else
#endif #endif
#ifdef FORCE_LOONGSON3R5 #if defined(FORCE_LA464) || defined(FORCE_LOONGSON3R5)
#define FORCE #define FORCE
#define ARCHITECTURE "LOONGARCH" #define ARCHITECTURE "LOONGARCH"
#define SUBARCHITECTURE "LOONGSON3R5" #ifdef NO_LASX
#ifdef NO_LSX
#define SUBARCHITECTURE "LA64_GENERIC"
#define SUBDIRNAME "loongarch64" #define SUBDIRNAME "loongarch64"
#define ARCHCONFIG "-DLOONGSON3R5 " \ #define ARCHCONFIG "-DLA64_GENERIC " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 -DHAVE_MSA"
#define LIBNAME "loongson3r5"
#define CORENAME "LOONGSON3R5"
#else
#endif
#ifdef FORCE_LOONGSON2K1000
#define FORCE
#define ARCHITECTURE "LOONGARCH"
#define SUBARCHITECTURE "LOONGSON2K1000"
#define SUBDIRNAME "loongarch64"
#define ARCHCONFIG "-DLOONGSON2K1000 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 -DHAVE_MSA" "-DDTB_DEFAULT_ENTRIES=64 "
#define LIBNAME "loongson2k1000" #define LIBNAME "la64_generic"
#define CORENAME "LOONGSON2K1000" #define CORENAME "LA64_GENERIC"
#else #else
#endif #define SUBARCHITECTURE "LA264"
#ifdef FORCE_LOONGSONGENERIC
#define FORCE
#define ARCHITECTURE "LOONGARCH"
#define SUBARCHITECTURE "LOONGSONGENERIC"
#define SUBDIRNAME "loongarch64" #define SUBDIRNAME "loongarch64"
#define ARCHCONFIG "-DLOONGSONGENERIC " \ #define ARCHCONFIG "-DLA264 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 -DHAVE_MSA" "-DDTB_DEFAULT_ENTRIES=64 "
#define LIBNAME "loongsongeneric" #define LIBNAME "la264"
#define CORENAME "LOONGSONGENERIC" #define CORENAME "LA264"
#endif
#else #else
#define SUBARCHITECTURE "LA464"
#define SUBDIRNAME "loongarch64"
#define ARCHCONFIG "-DLA464 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 "
#define LIBNAME "la464"
#define CORENAME "LA464"
#endif
#endif
#if defined(FORCE_LA264) || defined(FORCE_LOONGSON2K1000)
#define FORCE
#define ARCHITECTURE "LOONGARCH"
#ifdef NO_LSX
#define SUBARCHITECTURE "LA64_GENERIC"
#define SUBDIRNAME "loongarch64"
#define ARCHCONFIG "-DLA64_GENERIC " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 "
#define LIBNAME "la64_generic"
#define CORENAME "LA64_GENERIC"
#else
#define SUBARCHITECTURE "LA264"
#define SUBDIRNAME "loongarch64"
#define ARCHCONFIG "-DLA264 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 "
#define LIBNAME "la264"
#define CORENAME "LA264"
#endif
#endif
#if defined(FORCE_LA64_GENERIC) || defined(FORCE_LOONGSONGENERIC)
#define FORCE
#define ARCHITECTURE "LOONGARCH"
#define SUBARCHITECTURE "LA64_GENERIC"
#define SUBDIRNAME "loongarch64"
#define ARCHCONFIG "-DLA64_GENERIC " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 "
#define LIBNAME "la64_generic"
#define CORENAME "LA64_GENERIC"
#endif #endif
#ifdef FORCE_I6400 #ifdef FORCE_I6400

View File

@ -572,7 +572,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
buffer = (XFLOAT *)blas_memory_alloc(0); buffer = (XFLOAT *)blas_memory_alloc(0);
//For target LOONGSON3R5, applying an offset to the buffer is essential //For LOONGARCH64, applying an offset to the buffer is essential
//for minimizing cache conflicts and optimizing performance. //for minimizing cache conflicts and optimizing performance.
#if defined(ARCH_LOONGARCH64) && !defined(NO_AFFINITY) #if defined(ARCH_LOONGARCH64) && !defined(NO_AFFINITY)
sa = (XFLOAT *)((BLASLONG)buffer + (WhereAmI() & 0xf) * GEMM_OFFSET_A); sa = (XFLOAT *)((BLASLONG)buffer + (WhereAmI() & 0xf) * GEMM_OFFSET_A);

View File

@ -1086,7 +1086,7 @@ static void init_parameter(void) {
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R; TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
#endif #endif
#if defined(LOONGSON3R5) #if defined(LA464)
int L3_size = get_L3_size(); int L3_size = get_L3_size();
#ifdef SMP #ifdef SMP
if(blas_num_threads == 1){ if(blas_num_threads == 1){

View File

@ -2838,7 +2838,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 16 #define SYMV_P 16
#endif #endif
#if defined (LOONGSON3R5) #if defined (LA464)
#define SNUMOPT 2 #define SNUMOPT 2
#define DNUMOPT 2 #define DNUMOPT 2
@ -2891,7 +2891,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 16 #define SYMV_P 16
#endif #endif
#ifdef LOONGSON2K1000 #ifdef LA264
#define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
@ -2926,7 +2926,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 16 #define SYMV_P 16
#endif #endif
#ifdef LOONGSONGENERIC #ifdef LA64_GENERIC
#define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL