Scale P and Q with L2 cache size for SVE
The defaults in param.h now reflect an L2 size of 128KB, and that is scaled based on the actual size.
This commit is contained in:
parent
e9c32ed165
commit
75fe9c21e5
|
@ -679,7 +679,6 @@ DYNAMIC_CORE += CORTEXA53
|
|||
DYNAMIC_CORE += CORTEXA57
|
||||
DYNAMIC_CORE += NEOVERSEN1
|
||||
ifneq ($(NO_SVE), 1)
|
||||
DYNAMIC_CORE += NEOVERSEV1
|
||||
DYNAMIC_CORE += NEOVERSEN2
|
||||
DYNAMIC_CORE += ARMV8SVE
|
||||
endif
|
||||
|
|
|
@ -133,9 +133,9 @@ extern gotoblas_t gotoblas_TSV110;
|
|||
extern gotoblas_t gotoblas_EMAG8180;
|
||||
extern gotoblas_t gotoblas_NEOVERSEN1;
|
||||
#ifndef NO_SVE
|
||||
extern gotoblas_t gotoblas_NEOVERSEV1;
|
||||
extern gotoblas_t gotoblas_NEOVERSEN2;
|
||||
extern gotoblas_t gotoblas_ARMV8SVE;
|
||||
#define gotoblas_NEOVERSEV1 gotoblas_ARMV8SVE
|
||||
#else
|
||||
#define gotoblas_NEOVERSEV1 gotoblas_ARMV8
|
||||
#define gotoblas_NEOVERSEN2 gotoblas_ARMV8
|
||||
|
@ -167,6 +167,7 @@ extern void openblas_warning(int verbose, const char * msg);
|
|||
|
||||
static char *corename[] = {
|
||||
"armv8",
|
||||
"armv8sve",
|
||||
"cortexa53",
|
||||
"cortexa57",
|
||||
"cortexa72",
|
||||
|
@ -181,27 +182,26 @@ static char *corename[] = {
|
|||
"neoversen2",
|
||||
"thunderx3t110",
|
||||
"cortexa55",
|
||||
"armv8sve",
|
||||
"unknown"
|
||||
};
|
||||
|
||||
char *gotoblas_corename(void) {
|
||||
if (gotoblas == &gotoblas_ARMV8) return corename[ 0];
|
||||
if (gotoblas == &gotoblas_CORTEXA53) return corename[ 1];
|
||||
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 2];
|
||||
if (gotoblas == &gotoblas_CORTEXA72) return corename[ 3];
|
||||
if (gotoblas == &gotoblas_CORTEXA73) return corename[ 4];
|
||||
if (gotoblas == &gotoblas_FALKOR) return corename[ 5];
|
||||
if (gotoblas == &gotoblas_THUNDERX) return corename[ 6];
|
||||
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7];
|
||||
if (gotoblas == &gotoblas_TSV110) return corename[ 8];
|
||||
if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
|
||||
if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10];
|
||||
if (gotoblas == &gotoblas_NEOVERSEV1) return corename[11];
|
||||
if (gotoblas == &gotoblas_NEOVERSEN2) return corename[12];
|
||||
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[13];
|
||||
if (gotoblas == &gotoblas_CORTEXA55) return corename[14];
|
||||
if (gotoblas == &gotoblas_ARMV8SVE) return corename[15];
|
||||
if (gotoblas == &gotoblas_ARMV8SVE) return corename[ 1];
|
||||
if (gotoblas == &gotoblas_CORTEXA53) return corename[ 2];
|
||||
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 3];
|
||||
if (gotoblas == &gotoblas_CORTEXA72) return corename[ 4];
|
||||
if (gotoblas == &gotoblas_CORTEXA73) return corename[ 5];
|
||||
if (gotoblas == &gotoblas_FALKOR) return corename[ 6];
|
||||
if (gotoblas == &gotoblas_THUNDERX) return corename[ 7];
|
||||
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 8];
|
||||
if (gotoblas == &gotoblas_TSV110) return corename[ 9];
|
||||
if (gotoblas == &gotoblas_EMAG8180) return corename[10];
|
||||
if (gotoblas == &gotoblas_NEOVERSEN1) return corename[11];
|
||||
if (gotoblas == &gotoblas_NEOVERSEV1) return corename[12];
|
||||
if (gotoblas == &gotoblas_NEOVERSEN2) return corename[13];
|
||||
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14];
|
||||
if (gotoblas == &gotoblas_CORTEXA55) return corename[15];
|
||||
return corename[NUM_CORETYPES];
|
||||
}
|
||||
|
||||
|
|
|
@ -1245,7 +1245,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define SUBDIRNAME "arm64"
|
||||
#define ARCHCONFIG "-DARMV8SVE " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 -DSCALE_L2 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8"
|
||||
#define LIBNAME "armv8sve"
|
||||
|
|
|
@ -890,15 +890,41 @@ gotoblas_t TABLE_NAME = {
|
|||
};
|
||||
|
||||
#if (ARCH_ARM64)
|
||||
|
||||
#define L2_CACHE_FILE "/sys/devices/system/cpu/cpu0/cache/index2/size"
|
||||
static inline uint64_t get_l2_multiplier() {
|
||||
#if defined(__linux) && defined(SCALE_L2)
|
||||
char buffer[100];
|
||||
FILE* sysfs_file = fopen(L2_CACHE_FILE, "r");
|
||||
uint64_t cache_size = 0;
|
||||
char cache_unit = '\n';
|
||||
if (sysfs_file && fgets(buffer, sizeof(buffer), sysfs_file) != NULL) {
|
||||
if (sscanf(buffer, "%ld%c", &cache_size, &cache_unit) > 1) {
|
||||
switch (cache_unit) {
|
||||
case 'K':
|
||||
return MAX(cache_size >> 7, 1);
|
||||
case '\n':
|
||||
return MAX(cache_size >> 17, 1);
|
||||
default: // unknown
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void init_parameter(void) {
|
||||
const uint64_t l2_multiplier = get_l2_multiplier();
|
||||
|
||||
#if (BUILD_BFLOAT16)
|
||||
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
|
||||
#endif
|
||||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
|
||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P * l2_multiplier;
|
||||
#endif
|
||||
#if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
|
||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P * l2_multiplier;
|
||||
#endif
|
||||
#if BUILD_COMPLEX==1
|
||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
||||
|
@ -911,10 +937,10 @@ static void init_parameter(void) {
|
|||
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
|
||||
#endif
|
||||
#if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
|
||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q * l2_multiplier;
|
||||
#endif
|
||||
#if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
|
||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q * l2_multiplier;
|
||||
#endif
|
||||
#if BUILD_COMPLEX== 1
|
||||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
||||
|
|
8
param.h
8
param.h
|
@ -3517,13 +3517,13 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
|
|||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_MN 16
|
||||
|
||||
#define SGEMM_DEFAULT_P 128
|
||||
#define DGEMM_DEFAULT_P 160
|
||||
#define SGEMM_DEFAULT_P 30
|
||||
#define DGEMM_DEFAULT_P 30
|
||||
#define CGEMM_DEFAULT_P 128
|
||||
#define ZGEMM_DEFAULT_P 128
|
||||
|
||||
#define SGEMM_DEFAULT_Q 352
|
||||
#define DGEMM_DEFAULT_Q 128
|
||||
#define SGEMM_DEFAULT_Q 80
|
||||
#define DGEMM_DEFAULT_Q 40
|
||||
#define CGEMM_DEFAULT_Q 224
|
||||
#define ZGEMM_DEFAULT_Q 112
|
||||
|
||||
|
|
Loading…
Reference in New Issue