diff --git a/cpuid_arm64.c b/cpuid_arm64.c index e8aa29813..9e019fe3e 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -206,6 +206,33 @@ void get_subdirname(void) printf("arm64"); } +void get_cpucount(void) +{ +int n=0; + +#ifdef linux + FILE *infile; + char buffer[2048], *p,*t; + p = (char *) NULL ; + + infile = fopen("/proc/cpuinfo", "r"); + + while (fgets(buffer, sizeof(buffer), infile)) + { + + if (!strncmp("processor", buffer, 9)) + n++; + } + + fclose(infile); + + printf("#define NUM_CORES %d\n",n); +#endif + +} + + + void get_cpuconfig(void) { @@ -309,6 +336,7 @@ void get_cpuconfig(void) printf("#define DTB_SIZE 4096 \n"); break; } + get_cpucount(); } @@ -351,5 +379,3 @@ void get_features(void) #endif return; } - - diff --git a/param.h b/param.h index 5fbdbcdcd..0ff59f400 100644 --- a/param.h +++ b/param.h @@ -2636,15 +2636,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ZGEMM_DEFAULT_UNROLL_M 4 #define ZGEMM_DEFAULT_UNROLL_N 4 -#define SGEMM_DEFAULT_P 512 -#define DGEMM_DEFAULT_P 256 -#define CGEMM_DEFAULT_P 256 -#define ZGEMM_DEFAULT_P 128 +/*FIXME: this should be using the cache size, but there is currently no easy way to +query that on ARM. So if getarch counted more than 8 cores we simply assume the host +is a big desktop or server with abundant cache rather than a phone or embedded device */ +#if NUM_CORES > 8 + #define SGEMM_DEFAULT_P 512 + #define DGEMM_DEFAULT_P 256 + #define CGEMM_DEFAULT_P 256 + #define ZGEMM_DEFAULT_P 128 -#define SGEMM_DEFAULT_Q 1024 -#define DGEMM_DEFAULT_Q 512 -#define CGEMM_DEFAULT_Q 512 -#define ZGEMM_DEFAULT_Q 512 + #define SGEMM_DEFAULT_Q 1024 + #define DGEMM_DEFAULT_Q 512 + #define CGEMM_DEFAULT_Q 512 + #define ZGEMM_DEFAULT_Q 512 +#else + #define SGEMM_DEFAULT_P 128 + #define DGEMM_DEFAULT_P 160 + #define CGEMM_DEFAULT_P 128 + #define ZGEMM_DEFAULT_P 128 + + #define SGEMM_DEFAULT_Q 352 + #define DGEMM_DEFAULT_Q 128 + #define CGEMM_DEFAULT_Q 224 + #define ZGEMM_DEFAULT_Q 112 +#endif #define SGEMM_DEFAULT_R 4096 #define DGEMM_DEFAULT_R 4096