Count cpu cores on ARMV8 and use that to pick the GEMM_PQ parameters (#2267)
There is currently no simple way to query cache sizes on ARMV8, so this takes the number of cores as a trivial indication if the target is a server-class device with a big cache, or just a single-board toy or smartphone.
This commit is contained in:
parent
673e5a0495
commit
6b83079368
|
@ -206,6 +206,33 @@ void get_subdirname(void)
|
|||
printf("arm64");
|
||||
}
|
||||
|
||||
void get_cpucount(void)
|
||||
{
|
||||
int n=0;
|
||||
|
||||
#ifdef linux
|
||||
FILE *infile;
|
||||
char buffer[2048], *p,*t;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("processor", buffer, 9))
|
||||
n++;
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
printf("#define NUM_CORES %d\n",n);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void get_cpuconfig(void)
|
||||
{
|
||||
|
||||
|
@ -309,6 +336,7 @@ void get_cpuconfig(void)
|
|||
printf("#define DTB_SIZE 4096 \n");
|
||||
break;
|
||||
}
|
||||
get_cpucount();
|
||||
}
|
||||
|
||||
|
||||
|
@ -351,5 +379,3 @@ void get_features(void)
|
|||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
|
31
param.h
31
param.h
|
@ -2636,15 +2636,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ZGEMM_DEFAULT_UNROLL_M 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define SGEMM_DEFAULT_P 512
|
||||
#define DGEMM_DEFAULT_P 256
|
||||
#define CGEMM_DEFAULT_P 256
|
||||
#define ZGEMM_DEFAULT_P 128
|
||||
/*FIXME: this should be using the cache size, but there is currently no easy way to
|
||||
query that on ARM. So if getarch counted more than 8 cores we simply assume the host
|
||||
is a big desktop or server with abundant cache rather than a phone or embedded device */
|
||||
#if NUM_CORES > 8
|
||||
#define SGEMM_DEFAULT_P 512
|
||||
#define DGEMM_DEFAULT_P 256
|
||||
#define CGEMM_DEFAULT_P 256
|
||||
#define ZGEMM_DEFAULT_P 128
|
||||
|
||||
#define SGEMM_DEFAULT_Q 1024
|
||||
#define DGEMM_DEFAULT_Q 512
|
||||
#define CGEMM_DEFAULT_Q 512
|
||||
#define ZGEMM_DEFAULT_Q 512
|
||||
#define SGEMM_DEFAULT_Q 1024
|
||||
#define DGEMM_DEFAULT_Q 512
|
||||
#define CGEMM_DEFAULT_Q 512
|
||||
#define ZGEMM_DEFAULT_Q 512
|
||||
#else
|
||||
#define SGEMM_DEFAULT_P 128
|
||||
#define DGEMM_DEFAULT_P 160
|
||||
#define CGEMM_DEFAULT_P 128
|
||||
#define ZGEMM_DEFAULT_P 128
|
||||
|
||||
#define SGEMM_DEFAULT_Q 352
|
||||
#define DGEMM_DEFAULT_Q 128
|
||||
#define CGEMM_DEFAULT_Q 224
|
||||
#define ZGEMM_DEFAULT_Q 112
|
||||
#endif
|
||||
|
||||
#define SGEMM_DEFAULT_R 4096
|
||||
#define DGEMM_DEFAULT_R 4096
|
||||
|
|
Loading…
Reference in New Issue