Compare commits

...

5 Commits

Author SHA1 Message Date
Martin Kroeker
9c87911270 Revert "Honor cgroup/cpuset limits when enumerating cpus" 2017-07-24 16:16:16 +02:00
Zhang Xianyi
3c4c472584 Merge pull request #1236 from martin-frbg/l1cache
Use cpuid 4 with subleafs to query L1 cache size on Intel processors
2017-07-24 12:07:00 +08:00
Zhang Xianyi
a797666fbe Bump develop version for 0.3.0. 2017-07-24 12:06:29 +08:00
Martin Kroeker
00774b1105 Add dummy implementation of cpuid_count for the CPUIDEMU case 2017-07-12 21:56:23 +02:00
Martin Kroeker
6497aae57c Use cpuid 4 with subleafs to query L1 cache size on Intel processors 2017-07-12 20:43:09 +02:00
5 changed files with 113 additions and 99 deletions

View File

@@ -5,8 +5,8 @@
cmake_minimum_required(VERSION 2.8.5)
project(OpenBLAS)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 2)
set(OpenBLAS_PATCH_VERSION 20)
set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 0.dev)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
enable_language(ASM)

View File

@@ -3,7 +3,7 @@
#
# This library's version
VERSION = 0.2.20
VERSION = 0.3.0.dev
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

View File

@@ -71,12 +71,23 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
*edx = cpuInfo[3];
}
void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx)
{
int cpuInfo[4] = {-1};
__cpuidex(cpuInfo, op, count);
*eax = cpuInfo[0];
*ebx = cpuInfo[1];
*ecx = cpuInfo[2];
*edx = cpuInfo[3];
}
#else
#ifndef CPUIDEMU
#if defined(__APPLE__) && defined(__i386__)
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx);
#else
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
#if defined(__i386__) && defined(__PIC__)
@@ -90,6 +101,19 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
#endif
}
static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){
#if defined(__i386__) && defined(__PIC__)
__asm__ __volatile__
("mov %%ebx, %%edi;"
"cpuid;"
"xchgl %%ebx, %%edi;"
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
#else
__asm__ __volatile__
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
#endif
}
#endif
#else
@@ -133,6 +157,10 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
*edx = idlist[current].d;
}
void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
return cpuid (op, eax, ebx, ecx, edx);
}
#endif
#endif // _MSC_VER
@@ -312,9 +340,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
cpuid(0, &cpuid_level, &ebx, &ecx, &edx);
if (cpuid_level > 1) {
int numcalls =0 ;
cpuid(2, &eax, &ebx, &ecx, &edx);
numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries
info[ 0] = BITMASK(eax, 8, 0xff);
info[ 1] = BITMASK(eax, 16, 0xff);
info[ 2] = BITMASK(eax, 24, 0xff);
@@ -335,7 +363,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
info[14] = BITMASK(edx, 24, 0xff);
for (i = 0; i < 15; i++){
switch (info[i]){
/* This table is from http://www.sandpile.org/ia32/cpuid.htm */
@@ -637,12 +664,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LD1.linesize = 64;
break;
case 0x63 :
DTB.size = 2048;
DTB.associative = 4;
DTB.linesize = 32;
LDTB.size = 4096;
LDTB.associative= 4;
LDTB.linesize = 32;
DTB.size = 2048;
DTB.associative = 4;
DTB.linesize = 32;
LDTB.size = 4096;
LDTB.associative= 4;
LDTB.linesize = 32;
break;
case 0x66 :
LD1.size = 8;
LD1.associative = 4;
@@ -675,12 +703,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LC1.associative = 8;
break;
case 0x76 :
ITB.size = 2048;
ITB.associative = 0;
ITB.linesize = 8;
LITB.size = 4096;
LITB.associative= 0;
LITB.linesize = 8;
ITB.size = 2048;
ITB.associative = 0;
ITB.linesize = 8;
LITB.size = 4096;
LITB.associative= 0;
LITB.linesize = 8;
break;
case 0x77 :
LC1.size = 16;
LC1.associative = 4;
@@ -891,6 +920,67 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
}
if (get_vendor() == VENDOR_INTEL) {
if(LD1.size<=0 || LC1.size<=0){
//If we didn't detect L1 correctly before,
int count;
for (count=0;count <4;count++) {
cpuid_count(4, count, &eax, &ebx, &ecx, &edx);
switch (eax &0x1f) {
case 0:
continue;
case 1:
case 3:
{
switch ((eax >>5) &0x07)
{
case 1:
{
// fprintf(stderr,"L1 data cache...\n");
int sets = ecx+1;
int lines = (ebx & 0x0fff) +1;
ebx>>=12;
int part = (ebx&0x03ff)+1;
ebx >>=10;
int assoc = (ebx&0x03ff)+1;
LD1.size = (assoc*part*lines*sets)/1024;
LD1.associative = assoc;
LD1.linesize= lines;
break;
}
default:
break;
}
break;
}
case 2:
{
switch ((eax >>5) &0x07)
{
case 1:
{
// fprintf(stderr,"L1 instruction cache...\n");
int sets = ecx+1;
int lines = (ebx & 0x0fff) +1;
ebx>>=12;
int part = (ebx&0x03ff)+1;
ebx >>=10;
int assoc = (ebx&0x03ff)+1;
LC1.size = (assoc*part*lines*sets)/1024;
LC1.associative = assoc;
LC1.linesize= lines;
break;
}
default:
break;
}
break;
}
default:
break;
}
}
}
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
if (cpuid_level >= 0x80000006) {
if(L2.size<=0){

View File

@@ -778,11 +778,11 @@ static int initialized = 0;
void gotoblas_affinity_init(void) {
int cpu, num_avail;
#ifndef USE_OPENMP
#ifndef USE_OPENMP
cpu_set_t cpu_mask;
#endif
int i;
if (initialized) return;
initialized = 1;
@@ -826,54 +826,15 @@ void gotoblas_affinity_init(void) {
common -> shmid = pshmid;
if (common -> magic != SH_MAGIC) {
cpu_set_t *cpusetp;
int nums;
int ret;
#ifdef DEBUG
fprintf(stderr, "Shared Memory Initialization.\n");
#endif
//returns the number of processors which are currently online
nums = sysconf(_SC_NPROCESSORS_CONF);
#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 3)
common->num_procs = nums;
#elif __GLIBC_PREREQ(2, 7)
cpusetp = CPU_ALLOC(nums);
if (cpusetp == NULL) {
common->num_procs = nums;
} else {
size_t size;
size = CPU_ALLOC_SIZE(nums);
ret = sched_getaffinity(0,size,cpusetp);
if (ret!=0)
common->num_procs = nums;
else
common->num_procs = CPU_COUNT_S(size,cpusetp);
}
CPU_FREE(cpusetp);
#else
ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp);
if (ret!=0) {
common->num_procs = nums;
} else {
#if !__GLIBC_PREREQ(2, 6)
int i;
int n = 0;
for (i=0;i<nums;i++)
if (CPU_ISSET(i,cpusetp)) n++;
common->num_procs = n;
}
#else
common->num_procs = CPU_COUNT(sizeof(cpu_set_t),cpusetp);
#endif
#endif
common -> num_procs = sysconf(_SC_NPROCESSORS_CONF);;
if(common -> num_procs > MAX_CPUS) {
fprintf(stderr, "\nOpenBLAS Warning : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS);
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS);
exit(1);
}
@@ -886,7 +847,7 @@ void gotoblas_affinity_init(void) {
if (common -> num_nodes > 1) numa_mapping();
common -> final_num_procs = 0;
for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number.
for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number.
for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] = 0;

View File

@@ -175,44 +175,7 @@ int get_num_procs(void);
#else
int get_num_procs(void) {
static int nums = 0;
cpu_set_t *cpusetp;
size_t size;
int ret;
int i,n;
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
#if !defined(OS_LINUX)
return nums;
#endif
#if !defined(__GLIBC_PREREQ)
return nums;
#endif
#if !__GLIBC_PREREQ(2, 3)
return nums;
#endif
#if !__GLIBC_PREREQ(2, 7)
ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp);
if (ret!=0) return nums;
n=0;
#if !__GLIBC_PREREQ(2, 6)
for (i=0;i<nums;i++)
if (CPU_ISSET(i,cpusetp)) n++;
nums=n;
#else
nums = CPU_COUNT(sizeof(cpu_set_t),cpusetp);
#endif
return nums;
#endif
cpusetp = CPU_ALLOC(nums);
if (cpusetp == NULL) return nums;
size = CPU_ALLOC_SIZE(nums);
ret = sched_getaffinity(0,size,cpusetp);
if (ret!=0) return nums;
nums = CPU_COUNT_S(size,cpusetp);
CPU_FREE(cpusetp);
return nums;
}
#endif