Compare commits
5 Commits
master
...
revert-123
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9c87911270 | ||
|
|
3c4c472584 | ||
|
|
a797666fbe | ||
|
|
00774b1105 | ||
|
|
6497aae57c |
@@ -5,8 +5,8 @@
|
||||
cmake_minimum_required(VERSION 2.8.5)
|
||||
project(OpenBLAS)
|
||||
set(OpenBLAS_MAJOR_VERSION 0)
|
||||
set(OpenBLAS_MINOR_VERSION 2)
|
||||
set(OpenBLAS_PATCH_VERSION 20)
|
||||
set(OpenBLAS_MINOR_VERSION 3)
|
||||
set(OpenBLAS_PATCH_VERSION 0.dev)
|
||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||
|
||||
enable_language(ASM)
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.2.20
|
||||
VERSION = 0.3.0.dev
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
|
||||
120
cpuid_x86.c
120
cpuid_x86.c
@@ -71,12 +71,23 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
|
||||
*edx = cpuInfo[3];
|
||||
}
|
||||
|
||||
void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx)
|
||||
{
|
||||
int cpuInfo[4] = {-1};
|
||||
__cpuidex(cpuInfo, op, count);
|
||||
*eax = cpuInfo[0];
|
||||
*ebx = cpuInfo[1];
|
||||
*ecx = cpuInfo[2];
|
||||
*edx = cpuInfo[3];
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#ifndef CPUIDEMU
|
||||
|
||||
#if defined(__APPLE__) && defined(__i386__)
|
||||
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
|
||||
void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx);
|
||||
#else
|
||||
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
@@ -90,6 +101,19 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
|
||||
#endif
|
||||
}
|
||||
|
||||
static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
__asm__ __volatile__
|
||||
("mov %%ebx, %%edi;"
|
||||
"cpuid;"
|
||||
"xchgl %%ebx, %%edi;"
|
||||
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
|
||||
#else
|
||||
__asm__ __volatile__
|
||||
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
@@ -133,6 +157,10 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
|
||||
*edx = idlist[current].d;
|
||||
}
|
||||
|
||||
void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
|
||||
return cpuid (op, eax, ebx, ecx, edx);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // _MSC_VER
|
||||
@@ -312,9 +340,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
cpuid(0, &cpuid_level, &ebx, &ecx, &edx);
|
||||
|
||||
if (cpuid_level > 1) {
|
||||
|
||||
int numcalls =0 ;
|
||||
cpuid(2, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries
|
||||
info[ 0] = BITMASK(eax, 8, 0xff);
|
||||
info[ 1] = BITMASK(eax, 16, 0xff);
|
||||
info[ 2] = BITMASK(eax, 24, 0xff);
|
||||
@@ -335,7 +363,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
info[14] = BITMASK(edx, 24, 0xff);
|
||||
|
||||
for (i = 0; i < 15; i++){
|
||||
|
||||
switch (info[i]){
|
||||
|
||||
/* This table is from http://www.sandpile.org/ia32/cpuid.htm */
|
||||
@@ -637,12 +664,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
LD1.linesize = 64;
|
||||
break;
|
||||
case 0x63 :
|
||||
DTB.size = 2048;
|
||||
DTB.associative = 4;
|
||||
DTB.linesize = 32;
|
||||
LDTB.size = 4096;
|
||||
LDTB.associative= 4;
|
||||
LDTB.linesize = 32;
|
||||
DTB.size = 2048;
|
||||
DTB.associative = 4;
|
||||
DTB.linesize = 32;
|
||||
LDTB.size = 4096;
|
||||
LDTB.associative= 4;
|
||||
LDTB.linesize = 32;
|
||||
break;
|
||||
case 0x66 :
|
||||
LD1.size = 8;
|
||||
LD1.associative = 4;
|
||||
@@ -675,12 +703,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
LC1.associative = 8;
|
||||
break;
|
||||
case 0x76 :
|
||||
ITB.size = 2048;
|
||||
ITB.associative = 0;
|
||||
ITB.linesize = 8;
|
||||
LITB.size = 4096;
|
||||
LITB.associative= 0;
|
||||
LITB.linesize = 8;
|
||||
ITB.size = 2048;
|
||||
ITB.associative = 0;
|
||||
ITB.linesize = 8;
|
||||
LITB.size = 4096;
|
||||
LITB.associative= 0;
|
||||
LITB.linesize = 8;
|
||||
break;
|
||||
case 0x77 :
|
||||
LC1.size = 16;
|
||||
LC1.associative = 4;
|
||||
@@ -891,6 +920,67 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
}
|
||||
|
||||
if (get_vendor() == VENDOR_INTEL) {
|
||||
if(LD1.size<=0 || LC1.size<=0){
|
||||
//If we didn't detect L1 correctly before,
|
||||
int count;
|
||||
for (count=0;count <4;count++) {
|
||||
cpuid_count(4, count, &eax, &ebx, &ecx, &edx);
|
||||
switch (eax &0x1f) {
|
||||
case 0:
|
||||
continue;
|
||||
case 1:
|
||||
case 3:
|
||||
{
|
||||
switch ((eax >>5) &0x07)
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
// fprintf(stderr,"L1 data cache...\n");
|
||||
int sets = ecx+1;
|
||||
int lines = (ebx & 0x0fff) +1;
|
||||
ebx>>=12;
|
||||
int part = (ebx&0x03ff)+1;
|
||||
ebx >>=10;
|
||||
int assoc = (ebx&0x03ff)+1;
|
||||
LD1.size = (assoc*part*lines*sets)/1024;
|
||||
LD1.associative = assoc;
|
||||
LD1.linesize= lines;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
switch ((eax >>5) &0x07)
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
// fprintf(stderr,"L1 instruction cache...\n");
|
||||
int sets = ecx+1;
|
||||
int lines = (ebx & 0x0fff) +1;
|
||||
ebx>>=12;
|
||||
int part = (ebx&0x03ff)+1;
|
||||
ebx >>=10;
|
||||
int assoc = (ebx&0x03ff)+1;
|
||||
LC1.size = (assoc*part*lines*sets)/1024;
|
||||
LC1.associative = assoc;
|
||||
LC1.linesize= lines;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
|
||||
if (cpuid_level >= 0x80000006) {
|
||||
if(L2.size<=0){
|
||||
|
||||
@@ -778,11 +778,11 @@ static int initialized = 0;
|
||||
void gotoblas_affinity_init(void) {
|
||||
|
||||
int cpu, num_avail;
|
||||
#ifndef USE_OPENMP
|
||||
#ifndef USE_OPENMP
|
||||
cpu_set_t cpu_mask;
|
||||
#endif
|
||||
int i;
|
||||
|
||||
|
||||
if (initialized) return;
|
||||
|
||||
initialized = 1;
|
||||
@@ -826,54 +826,15 @@ void gotoblas_affinity_init(void) {
|
||||
common -> shmid = pshmid;
|
||||
|
||||
if (common -> magic != SH_MAGIC) {
|
||||
cpu_set_t *cpusetp;
|
||||
int nums;
|
||||
int ret;
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "Shared Memory Initialization.\n");
|
||||
#endif
|
||||
|
||||
//returns the number of processors which are currently online
|
||||
|
||||
nums = sysconf(_SC_NPROCESSORS_CONF);
|
||||
|
||||
#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 3)
|
||||
common->num_procs = nums;
|
||||
#elif __GLIBC_PREREQ(2, 7)
|
||||
cpusetp = CPU_ALLOC(nums);
|
||||
if (cpusetp == NULL) {
|
||||
common->num_procs = nums;
|
||||
} else {
|
||||
size_t size;
|
||||
size = CPU_ALLOC_SIZE(nums);
|
||||
ret = sched_getaffinity(0,size,cpusetp);
|
||||
if (ret!=0)
|
||||
common->num_procs = nums;
|
||||
else
|
||||
common->num_procs = CPU_COUNT_S(size,cpusetp);
|
||||
}
|
||||
CPU_FREE(cpusetp);
|
||||
#else
|
||||
ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp);
|
||||
if (ret!=0) {
|
||||
common->num_procs = nums;
|
||||
} else {
|
||||
#if !__GLIBC_PREREQ(2, 6)
|
||||
int i;
|
||||
int n = 0;
|
||||
for (i=0;i<nums;i++)
|
||||
if (CPU_ISSET(i,cpusetp)) n++;
|
||||
common->num_procs = n;
|
||||
}
|
||||
#else
|
||||
common->num_procs = CPU_COUNT(sizeof(cpu_set_t),cpusetp);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
common -> num_procs = sysconf(_SC_NPROCESSORS_CONF);;
|
||||
|
||||
if(common -> num_procs > MAX_CPUS) {
|
||||
fprintf(stderr, "\nOpenBLAS Warning : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS);
|
||||
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@@ -886,7 +847,7 @@ void gotoblas_affinity_init(void) {
|
||||
if (common -> num_nodes > 1) numa_mapping();
|
||||
|
||||
common -> final_num_procs = 0;
|
||||
for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number.
|
||||
for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number.
|
||||
|
||||
for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] = 0;
|
||||
|
||||
|
||||
@@ -175,44 +175,7 @@ int get_num_procs(void);
|
||||
#else
|
||||
int get_num_procs(void) {
|
||||
static int nums = 0;
|
||||
cpu_set_t *cpusetp;
|
||||
size_t size;
|
||||
int ret;
|
||||
int i,n;
|
||||
|
||||
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
|
||||
#if !defined(OS_LINUX)
|
||||
return nums;
|
||||
#endif
|
||||
|
||||
#if !defined(__GLIBC_PREREQ)
|
||||
return nums;
|
||||
#endif
|
||||
#if !__GLIBC_PREREQ(2, 3)
|
||||
return nums;
|
||||
#endif
|
||||
|
||||
#if !__GLIBC_PREREQ(2, 7)
|
||||
ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp);
|
||||
if (ret!=0) return nums;
|
||||
n=0;
|
||||
#if !__GLIBC_PREREQ(2, 6)
|
||||
for (i=0;i<nums;i++)
|
||||
if (CPU_ISSET(i,cpusetp)) n++;
|
||||
nums=n;
|
||||
#else
|
||||
nums = CPU_COUNT(sizeof(cpu_set_t),cpusetp);
|
||||
#endif
|
||||
return nums;
|
||||
#endif
|
||||
|
||||
cpusetp = CPU_ALLOC(nums);
|
||||
if (cpusetp == NULL) return nums;
|
||||
size = CPU_ALLOC_SIZE(nums);
|
||||
ret = sched_getaffinity(0,size,cpusetp);
|
||||
if (ret!=0) return nums;
|
||||
nums = CPU_COUNT_S(size,cpusetp);
|
||||
CPU_FREE(cpusetp);
|
||||
return nums;
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user