Merge branch 'develop' into osx-lapacktest

This commit is contained in:
Martin Kroeker 2021-07-15 09:49:10 +02:00 committed by GitHub
commit 3fe04845e9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 50 additions and 25 deletions

View File

@ -54,6 +54,7 @@
#define VENDOR_TRANSMETA 9 #define VENDOR_TRANSMETA 9
#define VENDOR_NSC 10 #define VENDOR_NSC 10
#define VENDOR_HYGON 11 #define VENDOR_HYGON 11
#define VENDOR_ZHAOXIN 12
#define VENDOR_UNKNOWN 99 #define VENDOR_UNKNOWN 99
#define BITMASK(a, b, c) ((((a) >> (b)) & (c))) #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))

View File

@ -283,7 +283,7 @@ int get_vendor(void){
if (!strcmp(vendor, "CyrixInstead")) return VENDOR_CYRIX; if (!strcmp(vendor, "CyrixInstead")) return VENDOR_CYRIX;
if (!strcmp(vendor, "NexGenDriven")) return VENDOR_NEXGEN; if (!strcmp(vendor, "NexGenDriven")) return VENDOR_NEXGEN;
if (!strcmp(vendor, "CentaurHauls")) return VENDOR_CENTAUR; if (!strcmp(vendor, "CentaurHauls")) return VENDOR_CENTAUR;
if (!strcmp(vendor, " Shanghai ")) return VENDOR_CENTAUR; if (!strcmp(vendor, " Shanghai ")) return VENDOR_ZHAOXIN;
if (!strcmp(vendor, "RiseRiseRise")) return VENDOR_RISE; if (!strcmp(vendor, "RiseRiseRise")) return VENDOR_RISE;
if (!strcmp(vendor, " SiS SiS SiS")) return VENDOR_SIS; if (!strcmp(vendor, " SiS SiS SiS")) return VENDOR_SIS;
if (!strcmp(vendor, "GenuineTMx86")) return VENDOR_TRANSMETA; if (!strcmp(vendor, "GenuineTMx86")) return VENDOR_TRANSMETA;
@ -1067,7 +1067,8 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
if ((get_vendor() == VENDOR_AMD) || if ((get_vendor() == VENDOR_AMD) ||
(get_vendor() == VENDOR_HYGON) || (get_vendor() == VENDOR_HYGON) ||
(get_vendor() == VENDOR_CENTAUR)) { (get_vendor() == VENDOR_CENTAUR) ||
(get_vendor() == VENDOR_ZHAOXIN)) {
cpuid(0x80000005, &eax, &ebx, &ecx, &edx); cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
LDTB.size = 4096; LDTB.size = 4096;
@ -1190,7 +1191,7 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
int get_cpuname(void){ int get_cpuname(void){
int family, exfamily, model, vendor, exmodel; int family, exfamily, model, vendor, exmodel, stepping;
if (!have_cpuid()) return CPUTYPE_80386; if (!have_cpuid()) return CPUTYPE_80386;
@ -1198,6 +1199,7 @@ int get_cpuname(void){
exfamily = get_cputype(GET_EXFAMILY); exfamily = get_cputype(GET_EXFAMILY);
model = get_cputype(GET_MODEL); model = get_cputype(GET_MODEL);
exmodel = get_cputype(GET_EXMODEL); exmodel = get_cputype(GET_EXMODEL);
stepping = get_cputype(GET_STEPPING);
vendor = get_vendor(); vendor = get_vendor();
@ -1628,16 +1630,21 @@ int get_cpuname(void){
switch (family) { switch (family) {
case 0x5: case 0x5:
return CPUTYPE_CENTAURC6; return CPUTYPE_CENTAURC6;
break;
case 0x6: case 0x6:
if (model == 0xf && stepping < 0xe)
return CPUTYPE_NANO; return CPUTYPE_NANO;
break;
case 0x7:
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
break; default:
} if (family >= 0x7)
return CPUTYPE_NEHALEM;
else
return CPUTYPE_VIAC3; return CPUTYPE_VIAC3;
} }
}
if (vendor == VENDOR_ZHAOXIN){
return CPUTYPE_NEHALEM;
}
if (vendor == VENDOR_RISE){ if (vendor == VENDOR_RISE){
switch (family) { switch (family) {
@ -1869,7 +1876,7 @@ char *get_lower_cpunamechar(void){
int get_coretype(void){ int get_coretype(void){
int family, exfamily, model, exmodel, vendor; int family, exfamily, model, exmodel, vendor, stepping;
if (!have_cpuid()) return CORE_80486; if (!have_cpuid()) return CORE_80486;
@ -1877,6 +1884,7 @@ int get_coretype(void){
exfamily = get_cputype(GET_EXFAMILY); exfamily = get_cputype(GET_EXFAMILY);
model = get_cputype(GET_MODEL); model = get_cputype(GET_MODEL);
exmodel = get_cputype(GET_EXMODEL); exmodel = get_cputype(GET_EXMODEL);
stepping = get_cputype(GET_STEPPING);
vendor = get_vendor(); vendor = get_vendor();
@ -2286,14 +2294,20 @@ int get_coretype(void){
if (vendor == VENDOR_CENTAUR) { if (vendor == VENDOR_CENTAUR) {
switch (family) { switch (family) {
case 0x6: case 0x6:
if (model == 0xf && stepping < 0xe)
return CORE_NANO; return CORE_NANO;
break;
case 0x7:
return CORE_NEHALEM; return CORE_NEHALEM;
break; default:
} if (family >= 0x7)
return CORE_NEHALEM;
else
return CORE_VIAC3; return CORE_VIAC3;
} }
}
if (vendor == VENDOR_ZHAOXIN) {
return CORE_NEHALEM;
}
return CORE_UNKNOWN; return CORE_UNKNOWN;
} }

View File

@ -292,6 +292,7 @@ extern gotoblas_t gotoblas_COOPERLAKE;
#define VENDOR_AMD 2 #define VENDOR_AMD 2
#define VENDOR_CENTAUR 3 #define VENDOR_CENTAUR 3
#define VENDOR_HYGON 4 #define VENDOR_HYGON 4
#define VENDOR_ZHAOXIN 5
#define VENDOR_UNKNOWN 99 #define VENDOR_UNKNOWN 99
#define BITMASK(a, b, c) ((((a) >> (b)) & (c))) #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
@ -404,7 +405,7 @@ static int get_vendor(void){
if (!strcmp(vendor.vchar, "GenuineIntel")) return VENDOR_INTEL; if (!strcmp(vendor.vchar, "GenuineIntel")) return VENDOR_INTEL;
if (!strcmp(vendor.vchar, "AuthenticAMD")) return VENDOR_AMD; if (!strcmp(vendor.vchar, "AuthenticAMD")) return VENDOR_AMD;
if (!strcmp(vendor.vchar, "CentaurHauls")) return VENDOR_CENTAUR; if (!strcmp(vendor.vchar, "CentaurHauls")) return VENDOR_CENTAUR;
if (!strcmp(vendor.vchar, " Shanghai ")) return VENDOR_CENTAUR; if (!strcmp(vendor.vchar, " Shanghai ")) return VENDOR_ZHAOXIN;
if (!strcmp(vendor.vchar, "HygonGenuine")) return VENDOR_HYGON; if (!strcmp(vendor.vchar, "HygonGenuine")) return VENDOR_HYGON;
if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL; if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL;
@ -415,7 +416,7 @@ static int get_vendor(void){
static gotoblas_t *get_coretype(void){ static gotoblas_t *get_coretype(void){
int eax, ebx, ecx, edx; int eax, ebx, ecx, edx;
int family, exfamily, model, vendor, exmodel; int family, exfamily, model, vendor, exmodel, stepping;
cpuid(1, &eax, &ebx, &ecx, &edx); cpuid(1, &eax, &ebx, &ecx, &edx);
@ -423,6 +424,7 @@ static gotoblas_t *get_coretype(void){
exfamily = BITMASK(eax, 20, 0xff); exfamily = BITMASK(eax, 20, 0xff);
model = BITMASK(eax, 4, 0x0f); model = BITMASK(eax, 4, 0x0f);
exmodel = BITMASK(eax, 16, 0x0f); exmodel = BITMASK(eax, 16, 0x0f);
stepping = BITMASK(eax, 0, 0x0f);
vendor = get_vendor(); vendor = get_vendor();
@ -824,13 +826,19 @@ static gotoblas_t *get_coretype(void){
if (vendor == VENDOR_CENTAUR) { if (vendor == VENDOR_CENTAUR) {
switch (family) { switch (family) {
case 0x6: case 0x6:
if (model == 0xf && stepping < 0xe)
return &gotoblas_NANO; return &gotoblas_NANO;
break; return &gotoblas_NEHALEM;
case 0x7: default:
if (family >= 0x7)
return &gotoblas_NEHALEM; return &gotoblas_NEHALEM;
} }
} }
if (vendor == VENDOR_ZHAOXIN) {
return &gotoblas_NEHALEM;
}
return NULL; return NULL;
} }

View File

@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sgemv_t_microk_haswell-4.c" #include "sgemv_t_microk_haswell-4.c"
#elif defined (SKYLAKEX) || defined (COOPERLAKE) #elif defined (SKYLAKEX) || defined (COOPERLAKE)
#include "sgemv_t_microk_haswell-4.c" #include "sgemv_t_microk_haswell-4.c"
#include "sgemv_t_microk_skylakex.c" /*#include "sgemv_t_microk_skylakex.c"*/
#endif #endif
#if defined(STEAMROLLER) || defined(EXCAVATOR) #if defined(STEAMROLLER) || defined(EXCAVATOR)

View File

@ -99,6 +99,8 @@ typedef int blasint;
/* Inclusion of Linux-specific header is needed for definition of cpu_set_t. */ /* Inclusion of Linux-specific header is needed for definition of cpu_set_t. */
#ifdef OPENBLAS_OS_LINUX #ifdef OPENBLAS_OS_LINUX
#define _GNU_SOURCE #ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <sched.h> #include <sched.h>
#endif #endif

View File

@ -2502,7 +2502,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 2048 #define GEMM_DEFAULT_OFFSET_B 2048
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 2 #define SGEMM_DEFAULT_UNROLL_M 2
#define SGEMM_DEFAULT_UNROLL_N 8 #define SGEMM_DEFAULT_UNROLL_N 8
@ -2534,7 +2534,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 2048 #define GEMM_DEFAULT_OFFSET_B 2048
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 4 #define SGEMM_DEFAULT_UNROLL_N 4