Merge branch 'develop'
This commit is contained in:
commit
551f478477
|
@ -1,4 +1,12 @@
|
|||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.2.1
|
||||
30-Jun-2012
|
||||
common:
|
||||
x86/x86-64:
|
||||
* Fixed the SEGFAULT bug about hyper-theading
|
||||
* Support AMD Bulldozer by using GotoBLAS2 AMD Barcelona codes
|
||||
|
||||
====================================================================
|
||||
Version 0.2.0
|
||||
26-Jun-2012
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.2.0
|
||||
VERSION = 0.2.1
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
|
|
|
@ -44,6 +44,7 @@ Please read GotoBLAS_01Readme.txt
|
|||
- **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes.
|
||||
- **Intel Sandy Bridge**: Optimized Level-3 BLAS with AVX on x86-64.
|
||||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
|
||||
- **AMD Bulldozer**: Used GotoBLAS2 Barcelona codes.
|
||||
|
||||
#### MIPS64:
|
||||
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
|
||||
|
|
2
cpuid.h
2
cpuid.h
|
@ -105,6 +105,7 @@
|
|||
#define CORE_NANO 19
|
||||
#define CORE_SANDYBRIDGE 20
|
||||
#define CORE_BOBCAT 21
|
||||
#define CORE_BULLDOZER 22
|
||||
|
||||
#define HAVE_SSE (1 << 0)
|
||||
#define HAVE_SSE2 (1 << 1)
|
||||
|
@ -193,4 +194,5 @@ typedef struct {
|
|||
#define CPUTYPE_NANO 43
|
||||
#define CPUTYPE_SANDYBRIDGE 44
|
||||
#define CPUTYPE_BOBCAT 45
|
||||
#define CPUTYPE_BULLDOZER 46
|
||||
#endif
|
||||
|
|
|
@ -1027,6 +1027,7 @@ int get_cpuname(void){
|
|||
return CPUTYPE_OPTERON;
|
||||
case 1:
|
||||
case 10:
|
||||
case 6: //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
|
||||
return CPUTYPE_BARCELONA;
|
||||
case 5:
|
||||
return CPUTYPE_BOBCAT;
|
||||
|
@ -1151,6 +1152,7 @@ static char *cpuname[] = {
|
|||
"NANO",
|
||||
"SANDYBRIDGE",
|
||||
"BOBCAT",
|
||||
"BULLDOZER",
|
||||
};
|
||||
|
||||
static char *lowercpuname[] = {
|
||||
|
@ -1199,6 +1201,7 @@ static char *lowercpuname[] = {
|
|||
"nano",
|
||||
"sandybridge",
|
||||
"bobcat",
|
||||
"bulldozer",
|
||||
};
|
||||
|
||||
static char *corename[] = {
|
||||
|
@ -1224,6 +1227,7 @@ static char *corename[] = {
|
|||
"NANO",
|
||||
"SANDYBRIDGE",
|
||||
"BOBCAT",
|
||||
"BULLDOZER",
|
||||
};
|
||||
|
||||
static char *corename_lower[] = {
|
||||
|
@ -1249,6 +1253,7 @@ static char *corename_lower[] = {
|
|||
"nano",
|
||||
"sandybridge",
|
||||
"bobcat",
|
||||
"bulldozer",
|
||||
};
|
||||
|
||||
|
||||
|
@ -1359,6 +1364,7 @@ int get_coretype(void){
|
|||
if (family == 0xf){
|
||||
if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON;
|
||||
else if (exfamily == 5) return CORE_BOBCAT;
|
||||
else if (exfamily == 6) return CORE_BARCELONA; //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
|
||||
else return CORE_BARCELONA;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -447,6 +447,9 @@ static void disable_hyperthread(void) {
|
|||
|
||||
//When the shared cpu are in different element of share & avail array, this may be a bug.
|
||||
for (i = 0; i < count ; i++){
|
||||
|
||||
share[i] &= common->avail[i];
|
||||
|
||||
if (popcount(share[i]) > 1) {
|
||||
|
||||
#ifdef DEBUG
|
||||
|
|
|
@ -102,6 +102,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
/* #define FORCE_BARCELONA */
|
||||
/* #define FORCE_SHANGHAI */
|
||||
/* #define FORCE_ISTANBUL */
|
||||
/* #define FORCE_BULLDOZER */
|
||||
/* #define FORCE_BOBCAT */
|
||||
/* #define FORCE_SSE_GENERIC */
|
||||
/* #define FORCE_VIAC3 */
|
||||
|
@ -349,7 +350,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define CORENAME "OPTERON"
|
||||
#endif
|
||||
|
||||
#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL)
|
||||
#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_BULLDOZER)
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
|
@ -357,8 +358,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ARCHCONFIG "-DBARCELONA " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL3_SIZE=2097152 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
|
||||
"-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
|
||||
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU"
|
||||
#define LIBNAME "barcelona"
|
||||
#define CORENAME "BARCELONA"
|
||||
|
|
Loading…
Reference in New Issue