diff --git a/Changelog.txt b/Changelog.txt index c222c7eee..019870d8c 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,12 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.2.1 +30-Jun-2012 +common: +x86/x86-64: + * Fixed the SEGFAULT bug about hyper-theading + * Support AMD Bulldozer by using GotoBLAS2 AMD Barcelona codes + ==================================================================== Version 0.2.0 26-Jun-2012 diff --git a/Makefile.rule b/Makefile.rule index 299273773..082487835 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.2.0 +VERSION = 0.2.1 # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library diff --git a/README.md b/README.md index 82e9f528c..52d098366 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ Please read GotoBLAS_01Readme.txt - **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes. - **Intel Sandy Bridge**: Optimized Level-3 BLAS with AVX on x86-64. - **AMD Bobcat**: Used GotoBLAS2 Barcelona codes. +- **AMD Bulldozer**: Used GotoBLAS2 Barcelona codes. #### MIPS64: - **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2. diff --git a/cpuid.h b/cpuid.h index fdcfcea00..bb57ad92d 100644 --- a/cpuid.h +++ b/cpuid.h @@ -105,6 +105,7 @@ #define CORE_NANO 19 #define CORE_SANDYBRIDGE 20 #define CORE_BOBCAT 21 +#define CORE_BULLDOZER 22 #define HAVE_SSE (1 << 0) #define HAVE_SSE2 (1 << 1) @@ -193,4 +194,5 @@ typedef struct { #define CPUTYPE_NANO 43 #define CPUTYPE_SANDYBRIDGE 44 #define CPUTYPE_BOBCAT 45 +#define CPUTYPE_BULLDOZER 46 #endif diff --git a/cpuid_x86.c b/cpuid_x86.c index 204f41d51..ea1162e8f 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1027,6 +1027,7 @@ int get_cpuname(void){ return CPUTYPE_OPTERON; case 1: case 10: + case 6: //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series return CPUTYPE_BARCELONA; case 5: return CPUTYPE_BOBCAT; @@ -1151,6 +1152,7 @@ static char *cpuname[] = { "NANO", "SANDYBRIDGE", "BOBCAT", + "BULLDOZER", }; static char *lowercpuname[] = { @@ -1199,6 +1201,7 @@ static char *lowercpuname[] = { "nano", "sandybridge", "bobcat", + "bulldozer", }; static char *corename[] = { @@ -1224,6 +1227,7 @@ static char *corename[] = { "NANO", "SANDYBRIDGE", "BOBCAT", + "BULLDOZER", }; static char *corename_lower[] = { @@ -1249,6 +1253,7 @@ static char *corename_lower[] = { "nano", "sandybridge", "bobcat", + "bulldozer", }; @@ -1359,6 +1364,7 @@ int get_coretype(void){ if (family == 0xf){ if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON; else if (exfamily == 5) return CORE_BOBCAT; + else if (exfamily == 6) return CORE_BARCELONA; //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series else return CORE_BARCELONA; } } diff --git a/driver/others/init.c b/driver/others/init.c index 4a6f0aae8..f6924d5f4 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -447,6 +447,9 @@ static void disable_hyperthread(void) { //When the shared cpu are in different element of share & avail array, this may be a bug. for (i = 0; i < count ; i++){ + + share[i] &= common->avail[i]; + if (popcount(share[i]) > 1) { #ifdef DEBUG diff --git a/getarch.c b/getarch.c index 7e08e774e..5916a9a04 100644 --- a/getarch.c +++ b/getarch.c @@ -102,6 +102,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* #define FORCE_BARCELONA */ /* #define FORCE_SHANGHAI */ /* #define FORCE_ISTANBUL */ +/* #define FORCE_BULLDOZER */ /* #define FORCE_BOBCAT */ /* #define FORCE_SSE_GENERIC */ /* #define FORCE_VIAC3 */ @@ -349,7 +350,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "OPTERON" #endif -#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) +#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_BULLDOZER) #define FORCE #define FORCE_INTEL #define ARCHITECTURE "X86" @@ -357,8 +358,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DBARCELONA " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL3_SIZE=2097152 " \ - "-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ - "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \ + "-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 " \ + "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \ "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU" #define LIBNAME "barcelona" #define CORENAME "BARCELONA"