Merge branch 'develop'
This commit is contained in:
		
						commit
						551f478477
					
				|  | @ -1,4 +1,12 @@ | |||
| OpenBLAS ChangeLog | ||||
| ==================================================================== | ||||
| Version 0.2.1 | ||||
| 30-Jun-2012 | ||||
| common: | ||||
| x86/x86-64: | ||||
| 	* Fixed the SEGFAULT bug about hyper-theading | ||||
| 	* Support AMD Bulldozer by using GotoBLAS2 AMD Barcelona codes | ||||
| 
 | ||||
| ==================================================================== | ||||
| Version 0.2.0 | ||||
| 26-Jun-2012 | ||||
|  |  | |||
|  | @ -3,7 +3,7 @@ | |||
| #
 | ||||
| 
 | ||||
| # This library's version
 | ||||
| VERSION = 0.2.0 | ||||
| VERSION = 0.2.1 | ||||
| 
 | ||||
| # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
 | ||||
| # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library 
 | ||||
|  |  | |||
|  | @ -44,6 +44,7 @@ Please read GotoBLAS_01Readme.txt | |||
| - **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes. | ||||
| - **Intel Sandy Bridge**: Optimized Level-3 BLAS with AVX on x86-64. | ||||
| - **AMD Bobcat**: Used GotoBLAS2 Barcelona codes. | ||||
| - **AMD Bulldozer**: Used GotoBLAS2 Barcelona codes. | ||||
| 
 | ||||
| #### MIPS64: | ||||
| - **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2. | ||||
|  |  | |||
							
								
								
									
										2
									
								
								cpuid.h
								
								
								
								
							
							
						
						
									
										2
									
								
								cpuid.h
								
								
								
								
							|  | @ -105,6 +105,7 @@ | |||
| #define CORE_NANO	19 | ||||
| #define CORE_SANDYBRIDGE 20 | ||||
| #define CORE_BOBCAT     21 | ||||
| #define CORE_BULLDOZER  22 | ||||
| 
 | ||||
| #define HAVE_SSE      (1 <<  0) | ||||
| #define HAVE_SSE2     (1 <<  1) | ||||
|  | @ -193,4 +194,5 @@ typedef struct { | |||
| #define CPUTYPE_NANO			43 | ||||
| #define CPUTYPE_SANDYBRIDGE             44 | ||||
| #define CPUTYPE_BOBCAT                  45 | ||||
| #define CPUTYPE_BULLDOZER               46 | ||||
| #endif | ||||
|  |  | |||
|  | @ -1027,6 +1027,7 @@ int get_cpuname(void){ | |||
| 	return CPUTYPE_OPTERON; | ||||
|       case  1: | ||||
|       case 10: | ||||
|       case  6:   //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
 | ||||
| 	return CPUTYPE_BARCELONA; | ||||
|       case  5: | ||||
| 	return CPUTYPE_BOBCAT; | ||||
|  | @ -1151,6 +1152,7 @@ static char *cpuname[] = { | |||
|   "NANO", | ||||
|   "SANDYBRIDGE", | ||||
|   "BOBCAT", | ||||
|   "BULLDOZER", | ||||
| }; | ||||
| 
 | ||||
| static char *lowercpuname[] = { | ||||
|  | @ -1199,6 +1201,7 @@ static char *lowercpuname[] = { | |||
|   "nano", | ||||
|   "sandybridge", | ||||
|   "bobcat", | ||||
|   "bulldozer", | ||||
| }; | ||||
| 
 | ||||
| static char *corename[] = { | ||||
|  | @ -1224,6 +1227,7 @@ static char *corename[] = { | |||
|   "NANO", | ||||
|   "SANDYBRIDGE", | ||||
|   "BOBCAT", | ||||
|   "BULLDOZER", | ||||
| }; | ||||
| 
 | ||||
| static char *corename_lower[] = { | ||||
|  | @ -1249,6 +1253,7 @@ static char *corename_lower[] = { | |||
|   "nano", | ||||
|   "sandybridge", | ||||
|   "bobcat", | ||||
|   "bulldozer", | ||||
| }; | ||||
| 
 | ||||
| 
 | ||||
|  | @ -1359,6 +1364,7 @@ int get_coretype(void){ | |||
|     if (family == 0xf){ | ||||
|       if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON;  | ||||
|       else if (exfamily == 5) return CORE_BOBCAT;  | ||||
|       else if (exfamily == 6) return CORE_BARCELONA;  //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
 | ||||
|       else return CORE_BARCELONA; | ||||
|     } | ||||
|   } | ||||
|  |  | |||
|  | @ -447,6 +447,9 @@ static void disable_hyperthread(void) { | |||
| 
 | ||||
|     //When the shared cpu are in different element of share & avail array, this may be a bug.
 | ||||
|     for (i = 0; i < count ; i++){ | ||||
| 
 | ||||
|       share[i] &= common->avail[i]; | ||||
| 
 | ||||
|       if (popcount(share[i]) > 1) { | ||||
|        | ||||
| #ifdef DEBUG | ||||
|  |  | |||
|  | @ -102,6 +102,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| /* #define FORCE_BARCELONA	*/ | ||||
| /* #define FORCE_SHANGHAI	*/ | ||||
| /* #define FORCE_ISTANBUL	*/ | ||||
| /* #define FORCE_BULLDOZER	*/ | ||||
| /* #define FORCE_BOBCAT	*/ | ||||
| /* #define FORCE_SSE_GENERIC	*/ | ||||
| /* #define FORCE_VIAC3		*/ | ||||
|  | @ -349,7 +350,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CORENAME  "OPTERON" | ||||
| #endif | ||||
| 
 | ||||
| #if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) | ||||
| #if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_BULLDOZER) | ||||
| #define FORCE | ||||
| #define FORCE_INTEL | ||||
| #define ARCHITECTURE    "X86" | ||||
|  | @ -357,8 +358,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define ARCHCONFIG   "-DBARCELONA " \ | ||||
| 		     "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ | ||||
| 		     "-DL2_SIZE=524288 -DL2_LINESIZE=64  -DL3_SIZE=2097152 " \ | ||||
| 		     "-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ | ||||
| 		     "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \ | ||||
| 		     "-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 " \ | ||||
| 		     "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \ | ||||
| 		     "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU" | ||||
| #define LIBNAME   "barcelona" | ||||
| #define CORENAME  "BARCELONA" | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue