From 5e7f29b19e06323f7a0fc4b8a0ac98c60055b4f2 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Fri, 25 Mar 2011 01:16:12 +0800 Subject: [PATCH 01/41] Fixed #13. Fixed blasint undefined bug in file. --- Changelog.txt | 3 ++- Makefile.system | 7 ++++++- cblas.h | 3 +++ getarch_2nd.c | 4 ++++ 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Changelog.txt b/Changelog.txt index ab8fb19c2..f4f9c6e35 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -3,7 +3,8 @@ OpenBLAS ChangeLog Version 0.1 alpha2(in development) common: - * + * Fixed blasint undefined bug in file. Other software + could include this header successfully(Refs issue #13 on github) x86/x86_64: * MIPS64: diff --git a/Makefile.system b/Makefile.system index 5a129732f..6fb0ec86f 100644 --- a/Makefile.system +++ b/Makefile.system @@ -30,6 +30,10 @@ ifdef TARGET GETARCH_FLAGS += -DFORCE_$(TARGET) endif +ifdef INTERFACE64 +GETARCH_FLAGS += -DUSE64BITINT +endif + # This operation is expensive, so execution should be once. ifndef GOTOBLAS_MAKEFILE export GOTOBLAS_MAKEFILE = 1 @@ -489,7 +493,8 @@ endif ifdef BINARY64 ifdef INTERFACE64 -CCOMMON_OPT += -DUSE64BITINT +CCOMMON_OPT += +#-DUSE64BITINT endif endif diff --git a/cblas.h b/cblas.h index ea0fbb629..4beae7ad5 100644 --- a/cblas.h +++ b/cblas.h @@ -1,6 +1,9 @@ #ifndef CBLAS_H #define CBLAS_H +#include +#include "common.h" + #define CBLAS_INDEX size_t enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; diff --git a/getarch_2nd.c b/getarch_2nd.c index 31babd28a..018f08d31 100644 --- a/getarch_2nd.c +++ b/getarch_2nd.c @@ -30,6 +30,10 @@ int main(int argc, char **argv) { printf("#define DLOCAL_BUFFER_SIZE\t%ld\n", (DGEMM_DEFAULT_Q * DGEMM_DEFAULT_UNROLL_N * 2 * 1 * sizeof(double))); printf("#define CLOCAL_BUFFER_SIZE\t%ld\n", (CGEMM_DEFAULT_Q * CGEMM_DEFAULT_UNROLL_N * 4 * 2 * sizeof(float))); printf("#define ZLOCAL_BUFFER_SIZE\t%ld\n", (ZGEMM_DEFAULT_Q * ZGEMM_DEFAULT_UNROLL_N * 2 * 2 * sizeof(double))); + +#ifdef USE64BITINT + printf("#define USE64BITINT\n"); +#endif } return 0; From 85f99d476942847b9414c72e69d14c6d97c4d2d3 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Mon, 28 Mar 2011 10:58:39 +0800 Subject: [PATCH 02/41] Fixed #14 the SEGFAULT bug on 64 cores. On SMP server, the number of CPUs or cores should be less than or equal to 64. --- Changelog.txt | 3 +++ README | 6 +++++- driver/others/init.c | 39 ++++++++++++++++++++++++++++++++------- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/Changelog.txt b/Changelog.txt index f4f9c6e35..2bffec93c 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -5,6 +5,9 @@ Version 0.1 alpha2(in development) common: * Fixed blasint undefined bug in file. Other software could include this header successfully(Refs issue #13 on github) + * Fixed the SEGFAULT bug on 64 cores. On SMP server, the number + of CPUs or cores should be less than or equal to 64.(Refs issue #14 + on github) x86/x86_64: * MIPS64: diff --git a/README b/README index 9b04f6f99..4697ad44f 100644 --- a/README +++ b/README @@ -56,4 +56,8 @@ Optimization on ICT Loongson 3A CPU OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas 8.ChangeLog -Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. \ No newline at end of file +Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. + +9.Known Issues +* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit + is 64. On 32 bits, it is 32. diff --git a/driver/others/init.c b/driver/others/init.c index 7ee7dc45d..4adba661f 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -172,13 +172,20 @@ static inline int rcount(unsigned long number) { return count; } +/*** + Known issue: The number of CPUs/cores should less + than sizeof(unsigned long). On 64 bits, the limit + is 64. On 32 bits, it is 32. +***/ static inline unsigned long get_cpumap(int node) { int infile; unsigned long affinity; char name[160]; + char cpumap[160]; char *p, *dummy; - + int i=0; + sprintf(name, CPUMAP_NAME, node); infile = open(name, O_RDONLY); @@ -187,13 +194,19 @@ static inline unsigned long get_cpumap(int node) { if (infile != -1) { - read(infile, name, sizeof(name)); - + read(infile, cpumap, sizeof(cpumap)); + p = cpumap; + while (*p != '\n' && i<160){ + if(*p != ',') { + name[i++]=*p; + } + p++; + } p = name; - while ((*p == '0') || (*p == ',')) p++; + // while ((*p == '0') || (*p == ',')) p++; - affinity = strtol(p, &dummy, 16); + affinity = strtoul(p, &dummy, 16); close(infile); } @@ -347,7 +360,13 @@ static void disable_hyperthread(void) { unsigned long share; int cpu; - common -> avail = (1UL << common -> num_procs) - 1; + if(common->num_procs > 64){ + fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs); + exit(1); + }else if(common->num_procs == 64){ + common -> avail = 0xFFFFFFFFFFFFFFFFUL; + }else + common -> avail = (1UL << common -> num_procs) - 1; #ifdef DEBUG fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail); @@ -376,7 +395,13 @@ static void disable_affinity(void) { fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]); #endif - lprocmask = (1UL << common -> final_num_procs) - 1; + if(common->final_num_procs > 64){ + fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs); + exit(1); + }else if(common->final_num_procs == 64){ + lprocmask = 0xFFFFFFFFFFFFFFFFUL; + }else + lprocmask = (1UL << common -> final_num_procs) - 1; #ifndef USE_OPENMP lprocmask &= *(unsigned long *)&cpu_orig_mask[0]; From 54745902b87f28f878ef58eec8e697e25b67c423 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Sun, 20 Mar 2011 23:30:09 +0800 Subject: [PATCH 03/41] Init Changelog file for next release version(v0.1alpha2). --- Changelog.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Changelog.txt b/Changelog.txt index 71677f0ee..ab8fb19c2 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,5 +1,14 @@ OpenBLAS ChangeLog ==================================================================== +Version 0.1 alpha2(in development) + +common: + * +x86/x86_64: + * +MIPS64: + * +==================================================================== Version 0.1 alpha1 20-Mar-2011 From 5452ba3850908c0c1c027e10fce540ff59ac52e2 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Sun, 20 Mar 2011 23:35:31 +0800 Subject: [PATCH 04/41] Updated the developing version to v0.1 alpha2. --- Makefile.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.rule b/Makefile.rule index d622390f4..61f9eb91d 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.1alpha1 +VERSION = 0.1alpha2 # You can specify the target architecture, otherwise it's # automatically detected. From 552f31dbbd01374f60c8fb8169eeb4b613ac3e32 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Fri, 25 Mar 2011 01:16:12 +0800 Subject: [PATCH 05/41] Fixed #13. Fixed blasint undefined bug in file. --- Changelog.txt | 3 ++- Makefile.system | 7 ++++++- cblas.h | 3 +++ getarch_2nd.c | 4 ++++ 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Changelog.txt b/Changelog.txt index ab8fb19c2..f4f9c6e35 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -3,7 +3,8 @@ OpenBLAS ChangeLog Version 0.1 alpha2(in development) common: - * + * Fixed blasint undefined bug in file. Other software + could include this header successfully(Refs issue #13 on github) x86/x86_64: * MIPS64: diff --git a/Makefile.system b/Makefile.system index 5a129732f..6fb0ec86f 100644 --- a/Makefile.system +++ b/Makefile.system @@ -30,6 +30,10 @@ ifdef TARGET GETARCH_FLAGS += -DFORCE_$(TARGET) endif +ifdef INTERFACE64 +GETARCH_FLAGS += -DUSE64BITINT +endif + # This operation is expensive, so execution should be once. ifndef GOTOBLAS_MAKEFILE export GOTOBLAS_MAKEFILE = 1 @@ -489,7 +493,8 @@ endif ifdef BINARY64 ifdef INTERFACE64 -CCOMMON_OPT += -DUSE64BITINT +CCOMMON_OPT += +#-DUSE64BITINT endif endif diff --git a/cblas.h b/cblas.h index ea0fbb629..4beae7ad5 100644 --- a/cblas.h +++ b/cblas.h @@ -1,6 +1,9 @@ #ifndef CBLAS_H #define CBLAS_H +#include +#include "common.h" + #define CBLAS_INDEX size_t enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; diff --git a/getarch_2nd.c b/getarch_2nd.c index 31babd28a..018f08d31 100644 --- a/getarch_2nd.c +++ b/getarch_2nd.c @@ -30,6 +30,10 @@ int main(int argc, char **argv) { printf("#define DLOCAL_BUFFER_SIZE\t%ld\n", (DGEMM_DEFAULT_Q * DGEMM_DEFAULT_UNROLL_N * 2 * 1 * sizeof(double))); printf("#define CLOCAL_BUFFER_SIZE\t%ld\n", (CGEMM_DEFAULT_Q * CGEMM_DEFAULT_UNROLL_N * 4 * 2 * sizeof(float))); printf("#define ZLOCAL_BUFFER_SIZE\t%ld\n", (ZGEMM_DEFAULT_Q * ZGEMM_DEFAULT_UNROLL_N * 2 * 2 * sizeof(double))); + +#ifdef USE64BITINT + printf("#define USE64BITINT\n"); +#endif } return 0; From 989c6f8b066ca163449288af8e96023be0a5ed44 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Mon, 28 Mar 2011 10:58:39 +0800 Subject: [PATCH 06/41] Fixed #14 the SEGFAULT bug on 64 cores. On SMP server, the number of CPUs or cores should be less than or equal to 64. --- Changelog.txt | 6 +++++ README | 12 ++++++++-- driver/others/blas_server_omp.c | 20 +++++++++++++++++ driver/others/init.c | 39 +++++++++++++++++++++++++++------ 4 files changed, 68 insertions(+), 9 deletions(-) diff --git a/Changelog.txt b/Changelog.txt index f4f9c6e35..7df8c0c19 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -5,6 +5,12 @@ Version 0.1 alpha2(in development) common: * Fixed blasint undefined bug in file. Other software could include this header successfully(Refs issue #13 on github) + * Fixed the SEGFAULT bug on 64 cores. On SMP server, the number + of CPUs or cores should be less than or equal to 64.(Refs issue #14 + on github) + * Support "void goto_set_num_threads(int num_threads)" and "void + openblas_set_num_threads(int num_threads)" when USE_OPENMP=1 + x86/x86_64: * MIPS64: diff --git a/README b/README index 9b04f6f99..46792c091 100644 --- a/README +++ b/README @@ -39,13 +39,17 @@ export GOTO_NUM_THREADS=4 or export OMP_NUM_THREADS=4 -The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS. +The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS. + +If you compile this lib with USE_OPENMP=1, you should only set OMP_NUM_THREADS environment variable. 4.2 Set the number of threads with calling functions. for example, void goto_set_num_threads(int num_threads); or void openblas_set_num_threads(int num_threads); +If you compile this lib with USE_OPENMP=1, you should use the above functions, too. + 5.Report Bugs Please add a issue in https://github.com/xianyi/OpenBLAS/issues @@ -56,4 +60,8 @@ Optimization on ICT Loongson 3A CPU OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas 8.ChangeLog -Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. \ No newline at end of file +Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. + +9.Known Issues +* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit + is 64. On 32 bits, it is 32. diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index 3e70d8549..17d886e52 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -49,6 +49,26 @@ int blas_server_avail = 0; +void goto_set_num_threads(int num_threads) { + + if (num_threads < 1) num_threads = blas_num_threads; + + if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER; + + if (num_threads > blas_num_threads) { + blas_num_threads = num_threads; + } + + blas_cpu_number = num_threads; + + omp_set_num_threads(blas_cpu_number); + +} +void openblas_set_num_threads(int num_threads) { + + goto_set_num_threads(num_threads); +} + int blas_thread_init(void){ blas_get_cpu_number(); diff --git a/driver/others/init.c b/driver/others/init.c index 7ee7dc45d..4adba661f 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -172,13 +172,20 @@ static inline int rcount(unsigned long number) { return count; } +/*** + Known issue: The number of CPUs/cores should less + than sizeof(unsigned long). On 64 bits, the limit + is 64. On 32 bits, it is 32. +***/ static inline unsigned long get_cpumap(int node) { int infile; unsigned long affinity; char name[160]; + char cpumap[160]; char *p, *dummy; - + int i=0; + sprintf(name, CPUMAP_NAME, node); infile = open(name, O_RDONLY); @@ -187,13 +194,19 @@ static inline unsigned long get_cpumap(int node) { if (infile != -1) { - read(infile, name, sizeof(name)); - + read(infile, cpumap, sizeof(cpumap)); + p = cpumap; + while (*p != '\n' && i<160){ + if(*p != ',') { + name[i++]=*p; + } + p++; + } p = name; - while ((*p == '0') || (*p == ',')) p++; + // while ((*p == '0') || (*p == ',')) p++; - affinity = strtol(p, &dummy, 16); + affinity = strtoul(p, &dummy, 16); close(infile); } @@ -347,7 +360,13 @@ static void disable_hyperthread(void) { unsigned long share; int cpu; - common -> avail = (1UL << common -> num_procs) - 1; + if(common->num_procs > 64){ + fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs); + exit(1); + }else if(common->num_procs == 64){ + common -> avail = 0xFFFFFFFFFFFFFFFFUL; + }else + common -> avail = (1UL << common -> num_procs) - 1; #ifdef DEBUG fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail); @@ -376,7 +395,13 @@ static void disable_affinity(void) { fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]); #endif - lprocmask = (1UL << common -> final_num_procs) - 1; + if(common->final_num_procs > 64){ + fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs); + exit(1); + }else if(common->final_num_procs == 64){ + lprocmask = 0xFFFFFFFFFFFFFFFFUL; + }else + lprocmask = (1UL << common -> final_num_procs) - 1; #ifndef USE_OPENMP lprocmask &= *(unsigned long *)&cpu_orig_mask[0]; From 0a45e5495ff3253dd326db9f44cd40e9449e8ef9 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Wed, 20 Apr 2011 13:41:38 +0800 Subject: [PATCH 07/41] Fixed #21. Added extern C to support C++. Thank Tasio for the patch. --- Changelog.txt | 2 ++ cblas.h | 11 +++++++++++ common.h | 10 ++++++++++ 3 files changed, 23 insertions(+) diff --git a/Changelog.txt b/Changelog.txt index 7df8c0c19..301929aef 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -10,6 +10,8 @@ common: on github) * Support "void goto_set_num_threads(int num_threads)" and "void openblas_set_num_threads(int num_threads)" when USE_OPENMP=1 + * Added extern "C" to support C++. Thank Tasio for the patch(Refs + issue #21 on github) x86/x86_64: * diff --git a/cblas.h b/cblas.h index 4beae7ad5..34adc5e99 100644 --- a/cblas.h +++ b/cblas.h @@ -1,6 +1,11 @@ #ifndef CBLAS_H #define CBLAS_H +#ifdef __cplusplus +extern "C" { + /* Assume C declarations for C++ */ +#endif /* __cplusplus */ + #include #include "common.h" @@ -273,4 +278,10 @@ void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANS double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc); void cblas_xerbla(blasint p, char *rout, char *form, ...); + +#ifdef __cplusplus +} + +#endif /* __cplusplus */ + #endif diff --git a/common.h b/common.h index a481b2acb..1a7dd434a 100644 --- a/common.h +++ b/common.h @@ -39,6 +39,11 @@ #ifndef COMMON_H #define COMMON_H +#ifdef __cplusplus +extern "C" { + /* Assume C declarations for C++ */ +#endif /* __cplusplus */ + #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif @@ -607,4 +612,9 @@ extern int gotoblas_profile; #define PRINT_DEBUG_NAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME) #endif +#ifdef __cplusplus +} + +#endif /* __cplusplus */ + #endif From ff6ae89d3e64b32639e9c384f54810a854115189 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Fri, 22 Apr 2011 20:21:42 +0800 Subject: [PATCH 08/41] Fixed #19. Provided an error msg when the arch is not supported. --- Changelog.txt | 2 ++ getarch.c | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/Changelog.txt b/Changelog.txt index 301929aef..49f059be3 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -12,6 +12,8 @@ common: openblas_set_num_threads(int num_threads)" when USE_OPENMP=1 * Added extern "C" to support C++. Thank Tasio for the patch(Refs issue #21 on github) + * Provided an error message when the arch is not supported.(Refs + issue #19 on github) x86/x86_64: * diff --git a/getarch.c b/getarch.c index 1d9bbc175..8864753b7 100644 --- a/getarch.c +++ b/getarch.c @@ -604,30 +604,41 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef POWER #define POWER #endif +#define OPENBLAS_SUPPORTED #endif #if defined(__i386__) || (__x86_64__) #include "cpuid_x86.c" +#define OPENBLAS_SUPPORTED #endif #ifdef __ia64__ #include "cpuid_ia64.c" +#define OPENBLAS_SUPPORTED #endif #ifdef __alpha #include "cpuid_alpha.c" +#define OPENBLAS_SUPPORTED #endif #ifdef POWER #include "cpuid_power.c" +#define OPENBLAS_SUPPORTED #endif #ifdef sparc #include "cpuid_sparc.c" +#define OPENBLAS_SUPPORTED #endif #ifdef __mips__ #include "cpuid_mips.c" +#define OPENBLAS_SUPPORTED +#endif + +#ifndef OPENBLAS_SUPPORTED +#error "This arch/CPU is not supported by OpenBLAS." #endif #else From b8d93812f06f216e20cc7bac4f15ad65bf5544a6 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Fri, 22 Apr 2011 22:07:46 +0800 Subject: [PATCH 09/41] Added docs for make TARGET=your_cpu_target. --- README | 4 +++- TargetList.txt | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 TargetList.txt diff --git a/README b/README index 46792c091..0f0aefa43 100644 --- a/README +++ b/README @@ -8,7 +8,9 @@ Download from project homepage. http://xianyi.github.com/OpenBLAS/ Or, check out codes from git://github.com/xianyi/OpenBLAS.git 1)Normal compile -Please read GotoBLAS_02QuickInstall.txt or type "make" + (a) type "make" to detect the CPU automatically. + or + (b) type "make TARGET=xxx" to set target CPU, e.g. "make TARGET=NEHALEM". The full target list is in file TargetList.txt. 2)Cross compile Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly. diff --git a/TargetList.txt b/TargetList.txt new file mode 100644 index 000000000..1c3d7c5b9 --- /dev/null +++ b/TargetList.txt @@ -0,0 +1,57 @@ +Force Target Examples: + +make TARGET=NEHALEM +make TARGET=LOONGSON3A BINARY=64 +make TARGET=ISTANBUL + +Supported List: +1.X86/X86_64 +a)Intel CPU: +P2 +COPPERMINE +KATMAI +NORTHWOOD +PRESCOTT +BANIAS +YONAH +CORE2 +PENRYN +DUNNINGTON +NEHALEM +ATOM + +b)AMD CPU: +ATHLON +OPTERON +OPTERON_SSE3 +BARCELONA +SHANGHAI +ISTANBUL + +c)VIA CPU: +SSE_GENERIC +VIAC3 +NANO + +2.Power CPU: +POWER4 +POWER5 +POWER6 +PPCG4 +PPC970 +PPC970MP +PPC440 +PPC440FP2 +CELL + +3.MIPS64 CPU: +SICORTEX +LOONGSON3A + +4.IA64 CPU: +ITANIUM2 + +5.SPARC CPU: +SPARC +SPARCV7 + From 2aab238c61044c01a0fbed438d1cf3cbb3f13d39 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Fri, 22 Apr 2011 22:14:06 +0800 Subject: [PATCH 10/41] Fixed #16. Print the user-friendly message when detecting CPU failed. --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 52f649f77..77dd3c2e7 100644 --- a/Makefile +++ b/Makefile @@ -96,6 +96,9 @@ endif endif libs : +ifeq ($(CORE), UNKOWN) + $(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.) +endif -ln -fs $(LIBNAME) libopenblas.$(LIBSUFFIX) for d in $(SUBDIRS) ; \ do if test -d $$d; then \ From fcf9b82f14310915052218f850bd60454b3be989 Mon Sep 17 00:00:00 2001 From: traits Date: Tue, 3 May 2011 14:42:11 +0800 Subject: [PATCH 11/41] Fixed a build bug with NO_LAPACK=1 and SANNITY_CHECK=1. --- reference/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/reference/Makefile b/reference/Makefile index 034f23244..d6368dcda 100644 --- a/reference/Makefile +++ b/reference/Makefile @@ -128,6 +128,8 @@ CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS) XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) +ifneq ($(NO_LAPACK), 1) + SBLASOBJS += \ sgetf2f.$(SUFFIX) sgetrff.$(SUFFIX) slauu2f.$(SUFFIX) slauumf.$(SUFFIX) \ spotf2f.$(SUFFIX) spotrff.$(SUFFIX) strti2f.$(SUFFIX) strtrif.$(SUFFIX) \ @@ -160,6 +162,7 @@ XBLASOBJS += xpotf2f.$(SUFFIX) xpotrff.$(SUFFIX) xtrti2f.$(SUFFIX) xtrtrif.$(SUFFIX) \ xlaswpf.$(SUFFIX) xgetrsf.$(SUFFIX) xgesvf.$(SUFFIX) xpotrif.$(SUFFIX) \ +endif include $(TOPDIR)/Makefile.tail From 1acf5ace29601e272670313568c630f858122015 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Tue, 3 May 2011 17:19:36 +0800 Subject: [PATCH 12/41] Fixed a bug when detecting Intel CPU. --- cpuid_x86.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/cpuid_x86.c b/cpuid_x86.c index 0d091b37c..6e3e74f82 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1302,24 +1302,25 @@ int get_coretype(void){ case 13: return CORE_DUNNINGTON; } - break; - case 2: - switch (model) { - case 5: - //Intel Core (Clarkdale) / Core (Arrandale) - // Pentium (Clarkdale) / Pentium Mobile (Arrandale) - // Xeon (Clarkdale), 32nm - return CORE_NEHALEM; - case 12: - //Xeon Processor 5600 (Westmere-EP) - return CORE_NEHALEM; - } - break; - + break; + case 2: + switch (model) { + case 5: + //Intel Core (Clarkdale) / Core (Arrandale) + // Pentium (Clarkdale) / Pentium Mobile (Arrandale) + // Xeon (Clarkdale), 32nm + return CORE_NEHALEM; + case 12: + //Xeon Processor 5600 (Westmere-EP) + return CORE_NEHALEM; + } + break; } + break; + case 15: - if (model <= 0x2) return CORE_NORTHWOOD; - return CORE_PRESCOTT; + if (model <= 0x2) return CORE_NORTHWOOD; + else return CORE_PRESCOTT; } } From 7dcf4eeee7df1d67a17bbeb9abdd1cb9a49b1f6a Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Wed, 4 May 2011 13:03:10 +0800 Subject: [PATCH 13/41] Fixed #23. Fixed a bug of f_check script about generating link flags. --- Changelog.txt | 1 + f_check | 1 + 2 files changed, 2 insertions(+) diff --git a/Changelog.txt b/Changelog.txt index 49f059be3..4f83fdf97 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -14,6 +14,7 @@ common: issue #21 on github) * Provided an error message when the arch is not supported.(Refs issue #19 on github) + * Fixed issue #23. Fixed a bug of f_check script about generating link flags. x86/x86_64: * diff --git a/f_check b/f_check index 26c57bcc9..45a946eb6 100644 --- a/f_check +++ b/f_check @@ -274,6 +274,7 @@ if ($link ne "") { && ($flags !~ /kernel32/) && ($flags !~ /advapi32/) && ($flags !~ /shell32/) + && ($flags !~ /^\-l$/) ) { $linker_l .= $flags . " "; } From 417b8ec792ad423c629208010886e5cfac696af3 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Fri, 6 May 2011 17:03:35 +0800 Subject: [PATCH 14/41] Added openblas_set_num_threads for Fortran. --- Changelog.txt | 1 + driver/others/Makefile | 4 ++- driver/others/openblas_set_num_threads.c | 45 ++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 driver/others/openblas_set_num_threads.c diff --git a/Changelog.txt b/Changelog.txt index 4f83fdf97..0a9f182fa 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -15,6 +15,7 @@ common: * Provided an error message when the arch is not supported.(Refs issue #19 on github) * Fixed issue #23. Fixed a bug of f_check script about generating link flags. + * Added openblas_set_num_threads for Fortran. x86/x86_64: * diff --git a/driver/others/Makefile b/driver/others/Makefile index bc5de3848..ab0e2fea0 100644 --- a/driver/others/Makefile +++ b/driver/others/Makefile @@ -6,7 +6,7 @@ COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX) ifdef SMP -COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) +COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) ifndef NO_AFFINITY COMMONOBJS += init.$(SUFFIX) endif @@ -100,6 +100,8 @@ memory.$(SUFFIX) : $(MEMORY) ../../common.h ../../param.h blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../../param.h $(CC) $(CFLAGS) -c $< -o $(@F) +openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c + blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h $(CC) $(CFLAGS) -c $< -o $(@F) diff --git a/driver/others/openblas_set_num_threads.c b/driver/others/openblas_set_num_threads.c new file mode 100644 index 000000000..7ca3b7114 --- /dev/null +++ b/driver/others/openblas_set_num_threads.c @@ -0,0 +1,45 @@ +/***************************************************************************** +Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the ISCAS nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + +#include "common.h" + +#ifdef SMP_SERVER +#ifdef OS_LINUX + +extern void openblas_set_num_threads(int num_threads) ; + +void NAME(int* num_threads){ + openblas_set_num_threads(*num_threads); +} + +#endif +#endif From fa8e4fd879ea09dd093448e664f1c01ce47d5a1a Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Wed, 11 May 2011 01:12:32 +0800 Subject: [PATCH 15/41] Fixed #26 the wrong result of rotmg. Used fabs() instead of abs(). --- Changelog.txt | 1 + interface/rotmg.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Changelog.txt b/Changelog.txt index 0a9f182fa..2035dbce1 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -16,6 +16,7 @@ common: issue #19 on github) * Fixed issue #23. Fixed a bug of f_check script about generating link flags. * Added openblas_set_num_threads for Fortran. + * Fixed #25 a wrong result of rotmg. x86/x86_64: * diff --git a/interface/rotmg.c b/interface/rotmg.c index c37c09914..3db891714 100644 --- a/interface/rotmg.c +++ b/interface/rotmg.c @@ -7,6 +7,12 @@ #define GAMSQ 16777216.e0 #define RGAMSQ 5.9604645e-8 +#ifdef DOUBLE +#define ABS(x) fabs(x) +#else +#define ABS(x) fabsf(x) +#endif + #ifndef CBLAS void NAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT *DY1, FLOAT *dparam){ @@ -47,7 +53,7 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ dq2 = dp2 * dy1; dq1 = dp1 * *dx1; - if (! (abs(dq1) > abs(dq2))) goto L40; + if (! (ABS(dq1) > ABS(dq2))) goto L40; dh21 = -(dy1) / *dx1; dh12 = dp2 / dp1; @@ -140,7 +146,7 @@ L150: goto L130; L160: - if (! (abs(*dd2) <= RGAMSQ)) { + if (! (ABS(*dd2) <= RGAMSQ)) { goto L190; } if (*dd2 == ZERO) { @@ -157,7 +163,7 @@ L180: goto L160; L190: - if (! (abs(*dd2) >= GAMSQ)) { + if (! (ABS(*dd2) >= GAMSQ)) { goto L220; } igo = 3; From bbc517292ae6cd15509a65ffc4fbe75c3225fa0f Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Thu, 12 May 2011 19:05:20 +0800 Subject: [PATCH 16/41] Added the spec of git branches about this project. --- README | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README b/README index 0f0aefa43..c18b6c502 100644 --- a/README +++ b/README @@ -67,3 +67,11 @@ Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD ve 9.Known Issues * The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit is 64. On 32 bits, it is 32. + +10. Specification of Git Branches +We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/). +Now, there are 4 branches in github.com. + * The master branch. This a main branch to reflect a production-ready state. + * The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release. + * The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future. + * The gh-pages branch. This is for web pages \ No newline at end of file From 03272a606d9e2848ee696f467307d4e8fef5367c Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Fri, 13 May 2011 01:21:39 +0800 Subject: [PATCH 17/41] Added the unit test for drotmg. --- utest/Makefile | 4 +-- utest/common_utest.h | 2 ++ utest/main.c | 3 ++- utest/test_rotmg.c | 60 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 utest/test_rotmg.c diff --git a/utest/Makefile b/utest/Makefile index 9d512b877..9339d42be 100644 --- a/utest/Makefile +++ b/utest/Makefile @@ -5,12 +5,12 @@ include $(TOPDIR)/Makefile.system TARGET=openblas_utest CUNIT_LIB=/usr/local/lib/libcunit.a -OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o +OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o all : run_test $(TARGET): $(OBJS) - $(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB) + $(FC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB) run_test: $(TARGET) ./$(TARGET) diff --git a/utest/common_utest.h b/utest/common_utest.h index 3e9ecb422..f9a14d87d 100644 --- a/utest/common_utest.h +++ b/utest/common_utest.h @@ -57,4 +57,6 @@ void test_caxpy_inc_0(void); void test_zdotu_n_1(void); void test_zdotu_offset_1(void); +void test_drotmg(void); + #endif diff --git a/utest/main.c b/utest/main.c index f6ecf3cc0..2ec9c7a57 100644 --- a/utest/main.c +++ b/utest/main.c @@ -54,7 +54,8 @@ CU_TestInfo test_level1[]={ {"Testing zdotu with n == 1",test_zdotu_n_1}, {"Testing zdotu with input x & y offset == 1",test_zdotu_offset_1}, - + + {"Testing drotmg",test_drotmg}, CU_TEST_INFO_NULL, }; diff --git a/utest/test_rotmg.c b/utest/test_rotmg.c new file mode 100644 index 000000000..e51e6b299 --- /dev/null +++ b/utest/test_rotmg.c @@ -0,0 +1,60 @@ +/***************************************************************************** +Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the ISCAS nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + +#include "common_utest.h" + +void test_drotmg() +{ + double te_d1, tr_d1; + double te_d2, tr_d2; + double te_x1, tr_x1; + double te_y1, tr_y1; + double te_param[5],tr_param[5]; + int i=0; + te_d1= tr_d1=0.21149573940783739; + te_d2= tr_d2=0.046892057172954082; + te_x1= tr_x1=-0.42272687517106533; + te_y1= tr_y1=0.42211309121921659; + //OpenBLAS + BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param); + //reference + BLASFUNC_REF(drotmg)(&tr_d1, &tr_d2, &tr_x1, &tr_y1, tr_param); + + CU_ASSERT_DOUBLE_EQUAL(te_d1, tr_d1, CHECK_EPS); + CU_ASSERT_DOUBLE_EQUAL(te_d2, tr_d2, CHECK_EPS); + CU_ASSERT_DOUBLE_EQUAL(te_x1, tr_x1, CHECK_EPS); + CU_ASSERT_DOUBLE_EQUAL(te_y1, tr_y1, CHECK_EPS); + + for(i=0; i<5; i++){ + CU_ASSERT_DOUBLE_EQUAL(te_param[i], tr_param[i], CHECK_EPS); + } +} From 1d605109599db32d15d00e81c9d83b8b2cdf4208 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Fri, 13 May 2011 02:19:55 +0800 Subject: [PATCH 18/41] Added the unit testcase for dsdot. --- common_reference.h | 4 ++++ utest/Makefile | 2 +- utest/common_utest.h | 2 ++ utest/main.c | 2 ++ 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/common_reference.h b/common_reference.h index 04b11f80f..4cc4be4fd 100644 --- a/common_reference.h +++ b/common_reference.h @@ -60,4 +60,8 @@ float _Complex BLASFUNC_REF(cdotc) (blasint *, float *, blasint *, float *, double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *); double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *); +void BLASFUNC_REF(drotmg)(double *, double *, double *, double *, double *); + +double BLASFUNC_REF(dsdot)(blasint *, float *, blasint *, float *, blasint*); + #endif diff --git a/utest/Makefile b/utest/Makefile index 9339d42be..e7c5f3412 100644 --- a/utest/Makefile +++ b/utest/Makefile @@ -5,7 +5,7 @@ include $(TOPDIR)/Makefile.system TARGET=openblas_utest CUNIT_LIB=/usr/local/lib/libcunit.a -OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o +OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o all : run_test diff --git a/utest/common_utest.h b/utest/common_utest.h index f9a14d87d..1332ef6ab 100644 --- a/utest/common_utest.h +++ b/utest/common_utest.h @@ -59,4 +59,6 @@ void test_zdotu_offset_1(void); void test_drotmg(void); +void test_dsdot_n_1(void); + #endif diff --git a/utest/main.c b/utest/main.c index 2ec9c7a57..135709507 100644 --- a/utest/main.c +++ b/utest/main.c @@ -56,6 +56,8 @@ CU_TestInfo test_level1[]={ {"Testing zdotu with input x & y offset == 1",test_zdotu_offset_1}, {"Testing drotmg",test_drotmg}, + + {"Testing dsdot with n == 1",test_dsdot_n_1}, CU_TEST_INFO_NULL, }; From b206fc7075ad39f5de144a894fe32b8865c243fd Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Fri, 13 May 2011 02:34:30 +0800 Subject: [PATCH 19/41] Fixed #28. Convert the result to double precision in the end of dsdot kernel. --- Changelog.txt | 3 ++- kernel/Makefile.L1 | 2 +- kernel/x86_64/dot_sse.S | 4 ++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Changelog.txt b/Changelog.txt index 2035dbce1..2b5dc3a94 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -19,7 +19,8 @@ common: * Fixed #25 a wrong result of rotmg. x86/x86_64: - * + * Fixed #28 a wrong result of dsdot on x86_64. + MIPS64: * ==================================================================== diff --git a/kernel/Makefile.L1 b/kernel/Makefile.L1 index 317f14363..b08664a8e 100644 --- a/kernel/Makefile.L1 +++ b/kernel/Makefile.L1 @@ -668,7 +668,7 @@ $(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@ $(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL) - $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@ $(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL) $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ diff --git a/kernel/x86_64/dot_sse.S b/kernel/x86_64/dot_sse.S index cc866a9c5..61c481064 100644 --- a/kernel/x86_64/dot_sse.S +++ b/kernel/x86_64/dot_sse.S @@ -1286,6 +1286,10 @@ haddps %xmm0, %xmm0 #endif +#ifdef DSDOT + cvtss2sd %xmm0, %xmm0 +#endif + RESTOREREGISTERS ret From 830a823be18dddc6cf58a824eb12de99e22f76a1 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Fri, 13 May 2011 02:41:39 +0800 Subject: [PATCH 20/41] Added missed testing codes for dsdot. --- utest/test_dsdot.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 utest/test_dsdot.c diff --git a/utest/test_dsdot.c b/utest/test_dsdot.c new file mode 100644 index 000000000..8df7380be --- /dev/null +++ b/utest/test_dsdot.c @@ -0,0 +1,50 @@ +/***************************************************************************** +Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the ISCAS nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + +#include "common_utest.h" + +void test_dsdot_n_1() +{ + float x= 0.172555164; + float y= -0.0138700781; + int incx=1; + int incy=1; + int n=1; + + double res1=0.0f, res2=0.0f; + + res1=BLASFUNC(dsdot)(&n, &x, &incx, &y, &incy); + res2=BLASFUNC_REF(dsdot)(&n, &x, &incx, &y, &incy); + + CU_ASSERT_DOUBLE_EQUAL(res1, res2, CHECK_EPS); + +} From fcb5ce011b7fd1aea67d47d6e313dbe19547263f Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Tue, 17 May 2011 21:24:00 +0000 Subject: [PATCH 21/41] Fixed #28. Convert the result to double precision in MIPS64 dsdot_k kernel. --- Changelog.txt | 5 +++-- interface/dsdot.c | 11 +++++++---- kernel/mips64/dot.S | 8 ++++++-- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/Changelog.txt b/Changelog.txt index 2b5dc3a94..cd1b4c3ef 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,7 +1,7 @@ OpenBLAS ChangeLog ==================================================================== Version 0.1 alpha2(in development) - +0;136;0c common: * Fixed blasint undefined bug in file. Other software could include this header successfully(Refs issue #13 on github) @@ -22,7 +22,8 @@ x86/x86_64: * Fixed #28 a wrong result of dsdot on x86_64. MIPS64: - * + * Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. + ==================================================================== Version 0.1 alpha1 20-Mar-2011 diff --git a/interface/dsdot.c b/interface/dsdot.c index 66f7917d5..94237e0c4 100644 --- a/interface/dsdot.c +++ b/interface/dsdot.c @@ -49,6 +49,7 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){ BLASLONG n = *N; BLASLONG incx = *INCX; BLASLONG incy = *INCY; + double ret = 0.0; PRINT_DEBUG_NAME; @@ -61,19 +62,21 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){ if (incx < 0) x -= (n - 1) * incx; if (incy < 0) y -= (n - 1) * incy; - return DSDOT_K(n, x, incx, y, incy); + ret=DSDOT_K(n, x, incx, y, incy); FUNCTION_PROFILE_END(1, n, n); IDEBUG_END; - return 0; + return ret; } #else double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){ + + double ret = 0.0; PRINT_DEBUG_CNAME; @@ -86,13 +89,13 @@ double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){ if (incx < 0) x -= (n - 1) * incx; if (incy < 0) y -= (n - 1) * incy; - return DSDOT_K(n, x, incx, y, incy); + ret=DSDOT_K(n, x, incx, y, incy); FUNCTION_PROFILE_END(1, n, n); IDEBUG_END; - return 0; + return ret; } diff --git a/kernel/mips64/dot.S b/kernel/mips64/dot.S index b1f599172..6220b6ac9 100644 --- a/kernel/mips64/dot.S +++ b/kernel/mips64/dot.S @@ -300,7 +300,11 @@ .align 3 .L999: - j $31 ADD s1, s1, s2 - +#ifdef DSDOT + cvt.d.s s1, s1 +#endif + j $31 + NOP + EPILOGUE From cc09e6ef3a6b18640722c83a1c948261b60a5d57 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Fri, 27 May 2011 18:12:45 +0800 Subject: [PATCH 22/41] Ingnore *.obj files in git. --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 44af57166..fa15203f1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +*.obj +*.lib +*.dll +*.def *.o lapack-3.1.1 lapack-3.1.1.tgz From c30c22a76cc072c8791667c46b5cad9a5e431583 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Fri, 27 May 2011 18:16:19 +0800 Subject: [PATCH 23/41] Fixed a bug about detecting underscore prefix in c_check. --- Changelog.txt | 1 + c_check | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Changelog.txt b/Changelog.txt index cd1b4c3ef..461058279 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -17,6 +17,7 @@ common: * Fixed issue #23. Fixed a bug of f_check script about generating link flags. * Added openblas_set_num_threads for Fortran. * Fixed #25 a wrong result of rotmg. + * Fixed a bug about detecting underscore prefix in c_check. x86/x86_64: * Fixed #28 a wrong result of dsdot on x86_64. diff --git a/c_check b/c_check index d8025f9f3..263efeb3d 100644 --- a/c_check +++ b/c_check @@ -149,7 +149,7 @@ $binformat = bin64 if ($data =~ /BINARY_64/); $data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`; -$data =~ /globl\ ([_\.]*)(.*)/; +$data =~ /globl\s([_\.]*)(.*)/; $need_fu = $1; From af40551c9f47ce752abefbf8afddba348875b0f6 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Fri, 27 May 2011 21:15:30 +0800 Subject: [PATCH 24/41] Fixed the makefile bug about openblas_set_num_threads. --- driver/others/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/driver/others/Makefile b/driver/others/Makefile index ab0e2fea0..75b552b65 100644 --- a/driver/others/Makefile +++ b/driver/others/Makefile @@ -101,6 +101,7 @@ blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../. $(CC) $(CFLAGS) -c $< -o $(@F) openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c + $(CC) $(CFLAGS) -c $< -o $(@F) blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h $(CC) $(CFLAGS) -c $< -o $(@F) From 3d7e62eb8b17c52922d43c71354754e8cb283c47 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Mon, 30 May 2011 12:42:17 +0800 Subject: [PATCH 25/41] Fixed #31 Shared library placement on Mac. Thank Mr.Viral B. Shah for this patch. --- exports/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exports/Makefile b/exports/Makefile index 24cdc41c8..6e067acbf 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -84,7 +84,7 @@ libgoto_hpl.def : gensymbol perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F) $(LIBDYNNAME) : ../$(LIBNAME) osx.def - $(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o $(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) + $(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) symbol.$(SUFFIX) : symbol.S $(CC) $(CFLAGS) -c -o $(@F) $^ From 31040e4d80caab57dac1fd7f28b7a59815dca43e Mon Sep 17 00:00:00 2001 From: Xianyi Date: Fri, 3 Jun 2011 13:19:54 +0800 Subject: [PATCH 26/41] Fixed #32 a SEGFAULT bug with gcc-4.6. According to i386 calling convention, The called funtion should remove the hidden return value address from the stack. --- Changelog.txt | 1 + kernel/x86/zdot_sse2.S | 3 +++ 2 files changed, 4 insertions(+) diff --git a/Changelog.txt b/Changelog.txt index 461058279..60798d2a9 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -21,6 +21,7 @@ common: x86/x86_64: * Fixed #28 a wrong result of dsdot on x86_64. + * Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6. MIPS64: * Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. diff --git a/kernel/x86/zdot_sse2.S b/kernel/x86/zdot_sse2.S index 5aeefde31..2a174fb5d 100644 --- a/kernel/x86/zdot_sse2.S +++ b/kernel/x86/zdot_sse2.S @@ -1541,5 +1541,8 @@ popl %ebx popl %esi popl %edi +/*remove the hidden return value address from the stack.*/ + popl %ecx + xchgl %ecx, 0(%esp) ret EPILOGUE From 4335bca2f7265193186771c5b1cc4f6b177c85b8 Mon Sep 17 00:00:00 2001 From: Wang Qian Date: Tue, 7 Jun 2011 12:53:25 +0800 Subject: [PATCH 27/41] Fixed #33 ztrmm bug on Nehalem. --- Changelog.txt | 1 + kernel/x86_64/zgemm_kernel_1x4_nehalem.S | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Changelog.txt b/Changelog.txt index 60798d2a9..c4e6a8fe2 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -22,6 +22,7 @@ common: x86/x86_64: * Fixed #28 a wrong result of dsdot on x86_64. * Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6. + * Fixed #33 ztrmm bug on Nehalem. MIPS64: * Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. diff --git a/kernel/x86_64/zgemm_kernel_1x4_nehalem.S b/kernel/x86_64/zgemm_kernel_1x4_nehalem.S index e72a19c96..4ddfc488b 100644 --- a/kernel/x86_64/zgemm_kernel_1x4_nehalem.S +++ b/kernel/x86_64/zgemm_kernel_1x4_nehalem.S @@ -544,7 +544,7 @@ jg .L11 #if defined(TRMMKERNEL) && !defined(LEFT) - addq $1, KK + addq $4, KK #endif leaq (C, LDC, 4), C @@ -594,7 +594,7 @@ jg .L11 #if defined(TRMMKERNEL) && !defined(LEFT) - addq $1, KK + addq $4, KK #endif leaq (C, LDC, 4), C From 149638322434c90ba7fafb48312a08d1634144dd Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Thu, 9 Jun 2011 10:40:15 +0800 Subject: [PATCH 28/41] Print the wall time (cycles) with enabling FUNCTION_PROFILE. --- Changelog.txt | 1 + driver/others/profile.c | 9 +++++---- interface/create | 0 3 files changed, 6 insertions(+), 4 deletions(-) mode change 100644 => 100755 interface/create diff --git a/Changelog.txt b/Changelog.txt index c4e6a8fe2..cc90ee198 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -18,6 +18,7 @@ common: * Added openblas_set_num_threads for Fortran. * Fixed #25 a wrong result of rotmg. * Fixed a bug about detecting underscore prefix in c_check. + * Print the wall time (cycles) with enabling FUNCTION_PROFILE x86/x86_64: * Fixed #28 a wrong result of dsdot on x86_64. diff --git a/driver/others/profile.c b/driver/others/profile.c index f65550c9f..f464c0b6a 100644 --- a/driver/others/profile.c +++ b/driver/others/profile.c @@ -74,20 +74,21 @@ void gotoblas_profile_quit(void) { if (cycles > 0) { fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n"); - fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle\n"); + fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle Wall Time(Cycles)\n"); for (i = 0; i < MAX_PROF_TABLE; i ++) { if (function_profile_table[i].calls) { #ifndef OS_WINDOWS - fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f\n", + fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f %Ld\n", #else - fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f\n", + fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f %lld\n", #endif func_table[i], function_profile_table[i].calls, (double)function_profile_table[i].cycles / (double)cycles * 100., (double)function_profile_table[i].fops / (double)function_profile_table[i].tcycles * 100., - (double)function_profile_table[i].area / (double)function_profile_table[i].cycles + (double)function_profile_table[i].area / (double)function_profile_table[i].cycles, + function_profile_table[i].cycles ); } } diff --git a/interface/create b/interface/create old mode 100644 new mode 100755 From 8d50a9fd1ae8c04005ef3f40b0e7c6ed764e8ded Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Thu, 9 Jun 2011 11:38:59 +0800 Subject: [PATCH 29/41] Fixed #35 a build bug with NO_LAPACK=1 & DYNAMIC_ARCH=1. --- Changelog.txt | 1 + Makefile.system | 4 ++++ kernel/Makefile | 8 +++++++- kernel/setparam-ref.c | 16 ++++++++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/Changelog.txt b/Changelog.txt index cc90ee198..9089096e5 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -19,6 +19,7 @@ common: * Fixed #25 a wrong result of rotmg. * Fixed a bug about detecting underscore prefix in c_check. * Print the wall time (cycles) with enabling FUNCTION_PROFILE + * Fixed #35 a build bug with NO_LAPACK=1 & DYNAMIC_ARCH=1 x86/x86_64: * Fixed #28 a wrong result of dsdot on x86_64. diff --git a/Makefile.system b/Makefile.system index 6fb0ec86f..7686c938b 100644 --- a/Makefile.system +++ b/Makefile.system @@ -515,6 +515,10 @@ ifeq ($(DYNAMIC_ARCH), 1) CCOMMON_OPT += -DDYNAMIC_ARCH endif +ifeq ($(NO_LAPACK), 1) +CCOMMON_OPT += -DNO_LAPACK +endif + ifdef SMP CCOMMON_OPT += -DSMP_SERVER diff --git a/kernel/Makefile b/kernel/Makefile index 6084cbc3f..a3a32de81 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -53,6 +53,11 @@ SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX) CCOMMON_OPT += -DTS=$(TSUFFIX) endif +KERNEL_INTERFACE = ../common_level1.h ../common_level2.h ../common_level3.h +ifneq ($(NO_LAPACK), 1) +KERNEL_INTERFACE += ../common_lapack.h +endif + ifeq ($(ARCH), x86) COMMONOBJS += cpuid.$(SUFFIX) endif @@ -88,9 +93,10 @@ setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h setparam$(TSUFFIX).c : setparam-ref.c sed 's/TS/$(TSUFFIX)/g' $< > $(@F) -kernel$(TSUFFIX).h : ../common_level1.h ../common_level2.h ../common_level3.h ../common_lapack.h +kernel$(TSUFFIX).h : $(KERNEL_INTERFACE) sed 's/\ *(/$(TSUFFIX)(/g' $^ > $(@F) + cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S $(CC) -c $(CFLAGS) $< -o $(@F) diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 0ab57f3b3..73df7625a 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -101,7 +101,11 @@ gotoblas_t TABLE_NAME = { #endif ssymm_outcopyTS, ssymm_oltcopyTS, +#ifndef NO_LAPACK sneg_tcopyTS, slaswp_ncopyTS, +#else + NULL,NULL, +#endif 0, 0, 0, DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), @@ -147,7 +151,11 @@ gotoblas_t TABLE_NAME = { #endif dsymm_outcopyTS, dsymm_oltcopyTS, +#ifndef NO_LAPACK dneg_tcopyTS, dlaswp_ncopyTS, +#else + NULL, NULL, +#endif #ifdef EXPRECISION @@ -286,7 +294,11 @@ gotoblas_t TABLE_NAME = { chemm3m_oucopyrTS, chemm3m_olcopyrTS, chemm3m_oucopyiTS, chemm3m_olcopyiTS, +#ifndef NO_LAPACK cneg_tcopyTS, claswp_ncopyTS, +#else + NULL, NULL, +#endif 0, 0, 0, ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N), @@ -375,7 +387,11 @@ gotoblas_t TABLE_NAME = { zhemm3m_oucopyrTS, zhemm3m_olcopyrTS, zhemm3m_oucopyiTS, zhemm3m_olcopyiTS, +#ifndef NO_LAPACK zneg_tcopyTS, zlaswp_ncopyTS, +#else + NULL, NULL, +#endif #ifdef EXPRECISION From b3d188774525483cedf6ce1282ac9b9cb806eb67 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Thu, 9 Jun 2011 22:59:49 +0800 Subject: [PATCH 30/41] Fixed #35 a build bug with NO_LAPACK=1 DYNAMIC_ARCH=1 FC=gfortran. I forgot to test it with gfortran in last bug fixed commit. --- kernel/setparam-ref.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 73df7625a..d3734bbd9 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -203,7 +203,11 @@ gotoblas_t TABLE_NAME = { #endif qsymm_outcopyTS, qsymm_oltcopyTS, +#ifndef NO_LAPACK qneg_tcopyTS, qlaswp_ncopyTS, +#else + NULL, NULL, +#endif #endif @@ -482,7 +486,11 @@ gotoblas_t TABLE_NAME = { xhemm3m_oucopyrTS, xhemm3m_olcopyrTS, xhemm3m_oucopyiTS, xhemm3m_olcopyiTS, +#ifndef NO_LAPACK xneg_tcopyTS, xlaswp_ncopyTS, +#else + NULL, NULL, +#endif #endif From 49742cb2d3c62179b542fee72e2ba522a43f901d Mon Sep 17 00:00:00 2001 From: Elias Pipping Date: Sat, 11 Jun 2011 14:36:16 +0200 Subject: [PATCH 31/41] Make USE_OPENMP=0 disable openmp --- Makefile.system | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.system b/Makefile.system index 6fb0ec86f..ca752623c 100644 --- a/Makefile.system +++ b/Makefile.system @@ -189,7 +189,7 @@ ifeq ($(C_COMPILER), INTEL) CCOMMON_OPT += -wd981 endif -ifdef USE_OPENMP +ifeq ($(USE_OPENMP), 1) ifeq ($(C_COMPILER), GCC) CCOMMON_OPT += -fopenmp endif From aeed8d6225501a3ec0eaf82bdbd614ee5d4e336b Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Sun, 19 Jun 2011 11:55:29 +0800 Subject: [PATCH 32/41] Fixed #27. Temporarily walk around axpy's low performance issue with small imput size & multithreads. --- Changelog.txt | 1 + interface/axpy.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Changelog.txt b/Changelog.txt index 9089096e5..2c1bfdf53 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -25,6 +25,7 @@ x86/x86_64: * Fixed #28 a wrong result of dsdot on x86_64. * Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6. * Fixed #33 ztrmm bug on Nehalem. + * Walk round #27 the low performance axpy issue with small imput size & multithreads. MIPS64: * Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. diff --git a/interface/axpy.c b/interface/axpy.c index dd75b758c..82b0ee234 100644 --- a/interface/axpy.c +++ b/interface/axpy.c @@ -85,7 +85,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc //In that case, the threads would be dependent. if (incx == 0 || incy == 0) nthreads = 1; - + + //Temporarily walk around the low performance issue with small imput size & multithreads. + if (n <= 10000) + nthreads = 1; + if (nthreads == 1) { #endif From 7945919f22ef9eddfcd475621259af3f01f6a09c Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Sun, 19 Jun 2011 12:07:31 +0800 Subject: [PATCH 33/41] Updated gitignore file. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index fa15203f1..6cfc5b3c1 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ lapack-3.1.1 lapack-3.1.1.tgz *.so *.a +.svn *~ config.h Makefile.conf From fab36f1adb7aeef5e8e7655d781695764c4f4e6e Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Mon, 20 Jun 2011 18:35:35 +0800 Subject: [PATCH 34/41] Fixed #20. Added install target in makefile. You can use "make install PREFIX=your_installation_directory". --- Makefile | 63 ++++++++++++++++++++++++++++++++++++++ Makefile.rule | 3 ++ openblas_config_template.h | 21 +++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 openblas_config_template.h diff --git a/Makefile b/Makefile index 77dd3c2e7..3bca1337f 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,10 @@ ifdef SANITY_CHECK BLASDIRS += reference endif +ifndef PREFIX +PREFIX = /opt/OpenBLAS +endif + SUBDIRS = $(BLASDIRS) ifneq ($(NO_LAPACK), 1) SUBDIRS += lapack @@ -111,6 +115,7 @@ ifdef DYNAMIC_ARCH do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ done endif + touch lib.grd prof : prof_blas prof_lapack @@ -230,6 +235,63 @@ lapack-test : dummy : +lib.grd : + $(error OpenBLAS: Please run "make" firstly) + +install : lib.grd + @-mkdir -p $(PREFIX) + @echo Generating openblas_config.h in $(PREFIX) +#for inc + @echo \#ifndef OPENBLAS_CONFIG_H > $(PREFIX)/openblas_config.h + @echo \#define OPENBLAS_CONFIG_H >> $(PREFIX)/openblas_config.h + @cat config.h >> $(PREFIX)/openblas_config.h + @echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(PREFIX)/openblas_config.h + @cat openblas_config_template.h >> $(PREFIX)/openblas_config.h + @echo \#endif >> $(PREFIX)/openblas_config.h + + @echo Generating f77blas.h in $(PREFIX) + @echo \#ifndef OPENBLAS_F77BLAS_H > $(PREFIX)/f77blas.h + @echo \#define OPENBLAS_F77BLAS_H >> $(PREFIX)/f77blas.h + @echo \#include \"openblas_config.h\" >> $(PREFIX)/f77blas.h + @cat common_interface.h >> $(PREFIX)/f77blas.h + @echo \#endif >> $(PREFIX)/f77blas.h + + @echo Generating cblas.h in $(PREFIX) + @sed 's/common/openblas_config/g' cblas.h > $(PREFIX)/cblas.h + +#for install static library + @echo Copy the static library to $(PREFIX) + @cp $(LIBNAME) $(PREFIX) + @-ln -fs $(PREFIX)/$(LIBNAME) $(PREFIX)/libopenblas.$(LIBSUFFIX) +#for install shared library + @echo Copy the shared library to $(PREFIX) +ifeq ($(OSNAME), Linux) + -cp $(LIBSONAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so +endif +ifeq ($(OSNAME), FreeBSD) + -cp $(LIBSONAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so +endif +ifeq ($(OSNAME), NetBSD) + -cp $(LIBSONAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so +endif +ifeq ($(OSNAME), Darwin) + -cp $(LIBDYNNAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib +endif +ifeq ($(OSNAME), WINNT) + -cp $(LIBDLLNAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll +endif +ifeq ($(OSNAME), CYGWIN_NT) + -cp $(LIBDLLNAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll +endif + + @echo Install OK! + clean :: @for d in $(SUBDIRS_ALL) ; \ do if test -d $$d; then \ @@ -245,4 +307,5 @@ endif echo deleting lapack-3.1.1; \ rm -rf lapack-3.1.1 ;\ fi + @rm -f *.grd @echo Done. \ No newline at end of file diff --git a/Makefile.rule b/Makefile.rule index 61f9eb91d..88d552495 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -91,6 +91,9 @@ VERSION = 0.1alpha2 # SANITY_CHECK to compare the result with reference BLAS. # UTEST_CHECK = 1 +# The installation directory. +# PREFIX = /opt/OpenBLAS + # Common Optimization Flag; -O2 is enough. # DEBUG = 1 diff --git a/openblas_config_template.h b/openblas_config_template.h new file mode 100644 index 000000000..9fb80aa4f --- /dev/null +++ b/openblas_config_template.h @@ -0,0 +1,21 @@ +/*This is only for "make install" target.*/ + +#ifdef NEEDBUNDERSCORE +#define BLASFUNC(FUNC) FUNC##_ +#else +#define BLASFUNC(FUNC) FUNC +#endif + +#if defined(OS_WINDOWS) && defined(__64BIT__) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef USE64BITINT +typedef BLASLONG blasint; +#else +typedef int blasint; +#endif From d978436c4bcc45404188d8b58618f44efd52728d Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Mon, 20 Jun 2011 18:36:29 +0800 Subject: [PATCH 35/41] Refs #20. Updated the docs. --- Changelog.txt | 1 + README | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/Changelog.txt b/Changelog.txt index c4e6a8fe2..aadf00d37 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -18,6 +18,7 @@ common: * Added openblas_set_num_threads for Fortran. * Fixed #25 a wrong result of rotmg. * Fixed a bug about detecting underscore prefix in c_check. + * Added install target. You can use "make install". (Refs #20) x86/x86_64: * Fixed #28 a wrong result of dsdot on x86_64. diff --git a/README b/README index c18b6c502..21e740689 100644 --- a/README +++ b/README @@ -22,6 +22,11 @@ make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-g 3)Debug version make DEBUG=1 +4)Intall to the directory (Optional) +e.g. +make install PREFIX=your_installation_directory +The default directory is /opt/OpenBLAS + 3.Support CPU & OS Please read GotoBLAS_01Readme.txt From 32353a9d3085c9de7b95342e4be5e4c816ee8593 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Tue, 21 Jun 2011 17:39:08 +0800 Subject: [PATCH 36/41] Refs #20. Fixed the installation bug with DYNAMIC_ARCH=1. --- Makefile | 72 ++++++++---------------------------------------- Makefile.install | 65 +++++++++++++++++++++++++++++++++++++++++++ kernel/Makefile | 4 +-- 3 files changed, 78 insertions(+), 63 deletions(-) create mode 100644 Makefile.install diff --git a/Makefile b/Makefile index 3bca1337f..c480fc47d 100644 --- a/Makefile +++ b/Makefile @@ -26,8 +26,8 @@ endif SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench -.PHONY : all libs netlib test ctest shared -.NOTPARALLEL : all libs prof lapack-test +.PHONY : all libs netlib test ctest shared install +.NOTPARALLEL : all libs prof lapack-test install all :: libs netlib tests shared @echo @@ -109,11 +109,15 @@ endif $(MAKE) -C $$d $(@F) || exit 1 ; \ fi; \ done +#Save the config files for installation + cp Makefile.conf Makefile.conf_last + cp config.h config_last.h ifdef DYNAMIC_ARCH $(MAKE) -C kernel commonlibs || exit 1 for d in $(DYNAMIC_CORE) ; \ do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ done + echo DYNAMIC_ARCH=1 >> Makefile.conf_last endif touch lib.grd @@ -235,62 +239,8 @@ lapack-test : dummy : -lib.grd : - $(error OpenBLAS: Please run "make" firstly) - -install : lib.grd - @-mkdir -p $(PREFIX) - @echo Generating openblas_config.h in $(PREFIX) -#for inc - @echo \#ifndef OPENBLAS_CONFIG_H > $(PREFIX)/openblas_config.h - @echo \#define OPENBLAS_CONFIG_H >> $(PREFIX)/openblas_config.h - @cat config.h >> $(PREFIX)/openblas_config.h - @echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(PREFIX)/openblas_config.h - @cat openblas_config_template.h >> $(PREFIX)/openblas_config.h - @echo \#endif >> $(PREFIX)/openblas_config.h - - @echo Generating f77blas.h in $(PREFIX) - @echo \#ifndef OPENBLAS_F77BLAS_H > $(PREFIX)/f77blas.h - @echo \#define OPENBLAS_F77BLAS_H >> $(PREFIX)/f77blas.h - @echo \#include \"openblas_config.h\" >> $(PREFIX)/f77blas.h - @cat common_interface.h >> $(PREFIX)/f77blas.h - @echo \#endif >> $(PREFIX)/f77blas.h - - @echo Generating cblas.h in $(PREFIX) - @sed 's/common/openblas_config/g' cblas.h > $(PREFIX)/cblas.h - -#for install static library - @echo Copy the static library to $(PREFIX) - @cp $(LIBNAME) $(PREFIX) - @-ln -fs $(PREFIX)/$(LIBNAME) $(PREFIX)/libopenblas.$(LIBSUFFIX) -#for install shared library - @echo Copy the shared library to $(PREFIX) -ifeq ($(OSNAME), Linux) - -cp $(LIBSONAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so -endif -ifeq ($(OSNAME), FreeBSD) - -cp $(LIBSONAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so -endif -ifeq ($(OSNAME), NetBSD) - -cp $(LIBSONAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so -endif -ifeq ($(OSNAME), Darwin) - -cp $(LIBDYNNAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib -endif -ifeq ($(OSNAME), WINNT) - -cp $(LIBDLLNAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll -endif -ifeq ($(OSNAME), CYGWIN_NT) - -cp $(LIBDLLNAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll -endif - - @echo Install OK! +install : + $(MAKE) -f Makefile.install install clean :: @for d in $(SUBDIRS_ALL) ; \ @@ -298,14 +248,14 @@ clean :: $(MAKE) -C $$d $(@F) || exit 1 ; \ fi; \ done -ifdef DYNAMIC_ARCH +#ifdef DYNAMIC_ARCH @$(MAKE) -C kernel clean -endif +#endif @rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf libopenblas.$(LIBSUFFIX) libopenblas_p.$(LIBSUFFIX) *.lnk myconfig.h @rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib @if test -d lapack-3.1.1; then \ echo deleting lapack-3.1.1; \ rm -rf lapack-3.1.1 ;\ fi - @rm -f *.grd + @rm -f *.grd Makefile.conf_last config_last.h @echo Done. \ No newline at end of file diff --git a/Makefile.install b/Makefile.install new file mode 100644 index 000000000..80dafc9c6 --- /dev/null +++ b/Makefile.install @@ -0,0 +1,65 @@ +TOPDIR = . +export GOTOBLAS_MAKEFILE = 1 +-include $(TOPDIR)/Makefile.conf_last +include ./Makefile.system + +.PHONY : install +.NOTPARALLEL : install + +lib.grd : + $(error OpenBLAS: Please run "make" firstly) + +install : lib.grd + @-mkdir -p $(PREFIX) + @echo Generating openblas_config.h in $(PREFIX) +#for inc + @echo \#ifndef OPENBLAS_CONFIG_H > $(PREFIX)/openblas_config.h + @echo \#define OPENBLAS_CONFIG_H >> $(PREFIX)/openblas_config.h + @cat config_last.h >> $(PREFIX)/openblas_config.h + @echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(PREFIX)/openblas_config.h + @cat openblas_config_template.h >> $(PREFIX)/openblas_config.h + @echo \#endif >> $(PREFIX)/openblas_config.h + + @echo Generating f77blas.h in $(PREFIX) + @echo \#ifndef OPENBLAS_F77BLAS_H > $(PREFIX)/f77blas.h + @echo \#define OPENBLAS_F77BLAS_H >> $(PREFIX)/f77blas.h + @echo \#include \"openblas_config.h\" >> $(PREFIX)/f77blas.h + @cat common_interface.h >> $(PREFIX)/f77blas.h + @echo \#endif >> $(PREFIX)/f77blas.h + + @echo Generating cblas.h in $(PREFIX) + @sed 's/common/openblas_config/g' cblas.h > $(PREFIX)/cblas.h + +#for install static library + @echo Copy the static library to $(PREFIX) + @cp $(LIBNAME) $(PREFIX) + @-ln -fs $(PREFIX)/$(LIBNAME) $(PREFIX)/libopenblas.$(LIBSUFFIX) +#for install shared library + @echo Copy the shared library to $(PREFIX) +ifeq ($(OSNAME), Linux) + -cp $(LIBSONAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so +endif +ifeq ($(OSNAME), FreeBSD) + -cp $(LIBSONAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so +endif +ifeq ($(OSNAME), NetBSD) + -cp $(LIBSONAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so +endif +ifeq ($(OSNAME), Darwin) + -cp $(LIBDYNNAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib +endif +ifeq ($(OSNAME), WINNT) + -cp $(LIBDLLNAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll +endif +ifeq ($(OSNAME), CYGWIN_NT) + -cp $(LIBDLLNAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll +endif + + @echo Install OK! + diff --git a/kernel/Makefile b/kernel/Makefile index a3a32de81..aed145b60 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -118,10 +118,10 @@ lsame.$(PSUFFIX): $(KERNELDIR)/$(LSAME_KERNEL) cpuid.$(PSUFFIX): $(KERNELDIR)/cpuid.S $(CC) -c $(PFLAGS) $< -o $(@F) -ifdef DYNAMIC_ARCH +#ifdef DYNAMIC_ARCH clean :: @rm -f setparam_*.c kernel_*.h setparam.h kernel.h -endif +#endif include $(TOPDIR)/Makefile.tail From e568df0daee58498cd8f37cde1a1534e1a0698cf Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Tue, 21 Jun 2011 18:06:13 +0800 Subject: [PATCH 37/41] Refs #38. Prepare the docs with v0.1alpha2. --- Changelog.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Changelog.txt b/Changelog.txt index fb149ca7a..f94c164c7 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,7 +1,7 @@ OpenBLAS ChangeLog ==================================================================== -Version 0.1 alpha2(in development) -0;136;0c +Version 0.1 alpha2(Preparing to release) + common: * Fixed blasint undefined bug in file. Other software could include this header successfully(Refs issue #13 on github) @@ -31,6 +31,8 @@ x86/x86_64: MIPS64: * Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. + * Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2) + * Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3) ==================================================================== Version 0.1 alpha1 From 82f5274828a1c12137651375cff248cbc6ed4160 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Wed, 22 Jun 2011 01:52:20 +0800 Subject: [PATCH 38/41] Refs #39. It's unnecessary to include sys/mman.h file in blas_server_omp.c. --- driver/others/blas_server_omp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index 17d886e52..4fd4cd440 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -38,7 +38,7 @@ #include #include -#include +//#include #include "common.h" #ifndef USE_OPENMP From 078bfd0b4fa33e1b651366a475050fb1b8f5cb4c Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Wed, 22 Jun 2011 13:19:39 +0800 Subject: [PATCH 39/41] Refs #39. Moved the shared lib (dll) to top directory in MingW64 compiler environment. --- Makefile | 2 +- exports/Makefile | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index c480fc47d..798c56192 100644 --- a/Makefile +++ b/Makefile @@ -74,7 +74,7 @@ ifeq ($(OSNAME), Darwin) endif ifeq ($(OSNAME), WINNT) $(MAKE) -C exports dll -# -ln -fs $(LIBDLLNAME) libopenblas.dll + -ln -fs $(LIBDLLNAME) libopenblas.dll endif ifeq ($(OSNAME), CYGWIN_NT) $(MAKE) -C exports dll diff --git a/exports/Makefile b/exports/Makefile index 6e067acbf..f4c9314f9 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME) zip : dll zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME) -dll : libgoto2.dll +dll : ../$(LIBDLLNAME) +#libgoto2.dll dll2 : libgoto2_shared.dll -libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) +../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) $(RANLIB) ../$(LIBNAME) ifeq ($(BINARY32), 1) - $(DLLWRAP) -o $(@F) --def libgoto2.def \ + $(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ --entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) -lib /machine:i386 /def:libgoto2.def else - $(DLLWRAP) -o $(@F) --def libgoto2.def \ + $(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ --entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) -lib /machine:X64 /def:libgoto2.def endif From 859b71645a75e7d02a17a6f09a342ff495068435 Mon Sep 17 00:00:00 2001 From: traits Date: Thu, 23 Jun 2011 15:09:34 +0800 Subject: [PATCH 40/41] Refs #37. Updated REAME about the compatible issue with EKOPath compiler. --- README | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README b/README index 21e740689..9a7b16326 100644 --- a/README +++ b/README @@ -72,6 +72,7 @@ Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD ve 9.Known Issues * The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit is 64. On 32 bits, it is 32. +* This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully. 10. Specification of Git Branches We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/). @@ -79,4 +80,4 @@ Now, there are 4 branches in github.com. * The master branch. This a main branch to reflect a production-ready state. * The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release. * The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future. - * The gh-pages branch. This is for web pages \ No newline at end of file + * The gh-pages branch. This is for web pages From 6a0762949d703d19266331ecd5d0d1968526af70 Mon Sep 17 00:00:00 2001 From: traits Date: Thu, 23 Jun 2011 15:16:24 +0800 Subject: [PATCH 41/41] Fixed #38. Released v0.1 alpha2. --- Changelog.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Changelog.txt b/Changelog.txt index f94c164c7..b54949ec5 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,6 +1,7 @@ OpenBLAS ChangeLog ==================================================================== -Version 0.1 alpha2(Preparing to release) +Version 0.1 alpha2 +23-Jun-2011 common: * Fixed blasint undefined bug in file. Other software