From e1e83a1b7169737f43d51c545ec19ecffc72d117 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sun, 29 Jun 2014 10:15:29 +0200 Subject: [PATCH 01/18] modification, to run blas-test on Windows --- Makefile | 6 ++++++ make.inc | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 397836da1..e1fa89cd0 100644 --- a/Makefile +++ b/Makefile @@ -249,8 +249,14 @@ ifndef NOFORTRAN -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc ifeq ($(F_COMPILER), GFORTRAN) -@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc +ifdef SMP + -@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc +else + -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc +endif else -@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc endif -@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc endif diff --git a/make.inc b/make.inc index da430b7d6..485cb7d48 100644 --- a/make.inc +++ b/make.inc @@ -1,7 +1,6 @@ SHELL = /bin/sh PLAT = _LINUX DRVOPTS = $(OPTS) -LOADER = $(FORTRAN) ARCHFLAGS= -ru #RANLIB = ranlib From 1fad2b759f9db983f0d358771e852adb92fda95d Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sun, 29 Jun 2014 16:43:04 +0200 Subject: [PATCH 02/18] enabled smp for ger.c and zger.c, but only for 64bit binaries --- interface/ger.c | 18 ++++++++++++------ interface/zger.c | 15 +++++++++++---- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/interface/ger.c b/interface/ger.c index 7c9cd425f..86c95f49c 100644 --- a/interface/ger.c +++ b/interface/ger.c @@ -42,6 +42,12 @@ #include "functable.h" #endif +#ifdef SMP +#ifdef __64BIT__ +#define SMPTEST 1 +#endif +#endif + #ifdef XDOUBLE #define ERROR_NAME "QGER " #elif defined DOUBLE @@ -75,7 +81,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha, blasint incy = *INCY; blasint lda = *LDA; FLOAT *buffer; -#ifdef SMPBUG +#ifdef SMPTEST int nthreads; #endif @@ -107,7 +113,7 @@ void CNAME(enum CBLAS_ORDER order, FLOAT *buffer; blasint info, t; -#ifdef SMPBUG +#ifdef SMPTEST int nthreads; #endif @@ -135,11 +141,11 @@ void CNAME(enum CBLAS_ORDER order, t = incx; incx = incy; incy = t; - +/* buffer = x; x = y; y = buffer; - +*/ if (lda < MAX(1,m)) info = 9; if (incy == 0) info = 7; if (incx == 0) info = 5; @@ -167,7 +173,7 @@ void CNAME(enum CBLAS_ORDER order, buffer = (FLOAT *)blas_memory_alloc(1); -#ifdef SMPBUG +#ifdef SMPTEST nthreads = num_cpu_avail(2); @@ -176,7 +182,7 @@ void CNAME(enum CBLAS_ORDER order, GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer); -#ifdef SMPBUG +#ifdef SMPTEST } else { GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads); diff --git a/interface/zger.c b/interface/zger.c index cefc839c2..f46a462e2 100644 --- a/interface/zger.c +++ b/interface/zger.c @@ -42,6 +42,13 @@ #include "functable.h" #endif +#ifdef SMP +#ifdef __64BIT__ +#define SMPTEST 1 +#endif +#endif + + #ifdef XDOUBLE #ifndef CONJ #define ERROR_NAME "XGERU " @@ -109,7 +116,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha, blasint incy = *INCY; blasint lda = *LDA; FLOAT *buffer; -#ifdef SMPBUG +#ifdef SMPTEST int nthreads; #endif @@ -144,7 +151,7 @@ void CNAME(enum CBLAS_ORDER order, FLOAT *buffer; blasint info, t; -#ifdef SMPBUG +#ifdef SMPTEST int nthreads; #endif @@ -205,7 +212,7 @@ void CNAME(enum CBLAS_ORDER order, buffer = (FLOAT *)blas_memory_alloc(1); -#ifdef SMPBUG +#ifdef SMPTEST nthreads = num_cpu_avail(2); if (nthreads == 1) { @@ -221,7 +228,7 @@ void CNAME(enum CBLAS_ORDER order, } #endif -#ifdef SMPBUG +#ifdef SMPTEST } else { From 01a119abfcdecbb640c5bfcb52a1771253b14513 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sun, 29 Jun 2014 20:35:56 +0200 Subject: [PATCH 03/18] enabled SMP for sbmv and zsbmv, but only for 64bit binaries --- interface/sbmv.c | 17 ++++++++++++----- interface/zsbmv.c | 15 +++++++++++---- kernel/x86_64/KERNEL | 2 +- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/interface/sbmv.c b/interface/sbmv.c index 0dac736cb..d5577e529 100644 --- a/interface/sbmv.c +++ b/interface/sbmv.c @@ -43,6 +43,13 @@ #include "functable.h" #endif +#ifdef SMP +#ifdef __64BIT__ +#define SMPTEST 1 +#endif +#endif + + #ifdef XDOUBLE #define ERROR_NAME "QSBMV " #elif defined(DOUBLE) @@ -61,7 +68,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA #endif }; -#ifdef SMPBUG +#ifdef SMPTEST static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { #ifdef XDOUBLE qsbmv_thread_U, qsbmv_thread_L, @@ -90,7 +97,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * blasint info; int uplo; FLOAT *buffer; -#ifdef SMPBUG +#ifdef SMPTEST int nthreads; #endif @@ -130,7 +137,7 @@ void CNAME(enum CBLAS_ORDER order, FLOAT *buffer; int uplo; blasint info; -#ifdef SMPBUG +#ifdef SMPTEST int nthreads; #endif @@ -189,7 +196,7 @@ void CNAME(enum CBLAS_ORDER order, buffer = (FLOAT *)blas_memory_alloc(1); -#ifdef SMPBUG +#ifdef SMPTEST nthreads = num_cpu_avail(2); if (nthreads == 1) { @@ -197,7 +204,7 @@ void CNAME(enum CBLAS_ORDER order, (sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer); -#ifdef SMPBUG +#ifdef SMPTEST } else { (sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads); diff --git a/interface/zsbmv.c b/interface/zsbmv.c index 2efe85ba9..54dae87b7 100644 --- a/interface/zsbmv.c +++ b/interface/zsbmv.c @@ -43,6 +43,13 @@ #include "functable.h" #endif +#ifdef SMP +#ifdef __64BIT__ +#define SMPTEST 1 +#endif +#endif + + #ifdef XDOUBLE #define ERROR_NAME "XSBMV " #elif defined(DOUBLE) @@ -61,7 +68,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT #endif }; -#ifdef SMPBUG +#ifdef SMPTEST static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { #ifdef XDOUBLE xsbmv_thread_U, xsbmv_thread_L, @@ -90,7 +97,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * blasint info; int uplo; FLOAT *buffer; -#ifdef SMPBUG +#ifdef SMPTEST int nthreads; #endif @@ -131,7 +138,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * buffer = (FLOAT *)blas_memory_alloc(1); -#ifdef SMPBUG +#ifdef SMPTEST nthreads = num_cpu_avail(2); if (nthreads == 1) { @@ -139,7 +146,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * (sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer); -#ifdef SMPBUG +#ifdef SMPTEST } else { (sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads); diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL index fa6282c53..9dd219c91 100644 --- a/kernel/x86_64/KERNEL +++ b/kernel/x86_64/KERNEL @@ -119,7 +119,7 @@ XCOPYKERNEL = zcopy.S endif ifndef SDOTKERNEL -SDOTKERNEL = dot_sse.S +SDOTKERNEL = ../arm/dot.c endif From aee61456a4af3bdec1a6172d2ccc648030d8fa6e Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sun, 29 Jun 2014 21:18:38 +0200 Subject: [PATCH 04/18] disabled SMP for sbmv and zsbmv again --- interface/sbmv.c | 3 ++- interface/zsbmv.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/interface/sbmv.c b/interface/sbmv.c index d5577e529..761a9a0d0 100644 --- a/interface/sbmv.c +++ b/interface/sbmv.c @@ -43,12 +43,13 @@ #include "functable.h" #endif +/* #ifdef SMP #ifdef __64BIT__ #define SMPTEST 1 #endif #endif - +*/ #ifdef XDOUBLE #define ERROR_NAME "QSBMV " diff --git a/interface/zsbmv.c b/interface/zsbmv.c index 54dae87b7..b71d4c519 100644 --- a/interface/zsbmv.c +++ b/interface/zsbmv.c @@ -43,12 +43,13 @@ #include "functable.h" #endif +/* #ifdef SMP #ifdef __64BIT__ #define SMPTEST 1 #endif #endif - +*/ #ifdef XDOUBLE #define ERROR_NAME "XSBMV " From b079df9ef4536e89b4fb300062bebc7b7c540787 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Mon, 30 Jun 2014 14:46:38 +0200 Subject: [PATCH 05/18] added optimized sdot- and dsdot-kernel, written in C --- kernel/generic/dot.c | 104 +++++++++++++++++++++++++++++++++++++++++++ kernel/x86_64/KERNEL | 6 +-- 2 files changed, 106 insertions(+), 4 deletions(-) create mode 100644 kernel/generic/dot.c diff --git a/kernel/generic/dot.c b/kernel/generic/dot.c new file mode 100644 index 000000000..bc07bc78f --- /dev/null +++ b/kernel/generic/dot.c @@ -0,0 +1,104 @@ +/*************************************************************************** +Copyright (c) 2014, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +#include "common.h" + +#if defined(DSDOT) +double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#else +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#endif +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + +#if defined(DSDOT) + double dot = 0.0 ; +#else + FLOAT dot = 0.0 ; +#endif + + if ( n < 0 ) return(dot); + + if ( (inc_x == 1) && (inc_y == 1) ) + { + + int n1 = n & -4; + + while(i < n1) + { + +#if defined(DSDOT) + dot += (double) y[i] * (double) x[i] + + (double) y[i+1] * (double) x[i+1] + + (double) y[i+2] * (double) x[i+2] + + (double) y[i+3] * (double) x[i+3] ; +#else + dot += y[i] * x[i] + + y[i+1] * x[i+1] + + y[i+2] * x[i+2] + + y[i+3] * x[i+3] ; +#endif + i+=4 ; + + } + + while(i < n) + { + +#if defined(DSDOT) + dot += (double) y[i] * (double) x[i] ; +#else + dot += y[i] * x[i] ; +#endif + i++ ; + + } + return(dot); + + + } + + while(i < n) + { + +#if defined(DSDOT) + dot += (double) y[iy] * (double) x[ix] ; +#else + dot += y[iy] * x[ix] ; +#endif + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(dot); + +} + + diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL index 9dd219c91..ec21826d7 100644 --- a/kernel/x86_64/KERNEL +++ b/kernel/x86_64/KERNEL @@ -119,15 +119,13 @@ XCOPYKERNEL = zcopy.S endif ifndef SDOTKERNEL -SDOTKERNEL = ../arm/dot.c +SDOTKERNEL = ../generic/dot.c endif - ifndef DSDOTKERNEL -DSDOTKERNEL = ../arm/dot.c +DSDOTKERNEL = ../generic/dot.c endif - ifndef DDOTKERNEL DDOTKERNEL = dot_sse2.S endif From be94db096c78eb8e7695346890fba8c2dc0005b2 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Tue, 1 Jul 2014 16:18:05 +0200 Subject: [PATCH 06/18] disabled *3M functions for x86_64 platforms --- driver/level3/Makefile | 10 ++++++---- exports/gensymbol | 2 +- interface/Makefile | 8 +++++--- kernel/Makefile.L3 | 10 ++++++---- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/driver/level3/Makefile b/driver/level3/Makefile index 4c004ee80..d62921e84 100644 --- a/driver/level3/Makefile +++ b/driver/level3/Makefile @@ -1,12 +1,14 @@ TOPDIR = ../.. include ../../Makefile.system +USE_GEMM3M = 0 + ifeq ($(ARCH), x86) -USE_GEMM3M = 1 +USE_GEMM3M = 0 endif ifeq ($(ARCH), x86_64) -USE_GEMM3M = 1 +USE_GEMM3M = 0 endif ifeq ($(ARCH), ia64) @@ -168,7 +170,7 @@ XBLASOBJS += \ xher2k_kernel_UN.$(SUFFIX) xher2k_kernel_UC.$(SUFFIX) \ xher2k_kernel_LN.$(SUFFIX) xher2k_kernel_LC.$(SUFFIX) -ifdef USE_GEMM3M +ifeq ($(USE_GEMM3M), 1) CBLASOBJS += \ cgemm3m_nn.$(SUFFIX) cgemm3m_cn.$(SUFFIX) cgemm3m_tn.$(SUFFIX) cgemm3m_nc.$(SUFFIX) \ @@ -239,7 +241,7 @@ CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread ZBLASOBJS += zherk_thread_UN.$(SUFFIX) zherk_thread_UC.$(SUFFIX) zherk_thread_LN.$(SUFFIX) zherk_thread_LC.$(SUFFIX) XBLASOBJS += xherk_thread_UN.$(SUFFIX) xherk_thread_UC.$(SUFFIX) xherk_thread_LN.$(SUFFIX) xherk_thread_LC.$(SUFFIX) -ifdef USE_GEMM3M +ifeq ($(USE_GEMM3M), 1) CBLASOBJS += cgemm3m_thread_nn.$(SUFFIX) cgemm3m_thread_nt.$(SUFFIX) cgemm3m_thread_nr.$(SUFFIX) cgemm3m_thread_nc.$(SUFFIX) CBLASOBJS += cgemm3m_thread_tn.$(SUFFIX) cgemm3m_thread_tt.$(SUFFIX) cgemm3m_thread_tr.$(SUFFIX) cgemm3m_thread_tc.$(SUFFIX) diff --git a/exports/gensymbol b/exports/gensymbol index 6c21de455..2b8d94391 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -73,7 +73,7 @@ ); @gemm3mobjs = ( - zgemm3m, cgemm3m, zsymm3m, csymm3m, zhemm3m, chemm3m, + ); diff --git a/interface/Makefile b/interface/Makefile index 465d722b0..a24702630 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -1,6 +1,8 @@ TOPDIR = .. include $(TOPDIR)/Makefile.system +SUPPORT_GEMM3M = 0 + ifeq ($(ARCH), x86) SUPPORT_GEMM3M = 0 endif @@ -124,7 +126,7 @@ ZBLAS3OBJS = \ zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \ zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX) -ifdef SUPPORT_GEMM3M +ifeq ($(SUPPORT_GEMM3M), 1) CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX) @@ -182,7 +184,7 @@ XBLAS3OBJS = \ xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) -ifdef SUPPORT_GEMM3M +ifeq ($(SUPPORT_GEMM3M), 1) XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) @@ -238,7 +240,7 @@ XBLAS3OBJS = \ xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) -ifdef SUPPORT_GEMM3M +ifeq ($(SUPPORT_GEMM3M), 1) XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index be78dfc3d..14ee6c801 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -1,9 +1,11 @@ +USE_GEMM3M = 0 + ifeq ($(ARCH), x86) -USE_GEMM3M = 1 +USE_GEMM3M = 0 endif ifeq ($(ARCH), x86_64) -USE_GEMM3M = 1 +USE_GEMM3M = 0 endif ifeq ($(ARCH), ia64) @@ -122,7 +124,7 @@ XBLASOBJS += \ xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ -ifdef USE_GEMM3M +ifeq ($(USE_GEMM3M), 1) CBLASOBJS += cgemm3m_kernel$(TSUFFIX).$(SUFFIX) ZBLASOBJS += zgemm3m_kernel$(TSUFFIX).$(SUFFIX) @@ -256,7 +258,7 @@ XBLASOBJS += \ xhemm_iutcopy$(TSUFFIX).$(SUFFIX) xhemm_iltcopy$(TSUFFIX).$(SUFFIX) \ xhemm_outcopy$(TSUFFIX).$(SUFFIX) xhemm_oltcopy$(TSUFFIX).$(SUFFIX) -ifdef USE_GEMM3M +ifeq ($(USE_GEMM3M), 1) CBLASOBJS += \ cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ From 02a504c0b8f009f98e3fc47e4d5c123f9893d0c8 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Wed, 2 Jul 2014 10:39:33 +0200 Subject: [PATCH 07/18] fixed my bug in ger.c --- interface/ger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/interface/ger.c b/interface/ger.c index 86c95f49c..9857d2423 100644 --- a/interface/ger.c +++ b/interface/ger.c @@ -141,11 +141,11 @@ void CNAME(enum CBLAS_ORDER order, t = incx; incx = incy; incy = t; -/* + buffer = x; x = y; y = buffer; -*/ + if (lda < MAX(1,m)) info = 9; if (incy == 0) info = 7; if (incx == 0) info = 5; From e80b144932c3d147c596a6b6047c10338b62b501 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Wed, 2 Jul 2014 14:11:53 +0200 Subject: [PATCH 08/18] enabled compiling of *3M functions --- kernel/Makefile.L3 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 14ee6c801..268177c0f 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -1,11 +1,11 @@ USE_GEMM3M = 0 ifeq ($(ARCH), x86) -USE_GEMM3M = 0 +USE_GEMM3M = 1 endif ifeq ($(ARCH), x86_64) -USE_GEMM3M = 0 +USE_GEMM3M = 1 endif ifeq ($(ARCH), ia64) From e0c080a28cf46eb3eb0e7cd9fe1b2cf1a5e80be6 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sat, 5 Jul 2014 16:13:17 +0200 Subject: [PATCH 09/18] removed reference to zgemm_kernel_4x2_sse3.S (bug in lapack-test) --- kernel/x86_64/KERNEL.PRESCOTT | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/KERNEL.PRESCOTT b/kernel/x86_64/KERNEL.PRESCOTT index 9b3e514d1..c8011219d 100644 --- a/kernel/x86_64/KERNEL.PRESCOTT +++ b/kernel/x86_64/KERNEL.PRESCOTT @@ -19,7 +19,7 @@ DGEMMINCOPYOBJ = DGEMMITCOPYOBJ = DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) -CGEMMKERNEL = zgemm_kernel_4x2_sse3.S +CGEMMKERNEL = zgemm_kernel_4x2_penryn.S CGEMMINCOPY = ../generic/zgemm_ncopy_4.c CGEMMITCOPY = ../generic/zgemm_tcopy_4.c CGEMMONCOPY = zgemm_ncopy_2.S From f7267d9b0e2b8e55490b0f05bfd1b81627d49b4e Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sun, 6 Jul 2014 10:17:07 +0200 Subject: [PATCH 10/18] added missing definition for DUNNINGTON --- kernel/setparam-ref.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 806c1928c..6eebd5fcb 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -740,6 +740,23 @@ static void init_parameter(void) { #endif #endif +#ifdef DUNNINGTON + +#ifdef DEBUG + fprintf(stderr, "Dunnington\n"); +#endif + + TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8; + TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8; + TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4; + TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4; +#ifdef EXPRECISION + TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8; + TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4; +#endif +#endif + + #ifdef NEHALEM #ifdef DEBUG From d5b976f92d763092d12022b5b44f87cbb830fe4e Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sun, 6 Jul 2014 11:05:28 +0200 Subject: [PATCH 11/18] fallback to zgemm_kernel_4x2_sse.S --- kernel/x86_64/KERNEL.PRESCOTT | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/KERNEL.PRESCOTT b/kernel/x86_64/KERNEL.PRESCOTT index c8011219d..0ea43ad7b 100644 --- a/kernel/x86_64/KERNEL.PRESCOTT +++ b/kernel/x86_64/KERNEL.PRESCOTT @@ -19,7 +19,7 @@ DGEMMINCOPYOBJ = DGEMMITCOPYOBJ = DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) -CGEMMKERNEL = zgemm_kernel_4x2_penryn.S +CGEMMKERNEL = zgemm_kernel_4x2_sse.S CGEMMINCOPY = ../generic/zgemm_ncopy_4.c CGEMMITCOPY = ../generic/zgemm_tcopy_4.c CGEMMONCOPY = zgemm_ncopy_2.S From 9964ed2f798b86034a6e337d57de5aa23691a2c7 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sun, 6 Jul 2014 11:47:28 +0200 Subject: [PATCH 12/18] bugfix for CORE2 --- kernel/setparam-ref.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 6eebd5fcb..5086420c1 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -714,13 +714,13 @@ static void init_parameter(void) { fprintf(stderr, "Core2\n"); #endif - TABLE_NAME.sgemm_p = 92 * (l2 >> 9); - TABLE_NAME.dgemm_p = 46 * (l2 >> 9); - TABLE_NAME.cgemm_p = 46 * (l2 >> 9); - TABLE_NAME.zgemm_p = 23 * (l2 >> 9); + TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8; + TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8; + TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4; + TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4; #ifdef EXPRECISION - TABLE_NAME.qgemm_p = 92 * (l2 >> 9); - TABLE_NAME.xgemm_p = 46 * (l2 >> 9); + TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8; + TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4; #endif #endif From 50e99a52ea50121ef3c85c1fedeb20320e3f473c Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sun, 6 Jul 2014 12:08:27 +0200 Subject: [PATCH 13/18] added definitions for PILEDRIVER and HASWELL --- driver/others/parameter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/driver/others/parameter.c b/driver/others/parameter.c index 12787403e..a0a8b5188 100644 --- a/driver/others/parameter.c +++ b/driver/others/parameter.c @@ -165,7 +165,8 @@ int get_L2_size(void){ #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ - defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) + defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ + defined(PILEDRIVER) || defined(HASWELL) cpuid(0x80000006, &eax, &ebx, &ecx, &edx); From 783a7d2202c16dafa12860377cd81689b5a11bec Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sun, 6 Jul 2014 13:33:42 +0200 Subject: [PATCH 14/18] bugfix for fortran compiler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e1fa89cd0..2e378883b 100644 --- a/Makefile +++ b/Makefile @@ -247,7 +247,7 @@ ifndef NOFORTRAN -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -ifeq ($(F_COMPILER), GFORTRAN) +ifeq ($(FC), GFORTRAN) -@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc ifdef SMP -@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc From 13348b21373848f3df87249ae52303f506466203 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sun, 6 Jul 2014 16:39:32 +0200 Subject: [PATCH 15/18] removed reference to daxpy_bulldozer kernel (Windows bug in lapack-test) --- kernel/x86_64/KERNEL.BULLDOZER | 1 - kernel/x86_64/KERNEL.PILEDRIVER | 1 - 2 files changed, 2 deletions(-) diff --git a/kernel/x86_64/KERNEL.BULLDOZER b/kernel/x86_64/KERNEL.BULLDOZER index d9b9f84f3..6e09813c3 100644 --- a/kernel/x86_64/KERNEL.BULLDOZER +++ b/kernel/x86_64/KERNEL.BULLDOZER @@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S DGEMVNKERNEL = dgemv_n_bulldozer.S DGEMVTKERNEL = dgemv_t_bulldozer.S -DAXPYKERNEL = daxpy_bulldozer.S DDOTKERNEL = ddot_bulldozer.S DCOPYKERNEL = dcopy_bulldozer.S diff --git a/kernel/x86_64/KERNEL.PILEDRIVER b/kernel/x86_64/KERNEL.PILEDRIVER index b083b5d53..e4ac0d895 100644 --- a/kernel/x86_64/KERNEL.PILEDRIVER +++ b/kernel/x86_64/KERNEL.PILEDRIVER @@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S DGEMVNKERNEL = dgemv_n_bulldozer.S DGEMVTKERNEL = dgemv_t_bulldozer.S -DAXPYKERNEL = daxpy_bulldozer.S DDOTKERNEL = ddot_bulldozer.S DCOPYKERNEL = dcopy_bulldozer.S From 552119c48429c76a701600d05d3a1f472a41c65c Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 8 Jul 2014 12:48:08 +0800 Subject: [PATCH 16/18] Fixed #407. Support outputing the CPU corename on runtime. The user can use char * openblas_get_config() or char * openblas_get_corename(). --- cblas.h | 3 +++ driver/others/openblas_get_config.c | 25 +++++++++++++++++++++++++ exports/gensymbol | 1 + getarch.c | 9 +++++++++ 4 files changed, 38 insertions(+) diff --git a/cblas.h b/cblas.h index 841ad6330..ef072e6ff 100644 --- a/cblas.h +++ b/cblas.h @@ -16,6 +16,9 @@ void goto_set_num_threads(int num_threads); /*Get the build configure on runtime.*/ char* openblas_get_config(void); +/*Get the CPU corename on runtime.*/ +char* openblas_get_corename(void); + /* Get the parallelization type which is used by OpenBLAS */ int openblas_get_parallel(void); /* OpenBLAS is compiled for sequential use */ diff --git a/driver/others/openblas_get_config.c b/driver/others/openblas_get_config.c index d8da2e398..0fecbf951 100644 --- a/driver/others/openblas_get_config.c +++ b/driver/others/openblas_get_config.c @@ -32,6 +32,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" +#include + static char* openblas_config_str="" #ifdef USE64BITINT "USE64BITINT " @@ -50,10 +52,33 @@ static char* openblas_config_str="" #endif #ifdef NO_AFFINITY "NO_AFFINITY " +#endif +#ifndef DYNAMIC_ARCH + CHAR_CORENAME #endif ; +#ifdef DYNAMIC_ARCH +char *gotoblas_corename(); +static char tmp_config_str[256]; +#endif + + char* CNAME() { +#ifndef DYNAMIC_ARCH return openblas_config_str; +#else + strcpy(tmp_config_str, openblas_config_str); + strcat(tmp_config_str, gotoblas_corename()); + return tmp_config_str; +#endif } + +char* openblas_get_corename() { +#ifndef DYNAMIC_ARCH + return CHAR_CORENAME; +#else + return gotoblas_corename(); +#endif +} diff --git a/exports/gensymbol b/exports/gensymbol index 2b8d94391..0769ae0f3 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -85,6 +85,7 @@ @misc_no_underscore_objs = ( goto_set_num_threads, openblas_get_config, + openblas_get_corename, ); @misc_underscore_objs = ( diff --git a/getarch.c b/getarch.c index 234f7e172..3e9914259 100644 --- a/getarch.c +++ b/getarch.c @@ -952,6 +952,15 @@ int main(int argc, char *argv[]){ #else get_cpuconfig(); #endif + +#ifdef FORCE + printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); +#else +#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) + printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); +#endif +#endif + break; case '2' : /* SMP */ From 698e77dba4bf13384ea9a8c2cdd099235368bf2e Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 8 Jul 2014 17:26:49 +0800 Subject: [PATCH 17/18] Refs #406. Fixed utest building bug. --- utest/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/utest/Makefile b/utest/Makefile index 31cb93176..fa05458cc 100644 --- a/utest/Makefile +++ b/utest/Makefile @@ -1,15 +1,19 @@ UTEST_CHECK = 1 TOPDIR = .. -include $(TOPDIR)/Makefile.system TARGET=openblas_utest +.PHONY : all +.NOTPARALLEL : all run_test $(TARGET) + CUNIT_URL=http://downloads.sourceforge.net/project/cunit/CUnit/2.1-2/CUnit-2.1-2-src.tar.bz2 CUNIT_DIR=$(CURDIR)/CUnit-2.1-2 CUNIT_LIB=$(CUNIT_DIR)/lib/libcunit.a -CFLAGS+=-I$(CUNIT_DIR)/include +CFLAGS +=-I$(CUNIT_DIR)/include + +include $(TOPDIR)/Makefile.system OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o test_fork.o From 47688e24e99079191488b2fc57248f92753e0611 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Wed, 9 Jul 2014 08:47:36 +0800 Subject: [PATCH 18/18] OpenBLAS 0.2.10 rc2 version. --- Makefile.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.rule b/Makefile.rule index 90ff80ef8..f04591907 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.2.10.rc1 +VERSION = 0.2.10.rc2 # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library