From 97de657d38cfd2ccacee54ec7920afa61a5967e7 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Thu, 11 Dec 2014 13:53:59 +0100 Subject: [PATCH 1/8] added tests to sep.as as workaround for gfortran-4.8.x --- lapack-netlib/TESTING/sep.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lapack-netlib/TESTING/sep.in b/lapack-netlib/TESTING/sep.in index e0ed58512..4d10b6c19 100644 --- a/lapack-netlib/TESTING/sep.in +++ b/lapack-netlib/TESTING/sep.in @@ -1,6 +1,6 @@ SEP: Data file for testing Symmetric Eigenvalue Problem routines -6 Number of values of N -0 1 2 3 5 20 Values of N (dimension) +8 Number of values of N +0 1 2 3 5 19 20 21 Values of N (dimension) 5 Number of values of NB 1 3 3 3 10 Values of NB (blocksize) 2 2 2 2 2 Values of NBMIN (minimum blocksize) From ec85c4a51d01f7c4d2a9ffeff9c15ff451054a62 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Thu, 11 Dec 2014 14:57:41 +0100 Subject: [PATCH 2/8] Increased the Threshold value in sep.in --- lapack-netlib/TESTING/sep.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapack-netlib/TESTING/sep.in b/lapack-netlib/TESTING/sep.in index 4d10b6c19..19bd7c3da 100644 --- a/lapack-netlib/TESTING/sep.in +++ b/lapack-netlib/TESTING/sep.in @@ -5,7 +5,7 @@ SEP: Data file for testing Symmetric Eigenvalue Problem routines 1 3 3 3 10 Values of NB (blocksize) 2 2 2 2 2 Values of NBMIN (minimum blocksize) 1 0 5 9 1 Values of NX (crossover point) -60.0 Threshold value +160.0 Threshold value T Put T to test the LAPACK routines T Put T to test the driver routines T Put T to test the error exits From 113b48ca2222b5f46bb40425f21e52906958473e Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Wed, 17 Dec 2014 14:12:21 +0100 Subject: [PATCH 3/8] modified makefile for acml6.1 --- benchmark/Makefile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/benchmark/Makefile b/benchmark/Makefile index cf219cef1..402a2e07b 100644 --- a/benchmark/Makefile +++ b/benchmark/Makefile @@ -6,8 +6,13 @@ include $(TOPDIR)/Makefile.system #LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm # ACML custom -ACML=/opt/pb/acml-5-3-1-gfortran-64bit/gfortran64_fma4_mp/lib -LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm +#ACML=/opt/pb/acml-5-3-1-gfortran-64bit/gfortran64_fma4_mp/lib +#LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm + +# ACML 6.1 custom +ACML=/home/saar/acml6.1/gfortran64_mp/lib +LIBACML = -fopenmp $(ACML)/libacml_mp.so -lgfortran -lm + # Atlas Ubuntu #ATLAS=/usr/lib/atlas-base From 1e566223ed11a6b453a0e37cbb664a4192f04e8b Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Wed, 17 Dec 2014 15:02:11 +0100 Subject: [PATCH 4/8] added code for the size of n --- benchmark/gemm.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/benchmark/gemm.c b/benchmark/gemm.c index 4f9a58825..347cf0dfa 100644 --- a/benchmark/gemm.c +++ b/benchmark/gemm.c @@ -124,8 +124,9 @@ int MAIN__(int argc, char *argv[]){ FLOAT alpha[] = {1.0, 1.0}; FLOAT beta [] = {1.0, 1.0}; char trans='N'; - blasint m, i, j; + blasint m, n, i, j; int loops = 1; + int has_param_n=0; int l; char *p; @@ -162,6 +163,11 @@ int MAIN__(int argc, char *argv[]){ if ( p != NULL ) loops = atoi(p); + if ((p = getenv("OPENBLAS_PARAM_N"))) { + n = atoi(p); + has_param_n=1; + } + #ifdef linux srandom(getpid()); @@ -174,7 +180,14 @@ int MAIN__(int argc, char *argv[]){ timeg=0; - fprintf(stderr, " %6d : ", (int)m); + if ( has_param_n == 1 && n <= m ) + n=n; + else + n=m; + + + + fprintf(stderr, " %6dx%d : ", (int)m, (int)n); for (l=0; l Date: Thu, 18 Dec 2014 20:35:51 +0100 Subject: [PATCH 5/8] small optimization on dgemm_kernel for N=1 --- kernel/x86_64/dgemm_kernel_4x4_haswell.S | 79 ++++++++++++++---------- 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/kernel/x86_64/dgemm_kernel_4x4_haswell.S b/kernel/x86_64/dgemm_kernel_4x4_haswell.S index a49a51ee9..0a2ca7ae3 100644 --- a/kernel/x86_64/dgemm_kernel_4x4_haswell.S +++ b/kernel/x86_64/dgemm_kernel_4x4_haswell.S @@ -1092,18 +1092,48 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vxorpd %xmm4 , %xmm4 , %xmm4 - vxorpd %xmm5 , %xmm5 , %xmm5 + vxorpd %ymm4 , %ymm4 , %ymm4 + vxorpd %ymm5 , %ymm5 , %ymm5 + vxorpd %ymm6 , %ymm6 , %ymm6 + vxorpd %ymm7 , %ymm7 , %ymm7 + +.endm + + +.macro KERNEL4x1 + + vbroadcastsd -12 * SIZE(BO), %ymm0 + vbroadcastsd -11 * SIZE(BO), %ymm1 + vbroadcastsd -10 * SIZE(BO), %ymm2 + vbroadcastsd -9 * SIZE(BO), %ymm3 + + vfmadd231pd -16 * SIZE(AO) ,%ymm0 , %ymm4 + vfmadd231pd -12 * SIZE(AO) ,%ymm1 , %ymm5 + + vbroadcastsd -8 * SIZE(BO), %ymm0 + vbroadcastsd -7 * SIZE(BO), %ymm1 + + vfmadd231pd -8 * SIZE(AO) ,%ymm2 , %ymm6 + vfmadd231pd -4 * SIZE(AO) ,%ymm3 , %ymm7 + + vbroadcastsd -6 * SIZE(BO), %ymm2 + vbroadcastsd -5 * SIZE(BO), %ymm3 + + vfmadd231pd 0 * SIZE(AO) ,%ymm0 , %ymm4 + vfmadd231pd 4 * SIZE(AO) ,%ymm1 , %ymm5 + vfmadd231pd 8 * SIZE(AO) ,%ymm2 , %ymm6 + vfmadd231pd 12 * SIZE(AO) ,%ymm3 , %ymm7 + + addq $ 8 *SIZE, BO + addq $ 32*SIZE, AO .endm .macro KERNEL4x1_SUB - vmovddup -12 * SIZE(BO), %xmm2 - vmovups -16 * SIZE(AO), %xmm0 - vmovups -14 * SIZE(AO), %xmm1 - vfmadd231pd %xmm0 ,%xmm2 , %xmm4 - vfmadd231pd %xmm1 ,%xmm2 , %xmm5 + vbroadcastsd -12 * SIZE(BO), %ymm2 + vmovups -16 * SIZE(AO), %ymm0 + vfmadd231pd %ymm0 ,%ymm2 , %ymm4 addq $ 1*SIZE, BO addq $ 4*SIZE, AO @@ -1112,21 +1142,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro SAVE4x1 - vmovddup ALPHA, %xmm0 + vbroadcastsd ALPHA, %ymm0 - vmulpd %xmm0 , %xmm4 , %xmm4 - vmulpd %xmm0 , %xmm5 , %xmm5 + vaddpd %ymm4,%ymm5, %ymm4 + vaddpd %ymm6,%ymm7, %ymm6 + vaddpd %ymm4,%ymm6, %ymm4 + + vmulpd %ymm0 , %ymm4 , %ymm4 #if !defined(TRMMKERNEL) - vaddpd (CO1) , %xmm4, %xmm4 - vaddpd 2 * SIZE(CO1) , %xmm5, %xmm5 + vaddpd (CO1) , %ymm4, %ymm4 #endif - vmovups %xmm4 , (CO1) - vmovups %xmm5 , 2 * SIZE(CO1) + vmovups %ymm4 , (CO1) addq $ 4*SIZE, CO1 .endm @@ -2112,15 +2143,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .L1_12: - KERNEL4x1_SUB - KERNEL4x1_SUB - KERNEL4x1_SUB - KERNEL4x1_SUB - - KERNEL4x1_SUB - KERNEL4x1_SUB - KERNEL4x1_SUB - KERNEL4x1_SUB + KERNEL4x1 dec %rax jne .L1_12 @@ -3180,15 +3203,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .L1_12: - KERNEL4x1_SUB - KERNEL4x1_SUB - KERNEL4x1_SUB - KERNEL4x1_SUB - - KERNEL4x1_SUB - KERNEL4x1_SUB - KERNEL4x1_SUB - KERNEL4x1_SUB + KERNEL4x1 dec %rax jne .L1_12 From 887aed634df973d93bb559d14b7dc08b343c60b9 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Fri, 19 Dec 2014 12:40:46 +0100 Subject: [PATCH 6/8] modified sources for OS Darwin --- benchmark/axpy.c | 4 ++-- benchmark/cholesky.c | 4 ++-- benchmark/dot.c | 4 ++-- benchmark/geev.c | 4 ++-- benchmark/gemm.c | 4 ++-- benchmark/gemm3m.c | 4 ++-- benchmark/gemv.c | 4 ++-- benchmark/ger.c | 4 ++-- benchmark/getri.c | 4 ++-- benchmark/hemm.c | 4 ++-- benchmark/hemv.c | 4 ++-- benchmark/her2k.c | 4 ++-- benchmark/herk.c | 4 ++-- benchmark/linpack.c | 4 ++-- benchmark/potrf.c | 4 ++-- benchmark/symm.c | 4 ++-- benchmark/symv.c | 4 ++-- benchmark/syr2k.c | 4 ++-- benchmark/syrk.c | 4 ++-- benchmark/trmm.c | 4 ++-- benchmark/trsm.c | 4 ++-- 21 files changed, 42 insertions(+), 42 deletions(-) diff --git a/benchmark/axpy.c b/benchmark/axpy.c index ef3b5ae4f..a7206b690 100644 --- a/benchmark/axpy.c +++ b/benchmark/axpy.c @@ -114,7 +114,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *x, *y; FLOAT alpha[2] = { 2.0, 2.0 }; @@ -198,4 +198,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/cholesky.c b/benchmark/cholesky.c index 76c368eda..c8b96d80f 100644 --- a/benchmark/cholesky.c +++ b/benchmark/cholesky.c @@ -117,7 +117,7 @@ static __inline double getmflops(int ratio, int m, double secs){ } -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ #ifndef COMPLEX char *trans[] = {"T", "N"}; @@ -273,4 +273,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/dot.c b/benchmark/dot.c index 6132ed324..4c8d6cc38 100644 --- a/benchmark/dot.c +++ b/benchmark/dot.c @@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *x, *y; FLOAT result; @@ -192,4 +192,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/geev.c b/benchmark/geev.c index 3b7465360..a2ca2c315 100644 --- a/benchmark/geev.c +++ b/benchmark/geev.c @@ -139,7 +139,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork; FLOAT wkopt[4]; @@ -257,4 +257,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/gemm.c b/benchmark/gemm.c index 347cf0dfa..5a3587622 100644 --- a/benchmark/gemm.c +++ b/benchmark/gemm.c @@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *b, *c; FLOAT alpha[] = {1.0, 1.0}; @@ -222,4 +222,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/gemm3m.c b/benchmark/gemm3m.c index 048d74be6..d39543585 100644 --- a/benchmark/gemm3m.c +++ b/benchmark/gemm3m.c @@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *b, *c; FLOAT alpha[] = {1.0, 1.0}; @@ -209,4 +209,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/gemv.c b/benchmark/gemv.c index e21868259..42af2825a 100644 --- a/benchmark/gemv.c +++ b/benchmark/gemv.c @@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *x, *y; FLOAT alpha[] = {1.0, 1.0}; @@ -266,4 +266,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/ger.c b/benchmark/ger.c index 5085389da..354281006 100644 --- a/benchmark/ger.c +++ b/benchmark/ger.c @@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *x, *y; FLOAT alpha[] = {1.0, 1.0}; @@ -214,5 +214,5 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/getri.c b/benchmark/getri.c index 897f1ff04..083cdc9aa 100644 --- a/benchmark/getri.c +++ b/benchmark/getri.c @@ -137,7 +137,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a,*work; FLOAT wkopt[4]; @@ -231,4 +231,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/hemm.c b/benchmark/hemm.c index f5d4b4fd9..318c407ba 100644 --- a/benchmark/hemm.c +++ b/benchmark/hemm.c @@ -107,7 +107,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *b, *c; FLOAT alpha[] = {1.0, 1.0}; @@ -189,4 +189,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/hemv.c b/benchmark/hemv.c index 79b7679cc..05028e3cf 100644 --- a/benchmark/hemv.c +++ b/benchmark/hemv.c @@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *x, *y; FLOAT alpha[] = {1.0, 1.0}; @@ -205,4 +205,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/her2k.c b/benchmark/her2k.c index 49ab8d214..028e2718f 100644 --- a/benchmark/her2k.c +++ b/benchmark/her2k.c @@ -106,7 +106,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *b, *c; FLOAT alpha[] = {1.0, 1.0}; @@ -188,4 +188,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/herk.c b/benchmark/herk.c index 8c053b019..d2e25ff46 100644 --- a/benchmark/herk.c +++ b/benchmark/herk.c @@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *c; FLOAT alpha[] = {1.0, 1.0}; @@ -186,4 +186,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/linpack.c b/benchmark/linpack.c index 98a874208..7d5c87163 100644 --- a/benchmark/linpack.c +++ b/benchmark/linpack.c @@ -137,7 +137,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *b; blasint *ipiv; @@ -270,4 +270,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/potrf.c b/benchmark/potrf.c index 7b6cdd799..3caf61caa 100644 --- a/benchmark/potrf.c +++ b/benchmark/potrf.c @@ -114,7 +114,7 @@ int gettimeofday(struct timeval *tv, void *tz){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ #ifndef COMPLEX char *trans[] = {"T", "N"}; @@ -278,5 +278,5 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/symm.c b/benchmark/symm.c index 187dfe2ae..35ebcee97 100644 --- a/benchmark/symm.c +++ b/benchmark/symm.c @@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *b, *c; FLOAT alpha[] = {1.0, 1.0}; @@ -200,4 +200,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/symv.c b/benchmark/symv.c index 4bcfb411b..df2a5d301 100644 --- a/benchmark/symv.c +++ b/benchmark/symv.c @@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *x, *y; FLOAT alpha[] = {1.0, 1.0}; @@ -215,4 +215,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/syr2k.c b/benchmark/syr2k.c index e11b04e42..9840b5f3e 100644 --- a/benchmark/syr2k.c +++ b/benchmark/syr2k.c @@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *b, *c; FLOAT alpha[] = {1.0, 1.0}; @@ -200,4 +200,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/syrk.c b/benchmark/syrk.c index f01549688..34817f2bb 100644 --- a/benchmark/syrk.c +++ b/benchmark/syrk.c @@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *c; FLOAT alpha[] = {1.0, 1.0}; @@ -196,4 +196,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/trmm.c b/benchmark/trmm.c index 328dc9a10..f81e9d912 100644 --- a/benchmark/trmm.c +++ b/benchmark/trmm.c @@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *b; FLOAT alpha[] = {1.0, 1.0}; @@ -199,4 +199,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); diff --git a/benchmark/trsm.c b/benchmark/trsm.c index 908a0fcb7..ed969b707 100644 --- a/benchmark/trsm.c +++ b/benchmark/trsm.c @@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){ #endif -int MAIN__(int argc, char *argv[]){ +int main(int argc, char *argv[]){ FLOAT *a, *b; FLOAT alpha[] = {1.0, 1.0}; @@ -199,4 +199,4 @@ int MAIN__(int argc, char *argv[]){ return 0; } -void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); +// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); From 4de7b9ae470fb98c4d5353371604b025f5b9fcd4 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Mon, 22 Dec 2014 14:04:27 +0100 Subject: [PATCH 7/8] increased NMAX to 128 --- lapack-netlib/BLAS/TESTING/cblat2.f | 2 +- lapack-netlib/BLAS/TESTING/cblat3.f | 2 +- lapack-netlib/BLAS/TESTING/dblat2.f | 2 +- lapack-netlib/BLAS/TESTING/dblat3.f | 2 +- lapack-netlib/BLAS/TESTING/sblat2.f | 2 +- lapack-netlib/BLAS/TESTING/sblat3.f | 2 +- lapack-netlib/BLAS/TESTING/zblat2.f | 2 +- lapack-netlib/BLAS/TESTING/zblat3.f | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lapack-netlib/BLAS/TESTING/cblat2.f b/lapack-netlib/BLAS/TESTING/cblat2.f index 5833ea81a..2a6edd382 100644 --- a/lapack-netlib/BLAS/TESTING/cblat2.f +++ b/lapack-netlib/BLAS/TESTING/cblat2.f @@ -120,7 +120,7 @@ REAL RZERO PARAMETER ( RZERO = 0.0 ) INTEGER NMAX, INCMAX - PARAMETER ( NMAX = 65, INCMAX = 2 ) + PARAMETER ( NMAX = 128, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, $ NALMAX = 7, NBEMAX = 7 ) diff --git a/lapack-netlib/BLAS/TESTING/cblat3.f b/lapack-netlib/BLAS/TESTING/cblat3.f index 09f2cb9c5..fb2aa4ece 100644 --- a/lapack-netlib/BLAS/TESTING/cblat3.f +++ b/lapack-netlib/BLAS/TESTING/cblat3.f @@ -102,7 +102,7 @@ REAL RZERO PARAMETER ( RZERO = 0.0 ) INTEGER NMAX - PARAMETER ( NMAX = 65 ) + PARAMETER ( NMAX = 128 ) INTEGER NIDMAX, NALMAX, NBEMAX PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. diff --git a/lapack-netlib/BLAS/TESTING/dblat2.f b/lapack-netlib/BLAS/TESTING/dblat2.f index 0fa80afa4..80623b260 100644 --- a/lapack-netlib/BLAS/TESTING/dblat2.f +++ b/lapack-netlib/BLAS/TESTING/dblat2.f @@ -117,7 +117,7 @@ DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) INTEGER NMAX, INCMAX - PARAMETER ( NMAX = 65, INCMAX = 2 ) + PARAMETER ( NMAX = 128, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, $ NALMAX = 7, NBEMAX = 7 ) diff --git a/lapack-netlib/BLAS/TESTING/dblat3.f b/lapack-netlib/BLAS/TESTING/dblat3.f index 8d37c7453..72c17ed3b 100644 --- a/lapack-netlib/BLAS/TESTING/dblat3.f +++ b/lapack-netlib/BLAS/TESTING/dblat3.f @@ -97,7 +97,7 @@ DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) INTEGER NMAX - PARAMETER ( NMAX = 65 ) + PARAMETER ( NMAX = 128 ) INTEGER NIDMAX, NALMAX, NBEMAX PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. diff --git a/lapack-netlib/BLAS/TESTING/sblat2.f b/lapack-netlib/BLAS/TESTING/sblat2.f index 71605ed31..601add7e9 100644 --- a/lapack-netlib/BLAS/TESTING/sblat2.f +++ b/lapack-netlib/BLAS/TESTING/sblat2.f @@ -117,7 +117,7 @@ REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) INTEGER NMAX, INCMAX - PARAMETER ( NMAX = 65, INCMAX = 2 ) + PARAMETER ( NMAX = 128, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, $ NALMAX = 7, NBEMAX = 7 ) diff --git a/lapack-netlib/BLAS/TESTING/sblat3.f b/lapack-netlib/BLAS/TESTING/sblat3.f index 879269633..78d809379 100644 --- a/lapack-netlib/BLAS/TESTING/sblat3.f +++ b/lapack-netlib/BLAS/TESTING/sblat3.f @@ -97,7 +97,7 @@ REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) INTEGER NMAX - PARAMETER ( NMAX = 65 ) + PARAMETER ( NMAX = 128 ) INTEGER NIDMAX, NALMAX, NBEMAX PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. diff --git a/lapack-netlib/BLAS/TESTING/zblat2.f b/lapack-netlib/BLAS/TESTING/zblat2.f index 53129a11e..2e3e08e7c 100644 --- a/lapack-netlib/BLAS/TESTING/zblat2.f +++ b/lapack-netlib/BLAS/TESTING/zblat2.f @@ -121,7 +121,7 @@ DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) INTEGER NMAX, INCMAX - PARAMETER ( NMAX = 65, INCMAX = 2 ) + PARAMETER ( NMAX = 128, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, $ NALMAX = 7, NBEMAX = 7 ) diff --git a/lapack-netlib/BLAS/TESTING/zblat3.f b/lapack-netlib/BLAS/TESTING/zblat3.f index 59ca24145..39ce06b99 100644 --- a/lapack-netlib/BLAS/TESTING/zblat3.f +++ b/lapack-netlib/BLAS/TESTING/zblat3.f @@ -104,7 +104,7 @@ DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) INTEGER NMAX - PARAMETER ( NMAX = 65 ) + PARAMETER ( NMAX = 128 ) INTEGER NIDMAX, NALMAX, NBEMAX PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. From 587e16fba3775cd8587f7d54c19ef1696c88f771 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Mon, 22 Dec 2014 17:01:18 +0100 Subject: [PATCH 8/8] Ref #458: Backport, sandybrigde uses nehalem zgemm kernel --- kernel/x86_64/KERNEL.SANDYBRIDGE | 12 ++++++------ param.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/x86_64/KERNEL.SANDYBRIDGE b/kernel/x86_64/KERNEL.SANDYBRIDGE index 61e13a116..ff96cd011 100644 --- a/kernel/x86_64/KERNEL.SANDYBRIDGE +++ b/kernel/x86_64/KERNEL.SANDYBRIDGE @@ -34,17 +34,17 @@ CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) - -ZGEMMKERNEL = zgemm_kernel_4x4_sandy.S -ZGEMMINCOPY = -ZGEMMITCOPY = +ZGEMMKERNEL = zgemm_kernel_1x4_nehalem.S +ZGEMMINCOPY = zgemm_ncopy_1.S +ZGEMMITCOPY = zgemm_tcopy_1.S ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c -ZGEMMINCOPYOBJ = -ZGEMMITCOPYOBJ = +ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) +ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) + #STRSMKERNEL_LN = trsm_kernel_LN_4x8_nehalem.S #STRSMKERNEL_LT = trsm_kernel_LT_4x8_nehalem.S #STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S diff --git a/param.h b/param.h index 28ed91e60..bce05c957 100644 --- a/param.h +++ b/param.h @@ -1129,7 +1129,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define DGEMM_DEFAULT_UNROLL_M 8 #define QGEMM_DEFAULT_UNROLL_M 2 #define CGEMM_DEFAULT_UNROLL_M 8 -#define ZGEMM_DEFAULT_UNROLL_M 4 +#define ZGEMM_DEFAULT_UNROLL_M 1 #define XGEMM_DEFAULT_UNROLL_M 1 #define SGEMM_DEFAULT_UNROLL_N 4