Merge pull request #479 from wernsaar/develop

workaround for sandybridge zgemm kernel
This commit is contained in:
Zhang Xianyi 2014-12-23 00:59:41 +08:00
commit eb738148fe
34 changed files with 131 additions and 98 deletions

View File

@ -6,8 +6,13 @@ include $(TOPDIR)/Makefile.system
#LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm #LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm
# ACML custom # ACML custom
ACML=/opt/pb/acml-5-3-1-gfortran-64bit/gfortran64_fma4_mp/lib #ACML=/opt/pb/acml-5-3-1-gfortran-64bit/gfortran64_fma4_mp/lib
LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm #LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm
# ACML 6.1 custom
ACML=/home/saar/acml6.1/gfortran64_mp/lib
LIBACML = -fopenmp $(ACML)/libacml_mp.so -lgfortran -lm
# Atlas Ubuntu # Atlas Ubuntu
#ATLAS=/usr/lib/atlas-base #ATLAS=/usr/lib/atlas-base

View File

@ -114,7 +114,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x, *y; FLOAT *x, *y;
FLOAT alpha[2] = { 2.0, 2.0 }; FLOAT alpha[2] = { 2.0, 2.0 };
@ -198,4 +198,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -117,7 +117,7 @@ static __inline double getmflops(int ratio, int m, double secs){
} }
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
#ifndef COMPLEX #ifndef COMPLEX
char *trans[] = {"T", "N"}; char *trans[] = {"T", "N"};
@ -273,4 +273,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x, *y; FLOAT *x, *y;
FLOAT result; FLOAT result;
@ -192,4 +192,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -139,7 +139,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork; FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
FLOAT wkopt[4]; FLOAT wkopt[4];
@ -257,4 +257,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -118,14 +118,15 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b, *c; FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0}; FLOAT beta [] = {1.0, 1.0};
char trans='N'; char trans='N';
blasint m, i, j; blasint m, n, i, j;
int loops = 1; int loops = 1;
int has_param_n=0;
int l; int l;
char *p; char *p;
@ -162,6 +163,11 @@ int MAIN__(int argc, char *argv[]){
if ( p != NULL ) if ( p != NULL )
loops = atoi(p); loops = atoi(p);
if ((p = getenv("OPENBLAS_PARAM_N"))) {
n = atoi(p);
has_param_n=1;
}
#ifdef linux #ifdef linux
srandom(getpid()); srandom(getpid());
@ -174,7 +180,14 @@ int MAIN__(int argc, char *argv[]){
timeg=0; timeg=0;
fprintf(stderr, " %6d : ", (int)m); if ( has_param_n == 1 && n <= m )
n=n;
else
n=m;
fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
@ -189,7 +202,7 @@ int MAIN__(int argc, char *argv[]){
gettimeofday( &start, (struct timezone *)0); gettimeofday( &start, (struct timezone *)0);
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
gettimeofday( &stop, (struct timezone *)0); gettimeofday( &stop, (struct timezone *)0);
@ -202,11 +215,11 @@ int MAIN__(int argc, char *argv[]){
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6); COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6);
} }
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b, *c; FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
@ -209,4 +209,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
@ -266,4 +266,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
@ -214,5 +214,5 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -137,7 +137,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a,*work; FLOAT *a,*work;
FLOAT wkopt[4]; FLOAT wkopt[4];
@ -231,4 +231,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -107,7 +107,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b, *c; FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
@ -189,4 +189,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
@ -205,4 +205,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -106,7 +106,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b, *c; FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
@ -188,4 +188,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *c; FLOAT *a, *c;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
@ -186,4 +186,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -137,7 +137,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b; FLOAT *a, *b;
blasint *ipiv; blasint *ipiv;
@ -270,4 +270,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -114,7 +114,7 @@ int gettimeofday(struct timeval *tv, void *tz){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
#ifndef COMPLEX #ifndef COMPLEX
char *trans[] = {"T", "N"}; char *trans[] = {"T", "N"};
@ -278,5 +278,5 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b, *c; FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
@ -200,4 +200,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
@ -215,4 +215,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b, *c; FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
@ -200,4 +200,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *c; FLOAT *a, *c;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
@ -196,4 +196,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b; FLOAT *a, *b;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
@ -199,4 +199,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
int MAIN__(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b; FLOAT *a, *b;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
@ -199,4 +199,4 @@ int MAIN__(int argc, char *argv[]){
return 0; return 0;
} }
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -34,17 +34,17 @@ CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = zgemm_kernel_1x4_nehalem.S
ZGEMMKERNEL = zgemm_kernel_4x4_sandy.S ZGEMMINCOPY = zgemm_ncopy_1.S
ZGEMMINCOPY = ZGEMMITCOPY = zgemm_tcopy_1.S
ZGEMMITCOPY =
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
ZGEMMINCOPYOBJ = ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
#STRSMKERNEL_LN = trsm_kernel_LN_4x8_nehalem.S #STRSMKERNEL_LN = trsm_kernel_LN_4x8_nehalem.S
#STRSMKERNEL_LT = trsm_kernel_LT_4x8_nehalem.S #STRSMKERNEL_LT = trsm_kernel_LT_4x8_nehalem.S
#STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S #STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S

View File

@ -1092,18 +1092,48 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT4x1 .macro INIT4x1
vxorpd %xmm4 , %xmm4 , %xmm4 vxorpd %ymm4 , %ymm4 , %ymm4
vxorpd %xmm5 , %xmm5 , %xmm5 vxorpd %ymm5 , %ymm5 , %ymm5
vxorpd %ymm6 , %ymm6 , %ymm6
vxorpd %ymm7 , %ymm7 , %ymm7
.endm
.macro KERNEL4x1
vbroadcastsd -12 * SIZE(BO), %ymm0
vbroadcastsd -11 * SIZE(BO), %ymm1
vbroadcastsd -10 * SIZE(BO), %ymm2
vbroadcastsd -9 * SIZE(BO), %ymm3
vfmadd231pd -16 * SIZE(AO) ,%ymm0 , %ymm4
vfmadd231pd -12 * SIZE(AO) ,%ymm1 , %ymm5
vbroadcastsd -8 * SIZE(BO), %ymm0
vbroadcastsd -7 * SIZE(BO), %ymm1
vfmadd231pd -8 * SIZE(AO) ,%ymm2 , %ymm6
vfmadd231pd -4 * SIZE(AO) ,%ymm3 , %ymm7
vbroadcastsd -6 * SIZE(BO), %ymm2
vbroadcastsd -5 * SIZE(BO), %ymm3
vfmadd231pd 0 * SIZE(AO) ,%ymm0 , %ymm4
vfmadd231pd 4 * SIZE(AO) ,%ymm1 , %ymm5
vfmadd231pd 8 * SIZE(AO) ,%ymm2 , %ymm6
vfmadd231pd 12 * SIZE(AO) ,%ymm3 , %ymm7
addq $ 8 *SIZE, BO
addq $ 32*SIZE, AO
.endm .endm
.macro KERNEL4x1_SUB .macro KERNEL4x1_SUB
vmovddup -12 * SIZE(BO), %xmm2 vbroadcastsd -12 * SIZE(BO), %ymm2
vmovups -16 * SIZE(AO), %xmm0 vmovups -16 * SIZE(AO), %ymm0
vmovups -14 * SIZE(AO), %xmm1 vfmadd231pd %ymm0 ,%ymm2 , %ymm4
vfmadd231pd %xmm0 ,%xmm2 , %xmm4
vfmadd231pd %xmm1 ,%xmm2 , %xmm5
addq $ 1*SIZE, BO addq $ 1*SIZE, BO
addq $ 4*SIZE, AO addq $ 4*SIZE, AO
@ -1112,21 +1142,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro SAVE4x1 .macro SAVE4x1
vmovddup ALPHA, %xmm0 vbroadcastsd ALPHA, %ymm0
vmulpd %xmm0 , %xmm4 , %xmm4 vaddpd %ymm4,%ymm5, %ymm4
vmulpd %xmm0 , %xmm5 , %xmm5 vaddpd %ymm6,%ymm7, %ymm6
vaddpd %ymm4,%ymm6, %ymm4
vmulpd %ymm0 , %ymm4 , %ymm4
#if !defined(TRMMKERNEL) #if !defined(TRMMKERNEL)
vaddpd (CO1) , %xmm4, %xmm4 vaddpd (CO1) , %ymm4, %ymm4
vaddpd 2 * SIZE(CO1) , %xmm5, %xmm5
#endif #endif
vmovups %xmm4 , (CO1) vmovups %ymm4 , (CO1)
vmovups %xmm5 , 2 * SIZE(CO1)
addq $ 4*SIZE, CO1 addq $ 4*SIZE, CO1
.endm .endm
@ -2112,15 +2143,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L1_12: .L1_12:
KERNEL4x1_SUB KERNEL4x1
KERNEL4x1_SUB
KERNEL4x1_SUB
KERNEL4x1_SUB
KERNEL4x1_SUB
KERNEL4x1_SUB
KERNEL4x1_SUB
KERNEL4x1_SUB
dec %rax dec %rax
jne .L1_12 jne .L1_12
@ -3180,15 +3203,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L1_12: .L1_12:
KERNEL4x1_SUB KERNEL4x1
KERNEL4x1_SUB
KERNEL4x1_SUB
KERNEL4x1_SUB
KERNEL4x1_SUB
KERNEL4x1_SUB
KERNEL4x1_SUB
KERNEL4x1_SUB
dec %rax dec %rax
jne .L1_12 jne .L1_12

View File

@ -120,7 +120,7 @@
REAL RZERO REAL RZERO
PARAMETER ( RZERO = 0.0 ) PARAMETER ( RZERO = 0.0 )
INTEGER NMAX, INCMAX INTEGER NMAX, INCMAX
PARAMETER ( NMAX = 65, INCMAX = 2 ) PARAMETER ( NMAX = 128, INCMAX = 2 )
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
$ NALMAX = 7, NBEMAX = 7 ) $ NALMAX = 7, NBEMAX = 7 )

View File

@ -102,7 +102,7 @@
REAL RZERO REAL RZERO
PARAMETER ( RZERO = 0.0 ) PARAMETER ( RZERO = 0.0 )
INTEGER NMAX INTEGER NMAX
PARAMETER ( NMAX = 65 ) PARAMETER ( NMAX = 128 )
INTEGER NIDMAX, NALMAX, NBEMAX INTEGER NIDMAX, NALMAX, NBEMAX
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
* .. Local Scalars .. * .. Local Scalars ..

View File

@ -117,7 +117,7 @@
DOUBLE PRECISION ZERO, ONE DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
INTEGER NMAX, INCMAX INTEGER NMAX, INCMAX
PARAMETER ( NMAX = 65, INCMAX = 2 ) PARAMETER ( NMAX = 128, INCMAX = 2 )
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
$ NALMAX = 7, NBEMAX = 7 ) $ NALMAX = 7, NBEMAX = 7 )

View File

@ -97,7 +97,7 @@
DOUBLE PRECISION ZERO, ONE DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
INTEGER NMAX INTEGER NMAX
PARAMETER ( NMAX = 65 ) PARAMETER ( NMAX = 128 )
INTEGER NIDMAX, NALMAX, NBEMAX INTEGER NIDMAX, NALMAX, NBEMAX
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
* .. Local Scalars .. * .. Local Scalars ..

View File

@ -117,7 +117,7 @@
REAL ZERO, ONE REAL ZERO, ONE
PARAMETER ( ZERO = 0.0, ONE = 1.0 ) PARAMETER ( ZERO = 0.0, ONE = 1.0 )
INTEGER NMAX, INCMAX INTEGER NMAX, INCMAX
PARAMETER ( NMAX = 65, INCMAX = 2 ) PARAMETER ( NMAX = 128, INCMAX = 2 )
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
$ NALMAX = 7, NBEMAX = 7 ) $ NALMAX = 7, NBEMAX = 7 )

View File

@ -97,7 +97,7 @@
REAL ZERO, ONE REAL ZERO, ONE
PARAMETER ( ZERO = 0.0, ONE = 1.0 ) PARAMETER ( ZERO = 0.0, ONE = 1.0 )
INTEGER NMAX INTEGER NMAX
PARAMETER ( NMAX = 65 ) PARAMETER ( NMAX = 128 )
INTEGER NIDMAX, NALMAX, NBEMAX INTEGER NIDMAX, NALMAX, NBEMAX
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
* .. Local Scalars .. * .. Local Scalars ..

View File

@ -121,7 +121,7 @@
DOUBLE PRECISION RZERO DOUBLE PRECISION RZERO
PARAMETER ( RZERO = 0.0D0 ) PARAMETER ( RZERO = 0.0D0 )
INTEGER NMAX, INCMAX INTEGER NMAX, INCMAX
PARAMETER ( NMAX = 65, INCMAX = 2 ) PARAMETER ( NMAX = 128, INCMAX = 2 )
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
$ NALMAX = 7, NBEMAX = 7 ) $ NALMAX = 7, NBEMAX = 7 )

View File

@ -104,7 +104,7 @@
DOUBLE PRECISION RZERO DOUBLE PRECISION RZERO
PARAMETER ( RZERO = 0.0D0 ) PARAMETER ( RZERO = 0.0D0 )
INTEGER NMAX INTEGER NMAX
PARAMETER ( NMAX = 65 ) PARAMETER ( NMAX = 128 )
INTEGER NIDMAX, NALMAX, NBEMAX INTEGER NIDMAX, NALMAX, NBEMAX
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
* .. Local Scalars .. * .. Local Scalars ..

View File

@ -1,11 +1,11 @@
SEP: Data file for testing Symmetric Eigenvalue Problem routines SEP: Data file for testing Symmetric Eigenvalue Problem routines
6 Number of values of N 8 Number of values of N
0 1 2 3 5 20 Values of N (dimension) 0 1 2 3 5 19 20 21 Values of N (dimension)
5 Number of values of NB 5 Number of values of NB
1 3 3 3 10 Values of NB (blocksize) 1 3 3 3 10 Values of NB (blocksize)
2 2 2 2 2 Values of NBMIN (minimum blocksize) 2 2 2 2 2 Values of NBMIN (minimum blocksize)
1 0 5 9 1 Values of NX (crossover point) 1 0 5 9 1 Values of NX (crossover point)
60.0 Threshold value 160.0 Threshold value
T Put T to test the LAPACK routines T Put T to test the LAPACK routines
T Put T to test the driver routines T Put T to test the driver routines
T Put T to test the error exits T Put T to test the error exits

View File

@ -1129,7 +1129,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define DGEMM_DEFAULT_UNROLL_M 8 #define DGEMM_DEFAULT_UNROLL_M 8
#define QGEMM_DEFAULT_UNROLL_M 2 #define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 8 #define CGEMM_DEFAULT_UNROLL_M 8
#define ZGEMM_DEFAULT_UNROLL_M 4 #define ZGEMM_DEFAULT_UNROLL_M 1
#define XGEMM_DEFAULT_UNROLL_M 1 #define XGEMM_DEFAULT_UNROLL_M 1
#define SGEMM_DEFAULT_UNROLL_N 4 #define SGEMM_DEFAULT_UNROLL_N 4