Merge pull request #479 from wernsaar/develop
workaround for sandybridge zgemm kernel
This commit is contained in:
commit
eb738148fe
|
@ -6,8 +6,13 @@ include $(TOPDIR)/Makefile.system
|
|||
#LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm
|
||||
|
||||
# ACML custom
|
||||
ACML=/opt/pb/acml-5-3-1-gfortran-64bit/gfortran64_fma4_mp/lib
|
||||
LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm
|
||||
#ACML=/opt/pb/acml-5-3-1-gfortran-64bit/gfortran64_fma4_mp/lib
|
||||
#LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm
|
||||
|
||||
# ACML 6.1 custom
|
||||
ACML=/home/saar/acml6.1/gfortran64_mp/lib
|
||||
LIBACML = -fopenmp $(ACML)/libacml_mp.so -lgfortran -lm
|
||||
|
||||
|
||||
# Atlas Ubuntu
|
||||
#ATLAS=/usr/lib/atlas-base
|
||||
|
|
|
@ -114,7 +114,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
|
@ -198,4 +198,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -117,7 +117,7 @@ static __inline double getmflops(int ratio, int m, double secs){
|
|||
}
|
||||
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
#ifndef COMPLEX
|
||||
char *trans[] = {"T", "N"};
|
||||
|
@ -273,4 +273,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT result;
|
||||
|
@ -192,4 +192,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -139,7 +139,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
|
||||
FLOAT wkopt[4];
|
||||
|
@ -257,4 +257,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -118,14 +118,15 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char trans='N';
|
||||
blasint m, i, j;
|
||||
blasint m, n, i, j;
|
||||
int loops = 1;
|
||||
int has_param_n=0;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
|
@ -162,6 +163,11 @@ int MAIN__(int argc, char *argv[]){
|
|||
if ( p != NULL )
|
||||
loops = atoi(p);
|
||||
|
||||
if ((p = getenv("OPENBLAS_PARAM_N"))) {
|
||||
n = atoi(p);
|
||||
has_param_n=1;
|
||||
}
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
|
@ -174,7 +180,14 @@ int MAIN__(int argc, char *argv[]){
|
|||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
if ( has_param_n == 1 && n <= m )
|
||||
n=n;
|
||||
else
|
||||
n=m;
|
||||
|
||||
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
@ -189,7 +202,7 @@ int MAIN__(int argc, char *argv[]){
|
|||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
|
@ -202,11 +215,11 @@ int MAIN__(int argc, char *argv[]){
|
|||
timeg /= loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6);
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
|
@ -209,4 +209,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
|
@ -266,4 +266,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
|
@ -214,5 +214,5 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
|
|
|
@ -137,7 +137,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*work;
|
||||
FLOAT wkopt[4];
|
||||
|
@ -231,4 +231,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -107,7 +107,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
|
@ -189,4 +189,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
|
@ -205,4 +205,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -106,7 +106,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
|
@ -188,4 +188,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
|
@ -186,4 +186,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -137,7 +137,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
blasint *ipiv;
|
||||
|
@ -270,4 +270,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -114,7 +114,7 @@ int gettimeofday(struct timeval *tv, void *tz){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
#ifndef COMPLEX
|
||||
char *trans[] = {"T", "N"};
|
||||
|
@ -278,5 +278,5 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
|
|
|
@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
|
@ -200,4 +200,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
|
@ -215,4 +215,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
|
@ -200,4 +200,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
|
@ -196,4 +196,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
|
@ -199,4 +199,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
|
@ -199,4 +199,4 @@ int MAIN__(int argc, char *argv[]){
|
|||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
|
|
@ -34,17 +34,17 @@ CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
|||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_4x4_sandy.S
|
||||
ZGEMMINCOPY =
|
||||
ZGEMMITCOPY =
|
||||
ZGEMMKERNEL = zgemm_kernel_1x4_nehalem.S
|
||||
ZGEMMINCOPY = zgemm_ncopy_1.S
|
||||
ZGEMMITCOPY = zgemm_tcopy_1.S
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||
ZGEMMINCOPYOBJ =
|
||||
ZGEMMITCOPYOBJ =
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
#STRSMKERNEL_LN = trsm_kernel_LN_4x8_nehalem.S
|
||||
#STRSMKERNEL_LT = trsm_kernel_LT_4x8_nehalem.S
|
||||
#STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S
|
||||
|
|
|
@ -1092,18 +1092,48 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x1
|
||||
|
||||
vxorpd %xmm4 , %xmm4 , %xmm4
|
||||
vxorpd %xmm5 , %xmm5 , %xmm5
|
||||
vxorpd %ymm4 , %ymm4 , %ymm4
|
||||
vxorpd %ymm5 , %ymm5 , %ymm5
|
||||
vxorpd %ymm6 , %ymm6 , %ymm6
|
||||
vxorpd %ymm7 , %ymm7 , %ymm7
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL4x1
|
||||
|
||||
vbroadcastsd -12 * SIZE(BO), %ymm0
|
||||
vbroadcastsd -11 * SIZE(BO), %ymm1
|
||||
vbroadcastsd -10 * SIZE(BO), %ymm2
|
||||
vbroadcastsd -9 * SIZE(BO), %ymm3
|
||||
|
||||
vfmadd231pd -16 * SIZE(AO) ,%ymm0 , %ymm4
|
||||
vfmadd231pd -12 * SIZE(AO) ,%ymm1 , %ymm5
|
||||
|
||||
vbroadcastsd -8 * SIZE(BO), %ymm0
|
||||
vbroadcastsd -7 * SIZE(BO), %ymm1
|
||||
|
||||
vfmadd231pd -8 * SIZE(AO) ,%ymm2 , %ymm6
|
||||
vfmadd231pd -4 * SIZE(AO) ,%ymm3 , %ymm7
|
||||
|
||||
vbroadcastsd -6 * SIZE(BO), %ymm2
|
||||
vbroadcastsd -5 * SIZE(BO), %ymm3
|
||||
|
||||
vfmadd231pd 0 * SIZE(AO) ,%ymm0 , %ymm4
|
||||
vfmadd231pd 4 * SIZE(AO) ,%ymm1 , %ymm5
|
||||
vfmadd231pd 8 * SIZE(AO) ,%ymm2 , %ymm6
|
||||
vfmadd231pd 12 * SIZE(AO) ,%ymm3 , %ymm7
|
||||
|
||||
addq $ 8 *SIZE, BO
|
||||
addq $ 32*SIZE, AO
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL4x1_SUB
|
||||
vmovddup -12 * SIZE(BO), %xmm2
|
||||
vmovups -16 * SIZE(AO), %xmm0
|
||||
vmovups -14 * SIZE(AO), %xmm1
|
||||
vfmadd231pd %xmm0 ,%xmm2 , %xmm4
|
||||
vfmadd231pd %xmm1 ,%xmm2 , %xmm5
|
||||
vbroadcastsd -12 * SIZE(BO), %ymm2
|
||||
vmovups -16 * SIZE(AO), %ymm0
|
||||
vfmadd231pd %ymm0 ,%ymm2 , %ymm4
|
||||
addq $ 1*SIZE, BO
|
||||
addq $ 4*SIZE, AO
|
||||
|
||||
|
@ -1112,21 +1142,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro SAVE4x1
|
||||
|
||||
vmovddup ALPHA, %xmm0
|
||||
vbroadcastsd ALPHA, %ymm0
|
||||
|
||||
vmulpd %xmm0 , %xmm4 , %xmm4
|
||||
vmulpd %xmm0 , %xmm5 , %xmm5
|
||||
vaddpd %ymm4,%ymm5, %ymm4
|
||||
vaddpd %ymm6,%ymm7, %ymm6
|
||||
vaddpd %ymm4,%ymm6, %ymm4
|
||||
|
||||
vmulpd %ymm0 , %ymm4 , %ymm4
|
||||
|
||||
|
||||
#if !defined(TRMMKERNEL)
|
||||
|
||||
vaddpd (CO1) , %xmm4, %xmm4
|
||||
vaddpd 2 * SIZE(CO1) , %xmm5, %xmm5
|
||||
vaddpd (CO1) , %ymm4, %ymm4
|
||||
|
||||
#endif
|
||||
|
||||
vmovups %xmm4 , (CO1)
|
||||
vmovups %xmm5 , 2 * SIZE(CO1)
|
||||
vmovups %ymm4 , (CO1)
|
||||
|
||||
addq $ 4*SIZE, CO1
|
||||
.endm
|
||||
|
@ -2112,15 +2143,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.L1_12:
|
||||
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1_SUB
|
||||
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1
|
||||
|
||||
dec %rax
|
||||
jne .L1_12
|
||||
|
@ -3180,15 +3203,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.L1_12:
|
||||
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1_SUB
|
||||
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1
|
||||
|
||||
dec %rax
|
||||
jne .L1_12
|
||||
|
|
|
@ -120,7 +120,7 @@
|
|||
REAL RZERO
|
||||
PARAMETER ( RZERO = 0.0 )
|
||||
INTEGER NMAX, INCMAX
|
||||
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||
PARAMETER ( NMAX = 128, INCMAX = 2 )
|
||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
||||
$ NALMAX = 7, NBEMAX = 7 )
|
||||
|
|
|
@ -102,7 +102,7 @@
|
|||
REAL RZERO
|
||||
PARAMETER ( RZERO = 0.0 )
|
||||
INTEGER NMAX
|
||||
PARAMETER ( NMAX = 65 )
|
||||
PARAMETER ( NMAX = 128 )
|
||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
||||
* .. Local Scalars ..
|
||||
|
|
|
@ -117,7 +117,7 @@
|
|||
DOUBLE PRECISION ZERO, ONE
|
||||
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
|
||||
INTEGER NMAX, INCMAX
|
||||
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||
PARAMETER ( NMAX = 128, INCMAX = 2 )
|
||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
||||
$ NALMAX = 7, NBEMAX = 7 )
|
||||
|
|
|
@ -97,7 +97,7 @@
|
|||
DOUBLE PRECISION ZERO, ONE
|
||||
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
|
||||
INTEGER NMAX
|
||||
PARAMETER ( NMAX = 65 )
|
||||
PARAMETER ( NMAX = 128 )
|
||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
||||
* .. Local Scalars ..
|
||||
|
|
|
@ -117,7 +117,7 @@
|
|||
REAL ZERO, ONE
|
||||
PARAMETER ( ZERO = 0.0, ONE = 1.0 )
|
||||
INTEGER NMAX, INCMAX
|
||||
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||
PARAMETER ( NMAX = 128, INCMAX = 2 )
|
||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
||||
$ NALMAX = 7, NBEMAX = 7 )
|
||||
|
|
|
@ -97,7 +97,7 @@
|
|||
REAL ZERO, ONE
|
||||
PARAMETER ( ZERO = 0.0, ONE = 1.0 )
|
||||
INTEGER NMAX
|
||||
PARAMETER ( NMAX = 65 )
|
||||
PARAMETER ( NMAX = 128 )
|
||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
||||
* .. Local Scalars ..
|
||||
|
|
|
@ -121,7 +121,7 @@
|
|||
DOUBLE PRECISION RZERO
|
||||
PARAMETER ( RZERO = 0.0D0 )
|
||||
INTEGER NMAX, INCMAX
|
||||
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||
PARAMETER ( NMAX = 128, INCMAX = 2 )
|
||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
||||
$ NALMAX = 7, NBEMAX = 7 )
|
||||
|
|
|
@ -104,7 +104,7 @@
|
|||
DOUBLE PRECISION RZERO
|
||||
PARAMETER ( RZERO = 0.0D0 )
|
||||
INTEGER NMAX
|
||||
PARAMETER ( NMAX = 65 )
|
||||
PARAMETER ( NMAX = 128 )
|
||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
||||
* .. Local Scalars ..
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
SEP: Data file for testing Symmetric Eigenvalue Problem routines
|
||||
6 Number of values of N
|
||||
0 1 2 3 5 20 Values of N (dimension)
|
||||
8 Number of values of N
|
||||
0 1 2 3 5 19 20 21 Values of N (dimension)
|
||||
5 Number of values of NB
|
||||
1 3 3 3 10 Values of NB (blocksize)
|
||||
2 2 2 2 2 Values of NBMIN (minimum blocksize)
|
||||
1 0 5 9 1 Values of NX (crossover point)
|
||||
60.0 Threshold value
|
||||
160.0 Threshold value
|
||||
T Put T to test the LAPACK routines
|
||||
T Put T to test the driver routines
|
||||
T Put T to test the error exits
|
||||
|
|
2
param.h
2
param.h
|
@ -1129,7 +1129,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define DGEMM_DEFAULT_UNROLL_M 8
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
|
Loading…
Reference in New Issue