commit
cc9500db41
|
@ -62,13 +62,13 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha,
|
||||||
if (incy != 1) {
|
if (incy != 1) {
|
||||||
Y = bufferY;
|
Y = bufferY;
|
||||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + M * sizeof(FLOAT) + 4095) & ~4095);
|
bufferX = (FLOAT *)(((BLASLONG)bufferY + M * sizeof(FLOAT) + 4095) & ~4095);
|
||||||
gemvbuffer = bufferX;
|
// gemvbuffer = bufferX;
|
||||||
COPY_K(M, y, incy, Y, 1);
|
COPY_K(M, y, incy, Y, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (incx != 1) {
|
if (incx != 1) {
|
||||||
X = bufferX;
|
X = bufferX;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) + 4095) & ~4095);
|
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) + 4095) & ~4095);
|
||||||
COPY_K(N, x, incx, X, 1);
|
COPY_K(N, x, incx, X, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -55,13 +55,13 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha,
|
||||||
if (incy != 1) {
|
if (incy != 1) {
|
||||||
Y = bufferY;
|
Y = bufferY;
|
||||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) + 4095) & ~4095);
|
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) + 4095) & ~4095);
|
||||||
sbmvbuffer = bufferX;
|
// sbmvbuffer = bufferX;
|
||||||
COPY_K(n, y, incy, Y, 1);
|
COPY_K(n, y, incy, Y, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (incx != 1) {
|
if (incx != 1) {
|
||||||
X = bufferX;
|
X = bufferX;
|
||||||
sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) + 4095) & ~4095);
|
// sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) + 4095) & ~4095);
|
||||||
COPY_K(n, x, incx, X, 1);
|
COPY_K(n, x, incx, X, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -53,13 +53,13 @@ int CNAME(BLASLONG m, FLOAT alpha, FLOAT *a,
|
||||||
if (incy != 1) {
|
if (incy != 1) {
|
||||||
Y = bufferY;
|
Y = bufferY;
|
||||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) + 4095) & ~4095);
|
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) + 4095) & ~4095);
|
||||||
gemvbuffer = bufferX;
|
// gemvbuffer = bufferX;
|
||||||
COPY_K(m, y, incy, Y, 1);
|
COPY_K(m, y, incy, Y, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (incx != 1) {
|
if (incx != 1) {
|
||||||
X = bufferX;
|
X = bufferX;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) + 4095) & ~4095);
|
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) + 4095) & ~4095);
|
||||||
COPY_K(m, x, incx, X, 1);
|
COPY_K(m, x, incx, X, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -83,13 +83,13 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA
|
||||||
if (incy != 1) {
|
if (incy != 1) {
|
||||||
Y = bufferY;
|
Y = bufferY;
|
||||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + M * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
bufferX = (FLOAT *)(((BLASLONG)bufferY + M * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||||
gemvbuffer = bufferX;
|
// gemvbuffer = bufferX;
|
||||||
COPY_K(M, y, incy, Y, 1);
|
COPY_K(M, y, incy, Y, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (incx != 1) {
|
if (incx != 1) {
|
||||||
X = bufferX;
|
X = bufferX;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||||
COPY_K(N, x, incx, X, 1);
|
COPY_K(N, x, incx, X, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -61,13 +61,13 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
if (incy != 1) {
|
if (incy != 1) {
|
||||||
Y = bufferY;
|
Y = bufferY;
|
||||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
||||||
sbmvbuffer = bufferX;
|
// sbmvbuffer = bufferX;
|
||||||
COPY_K(n, y, incy, Y, 1);
|
COPY_K(n, y, incy, Y, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (incx != 1) {
|
if (incx != 1) {
|
||||||
X = bufferX;
|
X = bufferX;
|
||||||
sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
// sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
||||||
COPY_K(n, x, incx, X, 1);
|
COPY_K(n, x, incx, X, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -56,13 +56,13 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
if (incy != 1) {
|
if (incy != 1) {
|
||||||
Y = bufferY;
|
Y = bufferY;
|
||||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||||
gemvbuffer = bufferX;
|
// gemvbuffer = bufferX;
|
||||||
COPY_K(m, y, incy, Y, 1);
|
COPY_K(m, y, incy, Y, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (incx != 1) {
|
if (incx != 1) {
|
||||||
X = bufferX;
|
X = bufferX;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||||
COPY_K(m, x, incx, X, 1);
|
COPY_K(m, x, incx, X, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -60,13 +60,13 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
if (incy != 1) {
|
if (incy != 1) {
|
||||||
Y = bufferY;
|
Y = bufferY;
|
||||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
||||||
sbmvbuffer = bufferX;
|
// sbmvbuffer = bufferX;
|
||||||
COPY_K(n, y, incy, Y, 1);
|
COPY_K(n, y, incy, Y, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (incx != 1) {
|
if (incx != 1) {
|
||||||
X = bufferX;
|
X = bufferX;
|
||||||
sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
// sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
||||||
COPY_K(n, x, incx, X, 1);
|
COPY_K(n, x, incx, X, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -55,13 +55,13 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
if (incy != 1) {
|
if (incy != 1) {
|
||||||
Y = bufferY;
|
Y = bufferY;
|
||||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||||
gemvbuffer = bufferX;
|
// gemvbuffer = bufferX;
|
||||||
COPY_K(m, y, incy, Y, 1);
|
COPY_K(m, y, incy, Y, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (incx != 1) {
|
if (incx != 1) {
|
||||||
X = bufferX;
|
X = bufferX;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||||
COPY_K(m, x, incx, X, 1);
|
COPY_K(m, x, incx, X, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -293,7 +293,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_l = k - ls;
|
min_l = k - ls;
|
||||||
|
|
||||||
if (min_l >= GEMM_Q * 2) {
|
if (min_l >= GEMM_Q * 2) {
|
||||||
gemm_p = GEMM_P;
|
// gemm_p = GEMM_P;
|
||||||
min_l = GEMM_Q;
|
min_l = GEMM_Q;
|
||||||
} else {
|
} else {
|
||||||
if (min_l > GEMM_Q) {
|
if (min_l > GEMM_Q) {
|
||||||
|
|
|
@ -348,7 +348,7 @@ int blas_get_cpu_number(void){
|
||||||
max_num = get_num_procs();
|
max_num = get_num_procs();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
blas_goto_num = 0;
|
// blas_goto_num = 0;
|
||||||
#ifndef USE_OPENMP
|
#ifndef USE_OPENMP
|
||||||
blas_goto_num=openblas_num_threads_env();
|
blas_goto_num=openblas_num_threads_env();
|
||||||
if (blas_goto_num < 0) blas_goto_num = 0;
|
if (blas_goto_num < 0) blas_goto_num = 0;
|
||||||
|
@ -360,7 +360,7 @@ int blas_get_cpu_number(void){
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
blas_omp_num = 0;
|
// blas_omp_num = 0;
|
||||||
blas_omp_num=openblas_omp_num_threads_env();
|
blas_omp_num=openblas_omp_num_threads_env();
|
||||||
if (blas_omp_num < 0) blas_omp_num = 0;
|
if (blas_omp_num < 0) blas_omp_num = 0;
|
||||||
|
|
||||||
|
|
|
@ -218,9 +218,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
nthreads = num_cpu_avail(2);
|
/* nthreads = num_cpu_avail(2);
|
||||||
|
|
||||||
/*FIXME trmv_thread was found to be broken, see issue 1332 */
|
FIXME trmv_thread was found to be broken, see issue 1332 */
|
||||||
nthreads = 1;
|
nthreads = 1;
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
|
|
|
@ -113,8 +113,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
#else
|
#else
|
||||||
data01 = *(ao1 + 0);
|
data01 = *(ao1 + 0);
|
||||||
data02 = *(ao1 + 1);
|
data02 = *(ao1 + 1);
|
||||||
data03 = *(ao1 + 2);
|
/* data03 = *(ao1 + 2);
|
||||||
data04 = *(ao1 + 3);
|
data04 = *(ao1 + 3); */
|
||||||
data05 = *(ao2 + 0);
|
data05 = *(ao2 + 0);
|
||||||
data06 = *(ao2 + 1);
|
data06 = *(ao2 + 1);
|
||||||
data07 = *(ao2 + 2);
|
data07 = *(ao2 + 2);
|
||||||
|
|
|
@ -43,7 +43,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
BLASLONG i, ii, j, jj;
|
BLASLONG i, ii, j, jj;
|
||||||
|
|
||||||
FLOAT data01, data02;
|
FLOAT data01 = 0.0, data02 = 0.0;
|
||||||
FLOAT *a1;
|
FLOAT *a1;
|
||||||
|
|
||||||
lda *= 2;
|
lda *= 2;
|
||||||
|
|
|
@ -43,8 +43,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
BLASLONG i, ii, j, jj;
|
BLASLONG i, ii, j, jj;
|
||||||
|
|
||||||
FLOAT data01, data02, data03, data04;
|
FLOAT data01 = 0.0, data02 = 0.0, data03, data04;
|
||||||
FLOAT data05, data06, data07, data08;
|
FLOAT data05, data06, data07 = 0.0, data08 = 0.0;
|
||||||
FLOAT *a1, *a2;
|
FLOAT *a1, *a2;
|
||||||
|
|
||||||
lda *= 2;
|
lda *= 2;
|
||||||
|
|
|
@ -43,9 +43,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
BLASLONG i, ii, j, jj;
|
BLASLONG i, ii, j, jj;
|
||||||
|
|
||||||
FLOAT data01, data02, data03, data04;
|
FLOAT data01 = 0.0, data02 = 0.0, data03, data04;
|
||||||
FLOAT data05, data06, data07, data08;
|
FLOAT data05, data06, data07, data08;
|
||||||
FLOAT data09, data10, data11, data12;
|
FLOAT data09, data10, data11 = 0.0, data12 = 0.0;
|
||||||
FLOAT data13, data14, data15, data16;
|
FLOAT data13, data14, data15, data16;
|
||||||
FLOAT data17, data18, data19, data20;
|
FLOAT data17, data18, data19, data20;
|
||||||
FLOAT data21, data22, data23, data24;
|
FLOAT data21, data22, data23, data24;
|
||||||
|
|
|
@ -43,7 +43,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
BLASLONG i, ii, j, jj;
|
BLASLONG i, ii, j, jj;
|
||||||
|
|
||||||
FLOAT data01, data02;
|
FLOAT data01 = 0.0, data02 = 0.0;
|
||||||
FLOAT *a1;
|
FLOAT *a1;
|
||||||
|
|
||||||
lda *= 2;
|
lda *= 2;
|
||||||
|
|
|
@ -43,8 +43,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
BLASLONG i, ii, j, jj;
|
BLASLONG i, ii, j, jj;
|
||||||
|
|
||||||
FLOAT data01, data02, data03, data04;
|
FLOAT data01 = 0.0, data02 = 0.0, data03, data04;
|
||||||
FLOAT data05, data06, data07, data08;
|
FLOAT data05, data06, data07 = 0.0, data08 = 0.0;
|
||||||
FLOAT *a1, *a2;
|
FLOAT *a1, *a2;
|
||||||
|
|
||||||
lda *= 2;
|
lda *= 2;
|
||||||
|
|
|
@ -43,9 +43,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
BLASLONG i, ii, j, jj;
|
BLASLONG i, ii, j, jj;
|
||||||
|
|
||||||
FLOAT data01, data02, data03, data04;
|
FLOAT data01 = 0.0, data02 = 0.0, data03, data04;
|
||||||
FLOAT data05, data06, data07, data08;
|
FLOAT data05, data06, data07, data08;
|
||||||
FLOAT data09, data10, data11, data12;
|
FLOAT data09, data10, data11 = 0.0, data12 = 0.0;
|
||||||
FLOAT data13, data14, data15, data16;
|
FLOAT data13, data14, data15, data16;
|
||||||
FLOAT data17, data18, data19, data20;
|
FLOAT data17, data18, data19, data20;
|
||||||
FLOAT data21, data22, data23, data24;
|
FLOAT data21, data22, data23, data24;
|
||||||
|
|
|
@ -43,7 +43,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
BLASLONG i, ii, j, jj;
|
BLASLONG i, ii, j, jj;
|
||||||
|
|
||||||
FLOAT data01, data02;
|
FLOAT data01 = 0.0, data02 = 0.0;
|
||||||
FLOAT *a1;
|
FLOAT *a1;
|
||||||
|
|
||||||
lda *= 2;
|
lda *= 2;
|
||||||
|
|
|
@ -43,8 +43,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
BLASLONG i, ii, j, jj;
|
BLASLONG i, ii, j, jj;
|
||||||
|
|
||||||
FLOAT data01, data02, data03, data04;
|
FLOAT data01 = 0.0, data02 = 0.0, data03, data04;
|
||||||
FLOAT data05, data06, data07, data08;
|
FLOAT data05, data06, data07 = 0.0, data08 = 0.0;
|
||||||
FLOAT *a1, *a2;
|
FLOAT *a1, *a2;
|
||||||
|
|
||||||
lda *= 2;
|
lda *= 2;
|
||||||
|
|
|
@ -43,9 +43,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
BLASLONG i, ii, j, jj;
|
BLASLONG i, ii, j, jj;
|
||||||
|
|
||||||
FLOAT data01, data02, data03, data04;
|
FLOAT data01 = 0.0, data02 = 0.0, data03, data04;
|
||||||
FLOAT data05, data06, data07, data08;
|
FLOAT data05, data06, data07, data08;
|
||||||
FLOAT data09, data10, data11, data12;
|
FLOAT data09, data10, data11 = 0.0, data12 = 0.0;
|
||||||
FLOAT data13, data14, data15, data16;
|
FLOAT data13, data14, data15, data16;
|
||||||
FLOAT data17, data18, data19, data20;
|
FLOAT data17, data18, data19, data20;
|
||||||
FLOAT data21, data22, data23, data24;
|
FLOAT data21, data22, data23, data24;
|
||||||
|
|
|
@ -43,7 +43,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
BLASLONG i, ii, j, jj;
|
BLASLONG i, ii, j, jj;
|
||||||
|
|
||||||
FLOAT data01, data02;
|
FLOAT data01 = 0.0, data02 = 0.0;
|
||||||
FLOAT *a1;
|
FLOAT *a1;
|
||||||
|
|
||||||
lda *= 2;
|
lda *= 2;
|
||||||
|
|
|
@ -43,8 +43,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
BLASLONG i, ii, j, jj;
|
BLASLONG i, ii, j, jj;
|
||||||
|
|
||||||
FLOAT data01, data02, data03, data04;
|
FLOAT data01 = 0.0, data02 = 0.0, data03, data04;
|
||||||
FLOAT data05, data06, data07, data08;
|
FLOAT data05, data06, data07 = 0.0, data08 = 0.0;
|
||||||
FLOAT *a1, *a2;
|
FLOAT *a1, *a2;
|
||||||
|
|
||||||
lda *= 2;
|
lda *= 2;
|
||||||
|
|
|
@ -43,9 +43,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
BLASLONG i, ii, j, jj;
|
BLASLONG i, ii, j, jj;
|
||||||
|
|
||||||
FLOAT data01, data02, data03, data04;
|
FLOAT data01 = 0.0, data02 = 0.0, data03, data04;
|
||||||
FLOAT data05, data06, data07, data08;
|
FLOAT data05, data06, data07, data08;
|
||||||
FLOAT data09, data10, data11, data12;
|
FLOAT data09, data10, data11 = 0.0, data12 = 0.0;
|
||||||
FLOAT data13, data14, data15, data16;
|
FLOAT data13, data14, data15, data16;
|
||||||
FLOAT data17, data18, data19, data20;
|
FLOAT data17, data18, data19, data20;
|
||||||
FLOAT data21, data22, data23, data24;
|
FLOAT data21, data22, data23, data24;
|
||||||
|
|
Loading…
Reference in New Issue