forward to GEMV when one argument is actually a vector
This commit is contained in:
parent
136a4edc5f
commit
3db5dbc88e
|
@ -47,22 +47,29 @@
|
||||||
#define SMP_THRESHOLD_MIN 65536.0
|
#define SMP_THRESHOLD_MIN 65536.0
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
#define ERROR_NAME "QGEMM "
|
#define ERROR_NAME "QGEMM "
|
||||||
|
#define GEMV BLASFUNC(qgemv)
|
||||||
#elif defined(DOUBLE)
|
#elif defined(DOUBLE)
|
||||||
#define ERROR_NAME "DGEMM "
|
#define ERROR_NAME "DGEMM "
|
||||||
|
#define GEMV BLASFUNC(dgemv)
|
||||||
#elif defined(BFLOAT16)
|
#elif defined(BFLOAT16)
|
||||||
#define ERROR_NAME "SBGEMM "
|
#define ERROR_NAME "SBGEMM "
|
||||||
|
#define GEMV BLASFUNC(sbgemv)
|
||||||
#else
|
#else
|
||||||
#define ERROR_NAME "SGEMM "
|
#define ERROR_NAME "SGEMM "
|
||||||
|
#define GEMV BLASFUNC(sgemv)
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#define SMP_THRESHOLD_MIN 8192.0
|
#define SMP_THRESHOLD_MIN 8192.0
|
||||||
#ifndef GEMM3M
|
#ifndef GEMM3M
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
#define ERROR_NAME "XGEMM "
|
#define ERROR_NAME "XGEMM "
|
||||||
|
#define GEMV BLASFUNC(xgemv)
|
||||||
#elif defined(DOUBLE)
|
#elif defined(DOUBLE)
|
||||||
#define ERROR_NAME "ZGEMM "
|
#define ERROR_NAME "ZGEMM "
|
||||||
|
#define GEMV BLASFUNC(zgemv)
|
||||||
#else
|
#else
|
||||||
#define ERROR_NAME "CGEMM "
|
#define ERROR_NAME "CGEMM "
|
||||||
|
#define GEMV BLASFUNC(cgemv)
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
|
@ -485,9 +492,38 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif // defined(__linux__) && defined(__x86_64__) && defined(BFLOAT16)
|
#endif // defined(__linux__) && defined(__x86_64__) && defined(BFLOAT16)
|
||||||
|
// fprintf(stderr,"G E M M interface m n k %d %d %d\n",args.m,args.n,args.k);
|
||||||
|
|
||||||
if ((args.m == 0) || (args.n == 0)) return;
|
if ((args.m == 0) || (args.n == 0)) return;
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
#ifndef GEMM3M
|
||||||
|
if (args.m == 1) {
|
||||||
|
char *NT=(char*)malloc(2*sizeof(char));
|
||||||
|
if (transb&1)strcpy(NT,"T");
|
||||||
|
else NT="N";
|
||||||
|
// fprintf(stderr,"G E M V\n");
|
||||||
|
GEMV(NT, &args.n ,&args.k, args.alpha, args.b, &args.ldb, args.a, &args.m, args.beta, args.c, &args.m);
|
||||||
|
//SUBROUTINE SGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
if (args.n == 1) {
|
||||||
|
#ifndef CBLAS
|
||||||
|
char *NT=(char*)malloc(2*sizeof(char));
|
||||||
|
strcpy(NT,"N");
|
||||||
|
#else
|
||||||
|
char *NT=(char*)malloc(2*sizeof(char));
|
||||||
|
if (transb&1)strcpy(NT,"T");
|
||||||
|
else strcpy(NT,"N");
|
||||||
|
#endif
|
||||||
|
// fprintf(stderr,"G E M V ! ! ! lda=%d ldb=%d ldc=%d\n",args.lda,args.ldb,args.ldc);
|
||||||
|
GEMV(NT, &args.m ,&args.k, args.alpha, args.a, &args.lda, args.b, &args.n, args.beta, args.c, &args.n);
|
||||||
|
//SUBROUTINE SGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
#if 0
|
#if 0
|
||||||
fprintf(stderr, "m = %4d n = %d k = %d lda = %4d ldb = %4d ldc = %4d\n",
|
fprintf(stderr, "m = %4d n = %d k = %d lda = %4d ldb = %4d ldc = %4d\n",
|
||||||
args.m, args.n, args.k, args.lda, args.ldb, args.ldc);
|
args.m, args.n, args.k, args.lda, args.ldb, args.ldc);
|
||||||
|
@ -521,10 +557,15 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
|
||||||
|
|
||||||
buffer = (XFLOAT *)blas_memory_alloc(0);
|
buffer = (XFLOAT *)blas_memory_alloc(0);
|
||||||
|
|
||||||
//For target LOONGSON3R5, applying an offset to the buffer is essential
|
//For Loongson servers, like the 3C5000 (featuring 16 cores), applying an
|
||||||
//for minimizing cache conflicts and optimizing performance.
|
//offset to the buffer is essential for minimizing cache conflicts and optimizing performance.
|
||||||
#if defined(ARCH_LOONGARCH64) && !defined(NO_AFFINITY)
|
#if defined(LOONGSON3R5) && !defined(NO_AFFINITY)
|
||||||
sa = (XFLOAT *)((BLASLONG)buffer + (WhereAmI() & 0xf) * GEMM_OFFSET_A);
|
char model_name[128];
|
||||||
|
get_cpu_model(model_name);
|
||||||
|
if ((strstr(model_name, "3C5000") != NULL) || (strstr(model_name, "3D5000") != NULL))
|
||||||
|
sa = (XFLOAT *)((BLASLONG)buffer + (WhereAmI() & 0xf) * GEMM_OFFSET_A);
|
||||||
|
else
|
||||||
|
sa = (XFLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
|
||||||
#else
|
#else
|
||||||
sa = (XFLOAT *)((BLASLONG)buffer +GEMM_OFFSET_A);
|
sa = (XFLOAT *)((BLASLONG)buffer +GEMM_OFFSET_A);
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue