forward to GEMV when one argument is actually a vector

This commit is contained in:
Martin Kroeker 2024-05-20 22:40:04 +02:00 committed by Chris Sidebottom
parent 136a4edc5f
commit 3db5dbc88e
1 changed files with 45 additions and 4 deletions

View File

@ -47,22 +47,29 @@
#define SMP_THRESHOLD_MIN 65536.0 #define SMP_THRESHOLD_MIN 65536.0
#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "QGEMM " #define ERROR_NAME "QGEMM "
#define GEMV BLASFUNC(qgemv)
#elif defined(DOUBLE) #elif defined(DOUBLE)
#define ERROR_NAME "DGEMM " #define ERROR_NAME "DGEMM "
#define GEMV BLASFUNC(dgemv)
#elif defined(BFLOAT16) #elif defined(BFLOAT16)
#define ERROR_NAME "SBGEMM " #define ERROR_NAME "SBGEMM "
#define GEMV BLASFUNC(sbgemv)
#else #else
#define ERROR_NAME "SGEMM " #define ERROR_NAME "SGEMM "
#define GEMV BLASFUNC(sgemv)
#endif #endif
#else #else
#define SMP_THRESHOLD_MIN 8192.0 #define SMP_THRESHOLD_MIN 8192.0
#ifndef GEMM3M #ifndef GEMM3M
#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "XGEMM " #define ERROR_NAME "XGEMM "
#define GEMV BLASFUNC(xgemv)
#elif defined(DOUBLE) #elif defined(DOUBLE)
#define ERROR_NAME "ZGEMM " #define ERROR_NAME "ZGEMM "
#define GEMV BLASFUNC(zgemv)
#else #else
#define ERROR_NAME "CGEMM " #define ERROR_NAME "CGEMM "
#define GEMV BLASFUNC(cgemv)
#endif #endif
#else #else
#ifdef XDOUBLE #ifdef XDOUBLE
@ -485,9 +492,38 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
} }
#endif #endif
#endif // defined(__linux__) && defined(__x86_64__) && defined(BFLOAT16) #endif // defined(__linux__) && defined(__x86_64__) && defined(BFLOAT16)
// fprintf(stderr,"G E M M interface m n k %d %d %d\n",args.m,args.n,args.k);
if ((args.m == 0) || (args.n == 0)) return; if ((args.m == 0) || (args.n == 0)) return;
#if 1
#ifndef GEMM3M
if (args.m == 1) {
char *NT=(char*)malloc(2*sizeof(char));
if (transb&1)strcpy(NT,"T");
else NT="N";
// fprintf(stderr,"G E M V\n");
GEMV(NT, &args.n ,&args.k, args.alpha, args.b, &args.ldb, args.a, &args.m, args.beta, args.c, &args.m);
//SUBROUTINE SGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
return;
} else {
if (args.n == 1) {
#ifndef CBLAS
char *NT=(char*)malloc(2*sizeof(char));
strcpy(NT,"N");
#else
char *NT=(char*)malloc(2*sizeof(char));
if (transb&1)strcpy(NT,"T");
else strcpy(NT,"N");
#endif
// fprintf(stderr,"G E M V ! ! ! lda=%d ldb=%d ldc=%d\n",args.lda,args.ldb,args.ldc);
GEMV(NT, &args.m ,&args.k, args.alpha, args.a, &args.lda, args.b, &args.n, args.beta, args.c, &args.n);
//SUBROUTINE SGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
return;
}
}
#endif
#endif
#if 0 #if 0
fprintf(stderr, "m = %4d n = %d k = %d lda = %4d ldb = %4d ldc = %4d\n", fprintf(stderr, "m = %4d n = %d k = %d lda = %4d ldb = %4d ldc = %4d\n",
args.m, args.n, args.k, args.lda, args.ldb, args.ldc); args.m, args.n, args.k, args.lda, args.ldb, args.ldc);
@ -521,10 +557,15 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
buffer = (XFLOAT *)blas_memory_alloc(0); buffer = (XFLOAT *)blas_memory_alloc(0);
//For target LOONGSON3R5, applying an offset to the buffer is essential //For Loongson servers, like the 3C5000 (featuring 16 cores), applying an
//for minimizing cache conflicts and optimizing performance. //offset to the buffer is essential for minimizing cache conflicts and optimizing performance.
#if defined(ARCH_LOONGARCH64) && !defined(NO_AFFINITY) #if defined(LOONGSON3R5) && !defined(NO_AFFINITY)
sa = (XFLOAT *)((BLASLONG)buffer + (WhereAmI() & 0xf) * GEMM_OFFSET_A); char model_name[128];
get_cpu_model(model_name);
if ((strstr(model_name, "3C5000") != NULL) || (strstr(model_name, "3D5000") != NULL))
sa = (XFLOAT *)((BLASLONG)buffer + (WhereAmI() & 0xf) * GEMM_OFFSET_A);
else
sa = (XFLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
#else #else
sa = (XFLOAT *)((BLASLONG)buffer +GEMM_OFFSET_A); sa = (XFLOAT *)((BLASLONG)buffer +GEMM_OFFSET_A);
#endif #endif