Merge pull request #453 from wernsaar/develop

Enabled GEMM3M functions
This commit is contained in:
Zhang Xianyi 2014-09-22 16:47:54 +08:00
commit ae6b7caf32
25 changed files with 13757 additions and 39 deletions

View File

@ -112,6 +112,11 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
spotrf.mkl dpotrf.mkl cpotrf.mkl zpotrf.mkl \ spotrf.mkl dpotrf.mkl cpotrf.mkl zpotrf.mkl \
ssymm.mkl dsymm.mkl csymm.mkl zsymm.mkl ssymm.mkl dsymm.mkl csymm.mkl zsymm.mkl
goto_3m :: cgemm3m.goto zgemm3m.goto
mkl_3m :: cgemm3m.mkl zgemm3m.mkl
all :: goto mkl atlas acml all :: goto mkl atlas acml
##################################### Slinpack #################################################### ##################################### Slinpack ####################################################
@ -1043,6 +1048,22 @@ zaxpy.mkl : zaxpy.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Cgemm3m ####################################################
cgemm3m.goto : cgemm3m.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm
cgemm3m.mkl : cgemm3m.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Zgemm3m ####################################################
zgemm3m.goto : zgemm3m.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm
zgemm3m.mkl : zgemm3m.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
################################################################################################### ###################################################################################################
@ -1250,6 +1271,11 @@ caxpy.$(SUFFIX) : axpy.c
zaxpy.$(SUFFIX) : axpy.c zaxpy.$(SUFFIX) : axpy.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
cgemm3m.$(SUFFIX) : gemm3m.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
zgemm3m.$(SUFFIX) : gemm3m.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
clean :: clean ::

212
benchmark/gemm3m.c Normal file
View File

@ -0,0 +1,212 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef GEMM
#ifndef COMPLEX
#ifdef DOUBLE
#define GEMM BLASFUNC(dgemm)
#else
#define GEMM BLASFUNC(sgemm)
#endif
#else
#ifdef DOUBLE
#define GEMM BLASFUNC(zgemm3m)
#else
#define GEMM BLASFUNC(cgemm3m)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int MAIN__(int argc, char *argv[]){
FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char trans='N';
blasint m, i, j;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c\n", from, to, step, trans);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
p = getenv("OPENBLAS_LOOPS");
if ( p != NULL )
loops = atoi(p);
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
gettimeofday( &start, (struct timezone *)0);
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6);
}
return 0;
}
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -243,8 +243,13 @@ void cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_cgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, void cblas_cgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc); OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_cgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc); OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);

View File

@ -231,8 +231,12 @@ void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc); double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K, void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc); float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
void cblas_cgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K, void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc); double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
void cblas_zgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N, void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc); float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);

View File

@ -435,6 +435,9 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
int (*chemm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); int (*chemm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
int (*chemm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); int (*chemm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
int cgemm3m_p, cgemm3m_q, cgemm3m_r;
int cgemm3m_unroll_m, cgemm3m_unroll_n, cgemm3m_unroll_mn;
int (*cgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG); int (*cgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
int (*cgemm3m_incopyb)(BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*cgemm3m_incopyb)(BLASLONG, BLASLONG, float *, BLASLONG, float *);
@ -595,6 +598,9 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
int (*zhemm_outcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *); int (*zhemm_outcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
int (*zhemm_oltcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *); int (*zhemm_oltcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
int zgemm3m_p, zgemm3m_q, zgemm3m_r;
int zgemm3m_unroll_m, zgemm3m_unroll_n, zgemm3m_unroll_mn;
int (*zgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG); int (*zgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
int (*zgemm3m_incopyb)(BLASLONG, BLASLONG, double *, BLASLONG, double *); int (*zgemm3m_incopyb)(BLASLONG, BLASLONG, double *, BLASLONG, double *);
@ -757,6 +763,9 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*xhemm_outcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *); int (*xhemm_outcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
int (*xhemm_oltcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *); int (*xhemm_oltcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
int xgemm3m_p, xgemm3m_q, xgemm3m_r;
int xgemm3m_unroll_m, xgemm3m_unroll_n, xgemm3m_unroll_mn;
int (*xgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG); int (*xgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
int (*xgemm3m_incopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *); int (*xgemm3m_incopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
@ -900,6 +909,27 @@ extern gotoblas_t *gotoblas;
#define XGEMM_UNROLL_N gotoblas -> xgemm_unroll_n #define XGEMM_UNROLL_N gotoblas -> xgemm_unroll_n
#define XGEMM_UNROLL_MN gotoblas -> xgemm_unroll_mn #define XGEMM_UNROLL_MN gotoblas -> xgemm_unroll_mn
#define CGEMM3M_P gotoblas -> cgemm3m_p
#define CGEMM3M_Q gotoblas -> cgemm3m_q
#define CGEMM3M_R gotoblas -> cgemm3m_r
#define CGEMM3M_UNROLL_M gotoblas -> cgemm3m_unroll_m
#define CGEMM3M_UNROLL_N gotoblas -> cgemm3m_unroll_n
#define CGEMM3M_UNROLL_MN gotoblas -> cgemm3m_unroll_mn
#define ZGEMM3M_P gotoblas -> zgemm3m_p
#define ZGEMM3M_Q gotoblas -> zgemm3m_q
#define ZGEMM3M_R gotoblas -> zgemm3m_r
#define ZGEMM3M_UNROLL_M gotoblas -> zgemm3m_unroll_m
#define ZGEMM3M_UNROLL_N gotoblas -> zgemm3m_unroll_n
#define ZGEMM3M_UNROLL_MN gotoblas -> zgemm3m_unroll_mn
#define XGEMM3M_P gotoblas -> xgemm3m_p
#define XGEMM3M_Q gotoblas -> xgemm3m_q
#define XGEMM3M_R gotoblas -> xgemm3m_r
#define XGEMM3M_UNROLL_M gotoblas -> xgemm3m_unroll_m
#define XGEMM3M_UNROLL_N gotoblas -> xgemm3m_unroll_n
#define XGEMM3M_UNROLL_MN gotoblas -> xgemm3m_unroll_mn
#else #else
#define DTB_ENTRIES DTB_DEFAULT_ENTRIES #define DTB_ENTRIES DTB_DEFAULT_ENTRIES
@ -972,6 +1002,55 @@ extern gotoblas_t *gotoblas;
#define XGEMM_UNROLL_N XGEMM_DEFAULT_UNROLL_N #define XGEMM_UNROLL_N XGEMM_DEFAULT_UNROLL_N
#define XGEMM_UNROLL_MN MAX((XGEMM_UNROLL_M), (XGEMM_UNROLL_N)) #define XGEMM_UNROLL_MN MAX((XGEMM_UNROLL_M), (XGEMM_UNROLL_N))
#ifdef CGEMM_DEFAULT_UNROLL_N
#define CGEMM3M_P CGEMM3M_DEFAULT_P
#define CGEMM3M_Q CGEMM3M_DEFAULT_Q
#define CGEMM3M_R CGEMM3M_DEFAULT_R
#define CGEMM3M_UNROLL_M CGEMM3M_DEFAULT_UNROLL_M
#define CGEMM3M_UNROLL_N CGEMM3M_DEFAULT_UNROLL_N
#define CGEMM3M_UNROLL_MN MAX((CGEMM3M_UNROLL_M), (CGEMM3M_UNROLL_N))
#else
#define CGEMM3M_P SGEMM_DEFAULT_P
#define CGEMM3M_Q SGEMM_DEFAULT_Q
#define CGEMM3M_R SGEMM_DEFAULT_R
#define CGEMM3M_UNROLL_M SGEMM_DEFAULT_UNROLL_M
#define CGEMM3M_UNROLL_N SGEMM_DEFAULT_UNROLL_N
#define CGEMM3M_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N))
#endif
#ifdef ZGEMM_DEFAULT_UNROLL_N
#define ZGEMM3M_P ZGEMM3M_DEFAULT_P
#define ZGEMM3M_Q ZGEMM3M_DEFAULT_Q
#define ZGEMM3M_R ZGEMM3M_DEFAULT_R
#define ZGEMM3M_UNROLL_M ZGEMM3M_DEFAULT_UNROLL_M
#define ZGEMM3M_UNROLL_N ZGEMM3M_DEFAULT_UNROLL_N
#define ZGEMM3M_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N))
#else
#define ZGEMM3M_P DGEMM_DEFAULT_P
#define ZGEMM3M_Q DGEMM_DEFAULT_Q
#define ZGEMM3M_R DGEMM_DEFAULT_R
#define ZGEMM3M_UNROLL_M DGEMM_DEFAULT_UNROLL_M
#define ZGEMM3M_UNROLL_N DGEMM_DEFAULT_UNROLL_N
#define ZGEMM3M_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N))
#endif
#define XGEMM3M_P QGEMM_DEFAULT_P
#define XGEMM3M_Q QGEMM_DEFAULT_Q
#define XGEMM3M_R QGEMM_DEFAULT_R
#define XGEMM3M_UNROLL_M QGEMM_DEFAULT_UNROLL_M
#define XGEMM3M_UNROLL_N QGEMM_DEFAULT_UNROLL_N
#define XGEMM3M_UNROLL_MN MAX((QGEMM_UNROLL_M), (QGEMM_UNROLL_N))
#endif #endif
#endif #endif
@ -1054,14 +1133,14 @@ extern gotoblas_t *gotoblas;
#endif #endif
#ifdef XDOUBLE #ifdef XDOUBLE
#define GEMM3M_UNROLL_M QGEMM_UNROLL_M #define GEMM3M_UNROLL_M XGEMM3M_UNROLL_M
#define GEMM3M_UNROLL_N QGEMM_UNROLL_N #define GEMM3M_UNROLL_N XGEMM3M_UNROLL_N
#elif defined(DOUBLE) #elif defined(DOUBLE)
#define GEMM3M_UNROLL_M DGEMM_UNROLL_M #define GEMM3M_UNROLL_M ZGEMM3M_UNROLL_M
#define GEMM3M_UNROLL_N DGEMM_UNROLL_N #define GEMM3M_UNROLL_N ZGEMM3M_UNROLL_N
#else #else
#define GEMM3M_UNROLL_M SGEMM_UNROLL_M #define GEMM3M_UNROLL_M CGEMM3M_UNROLL_M
#define GEMM3M_UNROLL_N SGEMM_UNROLL_N #define GEMM3M_UNROLL_N CGEMM3M_UNROLL_N
#endif #endif
@ -1123,31 +1202,31 @@ extern gotoblas_t *gotoblas;
#ifndef GEMM3M_P #ifndef GEMM3M_P
#ifdef XDOUBLE #ifdef XDOUBLE
#define GEMM3M_P QGEMM_P #define GEMM3M_P XGEMM3M_P
#elif defined(DOUBLE) #elif defined(DOUBLE)
#define GEMM3M_P DGEMM_P #define GEMM3M_P ZGEMM3M_P
#else #else
#define GEMM3M_P SGEMM_P #define GEMM3M_P CGEMM3M_P
#endif #endif
#endif #endif
#ifndef GEMM3M_Q #ifndef GEMM3M_Q
#ifdef XDOUBLE #ifdef XDOUBLE
#define GEMM3M_Q QGEMM_Q #define GEMM3M_Q XGEMM3M_Q
#elif defined(DOUBLE) #elif defined(DOUBLE)
#define GEMM3M_Q DGEMM_Q #define GEMM3M_Q ZGEMM3M_Q
#else #else
#define GEMM3M_Q SGEMM_Q #define GEMM3M_Q CGEMM3M_Q
#endif #endif
#endif #endif
#ifndef GEMM3M_R #ifndef GEMM3M_R
#ifdef XDOUBLE #ifdef XDOUBLE
#define GEMM3M_R QGEMM_R #define GEMM3M_R XGEMM3M_R
#elif defined(DOUBLE) #elif defined(DOUBLE)
#define GEMM3M_R DGEMM_R #define GEMM3M_R ZGEMM3M_R
#else #else
#define GEMM3M_R SGEMM_R #define GEMM3M_R CGEMM3M_R
#endif #endif
#endif #endif

View File

@ -74,6 +74,18 @@ else
OPENBLAS_NUM_THREADS=2 ./xzcblat3 < zin3 OPENBLAS_NUM_THREADS=2 ./xzcblat3 < zin3
endif endif
all3_3m: xzcblat3_3m xccblat3_3m
ifeq ($(USE_OPENMP), 1)
OMP_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
OMP_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
else
OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
endif
clean :: clean ::
rm -f x* rm -f x*
@ -103,6 +115,9 @@ xccblat2: $(ctestl2o) c_cblat2.o $(TOPDIR)/$(LIBNAME)
xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME) xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xccblat3 c_cblat3.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xccblat3 c_cblat3.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xccblat3_3m: $(ctestl3o) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
# Double complex # Double complex
xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME) xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xzcblat1 c_zblat1.o $(ztestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xzcblat1 c_zblat1.o $(ztestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
@ -111,4 +126,9 @@ xzcblat2: $(ztestl2o) c_zblat2.o $(TOPDIR)/$(LIBNAME)
xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME) xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xzcblat3 c_zblat3.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xzcblat3 c_zblat3.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xzcblat3_3m: $(ztestl3o) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
include $(TOPDIR)/Makefile.tail include $(TOPDIR)/Makefile.tail

View File

@ -45,8 +45,238 @@ void F77_c3chke(char * rout) {
F77_xerbla(cblas_rout,&cblas_info); F77_xerbla(cblas_rout,&cblas_info);
} }
if (strncmp( sf,"cblas_cgemm" ,11)==0) {
cblas_rout = "cblas_cgemm" ; if (strncmp( sf,"cblas_cgemm3m" ,13)==0) {
cblas_rout = "cblas_cgemm3" ;
cblas_info = 1;
cblas_cgemm3m( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 1;
cblas_cgemm3m( INVALID, CblasNoTrans, CblasTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 1;
cblas_cgemm3m( INVALID, CblasTrans, CblasNoTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 1;
cblas_cgemm3m( INVALID, CblasTrans, CblasTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 2; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, INVALID, CblasNoTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 2; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, INVALID, CblasTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 3; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, INVALID, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 3; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, INVALID, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 1, B, 2, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
} else if (strncmp( sf,"cblas_cgemm" ,11)==0) {
cblas_rout = "cblas_cgemm" ;
cblas_info = 1; cblas_info = 1;
cblas_cgemm( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0, cblas_cgemm( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0,

View File

@ -88,6 +88,7 @@ void F77_cgemm(int *order, char *transpa, char *transpb, int *m, int *n,
cblas_cgemm( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda, cblas_cgemm( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc ); b, *ldb, beta, c, *ldc );
} }
void F77_chemm(int *order, char *rtlf, char *uplow, int *m, int *n, void F77_chemm(int *order, char *rtlf, char *uplow, int *m, int *n,
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
@ -563,3 +564,84 @@ void F77_ctrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
cblas_ctrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha, cblas_ctrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha,
a, *lda, b, *ldb); a, *lda, b, *ldb);
} }
void F77_cgemm3m(int *order, char *transpa, char *transpb, int *m, int *n,
int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
CBLAS_TEST_COMPLEX *c, int *ldc ) {
CBLAS_TEST_COMPLEX *A, *B, *C;
int i,j,LDA, LDB, LDC;
enum CBLAS_TRANSPOSE transa, transb;
get_transpose_type(transpa, &transa);
get_transpose_type(transpb, &transb);
if (*order == TEST_ROW_MJR) {
if (transa == CblasNoTrans) {
LDA = *k+1;
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
else {
LDA = *m+1;
A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*k; i++ )
for( j=0; j<*m; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
if (transb == CblasNoTrans) {
LDB = *n+1;
B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
else {
LDB = *k+1;
B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
LDC = *n+1;
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
}
cblas_cgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
B, LDB, beta, C, LDC );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
c[j*(*ldc)+i].real=C[i*LDC+j].real;
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
}
free(A);
free(B);
free(C);
}
else if (*order == TEST_COL_MJR)
cblas_cgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc );
else
cblas_cgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc );
}

2786
ctest/c_cblat3_3m.f Normal file

File diff suppressed because it is too large Load Diff

View File

@ -45,8 +45,242 @@ void F77_z3chke(char * rout) {
F77_xerbla(cblas_rout,&cblas_info); F77_xerbla(cblas_rout,&cblas_info);
} }
if (strncmp( sf,"cblas_zgemm" ,11)==0) {
cblas_rout = "cblas_zgemm" ;
if (strncmp( sf,"cblas_zgemm3m" ,13)==0) {
cblas_rout = "cblas_zgemm3" ;
cblas_info = 1;
cblas_zgemm3m( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 1;
cblas_zgemm3m( INVALID, CblasNoTrans, CblasTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 1;
cblas_zgemm3m( INVALID, CblasTrans, CblasNoTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 1;
cblas_zgemm3m( INVALID, CblasTrans, CblasTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 2; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, INVALID, CblasNoTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 2; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, INVALID, CblasTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 3; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, INVALID, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 3; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, INVALID, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 1, B, 2, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
} else if (strncmp( sf,"cblas_zgemm" ,11)==0) {
cblas_rout = "cblas_zgemm" ;
cblas_info = 1; cblas_info = 1;
cblas_zgemm( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0, cblas_zgemm( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0,

View File

@ -562,3 +562,82 @@ void F77_ztrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
cblas_ztrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha, cblas_ztrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha,
a, *lda, b, *ldb); a, *lda, b, *ldb);
} }
void F77_zgemm3m(int *order, char *transpa, char *transpb, int *m, int *n,
int *k, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda,
CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta,
CBLAS_TEST_ZOMPLEX *c, int *ldc ) {
CBLAS_TEST_ZOMPLEX *A, *B, *C;
int i,j,LDA, LDB, LDC;
enum CBLAS_TRANSPOSE transa, transb;
get_transpose_type(transpa, &transa);
get_transpose_type(transpb, &transb);
if (*order == TEST_ROW_MJR) {
if (transa == CblasNoTrans) {
LDA = *k+1;
A=(CBLAS_TEST_ZOMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
else {
LDA = *m+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*k; i++ )
for( j=0; j<*m; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
if (transb == CblasNoTrans) {
LDB = *n+1;
B=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_ZOMPLEX) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
else {
LDB = *k+1;
B=(CBLAS_TEST_ZOMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
LDC = *n+1;
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_ZOMPLEX));
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
}
cblas_zgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
B, LDB, beta, C, LDC );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
c[j*(*ldc)+i].real=C[i*LDC+j].real;
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
}
free(A);
free(B);
free(C);
}
else if (*order == TEST_COL_MJR)
cblas_zgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc );
else
cblas_zgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc );
}

2791
ctest/c_zblat3_3m.f Normal file

File diff suppressed because it is too large Load Diff

View File

@ -173,12 +173,14 @@ typedef struct { double real; double imag; } CBLAS_TEST_ZOMPLEX;
#define F77_dtrmm cdtrmm_ #define F77_dtrmm cdtrmm_
#define F77_dtrsm cdtrsm_ #define F77_dtrsm cdtrsm_
#define F77_cgemm ccgemm_ #define F77_cgemm ccgemm_
#define F77_cgemm3m ccgemm3m_
#define F77_csymm ccsymm_ #define F77_csymm ccsymm_
#define F77_csyrk ccsyrk_ #define F77_csyrk ccsyrk_
#define F77_csyr2k ccsyr2k_ #define F77_csyr2k ccsyr2k_
#define F77_ctrmm cctrmm_ #define F77_ctrmm cctrmm_
#define F77_ctrsm cctrsm_ #define F77_ctrsm cctrsm_
#define F77_zgemm czgemm_ #define F77_zgemm czgemm_
#define F77_zgemm3m czgemm3m_
#define F77_zsymm czsymm_ #define F77_zsymm czsymm_
#define F77_zsyrk czsyrk_ #define F77_zsyrk czsyrk_
#define F77_zsyr2k czsyr2k_ #define F77_zsyr2k czsyr2k_
@ -333,12 +335,14 @@ typedef struct { double real; double imag; } CBLAS_TEST_ZOMPLEX;
#define F77_dtrmm CDTRMM #define F77_dtrmm CDTRMM
#define F77_dtrsm CDTRSM #define F77_dtrsm CDTRSM
#define F77_cgemm CCGEMM #define F77_cgemm CCGEMM
#define F77_cgemm3m CCGEMM3M
#define F77_csymm CCSYMM #define F77_csymm CCSYMM
#define F77_csyrk CCSYRK #define F77_csyrk CCSYRK
#define F77_csyr2k CCSYR2K #define F77_csyr2k CCSYR2K
#define F77_ctrmm CCTRMM #define F77_ctrmm CCTRMM
#define F77_ctrsm CCTRSM #define F77_ctrsm CCTRSM
#define F77_zgemm CZGEMM #define F77_zgemm CZGEMM
#define F77_zgemm3m CZGEMM3M
#define F77_zsymm CZSYMM #define F77_zsymm CZSYMM
#define F77_zsyrk CZSYRK #define F77_zsyrk CZSYRK
#define F77_zsyr2k CZSYR2K #define F77_zsyr2k CZSYR2K
@ -493,12 +497,14 @@ typedef struct { double real; double imag; } CBLAS_TEST_ZOMPLEX;
#define F77_dtrmm cdtrmm #define F77_dtrmm cdtrmm
#define F77_dtrsm cdtrsm #define F77_dtrsm cdtrsm
#define F77_cgemm ccgemm #define F77_cgemm ccgemm
#define F77_cgemm3m ccgemm3m
#define F77_csymm ccsymm #define F77_csymm ccsymm
#define F77_csyrk ccsyrk #define F77_csyrk ccsyrk
#define F77_csyr2k ccsyr2k #define F77_csyr2k ccsyr2k
#define F77_ctrmm cctrmm #define F77_ctrmm cctrmm
#define F77_ctrsm cctrsm #define F77_ctrsm cctrsm
#define F77_zgemm czgemm #define F77_zgemm czgemm
#define F77_zgemm3m czgemm3m
#define F77_zsymm czsymm #define F77_zsymm czsymm
#define F77_zsyrk czsyrk #define F77_zsyrk czsyrk
#define F77_zsyr2k czsyr2k #define F77_zsyr2k czsyr2k

22
ctest/cin3_3m Normal file
View File

@ -0,0 +1,22 @@
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
F LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO
6 NUMBER OF VALUES OF N
0 1 2 3 5 9 35 VALUES OF N
3 NUMBER OF VALUES OF ALPHA
(0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA
3 NUMBER OF VALUES OF BETA
(0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA
cblas_cgemm3m T PUT F FOR NO TEST. SAME COLUMNS.
cblas_chemm F PUT F FOR NO TEST. SAME COLUMNS.
cblas_csymm F PUT F FOR NO TEST. SAME COLUMNS.
cblas_ctrmm F PUT F FOR NO TEST. SAME COLUMNS.
cblas_ctrsm F PUT F FOR NO TEST. SAME COLUMNS.
cblas_cherk F PUT F FOR NO TEST. SAME COLUMNS.
cblas_csyrk F PUT F FOR NO TEST. SAME COLUMNS.
cblas_cher2k F PUT F FOR NO TEST. SAME COLUMNS.
cblas_csyr2k F PUT F FOR NO TEST. SAME COLUMNS.

22
ctest/zin3_3m Normal file
View File

@ -0,0 +1,22 @@
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
F LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO
7 NUMBER OF VALUES OF N
0 1 2 3 5 9 35 VALUES OF N
3 NUMBER OF VALUES OF ALPHA
(0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA
3 NUMBER OF VALUES OF BETA
(0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA
cblas_zgemm3m T PUT F FOR NO TEST. SAME COLUMNS.
cblas_zhemm F PUT F FOR NO TEST. SAME COLUMNS.
cblas_zsymm F PUT F FOR NO TEST. SAME COLUMNS.
cblas_ztrmm F PUT F FOR NO TEST. SAME COLUMNS.
cblas_ztrsm F PUT F FOR NO TEST. SAME COLUMNS.
cblas_zherk F PUT F FOR NO TEST. SAME COLUMNS.
cblas_zsyrk F PUT F FOR NO TEST. SAME COLUMNS.
cblas_zher2k F PUT F FOR NO TEST. SAME COLUMNS.
cblas_zsyr2k F PUT F FOR NO TEST. SAME COLUMNS.

View File

@ -4,11 +4,11 @@ include ../../Makefile.system
USE_GEMM3M = 0 USE_GEMM3M = 0
ifeq ($(ARCH), x86) ifeq ($(ARCH), x86)
USE_GEMM3M = 0 USE_GEMM3M = 1
endif endif
ifeq ($(ARCH), x86_64) ifeq ($(ARCH), x86_64)
USE_GEMM3M = 0 USE_GEMM3M = 1
endif endif
ifeq ($(ARCH), ia64) ifeq ($(ARCH), ia64)

View File

@ -54,7 +54,7 @@
cblas_ztrsv, cblas_cdotc_sub, cblas_cdotu_sub, cblas_zdotc_sub, cblas_zdotu_sub, cblas_ztrsv, cblas_cdotc_sub, cblas_cdotu_sub, cblas_zdotc_sub, cblas_zdotu_sub,
cblas_saxpby,cblas_daxpby,cblas_caxpby,cblas_zaxpby, cblas_saxpby,cblas_daxpby,cblas_caxpby,cblas_zaxpby,
cblas_somatcopy, cblas_domatcopy, cblas_comatcopy, cblas_zomatcopy, cblas_somatcopy, cblas_domatcopy, cblas_comatcopy, cblas_zomatcopy,
cblas_simatcopy, cblas_dimatcopy, cblas_cimatcopy, cblas_zimatcopy cblas_simatcopy, cblas_dimatcopy, cblas_cimatcopy, cblas_zimatcopy,
); );
@exblasobjs = ( @exblasobjs = (
@ -75,7 +75,7 @@
); );
@gemm3mobjs = ( @gemm3mobjs = (
cgemm3m,zgemm3m
); );

View File

@ -4,11 +4,11 @@ include $(TOPDIR)/Makefile.system
SUPPORT_GEMM3M = 0 SUPPORT_GEMM3M = 0
ifeq ($(ARCH), x86) ifeq ($(ARCH), x86)
SUPPORT_GEMM3M = 0 SUPPORT_GEMM3M = 1
endif endif
ifeq ($(ARCH), x86_64) ifeq ($(ARCH), x86_64)
SUPPORT_GEMM3M = 0 SUPPORT_GEMM3M = 1
endif endif
ifeq ($(ARCH), ia64) ifeq ($(ARCH), ia64)
@ -128,9 +128,11 @@ ZBLAS3OBJS = \
ifeq ($(SUPPORT_GEMM3M), 1) ifeq ($(SUPPORT_GEMM3M), 1)
CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX) # CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX)
CBLAS3OBJS += cgemm3m.$(SUFFIX)
ZBLAS3OBJS += zgemm3m.$(SUFFIX) zsymm3m.$(SUFFIX) zhemm3m.$(SUFFIX) # ZBLAS3OBJS += zgemm3m.$(SUFFIX) zsymm3m.$(SUFFIX) zhemm3m.$(SUFFIX)
ZBLAS3OBJS += zgemm3m.$(SUFFIX)
endif endif
@ -332,6 +334,16 @@ CZBLAS3OBJS = \
cblas_zhemm.$(SUFFIX) cblas_zherk.$(SUFFIX) cblas_zher2k.$(SUFFIX)\ cblas_zhemm.$(SUFFIX) cblas_zherk.$(SUFFIX) cblas_zher2k.$(SUFFIX)\
cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX)
ifeq ($(SUPPORT_GEMM3M), 1)
# CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX)
CCBLAS3OBJS += cblas_cgemm3m.$(SUFFIX)
# ZBLAS3OBJS += zgemm3m.$(SUFFIX) zsymm3m.$(SUFFIX) zhemm3m.$(SUFFIX)
CZBLAS3OBJS += cblas_zgemm3m.$(SUFFIX)
endif
ifndef NO_CBLAS ifndef NO_CBLAS
@ -1775,6 +1787,13 @@ cblas_cher2k.$(SUFFIX) cblas_cher2k.$(PSUFFIX) : syr2k.c
cblas_zher2k.$(SUFFIX) cblas_zher2k.$(PSUFFIX) : syr2k.c cblas_zher2k.$(SUFFIX) cblas_zher2k.$(PSUFFIX) : syr2k.c
$(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F)
cblas_cgemm3m.$(SUFFIX) cblas_cgemm3m.$(PSUFFIX) : gemm.c
$(CC) -DCBLAS -c $(CFLAGS) -DGEMM3M $< -o $(@F)
cblas_zgemm3m.$(SUFFIX) cblas_zgemm3m.$(PSUFFIX) : gemm.c
$(CC) -DCBLAS -c $(CFLAGS) -DGEMM3M $< -o $(@F)
sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : lapack/getf2.c sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : lapack/getf2.c
$(CC) -c $(CFLAGS) $< -o $(@F) $(CC) -c $(CFLAGS) $< -o $(@F)

View File

@ -293,6 +293,14 @@ gotoblas_t TABLE_NAME = {
#endif #endif
chemm_outcopyTS, chemm_oltcopyTS, chemm_outcopyTS, chemm_oltcopyTS,
0, 0, 0,
#ifdef CGEMM3M_DEFAULT_UNROLL_M
CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
#else
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
#endif
cgemm3m_kernelTS, cgemm3m_kernelTS,
cgemm3m_incopybTS, cgemm3m_incopyrTS, cgemm3m_incopybTS, cgemm3m_incopyrTS,
@ -391,6 +399,14 @@ gotoblas_t TABLE_NAME = {
#endif #endif
zhemm_outcopyTS, zhemm_oltcopyTS, zhemm_outcopyTS, zhemm_oltcopyTS,
0, 0, 0,
#ifdef ZGEMM3M_DEFAULT_UNROLL_M
ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
#else
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
#endif
zgemm3m_kernelTS, zgemm3m_kernelTS,
zgemm3m_incopybTS, zgemm3m_incopyrTS, zgemm3m_incopybTS, zgemm3m_incopyrTS,
@ -486,6 +502,9 @@ gotoblas_t TABLE_NAME = {
#endif #endif
xhemm_outcopyTS, xhemm_oltcopyTS, xhemm_outcopyTS, xhemm_oltcopyTS,
0, 0, 0,
QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
xgemm3m_kernelTS, xgemm3m_kernelTS,
xgemm3m_incopybTS, xgemm3m_incopyrTS, xgemm3m_incopybTS, xgemm3m_incopyrTS,
@ -661,9 +680,23 @@ static void init_parameter(void) {
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
#ifdef CGEMM3M_DEFAULT_Q
TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
#else
TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
#endif
#ifdef ZGEMM3M_DEFAULT_Q
TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
#else
TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q; TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q; TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
#endif #endif
#if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON) #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
@ -918,20 +951,56 @@ static void init_parameter(void) {
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif #endif
#endif #endif
#ifdef CGEMM3M_DEFAULT_P
TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
#else
TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
#endif
#ifdef ZGEMM3M_DEFAULT_P
TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
#else
TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
#endif
#ifdef EXPRECISION
TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
#endif
TABLE_NAME.sgemm_p = (TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1); TABLE_NAME.sgemm_p = (TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1);
TABLE_NAME.dgemm_p = (TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1); TABLE_NAME.dgemm_p = (TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1);
TABLE_NAME.cgemm_p = (TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1) & ~(CGEMM_DEFAULT_UNROLL_M - 1); TABLE_NAME.cgemm_p = (TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1) & ~(CGEMM_DEFAULT_UNROLL_M - 1);
TABLE_NAME.zgemm_p = (TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1) & ~(ZGEMM_DEFAULT_UNROLL_M - 1); TABLE_NAME.zgemm_p = (TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1) & ~(ZGEMM_DEFAULT_UNROLL_M - 1);
#ifdef CGEMM3M_DEFAULT_UNROLL_M
TABLE_NAME.cgemm3m_p = (TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1) & ~(CGEMM3M_DEFAULT_UNROLL_M - 1);
#else
TABLE_NAME.cgemm3m_p = (TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1);
#endif
#ifdef ZGEMM3M_DEFAULT_UNROLL_M
TABLE_NAME.zgemm3m_p = (TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1) & ~(ZGEMM3M_DEFAULT_UNROLL_M - 1);
#else
TABLE_NAME.zgemm3m_p = (TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1);
#endif
#ifdef QUAD_PRECISION #ifdef QUAD_PRECISION
TABLE_NAME.qgemm_p = (TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1); TABLE_NAME.qgemm_p = (TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1);
TABLE_NAME.xgemm_p = (TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1) & ~(XGEMM_DEFAULT_UNROLL_M - 1); TABLE_NAME.xgemm_p = (TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1) & ~(XGEMM_DEFAULT_UNROLL_M - 1);
TABLE_NAME.xgemm3m_p = (TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1);
#endif #endif
#ifdef DEBUG #ifdef DEBUG
@ -965,11 +1034,32 @@ static void init_parameter(void) {
+ TABLE_NAME.align) & ~TABLE_NAME.align) + TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15); ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.xgemm_r = (((BUFFER_SIZE - TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align) + TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15); ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
#endif #endif
} }

63
param.h
View File

@ -289,6 +289,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_Q 224 #define ZGEMM_DEFAULT_Q 224
#define XGEMM_DEFAULT_Q 224 #define XGEMM_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_P 448
#define ZGEMM3M_DEFAULT_P 224
#define XGEMM3M_DEFAULT_P 112
#define CGEMM3M_DEFAULT_Q 224
#define ZGEMM3M_DEFAULT_Q 224
#define XGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_R 12288
#define ZGEMM3M_DEFAULT_R 12288
#define XGEMM3M_DEFAULT_R 12288
#define SGEMM_DEFAULT_R sgemm_r #define SGEMM_DEFAULT_R sgemm_r
#define QGEMM_DEFAULT_R qgemm_r #define QGEMM_DEFAULT_R qgemm_r
#define DGEMM_DEFAULT_R dgemm_r #define DGEMM_DEFAULT_R dgemm_r
@ -371,6 +381,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define QGEMM_DEFAULT_Q 224 #define QGEMM_DEFAULT_Q 224
#define XGEMM_DEFAULT_Q 224 #define XGEMM_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_P 448
#define ZGEMM3M_DEFAULT_P 224
#define XGEMM3M_DEFAULT_P 112
#define CGEMM3M_DEFAULT_Q 224
#define ZGEMM3M_DEFAULT_Q 224
#define XGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_R 12288
#define ZGEMM3M_DEFAULT_R 12288
#define XGEMM3M_DEFAULT_R 12288
#define SGEMM_DEFAULT_R 12288 #define SGEMM_DEFAULT_R 12288
#define QGEMM_DEFAULT_R qgemm_r #define QGEMM_DEFAULT_R qgemm_r
#define DGEMM_DEFAULT_R 12288 #define DGEMM_DEFAULT_R 12288
@ -1073,10 +1093,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GETRF_FACTOR 0.72 #define GETRF_FACTOR 0.72
#define CGEMM3M_DEFAULT_UNROLL_N 4
#define CGEMM3M_DEFAULT_UNROLL_M 8
#define ZGEMM3M_DEFAULT_UNROLL_N 2
#define ZGEMM3M_DEFAULT_UNROLL_M 8
#endif #endif
@ -1152,10 +1168,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_Q 192 #define ZGEMM_DEFAULT_Q 192
#define XGEMM_DEFAULT_Q 128 #define XGEMM_DEFAULT_Q 128
#define CGEMM3M_DEFAULT_UNROLL_N 4 #define CGEMM3M_DEFAULT_UNROLL_N 8
#define CGEMM3M_DEFAULT_UNROLL_M 8 #define CGEMM3M_DEFAULT_UNROLL_M 4
#define ZGEMM3M_DEFAULT_UNROLL_N 2 #define ZGEMM3M_DEFAULT_UNROLL_N 8
#define ZGEMM3M_DEFAULT_UNROLL_M 8 #define ZGEMM3M_DEFAULT_UNROLL_M 2
#define CGEMM3M_DEFAULT_P 448
#define ZGEMM3M_DEFAULT_P 224
#define XGEMM3M_DEFAULT_P 112
#define CGEMM3M_DEFAULT_Q 224
#define ZGEMM3M_DEFAULT_Q 224
#define XGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_R 12288
#define ZGEMM3M_DEFAULT_R 12288
#define XGEMM3M_DEFAULT_R 12288
#define GETRF_FACTOR 0.72 #define GETRF_FACTOR 0.72
@ -1259,10 +1287,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define XGEMM_DEFAULT_R xgemm_r #define XGEMM_DEFAULT_R xgemm_r
#define XGEMM_DEFAULT_Q 128 #define XGEMM_DEFAULT_Q 128
#define CGEMM3M_DEFAULT_UNROLL_N 4 #define CGEMM3M_DEFAULT_UNROLL_N 8
#define CGEMM3M_DEFAULT_UNROLL_M 8 #define CGEMM3M_DEFAULT_UNROLL_M 4
#define ZGEMM3M_DEFAULT_UNROLL_N 2 #define ZGEMM3M_DEFAULT_UNROLL_N 8
#define ZGEMM3M_DEFAULT_UNROLL_M 8 #define ZGEMM3M_DEFAULT_UNROLL_M 2
#define CGEMM3M_DEFAULT_P 448
#define ZGEMM3M_DEFAULT_P 224
#define XGEMM3M_DEFAULT_P 112
#define CGEMM3M_DEFAULT_Q 224
#define ZGEMM3M_DEFAULT_Q 224
#define XGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_R 12288
#define ZGEMM3M_DEFAULT_R 12288
#define XGEMM3M_DEFAULT_R 12288
#endif #endif

View File

@ -88,6 +88,31 @@ else
endif endif
endif endif
level3_3m : zblat3_3m cblat3_3m
rm -f ?BLAT3_3M.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat3_3m < ./cblat3_3m.dat
@$(GREP) -q FATAL CBLAT3_3M.SUMM && cat CBLAT3_3M.SUMM || exit 0
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat3_3m < ./zblat3_3m.dat
@$(GREP) -q FATAL ZBLAT3_3M.SUMM && cat ZBLAT3_3M.SUMM || exit 0
ifdef SMP
rm -f ?BLAT3_3M.SUMM
ifeq ($(USE_OPENMP), 1)
OMP_NUM_THREADS=2 ./cblat3_3m < ./cblat3_3m.dat
@$(GREP) -q FATAL CBLAT3_3M.SUMM && cat CBLAT3_3M.SUMM || exit 0
OMP_NUM_THREADS=2 ./zblat3_3m < ./zblat3_3m.dat
@$(GREP) -q FATAL ZBLAT3_3M.SUMM && cat ZBLAT3_3M.SUMM || exit 0
else
OPENBLAS_NUM_THREADS=2 ./cblat3_3m < ./cblat3_3m.dat
@$(GREP) -q FATAL CBLAT3_3M.SUMM && cat CBLAT3_3M.SUMM || exit 0
OPENBLAS_NUM_THREADS=2 ./zblat3_3m < ./zblat3_3m.dat
@$(GREP) -q FATAL ZBLAT3_3M.SUMM && cat ZBLAT3_3M.SUMM || exit 0
endif
endif
FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS) FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS)
CEXTRALIB = CEXTRALIB =
@ -131,6 +156,15 @@ cblat3 : cblat3.$(SUFFIX) ../$(LIBNAME)
zblat3 : zblat3.$(SUFFIX) ../$(LIBNAME) zblat3 : zblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o zblat3 zblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o zblat3 zblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
cblat3_3m : cblat3_3m.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o cblat3_3m cblat3_3m.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
zblat3_3m : zblat3_3m.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o zblat3_3m zblat3_3m.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
clean: clean:
@rm -f *.$(SUFFIX) *.$(PSUFFIX) gmon.$(SUFFIX)ut *.SUMM *.cxml *.exe *.pdb *.dwf \ @rm -f *.$(SUFFIX) *.$(PSUFFIX) gmon.$(SUFFIX)ut *.SUMM *.cxml *.exe *.pdb *.dwf \
sblat1 dblat1 cblat1 zblat1 \ sblat1 dblat1 cblat1 zblat1 \
@ -139,6 +173,8 @@ clean:
sblat1p dblat1p cblat1p zblat1p \ sblat1p dblat1p cblat1p zblat1p \
sblat2p dblat2p cblat2p zblat2p \ sblat2p dblat2p cblat2p zblat2p \
sblat3p dblat3p cblat3p zblat3p \ sblat3p dblat3p cblat3p zblat3p \
zblat3_3m zblat3_3mp \
cblat3_3m cblat3_3mp \
*.stackdump *.dll *.stackdump *.dll
libs: libs:

23
test/cblat3_3m.dat Normal file
View File

@ -0,0 +1,23 @@
'CBLAT3_3M.SUMM' NAME OF SUMMARY OUTPUT FILE
6 UNIT NUMBER OF SUMMARY FILE
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
F LOGICAL FLAG, T TO STOP ON FAILURES.
F LOGICAL FLAG, T TO TEST ERROR EXITS.
16.0 THRESHOLD VALUE OF TEST RATIO
6 NUMBER OF VALUES OF N
0 1 2 3 7 31 63 VALUES OF N
3 NUMBER OF VALUES OF ALPHA
(0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA
3 NUMBER OF VALUES OF BETA
(0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA
CGEMM3M T PUT F FOR NO TEST. SAME COLUMNS.
CHEMM F PUT F FOR NO TEST. SAME COLUMNS.
CSYMM F PUT F FOR NO TEST. SAME COLUMNS.
CTRMM F PUT F FOR NO TEST. SAME COLUMNS.
CTRSM F PUT F FOR NO TEST. SAME COLUMNS.
CHERK F PUT F FOR NO TEST. SAME COLUMNS.
CSYRK F PUT F FOR NO TEST. SAME COLUMNS.
CHER2K F PUT F FOR NO TEST. SAME COLUMNS.
CSYR2K F PUT F FOR NO TEST. SAME COLUMNS.

3442
test/cblat3_3m.f Normal file

File diff suppressed because it is too large Load Diff

23
test/zblat3_3m.dat Normal file
View File

@ -0,0 +1,23 @@
'ZBLAT3_3M.SUMM' NAME OF SUMMARY OUTPUT FILE
6 UNIT NUMBER OF SUMMARY FILE
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
F LOGICAL FLAG, T TO STOP ON FAILURES.
F LOGICAL FLAG, T TO TEST ERROR EXITS.
16.0 THRESHOLD VALUE OF TEST RATIO
6 NUMBER OF VALUES OF N
0 1 2 3 7 31 63 VALUES OF N
3 NUMBER OF VALUES OF ALPHA
(0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA
3 NUMBER OF VALUES OF BETA
(0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA
ZGEMM3M T PUT F FOR NO TEST. SAME COLUMNS.
ZHEMM F PUT F FOR NO TEST. SAME COLUMNS.
ZSYMM F PUT F FOR NO TEST. SAME COLUMNS.
ZTRMM F PUT F FOR NO TEST. SAME COLUMNS.
ZTRSM F PUT F FOR NO TEST. SAME COLUMNS.
ZHERK F PUT F FOR NO TEST. SAME COLUMNS.
ZSYRK F PUT F FOR NO TEST. SAME COLUMNS.
ZHER2K F PUT F FOR NO TEST. SAME COLUMNS.
ZSYR2K F PUT F FOR NO TEST. SAME COLUMNS.

3448
test/zblat3_3m.f Normal file

File diff suppressed because it is too large Load Diff