Merge 82827762c0
into 453b9e4886
This commit is contained in:
commit
c3bf021ff8
|
@ -630,85 +630,85 @@ zcholesky.essl : zcholesky.$(SUFFIX)
|
||||||
##################################### Sgemm ####################################################
|
##################################### Sgemm ####################################################
|
||||||
ifeq ($(BUILD_BFLOAT16),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
sbgemm.goto : sbgemm.$(SUFFIX) ../$(LIBNAME)
|
sbgemm.goto : sbgemm.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CXX) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
endif
|
endif
|
||||||
|
|
||||||
sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME)
|
sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CXX) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
sgemm.acml : sgemm.$(SUFFIX)
|
sgemm.acml : sgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
sgemm.atlas : sgemm.$(SUFFIX)
|
sgemm.atlas : sgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
sgemm.mkl : sgemm.$(SUFFIX)
|
sgemm.mkl : sgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
sgemm.veclib : sgemm.$(SUFFIX)
|
sgemm.veclib : sgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
sgemm.essl : sgemm.$(SUFFIX)
|
sgemm.essl : sgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Dgemm ####################################################
|
##################################### Dgemm ####################################################
|
||||||
dgemm.goto : dgemm.$(SUFFIX) ../$(LIBNAME)
|
dgemm.goto : dgemm.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CXX) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
dgemm.acml : dgemm.$(SUFFIX)
|
dgemm.acml : dgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
dgemm.atlas : dgemm.$(SUFFIX)
|
dgemm.atlas : dgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
dgemm.mkl : dgemm.$(SUFFIX)
|
dgemm.mkl : dgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
dgemm.veclib : dgemm.$(SUFFIX)
|
dgemm.veclib : dgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
dgemm.essl : dgemm.$(SUFFIX)
|
dgemm.essl : dgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Cgemm ####################################################
|
##################################### Cgemm ####################################################
|
||||||
|
|
||||||
cgemm.goto : cgemm.$(SUFFIX) ../$(LIBNAME)
|
cgemm.goto : cgemm.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CXX) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
cgemm.acml : cgemm.$(SUFFIX)
|
cgemm.acml : cgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
cgemm.atlas : cgemm.$(SUFFIX)
|
cgemm.atlas : cgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
cgemm.mkl : cgemm.$(SUFFIX)
|
cgemm.mkl : cgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
cgemm.veclib : cgemm.$(SUFFIX)
|
cgemm.veclib : cgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
cgemm.essl : cgemm.$(SUFFIX)
|
cgemm.essl : cgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Zgemm ####################################################
|
##################################### Zgemm ####################################################
|
||||||
|
|
||||||
zgemm.goto : zgemm.$(SUFFIX) ../$(LIBNAME)
|
zgemm.goto : zgemm.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CXX) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
zgemm.acml : zgemm.$(SUFFIX)
|
zgemm.acml : zgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
zgemm.atlas : zgemm.$(SUFFIX)
|
zgemm.atlas : zgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
zgemm.mkl : zgemm.$(SUFFIX)
|
zgemm.mkl : zgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
zgemm.veclib : zgemm.$(SUFFIX)
|
zgemm.veclib : zgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
zgemm.essl : zgemm.$(SUFFIX)
|
zgemm.essl : zgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Ssymm ####################################################
|
##################################### Ssymm ####################################################
|
||||||
ssymm.goto : ssymm.$(SUFFIX) ../$(LIBNAME)
|
ssymm.goto : ssymm.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
@ -2959,21 +2959,21 @@ zcholesky.$(SUFFIX) : cholesky.c
|
||||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
ifeq ($(BUILD_BFLOAT16),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
sbgemm.$(SUFFIX) : gemm.c
|
sbgemm.$(SUFFIX) : gemm.cpp
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UCOMPLEX -UDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -DHALF -UCOMPLEX -UDOUBLE -DANKERL_NANOBENCH_IMPLEMENT -o $(@F) $^
|
||||||
endif
|
endif
|
||||||
|
|
||||||
sgemm.$(SUFFIX) : gemm.c
|
sgemm.$(SUFFIX) : gemm.cpp
|
||||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
$(CXX) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -DANKERL_NANOBENCH_IMPLEMENT -o $(@F) $^
|
||||||
|
|
||||||
dgemm.$(SUFFIX) : gemm.c
|
dgemm.$(SUFFIX) : gemm.cpp
|
||||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
$(CXX) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -DANKERL_NANOBENCH_IMPLEMENT -o $(@F) $^
|
||||||
|
|
||||||
cgemm.$(SUFFIX) : gemm.c
|
cgemm.$(SUFFIX) : gemm.cpp
|
||||||
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
|
$(CXX) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -DANKERL_NANOBENCH_IMPLEMENT -o $(@F) $^
|
||||||
|
|
||||||
zgemm.$(SUFFIX) : gemm.c
|
zgemm.$(SUFFIX) : gemm.cpp
|
||||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
$(CXX) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -DANKERL_NANOBENCH_IMPLEMENT -o $(@F) $^
|
||||||
|
|
||||||
ssymm.$(SUFFIX) : symm.c
|
ssymm.$(SUFFIX) : symm.c
|
||||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
178
benchmark/gemm.c
178
benchmark/gemm.c
|
@ -1,178 +0,0 @@
|
||||||
/***************************************************************************
|
|
||||||
Copyright (c) 2014, The OpenBLAS Project
|
|
||||||
All rights reserved.
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
1. Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
3. Neither the name of the OpenBLAS project nor the names of
|
|
||||||
its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
||||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
||||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*****************************************************************************/
|
|
||||||
|
|
||||||
#include "bench.h"
|
|
||||||
|
|
||||||
#undef GEMM
|
|
||||||
|
|
||||||
#ifndef COMPLEX
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
|
||||||
#define GEMM BLASFUNC(dgemm)
|
|
||||||
#elif defined(HALF)
|
|
||||||
#define GEMM BLASFUNC(sbgemm)
|
|
||||||
#else
|
|
||||||
#define GEMM BLASFUNC(sgemm)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
|
||||||
#define GEMM BLASFUNC(zgemm)
|
|
||||||
#else
|
|
||||||
#define GEMM BLASFUNC(cgemm)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
|
||||||
|
|
||||||
IFLOAT *a, *b;
|
|
||||||
FLOAT *c;
|
|
||||||
FLOAT alpha[] = {1.0, 0.0};
|
|
||||||
FLOAT beta [] = {0.0, 0.0};
|
|
||||||
char transa = 'N';
|
|
||||||
char transb = 'N';
|
|
||||||
blasint m, n, k, i, j, lda, ldb, ldc;
|
|
||||||
int loops = 1;
|
|
||||||
int has_param_m = 0;
|
|
||||||
int has_param_n = 0;
|
|
||||||
int has_param_k = 0;
|
|
||||||
char *p;
|
|
||||||
|
|
||||||
int from = 1;
|
|
||||||
int to = 200;
|
|
||||||
int step = 1;
|
|
||||||
|
|
||||||
double time1, timeg;
|
|
||||||
|
|
||||||
argc--;argv++;
|
|
||||||
|
|
||||||
if (argc > 0) { from = atol(*argv); argc--; argv++; }
|
|
||||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++; }
|
|
||||||
if (argc > 0) { step = atol(*argv); argc--; argv++; }
|
|
||||||
|
|
||||||
if ((p = getenv("OPENBLAS_TRANS"))) {
|
|
||||||
transa=*p;
|
|
||||||
transb=*p;
|
|
||||||
}
|
|
||||||
if ((p = getenv("OPENBLAS_TRANSA"))) {
|
|
||||||
transa=*p;
|
|
||||||
}
|
|
||||||
if ((p = getenv("OPENBLAS_TRANSB"))) {
|
|
||||||
transb=*p;
|
|
||||||
}
|
|
||||||
TOUPPER(transa);
|
|
||||||
TOUPPER(transb);
|
|
||||||
|
|
||||||
fprintf(stderr, "From : %3d To : %3d Step=%d : Transa=%c : Transb=%c\n", from, to, step, transa, transb);
|
|
||||||
|
|
||||||
p = getenv("OPENBLAS_LOOPS");
|
|
||||||
if ( p != NULL ) {
|
|
||||||
loops = atoi(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((p = getenv("OPENBLAS_PARAM_M"))) {
|
|
||||||
m = atoi(p);
|
|
||||||
has_param_m=1;
|
|
||||||
} else {
|
|
||||||
m = to;
|
|
||||||
}
|
|
||||||
if ((p = getenv("OPENBLAS_PARAM_N"))) {
|
|
||||||
n = atoi(p);
|
|
||||||
has_param_n=1;
|
|
||||||
} else {
|
|
||||||
n = to;
|
|
||||||
}
|
|
||||||
if ((p = getenv("OPENBLAS_PARAM_K"))) {
|
|
||||||
k = atoi(p);
|
|
||||||
has_param_k=1;
|
|
||||||
} else {
|
|
||||||
k = to;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (( a = (IFLOAT *)malloc(sizeof(IFLOAT) * m * k * COMPSIZE)) == NULL) {
|
|
||||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
|
||||||
}
|
|
||||||
if (( b = (IFLOAT *)malloc(sizeof(IFLOAT) * k * n * COMPSIZE)) == NULL) {
|
|
||||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
|
||||||
}
|
|
||||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) {
|
|
||||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __linux
|
|
||||||
srandom(getpid());
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (i = 0; i < m * k * COMPSIZE; i++) {
|
|
||||||
a[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
|
|
||||||
}
|
|
||||||
for (i = 0; i < k * n * COMPSIZE; i++) {
|
|
||||||
b[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
|
|
||||||
}
|
|
||||||
for (i = 0; i < m * n * COMPSIZE; i++) {
|
|
||||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr, " SIZE Flops Time\n");
|
|
||||||
|
|
||||||
for (i = from; i <= to; i += step) {
|
|
||||||
|
|
||||||
timeg=0;
|
|
||||||
|
|
||||||
if (!has_param_m) { m = i; }
|
|
||||||
if (!has_param_n) { n = i; }
|
|
||||||
if (!has_param_k) { k = i; }
|
|
||||||
|
|
||||||
if (transa == 'N') { lda = m; }
|
|
||||||
else { lda = k; }
|
|
||||||
if (transb == 'N') { ldb = k; }
|
|
||||||
else { ldb = n; }
|
|
||||||
ldc = m;
|
|
||||||
|
|
||||||
fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
|
|
||||||
begin();
|
|
||||||
|
|
||||||
for (j=0; j<loops; j++) {
|
|
||||||
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
|
|
||||||
}
|
|
||||||
|
|
||||||
end();
|
|
||||||
time1 = getsec();
|
|
||||||
|
|
||||||
timeg = time1/loops;
|
|
||||||
fprintf(stderr,
|
|
||||||
" %10.2f MFlops %10.6f sec\n",
|
|
||||||
COMPSIZE * COMPSIZE * 2. * (double)k * (double)m * (double)n / timeg * 1.e-6, time1);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
|
@ -0,0 +1,152 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2014, 2023. The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iostream>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
#include <common.h>
|
||||||
|
|
||||||
|
#include "nanobench.h"
|
||||||
|
|
||||||
|
#undef GEMM
|
||||||
|
|
||||||
|
#ifndef COMPLEX
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define GEMM BLASFUNC(dgemm)
|
||||||
|
#elif defined(HALF)
|
||||||
|
#define GEMM BLASFUNC(sbgemm)
|
||||||
|
#else
|
||||||
|
#define GEMM BLASFUNC(sgemm)
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define GEMM BLASFUNC(zgemm)
|
||||||
|
#else
|
||||||
|
#define GEMM BLASFUNC(cgemm)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template <typename T> static void fill_vector(std::vector<T> vec) {
|
||||||
|
std::random_device rand_dev;
|
||||||
|
std::mt19937 generator(rand_dev());
|
||||||
|
std::uniform_real_distribution<T> distribution(std::numeric_limits<T>::min(),
|
||||||
|
std::numeric_limits<T>::max());
|
||||||
|
|
||||||
|
std::generate(vec.begin(), vec.end(),
|
||||||
|
[&]() { return distribution(generator); });
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::pair<bool, std::string>
|
||||||
|
env_param(const std::string &name, const std::string &default_value) {
|
||||||
|
const char *value = getenv(name.c_str());
|
||||||
|
return {value == nullptr, value ? value : default_value};
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string env_value(const std::string &name,
|
||||||
|
const std::string &default_value) {
|
||||||
|
return env_param(name, default_value).second;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
int from = (argc > 1) ? atol(argv[1]) : 1;
|
||||||
|
int to = (argc > 2) ? MAX(atol(argv[2]), from) : 200;
|
||||||
|
int step = (argc > 3) ? atol(argv[3]) : 1;
|
||||||
|
|
||||||
|
FLOAT alpha[] = {1.0, 0.0};
|
||||||
|
FLOAT beta[] = {0.0, 0.0};
|
||||||
|
|
||||||
|
int epochs = atoi(env_value("OPENBLAS_EPOCHS", "1").c_str());
|
||||||
|
bool json_output = env_value("OPENBLAS_OUTPUT_JSON", "0").front() == '1';
|
||||||
|
|
||||||
|
std::pair<bool, std::string> param_m = env_param("OPENBLAS_PARAM_M", "100");
|
||||||
|
std::pair<bool, std::string> param_n = env_param("OPENBLAS_PARAM_N", "100");
|
||||||
|
std::pair<bool, std::string> param_k = env_param("OPENBLAS_PARAM_K", "100");
|
||||||
|
blasint m = param_m.first ? atoi(param_m.second.c_str()) : to;
|
||||||
|
blasint n = param_n.first ? atoi(param_n.second.c_str()) : to;
|
||||||
|
blasint k = param_k.first ? atoi(param_k.second.c_str()) : to;
|
||||||
|
|
||||||
|
char transpose = toupper(env_value("OPENBLAS_TRANS", "N").front());
|
||||||
|
char transpose_a = toupper(env_value("OPENBLAS_TRANSA", "N").front());
|
||||||
|
char transpose_b = toupper(env_value("OPENBLAS_TRANSB", "N").front());
|
||||||
|
|
||||||
|
bool is_specific_size = param_m.first && param_n.first && param_k.first;
|
||||||
|
if (is_specific_size) {
|
||||||
|
from = 1;
|
||||||
|
to = 1;
|
||||||
|
step = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<IFLOAT> a(m * k);
|
||||||
|
std::vector<IFLOAT> b(n * k);
|
||||||
|
std::vector<FLOAT> c(m * n);
|
||||||
|
fill_vector(a);
|
||||||
|
fill_vector(b);
|
||||||
|
fill_vector(c);
|
||||||
|
|
||||||
|
if (!is_specific_size) {
|
||||||
|
std::cout << "From: " << std::to_string(from) << " To: " << std::to_string(to)
|
||||||
|
<< " Step: " << std::to_string(step) << " TransA: " << transpose_a
|
||||||
|
<< " TransB: " << transpose_b << "\n";
|
||||||
|
} else {
|
||||||
|
std::cout << "M: " << std::to_string(m) << " N: " << std::to_string(n)
|
||||||
|
<< " K: " << std::to_string(k) << " TransA: " << transpose_a
|
||||||
|
<< " TransB: " << transpose_b << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = from; i <= to; i += step) {
|
||||||
|
if (!param_m.first) {
|
||||||
|
m = i;
|
||||||
|
}
|
||||||
|
if (!param_n.first) {
|
||||||
|
n = i;
|
||||||
|
}
|
||||||
|
if (!param_k.first) {
|
||||||
|
k = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
blasint lda = transpose == 'N' && transpose_a == 'N' ? m : k;
|
||||||
|
blasint ldb = transpose == 'N' && transpose_b == 'N' ? k : n;
|
||||||
|
blasint ldc = m;
|
||||||
|
|
||||||
|
ankerl::nanobench::Bench bench;
|
||||||
|
if (json_output) {
|
||||||
|
bench.output(nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string bench_name = "M=" + std::to_string(m) +
|
||||||
|
" N=" + std::to_string(n) +
|
||||||
|
" K=" + std::to_string(k);
|
||||||
|
bench.minEpochIterations(epochs).run(bench_name, [&]() {
|
||||||
|
GEMM(&transpose_a, &transpose_b, &m, &n, &k, alpha, a.data(), &lda,
|
||||||
|
b.data(), &ldb, beta, c.data(), &ldc);
|
||||||
|
});
|
||||||
|
if (json_output) {
|
||||||
|
bench.render(ankerl::nanobench::templates::json(), std::cout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue