allow to set custom value for ?GEMM_DEFAULT_UNROLL_MN, optimizations for syrk
This commit is contained in:
parent
e213a42cde
commit
125610d23b
|
@ -919,14 +919,22 @@ extern gotoblas_t *gotoblas;
|
||||||
#define SGEMM_R SGEMM_DEFAULT_R
|
#define SGEMM_R SGEMM_DEFAULT_R
|
||||||
#define SGEMM_UNROLL_M SGEMM_DEFAULT_UNROLL_M
|
#define SGEMM_UNROLL_M SGEMM_DEFAULT_UNROLL_M
|
||||||
#define SGEMM_UNROLL_N SGEMM_DEFAULT_UNROLL_N
|
#define SGEMM_UNROLL_N SGEMM_DEFAULT_UNROLL_N
|
||||||
|
#ifdef SGEMM_DEFAULT_UNROLL_MN
|
||||||
|
#define SGEMM_UNROLL_MN SGEMM_DEFAULT_UNROLL_MN
|
||||||
|
#else
|
||||||
#define SGEMM_UNROLL_MN MAX((SGEMM_UNROLL_M), (SGEMM_UNROLL_N))
|
#define SGEMM_UNROLL_MN MAX((SGEMM_UNROLL_M), (SGEMM_UNROLL_N))
|
||||||
|
#endif
|
||||||
|
|
||||||
#define DGEMM_P DGEMM_DEFAULT_P
|
#define DGEMM_P DGEMM_DEFAULT_P
|
||||||
#define DGEMM_Q DGEMM_DEFAULT_Q
|
#define DGEMM_Q DGEMM_DEFAULT_Q
|
||||||
#define DGEMM_R DGEMM_DEFAULT_R
|
#define DGEMM_R DGEMM_DEFAULT_R
|
||||||
#define DGEMM_UNROLL_M DGEMM_DEFAULT_UNROLL_M
|
#define DGEMM_UNROLL_M DGEMM_DEFAULT_UNROLL_M
|
||||||
#define DGEMM_UNROLL_N DGEMM_DEFAULT_UNROLL_N
|
#define DGEMM_UNROLL_N DGEMM_DEFAULT_UNROLL_N
|
||||||
|
#ifdef DGEMM_DEFAULT_UNROLL_MN
|
||||||
|
#define DGEMM_UNROLL_MN DGEMM_DEFAULT_UNROLL_MN
|
||||||
|
#else
|
||||||
#define DGEMM_UNROLL_MN MAX((DGEMM_UNROLL_M), (DGEMM_UNROLL_N))
|
#define DGEMM_UNROLL_MN MAX((DGEMM_UNROLL_M), (DGEMM_UNROLL_N))
|
||||||
|
#endif
|
||||||
|
|
||||||
#define QGEMM_P QGEMM_DEFAULT_P
|
#define QGEMM_P QGEMM_DEFAULT_P
|
||||||
#define QGEMM_Q QGEMM_DEFAULT_Q
|
#define QGEMM_Q QGEMM_DEFAULT_Q
|
||||||
|
@ -940,14 +948,22 @@ extern gotoblas_t *gotoblas;
|
||||||
#define CGEMM_R CGEMM_DEFAULT_R
|
#define CGEMM_R CGEMM_DEFAULT_R
|
||||||
#define CGEMM_UNROLL_M CGEMM_DEFAULT_UNROLL_M
|
#define CGEMM_UNROLL_M CGEMM_DEFAULT_UNROLL_M
|
||||||
#define CGEMM_UNROLL_N CGEMM_DEFAULT_UNROLL_N
|
#define CGEMM_UNROLL_N CGEMM_DEFAULT_UNROLL_N
|
||||||
|
#ifdef CGEMM_DEFAULT_UNROLL_MN
|
||||||
|
#define CGEMM_UNROLL_MN CGEMM_DEFAULT_UNROLL_MN
|
||||||
|
#else
|
||||||
#define CGEMM_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N))
|
#define CGEMM_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N))
|
||||||
|
#endif
|
||||||
|
|
||||||
#define ZGEMM_P ZGEMM_DEFAULT_P
|
#define ZGEMM_P ZGEMM_DEFAULT_P
|
||||||
#define ZGEMM_Q ZGEMM_DEFAULT_Q
|
#define ZGEMM_Q ZGEMM_DEFAULT_Q
|
||||||
#define ZGEMM_R ZGEMM_DEFAULT_R
|
#define ZGEMM_R ZGEMM_DEFAULT_R
|
||||||
#define ZGEMM_UNROLL_M ZGEMM_DEFAULT_UNROLL_M
|
#define ZGEMM_UNROLL_M ZGEMM_DEFAULT_UNROLL_M
|
||||||
#define ZGEMM_UNROLL_N ZGEMM_DEFAULT_UNROLL_N
|
#define ZGEMM_UNROLL_N ZGEMM_DEFAULT_UNROLL_N
|
||||||
|
#ifdef ZGEMM_DEFAULT_UNROLL_MN
|
||||||
|
#define ZGEMM_UNROLL_MN ZGEMM_DEFAULT_UNROLL_MN
|
||||||
|
#else
|
||||||
#define ZGEMM_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N))
|
#define ZGEMM_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N))
|
||||||
|
#endif
|
||||||
|
|
||||||
#define XGEMM_P XGEMM_DEFAULT_P
|
#define XGEMM_P XGEMM_DEFAULT_P
|
||||||
#define XGEMM_Q XGEMM_DEFAULT_Q
|
#define XGEMM_Q XGEMM_DEFAULT_Q
|
||||||
|
|
|
@ -538,10 +538,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
mask = MAX(QGEMM_UNROLL_M, QGEMM_UNROLL_N) - 1;
|
mask = MAX(QGEMM_UNROLL_M, QGEMM_UNROLL_N) - 1;
|
||||||
#elif defined(DOUBLE)
|
#elif defined(DOUBLE)
|
||||||
mode = BLAS_DOUBLE | BLAS_REAL;
|
mode = BLAS_DOUBLE | BLAS_REAL;
|
||||||
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1;
|
mask = DGEMM_UNROLL_MN - 1;
|
||||||
#else
|
#else
|
||||||
mode = BLAS_SINGLE | BLAS_REAL;
|
mode = BLAS_SINGLE | BLAS_REAL;
|
||||||
mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1;
|
mask = SGEMM_UNROLL_MN - 1;
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
|
@ -549,10 +549,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
mask = MAX(XGEMM_UNROLL_M, XGEMM_UNROLL_N) - 1;
|
mask = MAX(XGEMM_UNROLL_M, XGEMM_UNROLL_N) - 1;
|
||||||
#elif defined(DOUBLE)
|
#elif defined(DOUBLE)
|
||||||
mode = BLAS_DOUBLE | BLAS_COMPLEX;
|
mode = BLAS_DOUBLE | BLAS_COMPLEX;
|
||||||
mask = MAX(ZGEMM_UNROLL_M, ZGEMM_UNROLL_N) - 1;
|
mask = ZGEMM_UNROLL_MN - 1;
|
||||||
#else
|
#else
|
||||||
mode = BLAS_SINGLE | BLAS_COMPLEX;
|
mode = BLAS_SINGLE | BLAS_COMPLEX;
|
||||||
mask = MAX(CGEMM_UNROLL_M, CGEMM_UNROLL_N) - 1;
|
mask = CGEMM_UNROLL_MN - 1;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -57,10 +57,10 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
|
||||||
|
|
||||||
switch (mode & BLAS_PREC) {
|
switch (mode & BLAS_PREC) {
|
||||||
case BLAS_SINGLE:
|
case BLAS_SINGLE:
|
||||||
mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1;
|
mask = SGEMM_UNROLL_MN - 1;
|
||||||
break;
|
break;
|
||||||
case BLAS_DOUBLE:
|
case BLAS_DOUBLE:
|
||||||
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1;
|
mask = DGEMM_UNROLL_MN - 1;
|
||||||
break;
|
break;
|
||||||
#ifdef EXPRECISION
|
#ifdef EXPRECISION
|
||||||
case BLAS_XDOUBLE:
|
case BLAS_XDOUBLE:
|
||||||
|
@ -71,10 +71,10 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
|
||||||
} else {
|
} else {
|
||||||
switch (mode & BLAS_PREC) {
|
switch (mode & BLAS_PREC) {
|
||||||
case BLAS_SINGLE:
|
case BLAS_SINGLE:
|
||||||
mask = MAX(CGEMM_UNROLL_M, CGEMM_UNROLL_N) - 1;
|
mask = CGEMM_UNROLL_MN - 1;
|
||||||
break;
|
break;
|
||||||
case BLAS_DOUBLE:
|
case BLAS_DOUBLE:
|
||||||
mask = MAX(ZGEMM_UNROLL_M, ZGEMM_UNROLL_N) - 1;
|
mask = ZGEMM_UNROLL_MN - 1;
|
||||||
break;
|
break;
|
||||||
#ifdef EXPRECISION
|
#ifdef EXPRECISION
|
||||||
case BLAS_XDOUBLE:
|
case BLAS_XDOUBLE:
|
||||||
|
|
|
@ -54,7 +54,14 @@ gotoblas_t TABLE_NAME = {
|
||||||
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
|
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
|
||||||
|
|
||||||
0, 0, 0,
|
0, 0, 0,
|
||||||
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
|
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
|
||||||
|
#ifdef SGEMM_DEFAULT_UNROLL_MN
|
||||||
|
SGEMM_DEFAULT_UNROLL_MN,
|
||||||
|
#else
|
||||||
|
MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_EXCLUSIVE_CACHE
|
#ifdef HAVE_EXCLUSIVE_CACHE
|
||||||
1,
|
1,
|
||||||
#else
|
#else
|
||||||
|
@ -110,7 +117,12 @@ gotoblas_t TABLE_NAME = {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
0, 0, 0,
|
0, 0, 0,
|
||||||
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
|
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
|
||||||
|
#ifdef DGEMM_DEFAULT_UNROLL_MN
|
||||||
|
DGEMM_DEFAULT_UNROLL_MN,
|
||||||
|
#else
|
||||||
|
MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
|
||||||
|
#endif
|
||||||
|
|
||||||
damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
|
damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
|
||||||
idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
|
idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
|
||||||
|
@ -214,7 +226,12 @@ gotoblas_t TABLE_NAME = {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
0, 0, 0,
|
0, 0, 0,
|
||||||
CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
|
CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
|
||||||
|
#ifdef CGEMM_DEFAULT_UNROLL_MN
|
||||||
|
CGEMM_DEFAULT_UNROLL_MN,
|
||||||
|
#else
|
||||||
|
MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
|
||||||
|
#endif
|
||||||
|
|
||||||
camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
|
camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
|
||||||
cnrm2_kTS, casum_kTS, ccopy_kTS,
|
cnrm2_kTS, casum_kTS, ccopy_kTS,
|
||||||
|
@ -307,7 +324,12 @@ gotoblas_t TABLE_NAME = {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
0, 0, 0,
|
0, 0, 0,
|
||||||
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
|
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
|
||||||
|
#ifdef ZGEMM_DEFAULT_UNROLL_MN
|
||||||
|
ZGEMM_DEFAULT_UNROLL_MN,
|
||||||
|
#else
|
||||||
|
MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
|
||||||
|
#endif
|
||||||
|
|
||||||
zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
|
zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
|
||||||
znrm2_kTS, zasum_kTS, zcopy_kTS,
|
znrm2_kTS, zasum_kTS, zcopy_kTS,
|
||||||
|
|
1
param.h
1
param.h
|
@ -1206,6 +1206,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||||
|
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_MN 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
|
|
Loading…
Reference in New Issue