allow to set custom value for ?GEMM_DEFAULT_UNROLL_MN, optimizations for syrk

This commit is contained in:
wernsaar 2014-07-24 18:43:31 +02:00
parent e213a42cde
commit 125610d23b
5 changed files with 51 additions and 12 deletions

View File

@ -919,14 +919,22 @@ extern gotoblas_t *gotoblas;
#define SGEMM_R SGEMM_DEFAULT_R #define SGEMM_R SGEMM_DEFAULT_R
#define SGEMM_UNROLL_M SGEMM_DEFAULT_UNROLL_M #define SGEMM_UNROLL_M SGEMM_DEFAULT_UNROLL_M
#define SGEMM_UNROLL_N SGEMM_DEFAULT_UNROLL_N #define SGEMM_UNROLL_N SGEMM_DEFAULT_UNROLL_N
#ifdef SGEMM_DEFAULT_UNROLL_MN
#define SGEMM_UNROLL_MN SGEMM_DEFAULT_UNROLL_MN
#else
#define SGEMM_UNROLL_MN MAX((SGEMM_UNROLL_M), (SGEMM_UNROLL_N)) #define SGEMM_UNROLL_MN MAX((SGEMM_UNROLL_M), (SGEMM_UNROLL_N))
#endif
#define DGEMM_P DGEMM_DEFAULT_P #define DGEMM_P DGEMM_DEFAULT_P
#define DGEMM_Q DGEMM_DEFAULT_Q #define DGEMM_Q DGEMM_DEFAULT_Q
#define DGEMM_R DGEMM_DEFAULT_R #define DGEMM_R DGEMM_DEFAULT_R
#define DGEMM_UNROLL_M DGEMM_DEFAULT_UNROLL_M #define DGEMM_UNROLL_M DGEMM_DEFAULT_UNROLL_M
#define DGEMM_UNROLL_N DGEMM_DEFAULT_UNROLL_N #define DGEMM_UNROLL_N DGEMM_DEFAULT_UNROLL_N
#ifdef DGEMM_DEFAULT_UNROLL_MN
#define DGEMM_UNROLL_MN DGEMM_DEFAULT_UNROLL_MN
#else
#define DGEMM_UNROLL_MN MAX((DGEMM_UNROLL_M), (DGEMM_UNROLL_N)) #define DGEMM_UNROLL_MN MAX((DGEMM_UNROLL_M), (DGEMM_UNROLL_N))
#endif
#define QGEMM_P QGEMM_DEFAULT_P #define QGEMM_P QGEMM_DEFAULT_P
#define QGEMM_Q QGEMM_DEFAULT_Q #define QGEMM_Q QGEMM_DEFAULT_Q
@ -940,14 +948,22 @@ extern gotoblas_t *gotoblas;
#define CGEMM_R CGEMM_DEFAULT_R #define CGEMM_R CGEMM_DEFAULT_R
#define CGEMM_UNROLL_M CGEMM_DEFAULT_UNROLL_M #define CGEMM_UNROLL_M CGEMM_DEFAULT_UNROLL_M
#define CGEMM_UNROLL_N CGEMM_DEFAULT_UNROLL_N #define CGEMM_UNROLL_N CGEMM_DEFAULT_UNROLL_N
#ifdef CGEMM_DEFAULT_UNROLL_MN
#define CGEMM_UNROLL_MN CGEMM_DEFAULT_UNROLL_MN
#else
#define CGEMM_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N)) #define CGEMM_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N))
#endif
#define ZGEMM_P ZGEMM_DEFAULT_P #define ZGEMM_P ZGEMM_DEFAULT_P
#define ZGEMM_Q ZGEMM_DEFAULT_Q #define ZGEMM_Q ZGEMM_DEFAULT_Q
#define ZGEMM_R ZGEMM_DEFAULT_R #define ZGEMM_R ZGEMM_DEFAULT_R
#define ZGEMM_UNROLL_M ZGEMM_DEFAULT_UNROLL_M #define ZGEMM_UNROLL_M ZGEMM_DEFAULT_UNROLL_M
#define ZGEMM_UNROLL_N ZGEMM_DEFAULT_UNROLL_N #define ZGEMM_UNROLL_N ZGEMM_DEFAULT_UNROLL_N
#ifdef ZGEMM_DEFAULT_UNROLL_MN
#define ZGEMM_UNROLL_MN ZGEMM_DEFAULT_UNROLL_MN
#else
#define ZGEMM_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N)) #define ZGEMM_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N))
#endif
#define XGEMM_P XGEMM_DEFAULT_P #define XGEMM_P XGEMM_DEFAULT_P
#define XGEMM_Q XGEMM_DEFAULT_Q #define XGEMM_Q XGEMM_DEFAULT_Q

View File

@ -538,10 +538,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
mask = MAX(QGEMM_UNROLL_M, QGEMM_UNROLL_N) - 1; mask = MAX(QGEMM_UNROLL_M, QGEMM_UNROLL_N) - 1;
#elif defined(DOUBLE) #elif defined(DOUBLE)
mode = BLAS_DOUBLE | BLAS_REAL; mode = BLAS_DOUBLE | BLAS_REAL;
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1; mask = DGEMM_UNROLL_MN - 1;
#else #else
mode = BLAS_SINGLE | BLAS_REAL; mode = BLAS_SINGLE | BLAS_REAL;
mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1; mask = SGEMM_UNROLL_MN - 1;
#endif #endif
#else #else
#ifdef XDOUBLE #ifdef XDOUBLE
@ -549,10 +549,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
mask = MAX(XGEMM_UNROLL_M, XGEMM_UNROLL_N) - 1; mask = MAX(XGEMM_UNROLL_M, XGEMM_UNROLL_N) - 1;
#elif defined(DOUBLE) #elif defined(DOUBLE)
mode = BLAS_DOUBLE | BLAS_COMPLEX; mode = BLAS_DOUBLE | BLAS_COMPLEX;
mask = MAX(ZGEMM_UNROLL_M, ZGEMM_UNROLL_N) - 1; mask = ZGEMM_UNROLL_MN - 1;
#else #else
mode = BLAS_SINGLE | BLAS_COMPLEX; mode = BLAS_SINGLE | BLAS_COMPLEX;
mask = MAX(CGEMM_UNROLL_M, CGEMM_UNROLL_N) - 1; mask = CGEMM_UNROLL_MN - 1;
#endif #endif
#endif #endif

View File

@ -57,10 +57,10 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
switch (mode & BLAS_PREC) { switch (mode & BLAS_PREC) {
case BLAS_SINGLE: case BLAS_SINGLE:
mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1; mask = SGEMM_UNROLL_MN - 1;
break; break;
case BLAS_DOUBLE: case BLAS_DOUBLE:
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1; mask = DGEMM_UNROLL_MN - 1;
break; break;
#ifdef EXPRECISION #ifdef EXPRECISION
case BLAS_XDOUBLE: case BLAS_XDOUBLE:
@ -71,10 +71,10 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
} else { } else {
switch (mode & BLAS_PREC) { switch (mode & BLAS_PREC) {
case BLAS_SINGLE: case BLAS_SINGLE:
mask = MAX(CGEMM_UNROLL_M, CGEMM_UNROLL_N) - 1; mask = CGEMM_UNROLL_MN - 1;
break; break;
case BLAS_DOUBLE: case BLAS_DOUBLE:
mask = MAX(ZGEMM_UNROLL_M, ZGEMM_UNROLL_N) - 1; mask = ZGEMM_UNROLL_MN - 1;
break; break;
#ifdef EXPRECISION #ifdef EXPRECISION
case BLAS_XDOUBLE: case BLAS_XDOUBLE:

View File

@ -54,7 +54,14 @@ gotoblas_t TABLE_NAME = {
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
0, 0, 0, 0, 0, 0,
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N), SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
#ifdef SGEMM_DEFAULT_UNROLL_MN
SGEMM_DEFAULT_UNROLL_MN,
#else
MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
#endif
#ifdef HAVE_EXCLUSIVE_CACHE #ifdef HAVE_EXCLUSIVE_CACHE
1, 1,
#else #else
@ -110,7 +117,12 @@ gotoblas_t TABLE_NAME = {
#endif #endif
0, 0, 0, 0, 0, 0,
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
#ifdef DGEMM_DEFAULT_UNROLL_MN
DGEMM_DEFAULT_UNROLL_MN,
#else
MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
#endif
damax_kTS, damin_kTS, dmax_kTS, dmin_kTS, damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS, idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
@ -214,7 +226,12 @@ gotoblas_t TABLE_NAME = {
#endif #endif
0, 0, 0, 0, 0, 0,
CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N), CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
#ifdef CGEMM_DEFAULT_UNROLL_MN
CGEMM_DEFAULT_UNROLL_MN,
#else
MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
#endif
camax_kTS, camin_kTS, icamax_kTS, icamin_kTS, camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
cnrm2_kTS, casum_kTS, ccopy_kTS, cnrm2_kTS, casum_kTS, ccopy_kTS,
@ -307,7 +324,12 @@ gotoblas_t TABLE_NAME = {
#endif #endif
0, 0, 0, 0, 0, 0,
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N), ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
#ifdef ZGEMM_DEFAULT_UNROLL_MN
ZGEMM_DEFAULT_UNROLL_MN,
#else
MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
#endif
zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS, zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
znrm2_kTS, zasum_kTS, zcopy_kTS, znrm2_kTS, zasum_kTS, zcopy_kTS,

View File

@ -1206,6 +1206,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_UNROLL_N 2 #define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1 #define XGEMM_DEFAULT_UNROLL_N 1
#define DGEMM_DEFAULT_UNROLL_MN 16
#endif #endif
#ifdef ARCH_X86 #ifdef ARCH_X86