diff --git a/driver/level3/level3.c b/driver/level3/level3.c index 70a6500b6..1ede8a247 100644 --- a/driver/level3/level3.c +++ b/driver/level3/level3.c @@ -335,7 +335,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; else - if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; + if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N; + else + if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index 096342a32..038274300 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -367,7 +367,9 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; else - if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; + if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N; + else + if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; START_RPCC(); diff --git a/param.h b/param.h index bed2e452b..962f80ef3 100644 --- a/param.h +++ b/param.h @@ -262,6 +262,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CGEMM3M_DEFAULT_UNROLL_M 8 #define ZGEMM3M_DEFAULT_UNROLL_N 4 #define ZGEMM3M_DEFAULT_UNROLL_M 4 + +#define DGEMM_DEFAULT_UNROLL_MN 16 #define GEMV_UNROLL 8 #endif @@ -273,6 +275,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SGEMM_DEFAULT_P 448 #define DGEMM_DEFAULT_P 224 #endif + #define QGEMM_DEFAULT_P 112 #define CGEMM_DEFAULT_P 224 #define ZGEMM_DEFAULT_P 112 @@ -285,6 +288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SGEMM_DEFAULT_Q 224 #define DGEMM_DEFAULT_Q 224 #endif + #define QGEMM_DEFAULT_Q 224 #define CGEMM_DEFAULT_Q 224 #define ZGEMM_DEFAULT_Q 224 @@ -1420,7 +1424,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ZGEMM_DEFAULT_UNROLL_N 2 #define XGEMM_DEFAULT_UNROLL_N 1 -#define DGEMM_DEFAULT_UNROLL_MN 16 +#define SGEMM_DEFAULT_UNROLL_MN 32 +#define DGEMM_DEFAULT_UNROLL_MN 32 #endif #ifdef ARCH_X86