diff --git a/driver/level3/gemm_thread_n.c b/driver/level3/gemm_thread_n.c index ba54612eb..62907fa65 100644 --- a/driver/level3/gemm_thread_n.c +++ b/driver/level3/gemm_thread_n.c @@ -71,15 +71,15 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( queue[num_cpu].args = arg; queue[num_cpu].range_m = range_m; queue[num_cpu].range_n = &range[num_cpu]; - queue[num_cpu].sa = NULL; - queue[num_cpu].sb = NULL; + queue[num_cpu].sa = sa + GEMM_OFFSET_A1 * num_cpu; //NULL; + queue[num_cpu].sb = queue[num_cpu].sa + GEMM_OFFSET_A1 * 5;//NULL; queue[num_cpu].next = &queue[num_cpu + 1]; num_cpu ++; } if (num_cpu) { queue[0].sa = sa; - queue[0].sb = sb; + queue[0].sb = sa + GEMM_OFFSET_A1 * 5; queue[num_cpu - 1].next = NULL; diff --git a/driver/others/parameter.c b/driver/others/parameter.c index 4a8542a93..fc7f0447e 100644 --- a/driver/others/parameter.c +++ b/driver/others/parameter.c @@ -688,11 +688,11 @@ void blas_set_parameter(void){ if(blas_num_threads == 1){ #endif //single thread - dgemm_r = 1000; + dgemm_r = 1024; #ifdef SMP }else{ //multi thread - dgemm_r = 300; + dgemm_r = 200; } #endif #endif diff --git a/param.h b/param.h index 1c729e8b9..4ffe05cf8 100644 --- a/param.h +++ b/param.h @@ -1493,33 +1493,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ZGEMM_DEFAULT_UNROLL_N 2 #define SGEMM_DEFAULT_P 64 -#define DGEMM_DEFAULT_P 32 +#define DGEMM_DEFAULT_P 44 #define CGEMM_DEFAULT_P 64 #define ZGEMM_DEFAULT_P 32 #define SGEMM_DEFAULT_Q 192 -#define DGEMM_DEFAULT_Q 112 -#define CGEMM_DEFAULT_Q 192 +#define DGEMM_DEFAULT_Q 92 +#define CGEMM_DEFAULT_Q 128 #define ZGEMM_DEFAULT_Q 80 #define SGEMM_DEFAULT_R 1024 -//#define DGEMM_DEFAULT_R 300 -//#define DGEMM_DEFAULT_R 200 -//#define DGEMM_DEFAULT_R 400 -//#define DGEMM_DEFAULT_R 192 -#define DGEMM_DEFAULT_R dgemm_r -//1000 -//#define DGEMM_DEFAULT_R 160 -//#define DGEMM_DEFAULT_R 270 +#define DGEMM_DEFAULT_R dgemm_r #define CGEMM_DEFAULT_R 1024 -//#define ZGEMM_DEFAULT_R 1000 -#define ZGEMM_DEFAULT_R 1000 +#define ZGEMM_DEFAULT_R 1024 -#define GEMM_OFFSET_A1 (DGEMM_DEFAULT_P*DGEMM_DEFAULT_Q*SIZE) -//#define GEMM_OFFSET_B1 0x10 -#define GEMM_OFFSET_B1 (DGEMM_DEFAULT_Q*DGEMM_DEFAULT_R*SIZE) -#define GEMM_OFFSET 0x100000 -#define GEMM_OFFSET1 0x40000 +#define GEMM_OFFSET_A1 0x10000 +#define GEMM_OFFSET_B1 0x100000 #define SYMV_P 16 #endif