Modify aligned address of sa and sb to improve the performance of multi-threads.
This commit is contained in:
parent
e08cfaf9ca
commit
831858b883
|
@ -71,15 +71,15 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
|
||||||
queue[num_cpu].args = arg;
|
queue[num_cpu].args = arg;
|
||||||
queue[num_cpu].range_m = range_m;
|
queue[num_cpu].range_m = range_m;
|
||||||
queue[num_cpu].range_n = &range[num_cpu];
|
queue[num_cpu].range_n = &range[num_cpu];
|
||||||
queue[num_cpu].sa = NULL;
|
queue[num_cpu].sa = sa + GEMM_OFFSET_A1 * num_cpu; //NULL;
|
||||||
queue[num_cpu].sb = NULL;
|
queue[num_cpu].sb = queue[num_cpu].sa + GEMM_OFFSET_A1 * 5;//NULL;
|
||||||
queue[num_cpu].next = &queue[num_cpu + 1];
|
queue[num_cpu].next = &queue[num_cpu + 1];
|
||||||
num_cpu ++;
|
num_cpu ++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (num_cpu) {
|
if (num_cpu) {
|
||||||
queue[0].sa = sa;
|
queue[0].sa = sa;
|
||||||
queue[0].sb = sb;
|
queue[0].sb = sa + GEMM_OFFSET_A1 * 5;
|
||||||
|
|
||||||
queue[num_cpu - 1].next = NULL;
|
queue[num_cpu - 1].next = NULL;
|
||||||
|
|
||||||
|
|
|
@ -688,11 +688,11 @@ void blas_set_parameter(void){
|
||||||
if(blas_num_threads == 1){
|
if(blas_num_threads == 1){
|
||||||
#endif
|
#endif
|
||||||
//single thread
|
//single thread
|
||||||
dgemm_r = 1000;
|
dgemm_r = 1024;
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
}else{
|
}else{
|
||||||
//multi thread
|
//multi thread
|
||||||
dgemm_r = 300;
|
dgemm_r = 200;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
25
param.h
25
param.h
|
@ -1493,33 +1493,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_P 64
|
#define SGEMM_DEFAULT_P 64
|
||||||
#define DGEMM_DEFAULT_P 32
|
#define DGEMM_DEFAULT_P 44
|
||||||
#define CGEMM_DEFAULT_P 64
|
#define CGEMM_DEFAULT_P 64
|
||||||
#define ZGEMM_DEFAULT_P 32
|
#define ZGEMM_DEFAULT_P 32
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_Q 192
|
#define SGEMM_DEFAULT_Q 192
|
||||||
#define DGEMM_DEFAULT_Q 112
|
#define DGEMM_DEFAULT_Q 92
|
||||||
#define CGEMM_DEFAULT_Q 192
|
#define CGEMM_DEFAULT_Q 128
|
||||||
#define ZGEMM_DEFAULT_Q 80
|
#define ZGEMM_DEFAULT_Q 80
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_R 1024
|
#define SGEMM_DEFAULT_R 1024
|
||||||
//#define DGEMM_DEFAULT_R 300
|
#define DGEMM_DEFAULT_R dgemm_r
|
||||||
//#define DGEMM_DEFAULT_R 200
|
|
||||||
//#define DGEMM_DEFAULT_R 400
|
|
||||||
//#define DGEMM_DEFAULT_R 192
|
|
||||||
#define DGEMM_DEFAULT_R dgemm_r
|
|
||||||
//1000
|
|
||||||
//#define DGEMM_DEFAULT_R 160
|
|
||||||
//#define DGEMM_DEFAULT_R 270
|
|
||||||
#define CGEMM_DEFAULT_R 1024
|
#define CGEMM_DEFAULT_R 1024
|
||||||
//#define ZGEMM_DEFAULT_R 1000
|
#define ZGEMM_DEFAULT_R 1024
|
||||||
#define ZGEMM_DEFAULT_R 1000
|
|
||||||
|
|
||||||
#define GEMM_OFFSET_A1 (DGEMM_DEFAULT_P*DGEMM_DEFAULT_Q*SIZE)
|
#define GEMM_OFFSET_A1 0x10000
|
||||||
//#define GEMM_OFFSET_B1 0x10
|
#define GEMM_OFFSET_B1 0x100000
|
||||||
#define GEMM_OFFSET_B1 (DGEMM_DEFAULT_Q*DGEMM_DEFAULT_R*SIZE)
|
|
||||||
#define GEMM_OFFSET 0x100000
|
|
||||||
#define GEMM_OFFSET1 0x40000
|
|
||||||
|
|
||||||
#define SYMV_P 16
|
#define SYMV_P 16
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue