Modify aligned address of sa and sb to improve the performance of multi-threads.
This commit is contained in:
		
							parent
							
								
									e08cfaf9ca
								
							
						
					
					
						commit
						831858b883
					
				| 
						 | 
					@ -71,15 +71,15 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
 | 
				
			||||||
    queue[num_cpu].args    = arg;
 | 
					    queue[num_cpu].args    = arg;
 | 
				
			||||||
    queue[num_cpu].range_m = range_m;
 | 
					    queue[num_cpu].range_m = range_m;
 | 
				
			||||||
    queue[num_cpu].range_n = &range[num_cpu];
 | 
					    queue[num_cpu].range_n = &range[num_cpu];
 | 
				
			||||||
    queue[num_cpu].sa      = NULL;
 | 
					    queue[num_cpu].sa      = sa	+ GEMM_OFFSET_A1 * num_cpu;	//NULL;
 | 
				
			||||||
    queue[num_cpu].sb      = NULL;
 | 
					    queue[num_cpu].sb      = queue[num_cpu].sa + GEMM_OFFSET_A1 * 5;//NULL;
 | 
				
			||||||
    queue[num_cpu].next    = &queue[num_cpu + 1];
 | 
					    queue[num_cpu].next    = &queue[num_cpu + 1];
 | 
				
			||||||
    num_cpu ++;
 | 
					    num_cpu ++;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  if (num_cpu) {
 | 
					  if (num_cpu) {
 | 
				
			||||||
    queue[0].sa = sa;
 | 
					    queue[0].sa = sa;
 | 
				
			||||||
    queue[0].sb = sb;
 | 
					    queue[0].sb = sa + GEMM_OFFSET_A1 * 5;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    queue[num_cpu - 1].next = NULL;
 | 
					    queue[num_cpu - 1].next = NULL;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -688,11 +688,11 @@ void blas_set_parameter(void){
 | 
				
			||||||
  if(blas_num_threads == 1){
 | 
					  if(blas_num_threads == 1){
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
    //single thread
 | 
					    //single thread
 | 
				
			||||||
    dgemm_r = 1000;
 | 
					    dgemm_r = 1024;
 | 
				
			||||||
#ifdef SMP
 | 
					#ifdef SMP
 | 
				
			||||||
  }else{
 | 
					  }else{
 | 
				
			||||||
    //multi thread
 | 
					    //multi thread
 | 
				
			||||||
    dgemm_r = 300;
 | 
					    dgemm_r = 200;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										23
									
								
								param.h
								
								
								
								
							
							
						
						
									
										23
									
								
								param.h
								
								
								
								
							| 
						 | 
					@ -1493,33 +1493,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
#define ZGEMM_DEFAULT_UNROLL_N  2
 | 
					#define ZGEMM_DEFAULT_UNROLL_N  2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SGEMM_DEFAULT_P	64
 | 
					#define SGEMM_DEFAULT_P	64
 | 
				
			||||||
#define DGEMM_DEFAULT_P	32 
 | 
					#define DGEMM_DEFAULT_P	44 
 | 
				
			||||||
#define CGEMM_DEFAULT_P 64
 | 
					#define CGEMM_DEFAULT_P 64
 | 
				
			||||||
#define ZGEMM_DEFAULT_P 32
 | 
					#define ZGEMM_DEFAULT_P 32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SGEMM_DEFAULT_Q 192
 | 
					#define SGEMM_DEFAULT_Q 192
 | 
				
			||||||
#define DGEMM_DEFAULT_Q 112
 | 
					#define DGEMM_DEFAULT_Q 92
 | 
				
			||||||
#define CGEMM_DEFAULT_Q 192
 | 
					#define CGEMM_DEFAULT_Q 128
 | 
				
			||||||
#define ZGEMM_DEFAULT_Q 80
 | 
					#define ZGEMM_DEFAULT_Q 80
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SGEMM_DEFAULT_R 1024
 | 
					#define SGEMM_DEFAULT_R 1024
 | 
				
			||||||
//#define DGEMM_DEFAULT_R 300
 | 
					 | 
				
			||||||
//#define DGEMM_DEFAULT_R 200
 | 
					 | 
				
			||||||
//#define DGEMM_DEFAULT_R 400
 | 
					 | 
				
			||||||
//#define DGEMM_DEFAULT_R 192
 | 
					 | 
				
			||||||
#define DGEMM_DEFAULT_R dgemm_r 
 | 
					#define DGEMM_DEFAULT_R dgemm_r 
 | 
				
			||||||
//1000
 | 
					 | 
				
			||||||
//#define DGEMM_DEFAULT_R 160
 | 
					 | 
				
			||||||
//#define DGEMM_DEFAULT_R 270
 | 
					 | 
				
			||||||
#define CGEMM_DEFAULT_R 1024
 | 
					#define CGEMM_DEFAULT_R 1024
 | 
				
			||||||
//#define ZGEMM_DEFAULT_R 1000
 | 
					#define ZGEMM_DEFAULT_R 1024
 | 
				
			||||||
#define ZGEMM_DEFAULT_R 1000
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define GEMM_OFFSET_A1	(DGEMM_DEFAULT_P*DGEMM_DEFAULT_Q*SIZE)
 | 
					#define GEMM_OFFSET_A1	0x10000
 | 
				
			||||||
//#define	GEMM_OFFSET_B1	0x10	
 | 
					#define	GEMM_OFFSET_B1	0x100000
 | 
				
			||||||
#define	GEMM_OFFSET_B1	(DGEMM_DEFAULT_Q*DGEMM_DEFAULT_R*SIZE)
 | 
					 | 
				
			||||||
#define	GEMM_OFFSET	0x100000
 | 
					 | 
				
			||||||
#define	GEMM_OFFSET1	0x40000
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SYMV_P	16
 | 
					#define SYMV_P	16
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue