Refs #47. On Loongson 3A, set DGEMM_R parameter depending on different number of threads. It would improve double precision BLAS3 on multi-threads.

This commit is contained in:
Xianyi Zhang 2011-09-05 15:13:05 +00:00
parent 74d4cdb81a
commit 4727fe8abf
6 changed files with 44 additions and 3 deletions

View File

@ -2127,7 +2127,9 @@
#endif
#ifndef ASSEMBLER
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64)
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
extern BLASLONG gemm_offset_a;
extern BLASLONG gemm_offset_b;
extern BLASLONG sgemm_p;
extern BLASLONG sgemm_q;
extern BLASLONG sgemm_r;

View File

@ -797,6 +797,11 @@ void goto_set_num_threads(int num_threads) {
blas_cpu_number = num_threads;
#if defined(ARCH_MIPS64)
//set parameters for different number of threads.
blas_set_parameter();
#endif
}
void openblas_set_num_threads(int num_threads) {

View File

@ -63,6 +63,11 @@ void goto_set_num_threads(int num_threads) {
omp_set_num_threads(blas_cpu_number);
#if defined(ARCH_MIPS64)
//set parameters for different number of threads.
blas_set_parameter();
#endif
}
void openblas_set_num_threads(int num_threads) {

View File

@ -884,7 +884,7 @@ void *blas_memory_alloc(int procpos){
if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number();
#endif
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64)
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
#ifndef DYNAMIC_ARCH
blas_set_parameter();
#endif

View File

@ -45,8 +45,22 @@ int get_L2_size(void);
#define DEFAULT_GEMM_P 128
#define DEFAULT_GEMM_Q 128
#define DEFAULT_GEMM_R 128
#define DEFAULT_GEMM_OFFSET_A 0
#define DEFAULT_GEMM_OFFSET_B 0
/* Global Parameter */
#if GEMM_OFFSET_A == gemm_offset_a
BLASLONG gemm_offset_a = DEFAULT_GEMM_OFFSET_A;
#else
BLASLONG gemm_offset_a = GEMM_OFFSET_A;
#endif
#if GEMM_OFFSET_B == gemm_offset_b
BLASLONG gemm_offset_b = DEFAULT_GEMM_OFFSET_B;
#else
BLASLONG gemm_offset_b = GEMM_OFFSET_B;
#endif
#if SGEMM_P == sgemm_p
BLASLONG sgemm_p = DEFAULT_GEMM_P;
#else
@ -666,3 +680,17 @@ void blas_set_parameter(void){
#endif
#endif
#if defined(ARCH_MIPS64)
void blas_set_parameter(void){
#if defined(LOONGSON3A)
if(blas_num_threads == 1){
//single thread
dgemm_r = 1000;
}else{
//multi thread
dgemm_r = 300;
}
#endif
}
#endif

View File

@ -1507,7 +1507,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//#define DGEMM_DEFAULT_R 200
//#define DGEMM_DEFAULT_R 400
//#define DGEMM_DEFAULT_R 192
#define DGEMM_DEFAULT_R 1000
#define DGEMM_DEFAULT_R dgemm_r
//1000
//#define DGEMM_DEFAULT_R 160
//#define DGEMM_DEFAULT_R 270
#define CGEMM_DEFAULT_R 1000