Ref #79 Added GEMM_MULTITHREAD_THRESHOLD flag to use single thread in gemm function with small matrices.
This commit is contained in:
parent
3afedbf6f0
commit
31c836ac25
|
@ -88,6 +88,11 @@ VERSION = 0.1alpha2.5
|
||||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
||||||
# CONSISTENT_FPCSR = 1
|
# CONSISTENT_FPCSR = 1
|
||||||
|
|
||||||
|
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
|
||||||
|
# with single thread. You can use this flag to avoid the overhead of multi-threading
|
||||||
|
# in small matrix sizes. The default value is 4.
|
||||||
|
# GEMM_MULTITHREAD_THRESHOLD = 4
|
||||||
|
|
||||||
# If you need santy check by comparing reference BLAS. It'll be very
|
# If you need santy check by comparing reference BLAS. It'll be very
|
||||||
# slow (Not implemented yet).
|
# slow (Not implemented yet).
|
||||||
# SANITY_CHECK = 1
|
# SANITY_CHECK = 1
|
||||||
|
|
|
@ -40,6 +40,11 @@ ifdef INTERFACE64
|
||||||
GETARCH_FLAGS += -DUSE64BITINT
|
GETARCH_FLAGS += -DUSE64BITINT
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifndef GEMM_MULTITHREAD_THRESHOLD
|
||||||
|
GEMM_MULTITHREAD_THRESHOLD=4
|
||||||
|
endif
|
||||||
|
GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)
|
||||||
|
|
||||||
# This operation is expensive, so execution should be once.
|
# This operation is expensive, so execution should be once.
|
||||||
ifndef GOTOBLAS_MAKEFILE
|
ifndef GOTOBLAS_MAKEFILE
|
||||||
export GOTOBLAS_MAKEFILE = 1
|
export GOTOBLAS_MAKEFILE = 1
|
||||||
|
|
|
@ -34,6 +34,7 @@ int main(int argc, char **argv) {
|
||||||
#ifdef USE64BITINT
|
#ifdef USE64BITINT
|
||||||
printf("#define USE64BITINT\n");
|
printf("#define USE64BITINT\n");
|
||||||
#endif
|
#endif
|
||||||
|
printf("#define GEMM_MULTITHREAD_THRESHOLD\t%ld\n", GEMM_MULTITHREAD_THRESHOLD);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -397,8 +397,13 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
|
||||||
mode |= (transb << BLAS_TRANSB_SHIFT);
|
mode |= (transb << BLAS_TRANSB_SHIFT);
|
||||||
|
|
||||||
args.common = NULL;
|
args.common = NULL;
|
||||||
args.nthreads = num_cpu_avail(3);
|
|
||||||
|
|
||||||
|
if(args.m <= GEMM_MULTITHREAD_THRESHOLD || args.n <= GEMM_MULTITHREAD_THRESHOLD
|
||||||
|
|| args.k <=GEMM_MULTITHREAD_THRESHOLD){
|
||||||
|
args.nthreads = 1;
|
||||||
|
}else{
|
||||||
|
args.nthreads = num_cpu_avail(3);
|
||||||
|
}
|
||||||
if (args.nthreads == 1) {
|
if (args.nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue