diff --git a/common_mips64.h b/common_mips64.h index acea79011..2aa325bfa 100644 --- a/common_mips64.h +++ b/common_mips64.h @@ -170,6 +170,13 @@ static inline int blas_quickdivide(blasint x, blasint y){ #define CMPEQ c.eq.s #define CMPLE c.le.s #define CMPLT c.lt.s +#define PLU plu.ps +#define PLL pll.ps +#define PUU puu.ps +#define PUL pul.ps +#define MADPS madd.ps +#define CVTU cvt.s.pu +#define CVTL cvt.s.pl #endif #if defined(__64BIT__) && defined(USE64BITINT) @@ -218,7 +225,7 @@ REALNAME: ;\ #define SEEK_ADDRESS -#define BUFFER_SIZE ( 8 << 20) +#define BUFFER_SIZE ( 32 << 20) #if defined(LOONGSON3A) #define PAGESIZE (16UL << 10) diff --git a/kernel/mips64/KERNEL.LOONGSON3A b/kernel/mips64/KERNEL.LOONGSON3A index ebab8e6ea..4a195f265 100644 --- a/kernel/mips64/KERNEL.LOONGSON3A +++ b/kernel/mips64/KERNEL.LOONGSON3A @@ -1,9 +1,13 @@ SAXPYKERNEL=axpy_loongson3a.S DAXPYKERNEL=daxpy_loongson3a_simd.S -SGEMMKERNEL = sgemm_kernel_loongson3a_4x4.S -SGEMMONCOPY = ../generic/gemm_ncopy_4.c -SGEMMOTCOPY = ../generic/gemm_tcopy_4.c +SGEMMKERNEL = sgemm_kernel_8x4_ps.S +SGEMMINCOPY = ../generic/gemm_ncopy_8.c +SGEMMITCOPY = ../generic/gemm_tcopy_8.c +SGEMMONCOPY = ../generic/gemm_ncopy_4.c +SGEMMOTCOPY = ../generic/gemm_tcopy_4.c +SGEMMINCOPYOBJ = sgemm_incopy.o +SGEMMITCOPYOBJ = sgemm_itcopy.o SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMOTCOPYOBJ = sgemm_otcopy.o diff --git a/param.h b/param.h index fd399a96f..2c3021710 100644 --- a/param.h +++ b/param.h @@ -1480,7 +1480,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_ALIGN 0x03fffUL -#define SGEMM_DEFAULT_UNROLL_M 4 +#define SGEMM_DEFAULT_UNROLL_M 8 #define SGEMM_DEFAULT_UNROLL_N 4 #define DGEMM_DEFAULT_UNROLL_M 4 @@ -1497,16 +1497,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CGEMM_DEFAULT_P 64 #define ZGEMM_DEFAULT_P 32 -#define SGEMM_DEFAULT_Q 116 -#define DGEMM_DEFAULT_Q 116 +#define SGEMM_DEFAULT_Q 128 +#define DGEMM_DEFAULT_Q 112 #define CGEMM_DEFAULT_Q 100 #define ZGEMM_DEFAULT_Q 80 -#define SGEMM_DEFAULT_R 1000 +#define SGEMM_DEFAULT_R 1024 +//#define DGEMM_DEFAULT_R 300 +//#define DGEMM_DEFAULT_R 200 +//#define DGEMM_DEFAULT_R 400 +//#define DGEMM_DEFAULT_R 192 #define DGEMM_DEFAULT_R 1000 +//#define DGEMM_DEFAULT_R 160 +//#define DGEMM_DEFAULT_R 270 #define CGEMM_DEFAULT_R 1000 +//#define ZGEMM_DEFAULT_R 1000 #define ZGEMM_DEFAULT_R 1000 +#define GEMM_OFFSET_A1 (DGEMM_DEFAULT_P*DGEMM_DEFAULT_Q*SIZE) +//#define GEMM_OFFSET_B1 0x10 +#define GEMM_OFFSET_B1 (DGEMM_DEFAULT_Q*DGEMM_DEFAULT_R*SIZE) +#define GEMM_OFFSET 0x100000 +#define GEMM_OFFSET1 0x40000 + #define SYMV_P 16 #endif