From 65de6f5957f9940ed338c1fdef251dbad70eb908 Mon Sep 17 00:00:00 2001 From: Jin Bo Date: Sat, 5 Dec 2020 15:06:12 +0800 Subject: [PATCH 01/12] Fix test errors reported by cblas_cgemm & cblas_ctrmm The file cgemm_kernel_8x4_msa.c holds the MSA optimization codes of cblas_cgemm and cblas_ctrmm. It defines two macros: CGEMM_SCALE_1X2 and CGEMM_TRMM_SCALE_1X2. The pc1 array index in the two macros should be 0 and 1. --- kernel/mips/cgemm_kernel_8x4_msa.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/mips/cgemm_kernel_8x4_msa.c b/kernel/mips/cgemm_kernel_8x4_msa.c index 4b3637c7c..8b624be88 100644 --- a/kernel/mips/cgemm_kernel_8x4_msa.c +++ b/kernel/mips/cgemm_kernel_8x4_msa.c @@ -758,10 +758,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pc0[1] += alphar * res1; \ pc0[1] += alphai * res0; \ \ - pc1[2] += alphar * res2; \ - pc1[2] -= alphai * res3; \ - pc1[3] += alphar * res3; \ - pc1[3] += alphai * res2; \ + pc1[0] += alphar * res2; \ + pc1[0] -= alphai * res3; \ + pc1[1] += alphar * res3; \ + pc1[1] += alphai * res2; \ } #define CGEMM_SCALE_1X1 \ @@ -1067,10 +1067,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pc0[1] = alphar * res1; \ pc0[1] += alphai * res0; \ \ - pc1[2] = alphar * res2; \ - pc1[2] -= alphai * res3; \ - pc1[3] = alphar * res3; \ - pc1[3] += alphai * res2; \ + pc1[0] = alphar * res2; \ + pc1[0] -= alphai * res3; \ + pc1[1] = alphar * res3; \ + pc1[1] += alphai * res2; \ } #define CGEMM_TRMM_SCALE_1X1 \ From 04fa17322c09c497ad8f69ab12ec8684a0847c60 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 6 Dec 2020 19:05:27 +0100 Subject: [PATCH 02/12] Fix build options for SolarisStudio compilers --- Makefile.sparc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Makefile.sparc b/Makefile.sparc index 8895b96dd..61c7aa36d 100644 --- a/Makefile.sparc +++ b/Makefile.sparc @@ -3,21 +3,29 @@ RANLIB = ranlib ifdef BINARY64 +ifeq ($(C_COMPILER), GCC) CCOMMON_OPT += -mcpu=v9 -m64 +else +CCOMMON_OPT += -m64 +endif ifeq ($(COMPILER_F77), g77) FCOMMON_OPT += -mcpu=v9 -m64 endif -ifeq ($(COMPILER_F77), f90) -FCOMMON_OPT += -xarch=v9 +ifeq ($(COMPILER_F77), f95) +FCOMMON_OPT += -m64 endif else +ifeq ($(C_COMPILER), GCC) CCOMMON_OPT += -mcpu=v9 +else +CCOMMON_OPT += -xarch=v9 +endif ifeq ($(COMPILER_F77), g77) FCOMMON_OPT += -mcpu=v9 endif -ifeq ($(COMPILER_F77), f90) +ifeq ($(COMPILER_F77), f95) FCOMMON_OPT += -xarch=v8plusb endif @@ -37,4 +45,4 @@ LIBSUNPERF = -L/opt/SUNWspro/lib/v9 -L/opt/SUNWspro/prod/lib/v9 \ else LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \ -Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath -endif \ No newline at end of file +endif From da6d5d675c3db0cfd4926704a9b72f89dc4963b8 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 6 Dec 2020 19:07:45 +0100 Subject: [PATCH 03/12] Fix hostarch detection for sparc --- c_check | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/c_check b/c_check index a841df153..fe9c53f0e 100644 --- a/c_check +++ b/c_check @@ -6,7 +6,8 @@ # Checking cross compile $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); -$hostarch = `uname -p` if ($hostos eq "AIX"); +$hostarch = `uname -p` if ($hostos eq "AIX" || $hostos eq "SunOS"); +chop($hostarch); $hostarch = "x86_64" if ($hostarch eq "amd64"); $hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/); $hostarch = "arm64" if ($hostarch eq "aarch64"); From 3a1b1b7c8cc7081155a1f0d9411c9d68ab7559fa Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 6 Dec 2020 19:08:43 +0100 Subject: [PATCH 04/12] Fix complex ABI for 32bit SolarisStudio builds --- common_sparc.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/common_sparc.h b/common_sparc.h index 85e29fffa..90a24ebf1 100644 --- a/common_sparc.h +++ b/common_sparc.h @@ -78,6 +78,12 @@ static __inline unsigned long rpcc(void){ #define __BIG_ENDIAN__ #endif +#ifdef C_SUN +#ifndef __64BIT +#define RETURN_BY_STACK +#endif +#endif + #ifdef DOUBLE #define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory") #else From b0b14f4e9ba13331ab484010b7150495dccb8e83 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 6 Dec 2020 19:12:02 +0100 Subject: [PATCH 05/12] Change comments to C style for compatibility --- param.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/param.h b/param.h index ee5ad17fb..a0d45c573 100644 --- a/param.h +++ b/param.h @@ -1454,22 +1454,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SGEMM_DEFAULT_P 768 #define SGEMM_DEFAULT_R sgemm_r -//#define SGEMM_DEFAULT_R 1024 +/*#define SGEMM_DEFAULT_R 1024*/ #define DGEMM_DEFAULT_P 512 #define DGEMM_DEFAULT_R dgemm_r -//#define DGEMM_DEFAULT_R 1024 +/*#define DGEMM_DEFAULT_R 1024*/ #define QGEMM_DEFAULT_P 504 #define QGEMM_DEFAULT_R qgemm_r #define CGEMM_DEFAULT_P 768 #define CGEMM_DEFAULT_R cgemm_r -//#define CGEMM_DEFAULT_R 1024 +/*#define CGEMM_DEFAULT_R 1024*/ #define ZGEMM_DEFAULT_P 512 #define ZGEMM_DEFAULT_R zgemm_r -//#define ZGEMM_DEFAULT_R 1024 +/*#define ZGEMM_DEFAULT_R 1024*/ #define XGEMM_DEFAULT_P 252 #define XGEMM_DEFAULT_R xgemm_r @@ -2571,7 +2571,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef LOONGSON3A -////Copy from SICORTEX +/*Copy from SICORTEX*/ #define SNUMOPT 2 #define DNUMOPT 2 @@ -2863,7 +2863,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif -// Common ARMv8 parameters +/* Common ARMv8 parameters */ #if defined(ARMV8) #define SNUMOPT 2 @@ -3066,7 +3066,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d #define CGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096 -#else // Other/undetected ARMv8 cores +#else /* Other/undetected ARMv8 cores */ #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -3095,9 +3095,9 @@ is a big desktop or server with abundant cache rather than a phone or embedded d #define CGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096 -#endif // Cores +#endif /* Cores */ -#endif // ARMv8 +#endif /* ARMv8 */ #if defined(ARMV5) #define SNUMOPT 2 From 93473174d6f59b989f36ae0ce6994d347d9c33bb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 6 Dec 2020 19:12:56 +0100 Subject: [PATCH 06/12] Fix utest build with SolarisStudio compilers --- utest/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utest/Makefile b/utest/Makefile index 1fc30d088..fad3607de 100644 --- a/utest/Makefile +++ b/utest/Makefile @@ -35,6 +35,9 @@ endif ifeq ($(C_COMPILER), PGI) OBJS = utest_main2.o endif +ifeq ($(C_COMPILER), SUN) +OBJS = utest_main2.o +endif ifeq ($(OSNAME), AIX) OBJS = utest_main2.o endif From f8346603cf1794826cc2b04cd4708bb890f805b0 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 6 Dec 2020 19:14:16 +0100 Subject: [PATCH 07/12] Fix compilation with SolarisStudio --- kernel/arm/zdot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/arm/zdot.c b/kernel/arm/zdot.c index 73ae3acd7..9249b54f8 100644 --- a/kernel/arm/zdot.c +++ b/kernel/arm/zdot.c @@ -48,7 +48,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA dot[0]=0.0; dot[1]=0.0; -#if !defined(__PPC__) +#if !defined(__PPC__) && !defined(__SunOS) CREAL(result) = 0.0 ; CIMAG(result) = 0.0 ; #else @@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA i++ ; } -#if !defined(__PPC__) +#if !defined(__PPC__) && !defined(__SunOS) CREAL(result) = dot[0]; CIMAG(result) = dot[1]; #else From b660008c7ef479d83f329e1aefbcf3dbed1653a6 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 6 Dec 2020 19:15:37 +0100 Subject: [PATCH 08/12] Work around DOT and SWAP test failures --- kernel/sparc/KERNEL.sparc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/sparc/KERNEL.sparc b/kernel/sparc/KERNEL.sparc index 2e8319ce5..1a2e9671a 100644 --- a/kernel/sparc/KERNEL.sparc +++ b/kernel/sparc/KERNEL.sparc @@ -54,3 +54,13 @@ ZTRSMKERNEL_LN = ztrsm_kernel_LN.S ZTRSMKERNEL_LT = ztrsm_kernel_LT.S ZTRSMKERNEL_RN = ztrsm_kernel_LT.S ZTRSMKERNEL_RT = ztrsm_kernel_RT.S + + +SDOTKERNEL = ../generic/dot.c +SDSDOTKERNEL = ../generic/dot.c +DSDOTKERNEL = ../generic/dot.c +DDOTKERNEL = ../generic/dot.c +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c +CSWAPKERNEL = ../arm/zswap.c +ZSWAPKERNEL = ../arm/zswap.c From 6c7d557a166aaad44be389acb0ef6bf73935cdc3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 6 Dec 2020 19:20:50 +0100 Subject: [PATCH 09/12] Fix compiler options for 32 and 64bit SPARC builds with SolarisStudio --- Makefile.system | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/Makefile.system b/Makefile.system index b5974f872..c17cd3bd1 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1131,16 +1131,25 @@ CCOMMON_OPT += -w ifeq ($(ARCH), x86) CCOMMON_OPT += -m32 else -FCOMMON_OPT += -m64 +ifdef BINARY64 +CCOMMON_OPT += -m64 +else +CCOMMON_OPT += -m32 +endif endif endif ifeq ($(F_COMPILER), SUN) CCOMMON_OPT += -DF_INTERFACE_SUN +FCOMMON_OPT += -ftrap=%none -xrecursive ifeq ($(ARCH), x86) FCOMMON_OPT += -m32 else +ifdef BINARY64 FCOMMON_OPT += -m64 +else +FCOMMON_OPT += -m32 +endif endif ifeq ($(USE_OPENMP), 1) FCOMMON_OPT += -xopenmp=parallel @@ -1313,8 +1322,10 @@ KERNELDIR = $(TOPDIR)/kernel/$(ARCH) include $(TOPDIR)/Makefile.$(ARCH) ifneq ($(C_COMPILER), PGI) +ifneq ($(C_COMPILER), SUN) CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME endif +endif CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\" ifeq ($(CORE), PPC440) From 47b639cc9b4ff900f7b83751af9d1c4ff9dea3c1 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Mon, 7 Dec 2020 10:04:00 +0800 Subject: [PATCH 10/12] Fix failed sswap and dswap case by using msa optimization The swap test case will call sswap_msa.c and dswap_msa.c files in MIPS environmnet. When inc_x or inc_y is equal to zero, the calculation result of the two functions will be wrong. This patch adds the processing of inc_x or inc_y equal to zero, and the swap test case has passed. --- kernel/mips/dswap_msa.c | 30 ++++++++++++++++++++++++++++-- kernel/mips/sswap_msa.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/kernel/mips/dswap_msa.c b/kernel/mips/dswap_msa.c index 7b1f02477..67e97f710 100644 --- a/kernel/mips/dswap_msa.c +++ b/kernel/mips/dswap_msa.c @@ -184,7 +184,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, } } } - else + else if ((inc_x != 0) && (inc_y != 0)) { for (i = (n >> 3); i--;) { @@ -248,6 +248,32 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, } } } - + else + { + if (inc_x == inc_y) + { + if (n & 1) + { + x0 = *srcx; + *srcx = *srcy; + *srcy = x0; + } + else + return (0); + } + else + { + BLASLONG ix = 0, iy = 0; + while (i < n) + { + x0 = srcx[ix]; + srcx[ix] = srcy[iy]; + srcy[iy] = x0; + ix += inc_x; + iy += inc_y; + i++; + } + } + } return (0); } diff --git a/kernel/mips/sswap_msa.c b/kernel/mips/sswap_msa.c index 46fa8aa87..d412285b0 100644 --- a/kernel/mips/sswap_msa.c +++ b/kernel/mips/sswap_msa.c @@ -198,7 +198,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, } } } - else + else if ((inc_x != 0) && (inc_y != 0)) { for (i = (n >> 3); i--;) { @@ -262,6 +262,33 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, } } } + else + { + if (inc_x == inc_y) + { + if (n & 1) + { + x0 = *srcx; + *srcx = *srcy; + *srcy = x0; + } + else + return (0); + } + else + { + BLASLONG ix = 0, iy = 0; + while (i < n) + { + x0 = srcx[ix]; + srcx[ix] = srcy[iy]; + srcy[iy] = x0; + ix += inc_x; + iy += inc_y; + i++; + } + } + } return (0); } From ad38bd0e89c4507476f1ad4ba566d27bb0dd6f9d Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Mon, 7 Dec 2020 10:18:51 +0800 Subject: [PATCH 11/12] Fix failed cgemv and zgemv test case after using msa optimization The cgemv and zgemv test case will call cgemv_n/t_msa.c zgemv_n/t_msa.c files in MIPS environment. When the macro CONJ is defined, the calculation result will be wrong due to the wrong definition of OP2. This patch updates the value of OP2 and passes the corresponding test. --- kernel/mips/cgemv_n_msa.c | 4 ++-- kernel/mips/cgemv_t_msa.c | 26 +++++++++++++++++++------- kernel/mips/zgemv_n_msa.c | 4 ++-- kernel/mips/zgemv_t_msa.c | 26 +++++++++++++++++++------- 4 files changed, 42 insertions(+), 18 deletions(-) diff --git a/kernel/mips/cgemv_n_msa.c b/kernel/mips/cgemv_n_msa.c index 12fa7ca02..c1eb9bbfd 100644 --- a/kernel/mips/cgemv_n_msa.c +++ b/kernel/mips/cgemv_n_msa.c @@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if !defined(XCONJ) #define OP0 += #define OP1 -= - #define OP2 -= + #define OP2 += #else #define OP0 -= #define OP1 -= - #define OP2 += + #define OP2 -= #endif #endif diff --git a/kernel/mips/cgemv_t_msa.c b/kernel/mips/cgemv_t_msa.c index 584e3de75..800667b6e 100644 --- a/kernel/mips/cgemv_t_msa.c +++ b/kernel/mips/cgemv_t_msa.c @@ -32,14 +32,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #undef OP1 #undef OP2 -#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) - #define OP0 -= - #define OP1 += - #define OP2 += +#if !defined(CONJ) + #if !defined(XCONJ) + #define OP0 -= + #define OP1 += + #define OP2 += + #else + #define OP0 += + #define OP1 += + #define OP2 -= + #endif #else - #define OP0 += - #define OP1 += - #define OP2 -= + #if !defined(XCONJ) + #define OP0 += + #define OP1 -= + #define OP2 += + #else + #define OP0 -= + #define OP1 -= + #define OP2 -= + #endif #endif #define CGEMV_T_8x4() \ diff --git a/kernel/mips/zgemv_n_msa.c b/kernel/mips/zgemv_n_msa.c index 669c25758..97a80b4ba 100644 --- a/kernel/mips/zgemv_n_msa.c +++ b/kernel/mips/zgemv_n_msa.c @@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if !defined(XCONJ) #define OP0 += #define OP1 -= - #define OP2 -= + #define OP2 += #else #define OP0 -= #define OP1 -= - #define OP2 += + #define OP2 -= #endif #endif diff --git a/kernel/mips/zgemv_t_msa.c b/kernel/mips/zgemv_t_msa.c index e6febb577..6492f90be 100644 --- a/kernel/mips/zgemv_t_msa.c +++ b/kernel/mips/zgemv_t_msa.c @@ -34,14 +34,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #undef OP3 #undef OP4 -#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) - #define OP0 -= - #define OP1 += - #define OP2 += +#if !defined(CONJ) + #if !defined(XCONJ) + #define OP0 -= + #define OP1 += + #define OP2 += + #else + #define OP0 += + #define OP1 += + #define OP2 -= + #endif #else - #define OP0 += - #define OP1 += - #define OP2 -= + #if !defined(XCONJ) + #define OP0 += + #define OP1 -= + #define OP2 += + #else + #define OP0 -= + #define OP1 -= + #define OP2 -= + #endif #endif #define ZGEMV_T_8x1() \ From 7834c10e2f6288d0c7fe339375540ebe765f7efc Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Mon, 7 Dec 2020 16:55:05 +0800 Subject: [PATCH 12/12] Add PingTouGe contribution credit. --- CONTRIBUTORS.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 7b994885a..be9a32a7c 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -190,4 +190,7 @@ In chronological order: * [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support * Danfeng Zhang - * [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53 \ No newline at end of file + * [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53 + +* PingTouGe Semiconductor Co., Ltd. + * [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910