diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 7b994885a..be9a32a7c 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -190,4 +190,7 @@ In chronological order: * [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support * Danfeng Zhang - * [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53 \ No newline at end of file + * [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53 + +* PingTouGe Semiconductor Co., Ltd. + * [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910 diff --git a/Makefile.sparc b/Makefile.sparc index 8895b96dd..61c7aa36d 100644 --- a/Makefile.sparc +++ b/Makefile.sparc @@ -3,21 +3,29 @@ RANLIB = ranlib ifdef BINARY64 +ifeq ($(C_COMPILER), GCC) CCOMMON_OPT += -mcpu=v9 -m64 +else +CCOMMON_OPT += -m64 +endif ifeq ($(COMPILER_F77), g77) FCOMMON_OPT += -mcpu=v9 -m64 endif -ifeq ($(COMPILER_F77), f90) -FCOMMON_OPT += -xarch=v9 +ifeq ($(COMPILER_F77), f95) +FCOMMON_OPT += -m64 endif else +ifeq ($(C_COMPILER), GCC) CCOMMON_OPT += -mcpu=v9 +else +CCOMMON_OPT += -xarch=v9 +endif ifeq ($(COMPILER_F77), g77) FCOMMON_OPT += -mcpu=v9 endif -ifeq ($(COMPILER_F77), f90) +ifeq ($(COMPILER_F77), f95) FCOMMON_OPT += -xarch=v8plusb endif @@ -37,4 +45,4 @@ LIBSUNPERF = -L/opt/SUNWspro/lib/v9 -L/opt/SUNWspro/prod/lib/v9 \ else LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \ -Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath -endif \ No newline at end of file +endif diff --git a/Makefile.system b/Makefile.system index b5974f872..c17cd3bd1 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1131,16 +1131,25 @@ CCOMMON_OPT += -w ifeq ($(ARCH), x86) CCOMMON_OPT += -m32 else -FCOMMON_OPT += -m64 +ifdef BINARY64 +CCOMMON_OPT += -m64 +else +CCOMMON_OPT += -m32 +endif endif endif ifeq ($(F_COMPILER), SUN) CCOMMON_OPT += -DF_INTERFACE_SUN +FCOMMON_OPT += -ftrap=%none -xrecursive ifeq ($(ARCH), x86) FCOMMON_OPT += -m32 else +ifdef BINARY64 FCOMMON_OPT += -m64 +else +FCOMMON_OPT += -m32 +endif endif ifeq ($(USE_OPENMP), 1) FCOMMON_OPT += -xopenmp=parallel @@ -1313,8 +1322,10 @@ KERNELDIR = $(TOPDIR)/kernel/$(ARCH) include $(TOPDIR)/Makefile.$(ARCH) ifneq ($(C_COMPILER), PGI) +ifneq ($(C_COMPILER), SUN) CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME endif +endif CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\" ifeq ($(CORE), PPC440) diff --git a/c_check b/c_check index a841df153..fe9c53f0e 100644 --- a/c_check +++ b/c_check @@ -6,7 +6,8 @@ # Checking cross compile $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); -$hostarch = `uname -p` if ($hostos eq "AIX"); +$hostarch = `uname -p` if ($hostos eq "AIX" || $hostos eq "SunOS"); +chop($hostarch); $hostarch = "x86_64" if ($hostarch eq "amd64"); $hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/); $hostarch = "arm64" if ($hostarch eq "aarch64"); diff --git a/common_sparc.h b/common_sparc.h index 85e29fffa..90a24ebf1 100644 --- a/common_sparc.h +++ b/common_sparc.h @@ -78,6 +78,12 @@ static __inline unsigned long rpcc(void){ #define __BIG_ENDIAN__ #endif +#ifdef C_SUN +#ifndef __64BIT +#define RETURN_BY_STACK +#endif +#endif + #ifdef DOUBLE #define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory") #else diff --git a/kernel/arm/zdot.c b/kernel/arm/zdot.c index 73ae3acd7..9249b54f8 100644 --- a/kernel/arm/zdot.c +++ b/kernel/arm/zdot.c @@ -48,7 +48,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA dot[0]=0.0; dot[1]=0.0; -#if !defined(__PPC__) +#if !defined(__PPC__) && !defined(__SunOS) CREAL(result) = 0.0 ; CIMAG(result) = 0.0 ; #else @@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA i++ ; } -#if !defined(__PPC__) +#if !defined(__PPC__) && !defined(__SunOS) CREAL(result) = dot[0]; CIMAG(result) = dot[1]; #else diff --git a/kernel/mips/cgemm_kernel_8x4_msa.c b/kernel/mips/cgemm_kernel_8x4_msa.c index 4b3637c7c..8b624be88 100644 --- a/kernel/mips/cgemm_kernel_8x4_msa.c +++ b/kernel/mips/cgemm_kernel_8x4_msa.c @@ -758,10 +758,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pc0[1] += alphar * res1; \ pc0[1] += alphai * res0; \ \ - pc1[2] += alphar * res2; \ - pc1[2] -= alphai * res3; \ - pc1[3] += alphar * res3; \ - pc1[3] += alphai * res2; \ + pc1[0] += alphar * res2; \ + pc1[0] -= alphai * res3; \ + pc1[1] += alphar * res3; \ + pc1[1] += alphai * res2; \ } #define CGEMM_SCALE_1X1 \ @@ -1067,10 +1067,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pc0[1] = alphar * res1; \ pc0[1] += alphai * res0; \ \ - pc1[2] = alphar * res2; \ - pc1[2] -= alphai * res3; \ - pc1[3] = alphar * res3; \ - pc1[3] += alphai * res2; \ + pc1[0] = alphar * res2; \ + pc1[0] -= alphai * res3; \ + pc1[1] = alphar * res3; \ + pc1[1] += alphai * res2; \ } #define CGEMM_TRMM_SCALE_1X1 \ diff --git a/kernel/mips/cgemv_n_msa.c b/kernel/mips/cgemv_n_msa.c index 12fa7ca02..c1eb9bbfd 100644 --- a/kernel/mips/cgemv_n_msa.c +++ b/kernel/mips/cgemv_n_msa.c @@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if !defined(XCONJ) #define OP0 += #define OP1 -= - #define OP2 -= + #define OP2 += #else #define OP0 -= #define OP1 -= - #define OP2 += + #define OP2 -= #endif #endif diff --git a/kernel/mips/cgemv_t_msa.c b/kernel/mips/cgemv_t_msa.c index 584e3de75..800667b6e 100644 --- a/kernel/mips/cgemv_t_msa.c +++ b/kernel/mips/cgemv_t_msa.c @@ -32,14 +32,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #undef OP1 #undef OP2 -#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) - #define OP0 -= - #define OP1 += - #define OP2 += +#if !defined(CONJ) + #if !defined(XCONJ) + #define OP0 -= + #define OP1 += + #define OP2 += + #else + #define OP0 += + #define OP1 += + #define OP2 -= + #endif #else - #define OP0 += - #define OP1 += - #define OP2 -= + #if !defined(XCONJ) + #define OP0 += + #define OP1 -= + #define OP2 += + #else + #define OP0 -= + #define OP1 -= + #define OP2 -= + #endif #endif #define CGEMV_T_8x4() \ diff --git a/kernel/mips/dswap_msa.c b/kernel/mips/dswap_msa.c index 7b1f02477..67e97f710 100644 --- a/kernel/mips/dswap_msa.c +++ b/kernel/mips/dswap_msa.c @@ -184,7 +184,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, } } } - else + else if ((inc_x != 0) && (inc_y != 0)) { for (i = (n >> 3); i--;) { @@ -248,6 +248,32 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, } } } - + else + { + if (inc_x == inc_y) + { + if (n & 1) + { + x0 = *srcx; + *srcx = *srcy; + *srcy = x0; + } + else + return (0); + } + else + { + BLASLONG ix = 0, iy = 0; + while (i < n) + { + x0 = srcx[ix]; + srcx[ix] = srcy[iy]; + srcy[iy] = x0; + ix += inc_x; + iy += inc_y; + i++; + } + } + } return (0); } diff --git a/kernel/mips/sswap_msa.c b/kernel/mips/sswap_msa.c index 46fa8aa87..d412285b0 100644 --- a/kernel/mips/sswap_msa.c +++ b/kernel/mips/sswap_msa.c @@ -198,7 +198,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, } } } - else + else if ((inc_x != 0) && (inc_y != 0)) { for (i = (n >> 3); i--;) { @@ -262,6 +262,33 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, } } } + else + { + if (inc_x == inc_y) + { + if (n & 1) + { + x0 = *srcx; + *srcx = *srcy; + *srcy = x0; + } + else + return (0); + } + else + { + BLASLONG ix = 0, iy = 0; + while (i < n) + { + x0 = srcx[ix]; + srcx[ix] = srcy[iy]; + srcy[iy] = x0; + ix += inc_x; + iy += inc_y; + i++; + } + } + } return (0); } diff --git a/kernel/mips/zgemv_n_msa.c b/kernel/mips/zgemv_n_msa.c index 669c25758..97a80b4ba 100644 --- a/kernel/mips/zgemv_n_msa.c +++ b/kernel/mips/zgemv_n_msa.c @@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if !defined(XCONJ) #define OP0 += #define OP1 -= - #define OP2 -= + #define OP2 += #else #define OP0 -= #define OP1 -= - #define OP2 += + #define OP2 -= #endif #endif diff --git a/kernel/mips/zgemv_t_msa.c b/kernel/mips/zgemv_t_msa.c index e6febb577..6492f90be 100644 --- a/kernel/mips/zgemv_t_msa.c +++ b/kernel/mips/zgemv_t_msa.c @@ -34,14 +34,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #undef OP3 #undef OP4 -#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) - #define OP0 -= - #define OP1 += - #define OP2 += +#if !defined(CONJ) + #if !defined(XCONJ) + #define OP0 -= + #define OP1 += + #define OP2 += + #else + #define OP0 += + #define OP1 += + #define OP2 -= + #endif #else - #define OP0 += - #define OP1 += - #define OP2 -= + #if !defined(XCONJ) + #define OP0 += + #define OP1 -= + #define OP2 += + #else + #define OP0 -= + #define OP1 -= + #define OP2 -= + #endif #endif #define ZGEMV_T_8x1() \ diff --git a/kernel/sparc/KERNEL.sparc b/kernel/sparc/KERNEL.sparc index 2e8319ce5..1a2e9671a 100644 --- a/kernel/sparc/KERNEL.sparc +++ b/kernel/sparc/KERNEL.sparc @@ -54,3 +54,13 @@ ZTRSMKERNEL_LN = ztrsm_kernel_LN.S ZTRSMKERNEL_LT = ztrsm_kernel_LT.S ZTRSMKERNEL_RN = ztrsm_kernel_LT.S ZTRSMKERNEL_RT = ztrsm_kernel_RT.S + + +SDOTKERNEL = ../generic/dot.c +SDSDOTKERNEL = ../generic/dot.c +DSDOTKERNEL = ../generic/dot.c +DDOTKERNEL = ../generic/dot.c +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c +CSWAPKERNEL = ../arm/zswap.c +ZSWAPKERNEL = ../arm/zswap.c diff --git a/param.h b/param.h index ee5ad17fb..a0d45c573 100644 --- a/param.h +++ b/param.h @@ -1454,22 +1454,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SGEMM_DEFAULT_P 768 #define SGEMM_DEFAULT_R sgemm_r -//#define SGEMM_DEFAULT_R 1024 +/*#define SGEMM_DEFAULT_R 1024*/ #define DGEMM_DEFAULT_P 512 #define DGEMM_DEFAULT_R dgemm_r -//#define DGEMM_DEFAULT_R 1024 +/*#define DGEMM_DEFAULT_R 1024*/ #define QGEMM_DEFAULT_P 504 #define QGEMM_DEFAULT_R qgemm_r #define CGEMM_DEFAULT_P 768 #define CGEMM_DEFAULT_R cgemm_r -//#define CGEMM_DEFAULT_R 1024 +/*#define CGEMM_DEFAULT_R 1024*/ #define ZGEMM_DEFAULT_P 512 #define ZGEMM_DEFAULT_R zgemm_r -//#define ZGEMM_DEFAULT_R 1024 +/*#define ZGEMM_DEFAULT_R 1024*/ #define XGEMM_DEFAULT_P 252 #define XGEMM_DEFAULT_R xgemm_r @@ -2571,7 +2571,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef LOONGSON3A -////Copy from SICORTEX +/*Copy from SICORTEX*/ #define SNUMOPT 2 #define DNUMOPT 2 @@ -2863,7 +2863,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif -// Common ARMv8 parameters +/* Common ARMv8 parameters */ #if defined(ARMV8) #define SNUMOPT 2 @@ -3066,7 +3066,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d #define CGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096 -#else // Other/undetected ARMv8 cores +#else /* Other/undetected ARMv8 cores */ #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -3095,9 +3095,9 @@ is a big desktop or server with abundant cache rather than a phone or embedded d #define CGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096 -#endif // Cores +#endif /* Cores */ -#endif // ARMv8 +#endif /* ARMv8 */ #if defined(ARMV5) #define SNUMOPT 2 diff --git a/utest/Makefile b/utest/Makefile index 1fc30d088..fad3607de 100644 --- a/utest/Makefile +++ b/utest/Makefile @@ -35,6 +35,9 @@ endif ifeq ($(C_COMPILER), PGI) OBJS = utest_main2.o endif +ifeq ($(C_COMPILER), SUN) +OBJS = utest_main2.o +endif ifeq ($(OSNAME), AIX) OBJS = utest_main2.o endif