Merge pull request #2 from xianyi/develop

rebase
This commit is contained in:
Martin Kroeker 2020-12-08 20:53:35 +01:00 committed by GitHub
commit 980ab349bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 166 additions and 47 deletions

View File

@ -190,4 +190,7 @@ In chronological order:
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support * [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support
* Danfeng Zhang <https://github.com/craft-zhang> * Danfeng Zhang <https://github.com/craft-zhang>
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53 * [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
* PingTouGe Semiconductor Co., Ltd.
* [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910

View File

@ -3,21 +3,29 @@ RANLIB = ranlib
ifdef BINARY64 ifdef BINARY64
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mcpu=v9 -m64 CCOMMON_OPT += -mcpu=v9 -m64
else
CCOMMON_OPT += -m64
endif
ifeq ($(COMPILER_F77), g77) ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mcpu=v9 -m64 FCOMMON_OPT += -mcpu=v9 -m64
endif endif
ifeq ($(COMPILER_F77), f90) ifeq ($(COMPILER_F77), f95)
FCOMMON_OPT += -xarch=v9 FCOMMON_OPT += -m64
endif endif
else else
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mcpu=v9 CCOMMON_OPT += -mcpu=v9
else
CCOMMON_OPT += -xarch=v9
endif
ifeq ($(COMPILER_F77), g77) ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mcpu=v9 FCOMMON_OPT += -mcpu=v9
endif endif
ifeq ($(COMPILER_F77), f90) ifeq ($(COMPILER_F77), f95)
FCOMMON_OPT += -xarch=v8plusb FCOMMON_OPT += -xarch=v8plusb
endif endif
@ -37,4 +45,4 @@ LIBSUNPERF = -L/opt/SUNWspro/lib/v9 -L/opt/SUNWspro/prod/lib/v9 \
else else
LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \ LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \
-Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath -Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath
endif endif

View File

@ -1131,16 +1131,25 @@ CCOMMON_OPT += -w
ifeq ($(ARCH), x86) ifeq ($(ARCH), x86)
CCOMMON_OPT += -m32 CCOMMON_OPT += -m32
else else
FCOMMON_OPT += -m64 ifdef BINARY64
CCOMMON_OPT += -m64
else
CCOMMON_OPT += -m32
endif
endif endif
endif endif
ifeq ($(F_COMPILER), SUN) ifeq ($(F_COMPILER), SUN)
CCOMMON_OPT += -DF_INTERFACE_SUN CCOMMON_OPT += -DF_INTERFACE_SUN
FCOMMON_OPT += -ftrap=%none -xrecursive
ifeq ($(ARCH), x86) ifeq ($(ARCH), x86)
FCOMMON_OPT += -m32 FCOMMON_OPT += -m32
else else
ifdef BINARY64
FCOMMON_OPT += -m64 FCOMMON_OPT += -m64
else
FCOMMON_OPT += -m32
endif
endif endif
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -xopenmp=parallel FCOMMON_OPT += -xopenmp=parallel
@ -1313,8 +1322,10 @@ KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
include $(TOPDIR)/Makefile.$(ARCH) include $(TOPDIR)/Makefile.$(ARCH)
ifneq ($(C_COMPILER), PGI) ifneq ($(C_COMPILER), PGI)
ifneq ($(C_COMPILER), SUN)
CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME
endif endif
endif
CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\" CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"
ifeq ($(CORE), PPC440) ifeq ($(CORE), PPC440)

View File

@ -6,7 +6,8 @@
# Checking cross compile # Checking cross compile
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
$hostarch = `uname -p` if ($hostos eq "AIX"); $hostarch = `uname -p` if ($hostos eq "AIX" || $hostos eq "SunOS");
chop($hostarch);
$hostarch = "x86_64" if ($hostarch eq "amd64"); $hostarch = "x86_64" if ($hostarch eq "amd64");
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/); $hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/);
$hostarch = "arm64" if ($hostarch eq "aarch64"); $hostarch = "arm64" if ($hostarch eq "aarch64");

View File

@ -78,6 +78,12 @@ static __inline unsigned long rpcc(void){
#define __BIG_ENDIAN__ #define __BIG_ENDIAN__
#endif #endif
#ifdef C_SUN
#ifndef __64BIT
#define RETURN_BY_STACK
#endif
#endif
#ifdef DOUBLE #ifdef DOUBLE
#define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory") #define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory")
#else #else

View File

@ -48,7 +48,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
dot[0]=0.0; dot[0]=0.0;
dot[1]=0.0; dot[1]=0.0;
#if !defined(__PPC__) #if !defined(__PPC__) && !defined(__SunOS)
CREAL(result) = 0.0 ; CREAL(result) = 0.0 ;
CIMAG(result) = 0.0 ; CIMAG(result) = 0.0 ;
#else #else
@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
i++ ; i++ ;
} }
#if !defined(__PPC__) #if !defined(__PPC__) && !defined(__SunOS)
CREAL(result) = dot[0]; CREAL(result) = dot[0];
CIMAG(result) = dot[1]; CIMAG(result) = dot[1];
#else #else

View File

@ -758,10 +758,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pc0[1] += alphar * res1; \ pc0[1] += alphar * res1; \
pc0[1] += alphai * res0; \ pc0[1] += alphai * res0; \
\ \
pc1[2] += alphar * res2; \ pc1[0] += alphar * res2; \
pc1[2] -= alphai * res3; \ pc1[0] -= alphai * res3; \
pc1[3] += alphar * res3; \ pc1[1] += alphar * res3; \
pc1[3] += alphai * res2; \ pc1[1] += alphai * res2; \
} }
#define CGEMM_SCALE_1X1 \ #define CGEMM_SCALE_1X1 \
@ -1067,10 +1067,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pc0[1] = alphar * res1; \ pc0[1] = alphar * res1; \
pc0[1] += alphai * res0; \ pc0[1] += alphai * res0; \
\ \
pc1[2] = alphar * res2; \ pc1[0] = alphar * res2; \
pc1[2] -= alphai * res3; \ pc1[0] -= alphai * res3; \
pc1[3] = alphar * res3; \ pc1[1] = alphar * res3; \
pc1[3] += alphai * res2; \ pc1[1] += alphai * res2; \
} }
#define CGEMM_TRMM_SCALE_1X1 \ #define CGEMM_TRMM_SCALE_1X1 \

View File

@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(XCONJ) #if !defined(XCONJ)
#define OP0 += #define OP0 +=
#define OP1 -= #define OP1 -=
#define OP2 -= #define OP2 +=
#else #else
#define OP0 -= #define OP0 -=
#define OP1 -= #define OP1 -=
#define OP2 += #define OP2 -=
#endif #endif
#endif #endif

View File

@ -32,14 +32,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#undef OP1 #undef OP1
#undef OP2 #undef OP2
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) #if !defined(CONJ)
#define OP0 -= #if !defined(XCONJ)
#define OP1 += #define OP0 -=
#define OP2 += #define OP1 +=
#define OP2 +=
#else
#define OP0 +=
#define OP1 +=
#define OP2 -=
#endif
#else #else
#define OP0 += #if !defined(XCONJ)
#define OP1 += #define OP0 +=
#define OP2 -= #define OP1 -=
#define OP2 +=
#else
#define OP0 -=
#define OP1 -=
#define OP2 -=
#endif
#endif #endif
#define CGEMV_T_8x4() \ #define CGEMV_T_8x4() \

View File

@ -184,7 +184,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
} }
} }
} }
else else if ((inc_x != 0) && (inc_y != 0))
{ {
for (i = (n >> 3); i--;) for (i = (n >> 3); i--;)
{ {
@ -248,6 +248,32 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
} }
} }
} }
else
{
if (inc_x == inc_y)
{
if (n & 1)
{
x0 = *srcx;
*srcx = *srcy;
*srcy = x0;
}
else
return (0);
}
else
{
BLASLONG ix = 0, iy = 0;
while (i < n)
{
x0 = srcx[ix];
srcx[ix] = srcy[iy];
srcy[iy] = x0;
ix += inc_x;
iy += inc_y;
i++;
}
}
}
return (0); return (0);
} }

View File

@ -198,7 +198,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
} }
} }
} }
else else if ((inc_x != 0) && (inc_y != 0))
{ {
for (i = (n >> 3); i--;) for (i = (n >> 3); i--;)
{ {
@ -262,6 +262,33 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
} }
} }
} }
else
{
if (inc_x == inc_y)
{
if (n & 1)
{
x0 = *srcx;
*srcx = *srcy;
*srcy = x0;
}
else
return (0);
}
else
{
BLASLONG ix = 0, iy = 0;
while (i < n)
{
x0 = srcx[ix];
srcx[ix] = srcy[iy];
srcy[iy] = x0;
ix += inc_x;
iy += inc_y;
i++;
}
}
}
return (0); return (0);
} }

View File

@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(XCONJ) #if !defined(XCONJ)
#define OP0 += #define OP0 +=
#define OP1 -= #define OP1 -=
#define OP2 -= #define OP2 +=
#else #else
#define OP0 -= #define OP0 -=
#define OP1 -= #define OP1 -=
#define OP2 += #define OP2 -=
#endif #endif
#endif #endif

View File

@ -34,14 +34,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#undef OP3 #undef OP3
#undef OP4 #undef OP4
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) #if !defined(CONJ)
#define OP0 -= #if !defined(XCONJ)
#define OP1 += #define OP0 -=
#define OP2 += #define OP1 +=
#define OP2 +=
#else
#define OP0 +=
#define OP1 +=
#define OP2 -=
#endif
#else #else
#define OP0 += #if !defined(XCONJ)
#define OP1 += #define OP0 +=
#define OP2 -= #define OP1 -=
#define OP2 +=
#else
#define OP0 -=
#define OP1 -=
#define OP2 -=
#endif
#endif #endif
#define ZGEMV_T_8x1() \ #define ZGEMV_T_8x1() \

View File

@ -54,3 +54,13 @@ ZTRSMKERNEL_LN = ztrsm_kernel_LN.S
ZTRSMKERNEL_LT = ztrsm_kernel_LT.S ZTRSMKERNEL_LT = ztrsm_kernel_LT.S
ZTRSMKERNEL_RN = ztrsm_kernel_LT.S ZTRSMKERNEL_RN = ztrsm_kernel_LT.S
ZTRSMKERNEL_RT = ztrsm_kernel_RT.S ZTRSMKERNEL_RT = ztrsm_kernel_RT.S
SDOTKERNEL = ../generic/dot.c
SDSDOTKERNEL = ../generic/dot.c
DSDOTKERNEL = ../generic/dot.c
DDOTKERNEL = ../generic/dot.c
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
CSWAPKERNEL = ../arm/zswap.c
ZSWAPKERNEL = ../arm/zswap.c

18
param.h
View File

@ -1454,22 +1454,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SGEMM_DEFAULT_P 768 #define SGEMM_DEFAULT_P 768
#define SGEMM_DEFAULT_R sgemm_r #define SGEMM_DEFAULT_R sgemm_r
//#define SGEMM_DEFAULT_R 1024 /*#define SGEMM_DEFAULT_R 1024*/
#define DGEMM_DEFAULT_P 512 #define DGEMM_DEFAULT_P 512
#define DGEMM_DEFAULT_R dgemm_r #define DGEMM_DEFAULT_R dgemm_r
//#define DGEMM_DEFAULT_R 1024 /*#define DGEMM_DEFAULT_R 1024*/
#define QGEMM_DEFAULT_P 504 #define QGEMM_DEFAULT_P 504
#define QGEMM_DEFAULT_R qgemm_r #define QGEMM_DEFAULT_R qgemm_r
#define CGEMM_DEFAULT_P 768 #define CGEMM_DEFAULT_P 768
#define CGEMM_DEFAULT_R cgemm_r #define CGEMM_DEFAULT_R cgemm_r
//#define CGEMM_DEFAULT_R 1024 /*#define CGEMM_DEFAULT_R 1024*/
#define ZGEMM_DEFAULT_P 512 #define ZGEMM_DEFAULT_P 512
#define ZGEMM_DEFAULT_R zgemm_r #define ZGEMM_DEFAULT_R zgemm_r
//#define ZGEMM_DEFAULT_R 1024 /*#define ZGEMM_DEFAULT_R 1024*/
#define XGEMM_DEFAULT_P 252 #define XGEMM_DEFAULT_P 252
#define XGEMM_DEFAULT_R xgemm_r #define XGEMM_DEFAULT_R xgemm_r
@ -2571,7 +2571,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#ifdef LOONGSON3A #ifdef LOONGSON3A
////Copy from SICORTEX /*Copy from SICORTEX*/
#define SNUMOPT 2 #define SNUMOPT 2
#define DNUMOPT 2 #define DNUMOPT 2
@ -2863,7 +2863,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 16 #define SYMV_P 16
#endif #endif
// Common ARMv8 parameters /* Common ARMv8 parameters */
#if defined(ARMV8) #if defined(ARMV8)
#define SNUMOPT 2 #define SNUMOPT 2
@ -3066,7 +3066,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096 #define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096
#else // Other/undetected ARMv8 cores #else /* Other/undetected ARMv8 cores */
#define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4 #define SGEMM_DEFAULT_UNROLL_N 4
@ -3095,9 +3095,9 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096 #define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096
#endif // Cores #endif /* Cores */
#endif // ARMv8 #endif /* ARMv8 */
#if defined(ARMV5) #if defined(ARMV5)
#define SNUMOPT 2 #define SNUMOPT 2

View File

@ -35,6 +35,9 @@ endif
ifeq ($(C_COMPILER), PGI) ifeq ($(C_COMPILER), PGI)
OBJS = utest_main2.o OBJS = utest_main2.o
endif endif
ifeq ($(C_COMPILER), SUN)
OBJS = utest_main2.o
endif
ifeq ($(OSNAME), AIX) ifeq ($(OSNAME), AIX)
OBJS = utest_main2.o OBJS = utest_main2.o
endif endif