Merge pull request #2 from xianyi/develop

rebase
This commit is contained in:
Martin Kroeker 2020-12-08 20:53:35 +01:00 committed by GitHub
commit 980ab349bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 166 additions and 47 deletions

View File

@ -191,3 +191,6 @@ In chronological order:
* Danfeng Zhang <https://github.com/craft-zhang>
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
* PingTouGe Semiconductor Co., Ltd.
* [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910

View File

@ -3,21 +3,29 @@ RANLIB = ranlib
ifdef BINARY64
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mcpu=v9 -m64
else
CCOMMON_OPT += -m64
endif
ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mcpu=v9 -m64
endif
ifeq ($(COMPILER_F77), f90)
FCOMMON_OPT += -xarch=v9
ifeq ($(COMPILER_F77), f95)
FCOMMON_OPT += -m64
endif
else
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mcpu=v9
else
CCOMMON_OPT += -xarch=v9
endif
ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mcpu=v9
endif
ifeq ($(COMPILER_F77), f90)
ifeq ($(COMPILER_F77), f95)
FCOMMON_OPT += -xarch=v8plusb
endif

View File

@ -1131,16 +1131,25 @@ CCOMMON_OPT += -w
ifeq ($(ARCH), x86)
CCOMMON_OPT += -m32
else
FCOMMON_OPT += -m64
ifdef BINARY64
CCOMMON_OPT += -m64
else
CCOMMON_OPT += -m32
endif
endif
endif
ifeq ($(F_COMPILER), SUN)
CCOMMON_OPT += -DF_INTERFACE_SUN
FCOMMON_OPT += -ftrap=%none -xrecursive
ifeq ($(ARCH), x86)
FCOMMON_OPT += -m32
else
ifdef BINARY64
FCOMMON_OPT += -m64
else
FCOMMON_OPT += -m32
endif
endif
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -xopenmp=parallel
@ -1313,8 +1322,10 @@ KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
include $(TOPDIR)/Makefile.$(ARCH)
ifneq ($(C_COMPILER), PGI)
ifneq ($(C_COMPILER), SUN)
CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME
endif
endif
CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"
ifeq ($(CORE), PPC440)

View File

@ -6,7 +6,8 @@
# Checking cross compile
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
$hostarch = `uname -p` if ($hostos eq "AIX");
$hostarch = `uname -p` if ($hostos eq "AIX" || $hostos eq "SunOS");
chop($hostarch);
$hostarch = "x86_64" if ($hostarch eq "amd64");
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/);
$hostarch = "arm64" if ($hostarch eq "aarch64");

View File

@ -78,6 +78,12 @@ static __inline unsigned long rpcc(void){
#define __BIG_ENDIAN__
#endif
#ifdef C_SUN
#ifndef __64BIT
#define RETURN_BY_STACK
#endif
#endif
#ifdef DOUBLE
#define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory")
#else

View File

@ -48,7 +48,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
dot[0]=0.0;
dot[1]=0.0;
#if !defined(__PPC__)
#if !defined(__PPC__) && !defined(__SunOS)
CREAL(result) = 0.0 ;
CIMAG(result) = 0.0 ;
#else
@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
i++ ;
}
#if !defined(__PPC__)
#if !defined(__PPC__) && !defined(__SunOS)
CREAL(result) = dot[0];
CIMAG(result) = dot[1];
#else

View File

@ -758,10 +758,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pc0[1] += alphar * res1; \
pc0[1] += alphai * res0; \
\
pc1[2] += alphar * res2; \
pc1[2] -= alphai * res3; \
pc1[3] += alphar * res3; \
pc1[3] += alphai * res2; \
pc1[0] += alphar * res2; \
pc1[0] -= alphai * res3; \
pc1[1] += alphar * res3; \
pc1[1] += alphai * res2; \
}
#define CGEMM_SCALE_1X1 \
@ -1067,10 +1067,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pc0[1] = alphar * res1; \
pc0[1] += alphai * res0; \
\
pc1[2] = alphar * res2; \
pc1[2] -= alphai * res3; \
pc1[3] = alphar * res3; \
pc1[3] += alphai * res2; \
pc1[0] = alphar * res2; \
pc1[0] -= alphai * res3; \
pc1[1] = alphar * res3; \
pc1[1] += alphai * res2; \
}
#define CGEMM_TRMM_SCALE_1X1 \

View File

@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(XCONJ)
#define OP0 +=
#define OP1 -=
#define OP2 -=
#define OP2 +=
#else
#define OP0 -=
#define OP1 -=
#define OP2 +=
#define OP2 -=
#endif
#endif

View File

@ -32,14 +32,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#undef OP1
#undef OP2
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
#define OP0 -=
#define OP1 +=
#define OP2 +=
#if !defined(CONJ)
#if !defined(XCONJ)
#define OP0 -=
#define OP1 +=
#define OP2 +=
#else
#define OP0 +=
#define OP1 +=
#define OP2 -=
#endif
#else
#define OP0 +=
#define OP1 +=
#define OP2 -=
#if !defined(XCONJ)
#define OP0 +=
#define OP1 -=
#define OP2 +=
#else
#define OP0 -=
#define OP1 -=
#define OP2 -=
#endif
#endif
#define CGEMV_T_8x4() \

View File

@ -184,7 +184,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
}
}
}
else
else if ((inc_x != 0) && (inc_y != 0))
{
for (i = (n >> 3); i--;)
{
@ -248,6 +248,32 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
}
}
}
else
{
if (inc_x == inc_y)
{
if (n & 1)
{
x0 = *srcx;
*srcx = *srcy;
*srcy = x0;
}
else
return (0);
}
else
{
BLASLONG ix = 0, iy = 0;
while (i < n)
{
x0 = srcx[ix];
srcx[ix] = srcy[iy];
srcy[iy] = x0;
ix += inc_x;
iy += inc_y;
i++;
}
}
}
return (0);
}

View File

@ -198,7 +198,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
}
}
}
else
else if ((inc_x != 0) && (inc_y != 0))
{
for (i = (n >> 3); i--;)
{
@ -262,6 +262,33 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
}
}
}
else
{
if (inc_x == inc_y)
{
if (n & 1)
{
x0 = *srcx;
*srcx = *srcy;
*srcy = x0;
}
else
return (0);
}
else
{
BLASLONG ix = 0, iy = 0;
while (i < n)
{
x0 = srcx[ix];
srcx[ix] = srcy[iy];
srcy[iy] = x0;
ix += inc_x;
iy += inc_y;
i++;
}
}
}
return (0);
}

View File

@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(XCONJ)
#define OP0 +=
#define OP1 -=
#define OP2 -=
#define OP2 +=
#else
#define OP0 -=
#define OP1 -=
#define OP2 +=
#define OP2 -=
#endif
#endif

View File

@ -34,14 +34,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#undef OP3
#undef OP4
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
#define OP0 -=
#define OP1 +=
#define OP2 +=
#if !defined(CONJ)
#if !defined(XCONJ)
#define OP0 -=
#define OP1 +=
#define OP2 +=
#else
#define OP0 +=
#define OP1 +=
#define OP2 -=
#endif
#else
#define OP0 +=
#define OP1 +=
#define OP2 -=
#if !defined(XCONJ)
#define OP0 +=
#define OP1 -=
#define OP2 +=
#else
#define OP0 -=
#define OP1 -=
#define OP2 -=
#endif
#endif
#define ZGEMV_T_8x1() \

View File

@ -54,3 +54,13 @@ ZTRSMKERNEL_LN = ztrsm_kernel_LN.S
ZTRSMKERNEL_LT = ztrsm_kernel_LT.S
ZTRSMKERNEL_RN = ztrsm_kernel_LT.S
ZTRSMKERNEL_RT = ztrsm_kernel_RT.S
SDOTKERNEL = ../generic/dot.c
SDSDOTKERNEL = ../generic/dot.c
DSDOTKERNEL = ../generic/dot.c
DDOTKERNEL = ../generic/dot.c
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
CSWAPKERNEL = ../arm/zswap.c
ZSWAPKERNEL = ../arm/zswap.c

18
param.h
View File

@ -1454,22 +1454,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SGEMM_DEFAULT_P 768
#define SGEMM_DEFAULT_R sgemm_r
//#define SGEMM_DEFAULT_R 1024
/*#define SGEMM_DEFAULT_R 1024*/
#define DGEMM_DEFAULT_P 512
#define DGEMM_DEFAULT_R dgemm_r
//#define DGEMM_DEFAULT_R 1024
/*#define DGEMM_DEFAULT_R 1024*/
#define QGEMM_DEFAULT_P 504
#define QGEMM_DEFAULT_R qgemm_r
#define CGEMM_DEFAULT_P 768
#define CGEMM_DEFAULT_R cgemm_r
//#define CGEMM_DEFAULT_R 1024
/*#define CGEMM_DEFAULT_R 1024*/
#define ZGEMM_DEFAULT_P 512
#define ZGEMM_DEFAULT_R zgemm_r
//#define ZGEMM_DEFAULT_R 1024
/*#define ZGEMM_DEFAULT_R 1024*/
#define XGEMM_DEFAULT_P 252
#define XGEMM_DEFAULT_R xgemm_r
@ -2571,7 +2571,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#ifdef LOONGSON3A
////Copy from SICORTEX
/*Copy from SICORTEX*/
#define SNUMOPT 2
#define DNUMOPT 2
@ -2863,7 +2863,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 16
#endif
// Common ARMv8 parameters
/* Common ARMv8 parameters */
#if defined(ARMV8)
#define SNUMOPT 2
@ -3066,7 +3066,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
#else // Other/undetected ARMv8 cores
#else /* Other/undetected ARMv8 cores */
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4
@ -3095,9 +3095,9 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
#endif // Cores
#endif /* Cores */
#endif // ARMv8
#endif /* ARMv8 */
#if defined(ARMV5)
#define SNUMOPT 2

View File

@ -35,6 +35,9 @@ endif
ifeq ($(C_COMPILER), PGI)
OBJS = utest_main2.o
endif
ifeq ($(C_COMPILER), SUN)
OBJS = utest_main2.o
endif
ifeq ($(OSNAME), AIX)
OBJS = utest_main2.o
endif