commit
980ab349bc
|
@ -190,4 +190,7 @@ In chronological order:
|
|||
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support
|
||||
|
||||
* Danfeng Zhang <https://github.com/craft-zhang>
|
||||
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
|
||||
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
|
||||
|
||||
* PingTouGe Semiconductor Co., Ltd.
|
||||
* [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910
|
||||
|
|
|
@ -3,21 +3,29 @@ RANLIB = ranlib
|
|||
|
||||
ifdef BINARY64
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -mcpu=v9 -m64
|
||||
else
|
||||
CCOMMON_OPT += -m64
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), g77)
|
||||
FCOMMON_OPT += -mcpu=v9 -m64
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), f90)
|
||||
FCOMMON_OPT += -xarch=v9
|
||||
ifeq ($(COMPILER_F77), f95)
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
else
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -mcpu=v9
|
||||
else
|
||||
CCOMMON_OPT += -xarch=v9
|
||||
endif
|
||||
|
||||
ifeq ($(COMPILER_F77), g77)
|
||||
FCOMMON_OPT += -mcpu=v9
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), f90)
|
||||
ifeq ($(COMPILER_F77), f95)
|
||||
FCOMMON_OPT += -xarch=v8plusb
|
||||
endif
|
||||
|
||||
|
@ -37,4 +45,4 @@ LIBSUNPERF = -L/opt/SUNWspro/lib/v9 -L/opt/SUNWspro/prod/lib/v9 \
|
|||
else
|
||||
LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \
|
||||
-Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath
|
||||
endif
|
||||
endif
|
||||
|
|
|
@ -1131,16 +1131,25 @@ CCOMMON_OPT += -w
|
|||
ifeq ($(ARCH), x86)
|
||||
CCOMMON_OPT += -m32
|
||||
else
|
||||
FCOMMON_OPT += -m64
|
||||
ifdef BINARY64
|
||||
CCOMMON_OPT += -m64
|
||||
else
|
||||
CCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), SUN)
|
||||
CCOMMON_OPT += -DF_INTERFACE_SUN
|
||||
FCOMMON_OPT += -ftrap=%none -xrecursive
|
||||
ifeq ($(ARCH), x86)
|
||||
FCOMMON_OPT += -m32
|
||||
else
|
||||
ifdef BINARY64
|
||||
FCOMMON_OPT += -m64
|
||||
else
|
||||
FCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -xopenmp=parallel
|
||||
|
@ -1313,8 +1322,10 @@ KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
|
|||
include $(TOPDIR)/Makefile.$(ARCH)
|
||||
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
ifneq ($(C_COMPILER), SUN)
|
||||
CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME
|
||||
endif
|
||||
endif
|
||||
CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"
|
||||
|
||||
ifeq ($(CORE), PPC440)
|
||||
|
|
3
c_check
3
c_check
|
@ -6,7 +6,8 @@
|
|||
# Checking cross compile
|
||||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
|
||||
$hostarch = `uname -p` if ($hostos eq "AIX");
|
||||
$hostarch = `uname -p` if ($hostos eq "AIX" || $hostos eq "SunOS");
|
||||
chop($hostarch);
|
||||
$hostarch = "x86_64" if ($hostarch eq "amd64");
|
||||
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/);
|
||||
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
||||
|
|
|
@ -78,6 +78,12 @@ static __inline unsigned long rpcc(void){
|
|||
#define __BIG_ENDIAN__
|
||||
#endif
|
||||
|
||||
#ifdef C_SUN
|
||||
#ifndef __64BIT
|
||||
#define RETURN_BY_STACK
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory")
|
||||
#else
|
||||
|
|
|
@ -48,7 +48,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
|
|||
|
||||
dot[0]=0.0;
|
||||
dot[1]=0.0;
|
||||
#if !defined(__PPC__)
|
||||
#if !defined(__PPC__) && !defined(__SunOS)
|
||||
CREAL(result) = 0.0 ;
|
||||
CIMAG(result) = 0.0 ;
|
||||
#else
|
||||
|
@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
|
|||
i++ ;
|
||||
|
||||
}
|
||||
#if !defined(__PPC__)
|
||||
#if !defined(__PPC__) && !defined(__SunOS)
|
||||
CREAL(result) = dot[0];
|
||||
CIMAG(result) = dot[1];
|
||||
#else
|
||||
|
|
|
@ -758,10 +758,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
pc0[1] += alphar * res1; \
|
||||
pc0[1] += alphai * res0; \
|
||||
\
|
||||
pc1[2] += alphar * res2; \
|
||||
pc1[2] -= alphai * res3; \
|
||||
pc1[3] += alphar * res3; \
|
||||
pc1[3] += alphai * res2; \
|
||||
pc1[0] += alphar * res2; \
|
||||
pc1[0] -= alphai * res3; \
|
||||
pc1[1] += alphar * res3; \
|
||||
pc1[1] += alphai * res2; \
|
||||
}
|
||||
|
||||
#define CGEMM_SCALE_1X1 \
|
||||
|
@ -1067,10 +1067,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
pc0[1] = alphar * res1; \
|
||||
pc0[1] += alphai * res0; \
|
||||
\
|
||||
pc1[2] = alphar * res2; \
|
||||
pc1[2] -= alphai * res3; \
|
||||
pc1[3] = alphar * res3; \
|
||||
pc1[3] += alphai * res2; \
|
||||
pc1[0] = alphar * res2; \
|
||||
pc1[0] -= alphai * res3; \
|
||||
pc1[1] = alphar * res3; \
|
||||
pc1[1] += alphai * res2; \
|
||||
}
|
||||
|
||||
#define CGEMM_TRMM_SCALE_1X1 \
|
||||
|
|
|
@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if !defined(XCONJ)
|
||||
#define OP0 +=
|
||||
#define OP1 -=
|
||||
#define OP2 -=
|
||||
#define OP2 +=
|
||||
#else
|
||||
#define OP0 -=
|
||||
#define OP1 -=
|
||||
#define OP2 +=
|
||||
#define OP2 -=
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -32,14 +32,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#undef OP1
|
||||
#undef OP2
|
||||
|
||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||
#define OP0 -=
|
||||
#define OP1 +=
|
||||
#define OP2 +=
|
||||
#if !defined(CONJ)
|
||||
#if !defined(XCONJ)
|
||||
#define OP0 -=
|
||||
#define OP1 +=
|
||||
#define OP2 +=
|
||||
#else
|
||||
#define OP0 +=
|
||||
#define OP1 +=
|
||||
#define OP2 -=
|
||||
#endif
|
||||
#else
|
||||
#define OP0 +=
|
||||
#define OP1 +=
|
||||
#define OP2 -=
|
||||
#if !defined(XCONJ)
|
||||
#define OP0 +=
|
||||
#define OP1 -=
|
||||
#define OP2 +=
|
||||
#else
|
||||
#define OP0 -=
|
||||
#define OP1 -=
|
||||
#define OP2 -=
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define CGEMV_T_8x4() \
|
||||
|
|
|
@ -184,7 +184,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
|
|||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
else if ((inc_x != 0) && (inc_y != 0))
|
||||
{
|
||||
for (i = (n >> 3); i--;)
|
||||
{
|
||||
|
@ -248,6 +248,32 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if (inc_x == inc_y)
|
||||
{
|
||||
if (n & 1)
|
||||
{
|
||||
x0 = *srcx;
|
||||
*srcx = *srcy;
|
||||
*srcy = x0;
|
||||
}
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
else
|
||||
{
|
||||
BLASLONG ix = 0, iy = 0;
|
||||
while (i < n)
|
||||
{
|
||||
x0 = srcx[ix];
|
||||
srcx[ix] = srcy[iy];
|
||||
srcy[iy] = x0;
|
||||
ix += inc_x;
|
||||
iy += inc_y;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
|
|
@ -198,7 +198,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
|
|||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
else if ((inc_x != 0) && (inc_y != 0))
|
||||
{
|
||||
for (i = (n >> 3); i--;)
|
||||
{
|
||||
|
@ -262,6 +262,33 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
|
|||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (inc_x == inc_y)
|
||||
{
|
||||
if (n & 1)
|
||||
{
|
||||
x0 = *srcx;
|
||||
*srcx = *srcy;
|
||||
*srcy = x0;
|
||||
}
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
else
|
||||
{
|
||||
BLASLONG ix = 0, iy = 0;
|
||||
while (i < n)
|
||||
{
|
||||
x0 = srcx[ix];
|
||||
srcx[ix] = srcy[iy];
|
||||
srcy[iy] = x0;
|
||||
ix += inc_x;
|
||||
iy += inc_y;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
|
|
@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if !defined(XCONJ)
|
||||
#define OP0 +=
|
||||
#define OP1 -=
|
||||
#define OP2 -=
|
||||
#define OP2 +=
|
||||
#else
|
||||
#define OP0 -=
|
||||
#define OP1 -=
|
||||
#define OP2 +=
|
||||
#define OP2 -=
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -34,14 +34,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#undef OP3
|
||||
#undef OP4
|
||||
|
||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||
#define OP0 -=
|
||||
#define OP1 +=
|
||||
#define OP2 +=
|
||||
#if !defined(CONJ)
|
||||
#if !defined(XCONJ)
|
||||
#define OP0 -=
|
||||
#define OP1 +=
|
||||
#define OP2 +=
|
||||
#else
|
||||
#define OP0 +=
|
||||
#define OP1 +=
|
||||
#define OP2 -=
|
||||
#endif
|
||||
#else
|
||||
#define OP0 +=
|
||||
#define OP1 +=
|
||||
#define OP2 -=
|
||||
#if !defined(XCONJ)
|
||||
#define OP0 +=
|
||||
#define OP1 -=
|
||||
#define OP2 +=
|
||||
#else
|
||||
#define OP0 -=
|
||||
#define OP1 -=
|
||||
#define OP2 -=
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define ZGEMV_T_8x1() \
|
||||
|
|
|
@ -54,3 +54,13 @@ ZTRSMKERNEL_LN = ztrsm_kernel_LN.S
|
|||
ZTRSMKERNEL_LT = ztrsm_kernel_LT.S
|
||||
ZTRSMKERNEL_RN = ztrsm_kernel_LT.S
|
||||
ZTRSMKERNEL_RT = ztrsm_kernel_RT.S
|
||||
|
||||
|
||||
SDOTKERNEL = ../generic/dot.c
|
||||
SDSDOTKERNEL = ../generic/dot.c
|
||||
DSDOTKERNEL = ../generic/dot.c
|
||||
DDOTKERNEL = ../generic/dot.c
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = ../arm/zdot.c
|
||||
CSWAPKERNEL = ../arm/zswap.c
|
||||
ZSWAPKERNEL = ../arm/zswap.c
|
||||
|
|
18
param.h
18
param.h
|
@ -1454,22 +1454,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define SGEMM_DEFAULT_P 768
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
//#define SGEMM_DEFAULT_R 1024
|
||||
/*#define SGEMM_DEFAULT_R 1024*/
|
||||
|
||||
#define DGEMM_DEFAULT_P 512
|
||||
#define DGEMM_DEFAULT_R dgemm_r
|
||||
//#define DGEMM_DEFAULT_R 1024
|
||||
/*#define DGEMM_DEFAULT_R 1024*/
|
||||
|
||||
#define QGEMM_DEFAULT_P 504
|
||||
#define QGEMM_DEFAULT_R qgemm_r
|
||||
|
||||
#define CGEMM_DEFAULT_P 768
|
||||
#define CGEMM_DEFAULT_R cgemm_r
|
||||
//#define CGEMM_DEFAULT_R 1024
|
||||
/*#define CGEMM_DEFAULT_R 1024*/
|
||||
|
||||
#define ZGEMM_DEFAULT_P 512
|
||||
#define ZGEMM_DEFAULT_R zgemm_r
|
||||
//#define ZGEMM_DEFAULT_R 1024
|
||||
/*#define ZGEMM_DEFAULT_R 1024*/
|
||||
|
||||
#define XGEMM_DEFAULT_P 252
|
||||
#define XGEMM_DEFAULT_R xgemm_r
|
||||
|
@ -2571,7 +2571,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
#ifdef LOONGSON3A
|
||||
////Copy from SICORTEX
|
||||
/*Copy from SICORTEX*/
|
||||
#define SNUMOPT 2
|
||||
#define DNUMOPT 2
|
||||
|
||||
|
@ -2863,7 +2863,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define SYMV_P 16
|
||||
#endif
|
||||
|
||||
// Common ARMv8 parameters
|
||||
/* Common ARMv8 parameters */
|
||||
#if defined(ARMV8)
|
||||
|
||||
#define SNUMOPT 2
|
||||
|
@ -3066,7 +3066,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
|||
#define CGEMM_DEFAULT_R 4096
|
||||
#define ZGEMM_DEFAULT_R 4096
|
||||
|
||||
#else // Other/undetected ARMv8 cores
|
||||
#else /* Other/undetected ARMv8 cores */
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
@ -3095,9 +3095,9 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
|||
#define CGEMM_DEFAULT_R 4096
|
||||
#define ZGEMM_DEFAULT_R 4096
|
||||
|
||||
#endif // Cores
|
||||
#endif /* Cores */
|
||||
|
||||
#endif // ARMv8
|
||||
#endif /* ARMv8 */
|
||||
|
||||
#if defined(ARMV5)
|
||||
#define SNUMOPT 2
|
||||
|
|
|
@ -35,6 +35,9 @@ endif
|
|||
ifeq ($(C_COMPILER), PGI)
|
||||
OBJS = utest_main2.o
|
||||
endif
|
||||
ifeq ($(C_COMPILER), SUN)
|
||||
OBJS = utest_main2.o
|
||||
endif
|
||||
ifeq ($(OSNAME), AIX)
|
||||
OBJS = utest_main2.o
|
||||
endif
|
||||
|
|
Loading…
Reference in New Issue