commit
980ab349bc
|
@ -190,4 +190,7 @@ In chronological order:
|
||||||
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support
|
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support
|
||||||
|
|
||||||
* Danfeng Zhang <https://github.com/craft-zhang>
|
* Danfeng Zhang <https://github.com/craft-zhang>
|
||||||
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
|
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
|
||||||
|
|
||||||
|
* PingTouGe Semiconductor Co., Ltd.
|
||||||
|
* [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910
|
||||||
|
|
|
@ -3,21 +3,29 @@ RANLIB = ranlib
|
||||||
|
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
|
|
||||||
|
ifeq ($(C_COMPILER), GCC)
|
||||||
CCOMMON_OPT += -mcpu=v9 -m64
|
CCOMMON_OPT += -mcpu=v9 -m64
|
||||||
|
else
|
||||||
|
CCOMMON_OPT += -m64
|
||||||
|
endif
|
||||||
ifeq ($(COMPILER_F77), g77)
|
ifeq ($(COMPILER_F77), g77)
|
||||||
FCOMMON_OPT += -mcpu=v9 -m64
|
FCOMMON_OPT += -mcpu=v9 -m64
|
||||||
endif
|
endif
|
||||||
ifeq ($(COMPILER_F77), f90)
|
ifeq ($(COMPILER_F77), f95)
|
||||||
FCOMMON_OPT += -xarch=v9
|
FCOMMON_OPT += -m64
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
|
|
||||||
|
ifeq ($(C_COMPILER), GCC)
|
||||||
CCOMMON_OPT += -mcpu=v9
|
CCOMMON_OPT += -mcpu=v9
|
||||||
|
else
|
||||||
|
CCOMMON_OPT += -xarch=v9
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(COMPILER_F77), g77)
|
ifeq ($(COMPILER_F77), g77)
|
||||||
FCOMMON_OPT += -mcpu=v9
|
FCOMMON_OPT += -mcpu=v9
|
||||||
endif
|
endif
|
||||||
ifeq ($(COMPILER_F77), f90)
|
ifeq ($(COMPILER_F77), f95)
|
||||||
FCOMMON_OPT += -xarch=v8plusb
|
FCOMMON_OPT += -xarch=v8plusb
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -37,4 +45,4 @@ LIBSUNPERF = -L/opt/SUNWspro/lib/v9 -L/opt/SUNWspro/prod/lib/v9 \
|
||||||
else
|
else
|
||||||
LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \
|
LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \
|
||||||
-Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath
|
-Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -1131,16 +1131,25 @@ CCOMMON_OPT += -w
|
||||||
ifeq ($(ARCH), x86)
|
ifeq ($(ARCH), x86)
|
||||||
CCOMMON_OPT += -m32
|
CCOMMON_OPT += -m32
|
||||||
else
|
else
|
||||||
FCOMMON_OPT += -m64
|
ifdef BINARY64
|
||||||
|
CCOMMON_OPT += -m64
|
||||||
|
else
|
||||||
|
CCOMMON_OPT += -m32
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(F_COMPILER), SUN)
|
ifeq ($(F_COMPILER), SUN)
|
||||||
CCOMMON_OPT += -DF_INTERFACE_SUN
|
CCOMMON_OPT += -DF_INTERFACE_SUN
|
||||||
|
FCOMMON_OPT += -ftrap=%none -xrecursive
|
||||||
ifeq ($(ARCH), x86)
|
ifeq ($(ARCH), x86)
|
||||||
FCOMMON_OPT += -m32
|
FCOMMON_OPT += -m32
|
||||||
else
|
else
|
||||||
|
ifdef BINARY64
|
||||||
FCOMMON_OPT += -m64
|
FCOMMON_OPT += -m64
|
||||||
|
else
|
||||||
|
FCOMMON_OPT += -m32
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
FCOMMON_OPT += -xopenmp=parallel
|
FCOMMON_OPT += -xopenmp=parallel
|
||||||
|
@ -1313,8 +1322,10 @@ KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
|
||||||
include $(TOPDIR)/Makefile.$(ARCH)
|
include $(TOPDIR)/Makefile.$(ARCH)
|
||||||
|
|
||||||
ifneq ($(C_COMPILER), PGI)
|
ifneq ($(C_COMPILER), PGI)
|
||||||
|
ifneq ($(C_COMPILER), SUN)
|
||||||
CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME
|
CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"
|
CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"
|
||||||
|
|
||||||
ifeq ($(CORE), PPC440)
|
ifeq ($(CORE), PPC440)
|
||||||
|
|
3
c_check
3
c_check
|
@ -6,7 +6,8 @@
|
||||||
# Checking cross compile
|
# Checking cross compile
|
||||||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
||||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
|
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
|
||||||
$hostarch = `uname -p` if ($hostos eq "AIX");
|
$hostarch = `uname -p` if ($hostos eq "AIX" || $hostos eq "SunOS");
|
||||||
|
chop($hostarch);
|
||||||
$hostarch = "x86_64" if ($hostarch eq "amd64");
|
$hostarch = "x86_64" if ($hostarch eq "amd64");
|
||||||
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/);
|
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/);
|
||||||
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
||||||
|
|
|
@ -78,6 +78,12 @@ static __inline unsigned long rpcc(void){
|
||||||
#define __BIG_ENDIAN__
|
#define __BIG_ENDIAN__
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef C_SUN
|
||||||
|
#ifndef __64BIT
|
||||||
|
#define RETURN_BY_STACK
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory")
|
#define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory")
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -48,7 +48,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
|
||||||
|
|
||||||
dot[0]=0.0;
|
dot[0]=0.0;
|
||||||
dot[1]=0.0;
|
dot[1]=0.0;
|
||||||
#if !defined(__PPC__)
|
#if !defined(__PPC__) && !defined(__SunOS)
|
||||||
CREAL(result) = 0.0 ;
|
CREAL(result) = 0.0 ;
|
||||||
CIMAG(result) = 0.0 ;
|
CIMAG(result) = 0.0 ;
|
||||||
#else
|
#else
|
||||||
|
@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
|
||||||
i++ ;
|
i++ ;
|
||||||
|
|
||||||
}
|
}
|
||||||
#if !defined(__PPC__)
|
#if !defined(__PPC__) && !defined(__SunOS)
|
||||||
CREAL(result) = dot[0];
|
CREAL(result) = dot[0];
|
||||||
CIMAG(result) = dot[1];
|
CIMAG(result) = dot[1];
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -758,10 +758,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
pc0[1] += alphar * res1; \
|
pc0[1] += alphar * res1; \
|
||||||
pc0[1] += alphai * res0; \
|
pc0[1] += alphai * res0; \
|
||||||
\
|
\
|
||||||
pc1[2] += alphar * res2; \
|
pc1[0] += alphar * res2; \
|
||||||
pc1[2] -= alphai * res3; \
|
pc1[0] -= alphai * res3; \
|
||||||
pc1[3] += alphar * res3; \
|
pc1[1] += alphar * res3; \
|
||||||
pc1[3] += alphai * res2; \
|
pc1[1] += alphai * res2; \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define CGEMM_SCALE_1X1 \
|
#define CGEMM_SCALE_1X1 \
|
||||||
|
@ -1067,10 +1067,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
pc0[1] = alphar * res1; \
|
pc0[1] = alphar * res1; \
|
||||||
pc0[1] += alphai * res0; \
|
pc0[1] += alphai * res0; \
|
||||||
\
|
\
|
||||||
pc1[2] = alphar * res2; \
|
pc1[0] = alphar * res2; \
|
||||||
pc1[2] -= alphai * res3; \
|
pc1[0] -= alphai * res3; \
|
||||||
pc1[3] = alphar * res3; \
|
pc1[1] = alphar * res3; \
|
||||||
pc1[3] += alphai * res2; \
|
pc1[1] += alphai * res2; \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define CGEMM_TRMM_SCALE_1X1 \
|
#define CGEMM_TRMM_SCALE_1X1 \
|
||||||
|
|
|
@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#if !defined(XCONJ)
|
#if !defined(XCONJ)
|
||||||
#define OP0 +=
|
#define OP0 +=
|
||||||
#define OP1 -=
|
#define OP1 -=
|
||||||
#define OP2 -=
|
#define OP2 +=
|
||||||
#else
|
#else
|
||||||
#define OP0 -=
|
#define OP0 -=
|
||||||
#define OP1 -=
|
#define OP1 -=
|
||||||
#define OP2 +=
|
#define OP2 -=
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -32,14 +32,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#undef OP1
|
#undef OP1
|
||||||
#undef OP2
|
#undef OP2
|
||||||
|
|
||||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
#if !defined(CONJ)
|
||||||
#define OP0 -=
|
#if !defined(XCONJ)
|
||||||
#define OP1 +=
|
#define OP0 -=
|
||||||
#define OP2 +=
|
#define OP1 +=
|
||||||
|
#define OP2 +=
|
||||||
|
#else
|
||||||
|
#define OP0 +=
|
||||||
|
#define OP1 +=
|
||||||
|
#define OP2 -=
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
#define OP0 +=
|
#if !defined(XCONJ)
|
||||||
#define OP1 +=
|
#define OP0 +=
|
||||||
#define OP2 -=
|
#define OP1 -=
|
||||||
|
#define OP2 +=
|
||||||
|
#else
|
||||||
|
#define OP0 -=
|
||||||
|
#define OP1 -=
|
||||||
|
#define OP2 -=
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define CGEMV_T_8x4() \
|
#define CGEMV_T_8x4() \
|
||||||
|
|
|
@ -184,7 +184,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else if ((inc_x != 0) && (inc_y != 0))
|
||||||
{
|
{
|
||||||
for (i = (n >> 3); i--;)
|
for (i = (n >> 3); i--;)
|
||||||
{
|
{
|
||||||
|
@ -248,6 +248,32 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (inc_x == inc_y)
|
||||||
|
{
|
||||||
|
if (n & 1)
|
||||||
|
{
|
||||||
|
x0 = *srcx;
|
||||||
|
*srcx = *srcy;
|
||||||
|
*srcy = x0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
BLASLONG ix = 0, iy = 0;
|
||||||
|
while (i < n)
|
||||||
|
{
|
||||||
|
x0 = srcx[ix];
|
||||||
|
srcx[ix] = srcy[iy];
|
||||||
|
srcy[iy] = x0;
|
||||||
|
ix += inc_x;
|
||||||
|
iy += inc_y;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -198,7 +198,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else if ((inc_x != 0) && (inc_y != 0))
|
||||||
{
|
{
|
||||||
for (i = (n >> 3); i--;)
|
for (i = (n >> 3); i--;)
|
||||||
{
|
{
|
||||||
|
@ -262,6 +262,33 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (inc_x == inc_y)
|
||||||
|
{
|
||||||
|
if (n & 1)
|
||||||
|
{
|
||||||
|
x0 = *srcx;
|
||||||
|
*srcx = *srcy;
|
||||||
|
*srcy = x0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
BLASLONG ix = 0, iy = 0;
|
||||||
|
while (i < n)
|
||||||
|
{
|
||||||
|
x0 = srcx[ix];
|
||||||
|
srcx[ix] = srcy[iy];
|
||||||
|
srcy[iy] = x0;
|
||||||
|
ix += inc_x;
|
||||||
|
iy += inc_y;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#if !defined(XCONJ)
|
#if !defined(XCONJ)
|
||||||
#define OP0 +=
|
#define OP0 +=
|
||||||
#define OP1 -=
|
#define OP1 -=
|
||||||
#define OP2 -=
|
#define OP2 +=
|
||||||
#else
|
#else
|
||||||
#define OP0 -=
|
#define OP0 -=
|
||||||
#define OP1 -=
|
#define OP1 -=
|
||||||
#define OP2 +=
|
#define OP2 -=
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -34,14 +34,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#undef OP3
|
#undef OP3
|
||||||
#undef OP4
|
#undef OP4
|
||||||
|
|
||||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
#if !defined(CONJ)
|
||||||
#define OP0 -=
|
#if !defined(XCONJ)
|
||||||
#define OP1 +=
|
#define OP0 -=
|
||||||
#define OP2 +=
|
#define OP1 +=
|
||||||
|
#define OP2 +=
|
||||||
|
#else
|
||||||
|
#define OP0 +=
|
||||||
|
#define OP1 +=
|
||||||
|
#define OP2 -=
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
#define OP0 +=
|
#if !defined(XCONJ)
|
||||||
#define OP1 +=
|
#define OP0 +=
|
||||||
#define OP2 -=
|
#define OP1 -=
|
||||||
|
#define OP2 +=
|
||||||
|
#else
|
||||||
|
#define OP0 -=
|
||||||
|
#define OP1 -=
|
||||||
|
#define OP2 -=
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ZGEMV_T_8x1() \
|
#define ZGEMV_T_8x1() \
|
||||||
|
|
|
@ -54,3 +54,13 @@ ZTRSMKERNEL_LN = ztrsm_kernel_LN.S
|
||||||
ZTRSMKERNEL_LT = ztrsm_kernel_LT.S
|
ZTRSMKERNEL_LT = ztrsm_kernel_LT.S
|
||||||
ZTRSMKERNEL_RN = ztrsm_kernel_LT.S
|
ZTRSMKERNEL_RN = ztrsm_kernel_LT.S
|
||||||
ZTRSMKERNEL_RT = ztrsm_kernel_RT.S
|
ZTRSMKERNEL_RT = ztrsm_kernel_RT.S
|
||||||
|
|
||||||
|
|
||||||
|
SDOTKERNEL = ../generic/dot.c
|
||||||
|
SDSDOTKERNEL = ../generic/dot.c
|
||||||
|
DSDOTKERNEL = ../generic/dot.c
|
||||||
|
DDOTKERNEL = ../generic/dot.c
|
||||||
|
CDOTKERNEL = ../arm/zdot.c
|
||||||
|
ZDOTKERNEL = ../arm/zdot.c
|
||||||
|
CSWAPKERNEL = ../arm/zswap.c
|
||||||
|
ZSWAPKERNEL = ../arm/zswap.c
|
||||||
|
|
18
param.h
18
param.h
|
@ -1454,22 +1454,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_P 768
|
#define SGEMM_DEFAULT_P 768
|
||||||
#define SGEMM_DEFAULT_R sgemm_r
|
#define SGEMM_DEFAULT_R sgemm_r
|
||||||
//#define SGEMM_DEFAULT_R 1024
|
/*#define SGEMM_DEFAULT_R 1024*/
|
||||||
|
|
||||||
#define DGEMM_DEFAULT_P 512
|
#define DGEMM_DEFAULT_P 512
|
||||||
#define DGEMM_DEFAULT_R dgemm_r
|
#define DGEMM_DEFAULT_R dgemm_r
|
||||||
//#define DGEMM_DEFAULT_R 1024
|
/*#define DGEMM_DEFAULT_R 1024*/
|
||||||
|
|
||||||
#define QGEMM_DEFAULT_P 504
|
#define QGEMM_DEFAULT_P 504
|
||||||
#define QGEMM_DEFAULT_R qgemm_r
|
#define QGEMM_DEFAULT_R qgemm_r
|
||||||
|
|
||||||
#define CGEMM_DEFAULT_P 768
|
#define CGEMM_DEFAULT_P 768
|
||||||
#define CGEMM_DEFAULT_R cgemm_r
|
#define CGEMM_DEFAULT_R cgemm_r
|
||||||
//#define CGEMM_DEFAULT_R 1024
|
/*#define CGEMM_DEFAULT_R 1024*/
|
||||||
|
|
||||||
#define ZGEMM_DEFAULT_P 512
|
#define ZGEMM_DEFAULT_P 512
|
||||||
#define ZGEMM_DEFAULT_R zgemm_r
|
#define ZGEMM_DEFAULT_R zgemm_r
|
||||||
//#define ZGEMM_DEFAULT_R 1024
|
/*#define ZGEMM_DEFAULT_R 1024*/
|
||||||
|
|
||||||
#define XGEMM_DEFAULT_P 252
|
#define XGEMM_DEFAULT_P 252
|
||||||
#define XGEMM_DEFAULT_R xgemm_r
|
#define XGEMM_DEFAULT_R xgemm_r
|
||||||
|
@ -2571,7 +2571,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef LOONGSON3A
|
#ifdef LOONGSON3A
|
||||||
////Copy from SICORTEX
|
/*Copy from SICORTEX*/
|
||||||
#define SNUMOPT 2
|
#define SNUMOPT 2
|
||||||
#define DNUMOPT 2
|
#define DNUMOPT 2
|
||||||
|
|
||||||
|
@ -2863,7 +2863,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SYMV_P 16
|
#define SYMV_P 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Common ARMv8 parameters
|
/* Common ARMv8 parameters */
|
||||||
#if defined(ARMV8)
|
#if defined(ARMV8)
|
||||||
|
|
||||||
#define SNUMOPT 2
|
#define SNUMOPT 2
|
||||||
|
@ -3066,7 +3066,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
||||||
#define CGEMM_DEFAULT_R 4096
|
#define CGEMM_DEFAULT_R 4096
|
||||||
#define ZGEMM_DEFAULT_R 4096
|
#define ZGEMM_DEFAULT_R 4096
|
||||||
|
|
||||||
#else // Other/undetected ARMv8 cores
|
#else /* Other/undetected ARMv8 cores */
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
@ -3095,9 +3095,9 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
||||||
#define CGEMM_DEFAULT_R 4096
|
#define CGEMM_DEFAULT_R 4096
|
||||||
#define ZGEMM_DEFAULT_R 4096
|
#define ZGEMM_DEFAULT_R 4096
|
||||||
|
|
||||||
#endif // Cores
|
#endif /* Cores */
|
||||||
|
|
||||||
#endif // ARMv8
|
#endif /* ARMv8 */
|
||||||
|
|
||||||
#if defined(ARMV5)
|
#if defined(ARMV5)
|
||||||
#define SNUMOPT 2
|
#define SNUMOPT 2
|
||||||
|
|
|
@ -35,6 +35,9 @@ endif
|
||||||
ifeq ($(C_COMPILER), PGI)
|
ifeq ($(C_COMPILER), PGI)
|
||||||
OBJS = utest_main2.o
|
OBJS = utest_main2.o
|
||||||
endif
|
endif
|
||||||
|
ifeq ($(C_COMPILER), SUN)
|
||||||
|
OBJS = utest_main2.o
|
||||||
|
endif
|
||||||
ifeq ($(OSNAME), AIX)
|
ifeq ($(OSNAME), AIX)
|
||||||
OBJS = utest_main2.o
|
OBJS = utest_main2.o
|
||||||
endif
|
endif
|
||||||
|
|
Loading…
Reference in New Issue