Merge pull request #105 from xianyi/develop

rebase
This commit is contained in:
Martin Kroeker 2020-10-20 23:48:53 +02:00 committed by GitHub
commit 336e35469a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 74 additions and 50 deletions

View File

@ -641,6 +641,7 @@ DYNAMIC_CORE += POWER8
ifneq ($(C_COMPILER), GCC)
DYNAMIC_CORE += POWER9
DYNAMIC_CORE += POWER10
CCOMMON_OPT += -DHAVE_P10_SUPPORT
endif
ifeq ($(C_COMPILER), GCC)
ifeq ($(GCCVERSIONGT5), 1)
@ -648,11 +649,14 @@ DYNAMIC_CORE += POWER9
else
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
endif
ifeq ($(GCCVERSIONGTEQ11), 1)
LDVERSIONGTEQ35 := $(shell expr `ld --version | head -1 | cut -f2 -d "." | cut -f1 -d "-"` >= 35)
ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11)
DYNAMIC_CORE += POWER10
CCOMMON_OPT += -DHAVE_P10_SUPPORT
else ifeq ($(GCCVERSIONGTEQ10), 1)
ifeq ($(GCCMINORVERSIONGTEQ2), 1)
ifeq ($(GCCMINORVERSIONGTEQ2)$(LDVERSIONGTEQ35), 11)
DYNAMIC_CORE += POWER10
CCOMMON_OPT += -DHAVE_P10_SUPPORT
endif
else
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)

View File

@ -49,6 +49,7 @@ if (DYNAMIC_ARCH)
if (POWER)
set(DYNAMIC_CORE POWER6 POWER8 POWER9 POWER10)
set(CCOMMON_OPT "${CCOMMON_OPT} -DHAVE_P10_SUPPORT")
endif ()
if (X86)

View File

@ -6,10 +6,10 @@ extern gotoblas_t gotoblas_POWER8;
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
extern gotoblas_t gotoblas_POWER9;
#endif
#if (!defined __GNUC__) || ( __GNUC__ >= 11) \
|| (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
#define HAVE_P10_SUPPORT 1
#endif
//#if (!defined __GNUC__) || ( __GNUC__ >= 11) \
// || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
//#define HAVE_P10_SUPPORT 1
//#endif
#ifdef HAVE_P10_SUPPORT
extern gotoblas_t gotoblas_POWER10;
#endif

View File

@ -122,7 +122,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (alpha), // 4
"r" (mvec) // 5
: "cc",
"%xmm0", "%xmm1",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
@ -189,9 +189,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (alpha), // 4
"r" (mvec) // 5
: "cc",
"%xmm0", "%xmm1",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -120,7 +120,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (alpha), // 4
"r" (mvec) // 5
: "cc",
"%xmm0", "%xmm1",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",

View File

@ -104,7 +104,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (alpha), // 4
"r" (mvec) // 5
: "cc",
"%xmm0", "%xmm1",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",

View File

@ -122,7 +122,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (alpha), // 4
"r" (mvec) // 5
: "cc",
"%xmm0", "%xmm1",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
@ -189,9 +189,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (alpha), // 4
"r" (mvec) // 5
: "cc",
"%xmm0", "%xmm1",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -67,8 +67,9 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (y), // 3
"r" (alpha) // 4
: "cc",
"%xmm0",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -84,8 +84,9 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"r" (y), // 3
"r" (dot) // 4
: "cc",
"%xmm4", "%xmm5",
"%xmm6", "%xmm7",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -91,6 +91,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
: "cc",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);
@ -155,6 +156,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
: "cc",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -89,8 +89,9 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"r" (y), // 3
"r" (dot) // 4
: "cc",
"%xmm4", "%xmm5",
"%xmm6", "%xmm7",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -88,6 +88,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
: "cc",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -105,9 +105,8 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"r" (alpha) // 8
: "cc",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5",
"%xmm6", "%xmm7",
"%xmm8", "%xmm9",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);
@ -182,11 +181,10 @@ static void dgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"r" (ap[1]), // 5
"r" (alpha) // 6
: "cc",
"%xmm0", "%xmm1",
"%xmm4", "%xmm5",
"%xmm6",
"%xmm8",
"%xmm12", "%xmm13",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);
}

View File

@ -140,7 +140,7 @@ static void dgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
"%xmm2", "%xmm3",
"%xmm4", "%xmm5",
"%xmm6", "%xmm7",
"%xmm8", "%xmm9",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);
@ -235,9 +235,11 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"r" (ap[3]), // 7
"r" (alpha) // 8
: "cc",
"%xmm0", "%xmm1",
"%xmm2", "%xmm3",
"%xmm4", "%xmm5",
"%xmm6", "%xmm7",
"%xmm8", "%xmm9",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -117,7 +117,9 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"r" (ap[2]), // 6
"r" (ap[3]) // 7
: "cc",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -67,7 +67,8 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (y), // 3
"r" (alpha) // 4
: "cc",
"%xmm0",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"

View File

@ -86,7 +86,8 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (y), // 3
"r" (alpha) // 4
: "cc",
"%xmm0",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
@ -147,7 +148,8 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (y), // 3
"r" (alpha) // 4
: "cc",
"%xmm0",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"

View File

@ -87,8 +87,9 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"r" (y), // 3
"r" (dot) // 4
: "cc",
"%xmm4", "%xmm5",
"%xmm6", "%xmm7",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -90,8 +90,9 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"r" (y), // 3
"r" (dot) // 4
: "cc",
"%xmm4", "%xmm5",
"%xmm6", "%xmm7",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -164,11 +164,9 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
"r" (ap[3]), // 8
"r" (alpha) // 9
: "cc",
"%xmm0", "%xmm1",
"%xmm2", "%xmm3",
"%xmm4", "%xmm5",
"%xmm6", "%xmm7",
"%xmm8", "%xmm9",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);
@ -286,9 +284,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"r" (ap[3]), // 7
"r" (alpha) // 8
: "cc",
"%xmm4", "%xmm5",
"%xmm6", "%xmm7",
"%xmm8", "%xmm9",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -138,7 +138,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"r" (ap[2]), // 6
"r" (ap[3]) // 7
: "cc",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -122,7 +122,7 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (alpha), // 4
"r" (mvec) // 5
: "cc",
"%xmm0", "%xmm1",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
@ -189,9 +189,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (alpha), // 4
"r" (mvec) // 5
: "cc",
"%xmm0", "%xmm1",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -120,7 +120,7 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (alpha), // 4
"r" (mvec) // 5
: "cc",
"%xmm0", "%xmm1",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",

View File

@ -108,9 +108,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (alpha), // 4
"r" (mvec) // 5
: "cc",
"%xmm0", "%xmm1",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);
return;
@ -185,9 +186,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (alpha), // 4
"r" (mvec) // 5
: "cc",
"%xmm0", "%xmm1",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);

View File

@ -122,7 +122,7 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (alpha), // 4
"r" (mvec) // 5
: "cc",
"%xmm0", "%xmm1",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
@ -189,9 +189,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"r" (alpha), // 4
"r" (mvec) // 5
: "cc",
"%xmm0", "%xmm1",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);