Merge pull request #2309 from martin-frbg/ppc970-be

Fix PPC970 big-endian support
This commit is contained in:
Martin Kroeker 2019-11-17 18:22:24 +01:00 committed by GitHub
commit 3e67017ac8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 53 additions and 10 deletions

View File

@ -1,3 +1,14 @@
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
SGEMMKERNEL = gemm_kernel.S
SGEMMINCOPY =
SGEMMITCOPY =
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMINCOPYOBJ =
SGEMMITCOPYOBJ =
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
else
SGEMMKERNEL = gemm_kernel_altivec.S SGEMMKERNEL = gemm_kernel_altivec.S
SGEMMINCOPY = ../generic/gemm_ncopy_16.c SGEMMINCOPY = ../generic/gemm_ncopy_16.c
SGEMMITCOPY = ../generic/gemm_tcopy_16.c SGEMMITCOPY = ../generic/gemm_tcopy_16.c
@ -7,6 +18,8 @@ SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
endif
DGEMMKERNEL = gemm_kernel.S DGEMMKERNEL = gemm_kernel.S
DGEMMINCOPY = DGEMMINCOPY =
DGEMMITCOPY = DGEMMITCOPY =
@ -16,6 +29,18 @@ DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ = DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
CGEMMKERNEL = zgemm_kernel.S
CGEMMINCOPY =
CGEMMITCOPY =
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMINCOPYOBJ =
CGEMMITCOPYOBJ =
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
else
CGEMMKERNEL = zgemm_kernel_altivec.S CGEMMKERNEL = zgemm_kernel_altivec.S
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
@ -25,6 +50,8 @@ CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
endif
ZGEMMKERNEL = zgemm_kernel.S ZGEMMKERNEL = zgemm_kernel.S
ZGEMMINCOPY = ZGEMMINCOPY =
ZGEMMITCOPY = ZGEMMITCOPY =
@ -35,22 +62,30 @@ ZGEMMITCOPYOBJ =
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
#STRSMKERNEL_LN = trsm_kernel_LN.S
#STRSMKERNEL_LT = trsm_kernel_LT.S
#STRSMKERNEL_RN = trsm_kernel_LT.S
#STRSMKERNEL_RT = trsm_kernel_RT.S
DTRSMKERNEL_LN = trsm_kernel_LN.S DTRSMKERNEL_LN = trsm_kernel_LN.S
DTRSMKERNEL_LT = trsm_kernel_LT.S DTRSMKERNEL_LT = trsm_kernel_LT.S
DTRSMKERNEL_RN = trsm_kernel_LT.S DTRSMKERNEL_RN = trsm_kernel_LT.S
DTRSMKERNEL_RT = trsm_kernel_RT.S DTRSMKERNEL_RT = trsm_kernel_RT.S
#CTRSMKERNEL_LN = ztrsm_kernel_LN.S
#CTRSMKERNEL_LT = ztrsm_kernel_LT.S
#CTRSMKERNEL_RN = ztrsm_kernel_LT.S
#CTRSMKERNEL_RT = ztrsm_kernel_RT.S
ZTRSMKERNEL_LN = ztrsm_kernel_LN.S ZTRSMKERNEL_LN = ztrsm_kernel_LN.S
ZTRSMKERNEL_LT = ztrsm_kernel_LT.S ZTRSMKERNEL_LT = ztrsm_kernel_LT.S
ZTRSMKERNEL_RN = ztrsm_kernel_LT.S ZTRSMKERNEL_RN = ztrsm_kernel_LT.S
ZTRSMKERNEL_RT = ztrsm_kernel_RT.S ZTRSMKERNEL_RT = ztrsm_kernel_RT.S
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
STRSMKERNEL_LN = trsm_kernel_LN.S
STRSMKERNEL_LT = trsm_kernel_LT.S
STRSMKERNEL_RN = trsm_kernel_LT.S
STRSMKERNEL_RT = trsm_kernel_RT.S
CTRSMKERNEL_LN = ztrsm_kernel_LN.S
CTRSMKERNEL_LT = ztrsm_kernel_LT.S
CTRSMKERNEL_RN = ztrsm_kernel_LT.S
CTRSMKERNEL_RT = ztrsm_kernel_RT.S
SROTKERNEL = ../arm/rot.c
DROTKERNEL = ../arm/rot.c
CROTKERNEL = ../arm/zrot.c
ZROTKERNEL = ../arm/zrot.c
endif

View File

@ -1990,11 +1990,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_B 3072 #define GEMM_DEFAULT_OFFSET_B 3072
#define GEMM_DEFAULT_ALIGN 0x03fffUL #define GEMM_DEFAULT_ALIGN 0x03fffUL
#if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define SGEMM_DEFAULT_UNROLL_M 4
#else
#define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_M 16
#endif
#define SGEMM_DEFAULT_UNROLL_N 4 #define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 4 #define DGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_N 4 #define DGEMM_DEFAULT_UNROLL_N 4
#if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define CGEMM_DEFAULT_UNROLL_M 2
#else
#define CGEMM_DEFAULT_UNROLL_M 8 #define CGEMM_DEFAULT_UNROLL_M 8
#endif
#define CGEMM_DEFAULT_UNROLL_N 2 #define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_M 2 #define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 2 #define ZGEMM_DEFAULT_UNROLL_N 2