Merge pull request #29 from xianyi/develop

rebase
This commit is contained in:
Martin Kroeker 2020-02-16 17:28:10 +01:00 committed by GitHub
commit 01834aee33
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 66 additions and 20 deletions

View File

@ -92,7 +92,7 @@ steps:
- mkdir build && cd build - mkdir build && cd build
- cmake $CMAKE_FLAGS .. - cmake $CMAKE_FLAGS ..
- make -j - make -j
- ctest - ctest -V
--- ---
kind: pipeline kind: pipeline
@ -116,7 +116,7 @@ steps:
- mkdir build && cd build - mkdir build && cd build
- cmake $CMAKE_FLAGS .. - cmake $CMAKE_FLAGS ..
- make -j - make -j
- ctest - ctest -V
--- ---
kind: pipeline kind: pipeline
@ -140,4 +140,4 @@ steps:
- mkdir build && cd build - mkdir build && cd build
- cmake $CMAKE_FLAGS .. - cmake $CMAKE_FLAGS ..
- make -j - make -j
- ctest - ctest -V

3
.gitignore vendored
View File

@ -87,4 +87,5 @@ build.*
*.swp *.swp
benchmark/*.goto benchmark/*.goto
benchmark/smallscaling benchmark/smallscaling
CMakeCache.txt
CMakeFiles/*

View File

@ -89,30 +89,52 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
#SMINKERNEL = ../arm/min.c #SMINKERNEL = ../arm/min.c
#DMINKERNEL = ../arm/min.c #DMINKERNEL = ../arm/min.c
# #
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
ifneq ($(GCCVERSIONGTEQ9),1)
ISAMAXKERNEL = isamax_power8.S ISAMAXKERNEL = isamax_power8.S
else else
ISAMAXKERNEL = isamax.c ISAMAXKERNEL = isamax.c
endif endif
else
ISAMAXKERNEL = isamax.c
endif
#
IDAMAXKERNEL = idamax.c IDAMAXKERNEL = idamax.c
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) #
ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
ifneq ($(GCCVERSIONGTEQ9),1)
ICAMAXKERNEL = icamax_power8.S ICAMAXKERNEL = icamax_power8.S
else else
ICAMAXKERNEL = icamax.c ICAMAXKERNEL = icamax.c
endif endif
else
ICAMAXKERNEL = icamax.c
endif
#
IZAMAXKERNEL = izamax.c IZAMAXKERNEL = izamax.c
# #
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
ifneq ($(GCCVERSIONGTEQ9),1)
ISAMINKERNEL = isamin_power8.S ISAMINKERNEL = isamin_power8.S
else else
ISAMINKERNEL = isamin.c ISAMINKERNEL = isamin.c
endif endif
else
ISAMINKERNEL = isamin.c
endif
#
IDAMINKERNEL = idamin.c IDAMINKERNEL = idamin.c
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) #
ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
ifneq ($(GCCVERSIONGTEQ9),1)
ICAMINKERNEL = icamin_power8.S ICAMINKERNEL = icamin_power8.S
else else
ICAMINKERNEL = icamin.c ICAMINKERNEL = icamin.c
endif endif
else
ICAMINKERNEL = icamin.c
endif
#
IZAMINKERNEL = izamin.c IZAMINKERNEL = izamin.c
# #
#ISMAXKERNEL = ../arm/imax.c #ISMAXKERNEL = ../arm/imax.c
@ -128,11 +150,16 @@ ZASUMKERNEL = zasum.c
# #
SAXPYKERNEL = saxpy.c SAXPYKERNEL = saxpy.c
DAXPYKERNEL = daxpy.c DAXPYKERNEL = daxpy.c
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
ifneq ($(GCCVERSIONGTEQ9),1)
CAXPYKERNEL = caxpy_power8.S CAXPYKERNEL = caxpy_power8.S
else else
CAXPYKERNEL = caxpy.c CAXPYKERNEL = caxpy.c
endif endif
else
CAXPYKERNEL = caxpy.c
endif
#
ZAXPYKERNEL = zaxpy.c ZAXPYKERNEL = zaxpy.c
# #
SCOPYKERNEL = scopy.c SCOPYKERNEL = scopy.c

View File

@ -15,7 +15,7 @@ ZASUMKERNEL = zasum_ppc440.S
SAXPYKERNEL = axpy_ppc440.S SAXPYKERNEL = axpy_ppc440.S
DAXPYKERNEL = axpy_ppc440.S DAXPYKERNEL = axpy_ppc440.S
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
CAXPYKERNEL = ../arm/zaxpy.c CAXPYKERNEL = ../arm/zaxpy.c
ZAXPYKERNEL = ../arm/zaxpy.c ZAXPYKERNEL = ../arm/zaxpy.c
else else
@ -25,7 +25,7 @@ endif
SDOTKERNEL = dot_ppc440.S SDOTKERNEL = dot_ppc440.S
DDOTKERNEL = dot_ppc440.S DDOTKERNEL = dot_ppc440.S
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
CDOTKERNEL = zdot_ppc440.S CDOTKERNEL = zdot_ppc440.S
ZDOTKERNEL = zdot_ppc440.S ZDOTKERNEL = zdot_ppc440.S
else else
@ -62,7 +62,7 @@ ZNRM2KERNEL = znrm2_ppc440.S
SROTKERNEL = rot_ppc440.S SROTKERNEL = rot_ppc440.S
DROTKERNEL = rot_ppc440.S DROTKERNEL = rot_ppc440.S
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
CROTKERNEL = zrot_ppc440.S CROTKERNEL = zrot_ppc440.S
ZROTKERNEL = zrot_ppc440.S ZROTKERNEL = zrot_ppc440.S
else else
@ -132,7 +132,7 @@ ZTRSMKERNEL_LT = ztrsm_kernel_ppc440_LT.S
ZTRSMKERNEL_RN = ztrsm_kernel_ppc440_LT.S ZTRSMKERNEL_RN = ztrsm_kernel_ppc440_LT.S
ZTRSMKERNEL_RT = ztrsm_kernel_ppc440_RT.S ZTRSMKERNEL_RT = ztrsm_kernel_ppc440_RT.S
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) ifeq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
SGEMVNKERNEL = ../arm/gemv_n.c SGEMVNKERNEL = ../arm/gemv_n.c
DGEMVNKERNEL = ../arm/gemv_n.c DGEMVNKERNEL = ../arm/gemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c SGEMVTKERNEL = ../arm/gemv_t.c

View File

@ -1,4 +1,4 @@
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) ifeq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
SGEMMKERNEL = gemm_kernel.S SGEMMKERNEL = gemm_kernel.S
SGEMMINCOPY = SGEMMINCOPY =
SGEMMITCOPY = SGEMMITCOPY =
@ -30,7 +30,7 @@ DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) ifeq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
CGEMMKERNEL = zgemm_kernel.S CGEMMKERNEL = zgemm_kernel.S
CGEMMINCOPY = CGEMMINCOPY =
CGEMMITCOPY = CGEMMITCOPY =
@ -72,7 +72,7 @@ ZTRSMKERNEL_LT = ztrsm_kernel_LT.S
ZTRSMKERNEL_RN = ztrsm_kernel_LT.S ZTRSMKERNEL_RN = ztrsm_kernel_LT.S
ZTRSMKERNEL_RT = ztrsm_kernel_RT.S ZTRSMKERNEL_RT = ztrsm_kernel_RT.S
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) ifeq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
STRSMKERNEL_LN = trsm_kernel_LN.S STRSMKERNEL_LN = trsm_kernel_LN.S
STRSMKERNEL_LT = trsm_kernel_LT.S STRSMKERNEL_LT = trsm_kernel_LT.S
STRSMKERNEL_RN = trsm_kernel_LT.S STRSMKERNEL_RN = trsm_kernel_LT.S

View File

@ -12,6 +12,13 @@
PROLOGUE PROLOGUE
#if _CALL_ELF ==2
#ifdef CONJ
caxpyc_k:
#else
caxpy_k:
#endif
#endif
.LCF0: .LCF0:
0: addis 2,12,.TOC.-.LCF0@ha 0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l addi 2,2,.TOC.-.LCF0@l

View File

@ -10,7 +10,9 @@
#include "common.h" #include "common.h"
PROLOGUE PROLOGUE
#if _CALL_ELF ==2
icamin_k:
#endif
.LCF0: .LCF0:
0: addis 2,12,.TOC.-.LCF0@ha 0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l addi 2,2,.TOC.-.LCF0@l

View File

@ -11,7 +11,10 @@
#include "common.h" #include "common.h"
PROLOGUE PROLOGUE
#if _CALL_ELF == 2
isamax_k:
#endif
.LCF0: .LCF0:
0: addis 2,12,.TOC.-.LCF0@ha 0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l addi 2,2,.TOC.-.LCF0@l

View File

@ -11,6 +11,9 @@
PROLOGUE PROLOGUE
#if _CALL_ELF ==2
isamin_k:
#endif
.LCF0: .LCF0:
0: addis 2,12,.TOC.-.LCF0@ha 0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l addi 2,2,.TOC.-.LCF0@l

View File

@ -4,7 +4,6 @@ include_directories(${PROJECT_BINARY_DIR})
set(LAPACK_SOURCES set(LAPACK_SOURCES
getrf/getrf_single.c
potrf/potrf_U_single.c potrf/potrf_U_single.c
potrf/potrf_L_single.c potrf/potrf_L_single.c
lauum/lauum_U_single.c lauum/lauum_U_single.c
@ -45,6 +44,10 @@ GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" false "" "" false 3)
GenerateNamedObjects("laswp/generic/laswp_k_4.c" "" "laswp_plus" false "" "" false 3) GenerateNamedObjects("laswp/generic/laswp_k_4.c" "" "laswp_plus" false "" "" false 3)
GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3) GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3)
foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("getrf/getrf_single.c" "UNIT" "getrf_single" false "" "" false ${float_type})
endforeach ()
# dynamic_arch laswp needs arch specific code ? # dynamic_arch laswp needs arch specific code ?
#foreach(TARGET_CORE ${DYNAMIC_CORE}) #foreach(TARGET_CORE ${DYNAMIC_CORE})
# set(TSUFFIX "_${TARGET_CORE}") # set(TSUFFIX "_${TARGET_CORE}")
@ -81,7 +84,7 @@ if (USE_THREAD)
) )
foreach (float_type ${FLOAT_TYPES}) foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("${GETRF_SRC}" "" "getrf_parallel" false "" "" false ${float_type}) GenerateNamedObjects("${GETRF_SRC}" "UNIT" "getrf_parallel" false "" "" false ${float_type})
endforeach() endforeach()
GenerateNamedObjects("${PARALLEL_SOURCES}") GenerateNamedObjects("${PARALLEL_SOURCES}")