Merge pull request #20 from xianyi/develop

Rebase
This commit is contained in:
Martin Kroeker 2019-12-03 08:22:40 +01:00 committed by GitHub
commit 10705183ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 241 additions and 190 deletions

View File

@ -5,6 +5,11 @@ endif
TOPDIR = .. TOPDIR = ..
include $(TOPDIR)/Makefile.system include $(TOPDIR)/Makefile.system
ifeq ($(C_COMPILER), GCC)
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
endif
AVX2OPT = AVX2OPT =
ifeq ($(C_COMPILER), GCC) ifeq ($(C_COMPILER), GCC)
# AVX2 support was added in 4.7.0 # AVX2 support was added in 4.7.0

View File

@ -89,14 +89,30 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
#SMINKERNEL = ../arm/min.c #SMINKERNEL = ../arm/min.c
#DMINKERNEL = ../arm/min.c #DMINKERNEL = ../arm/min.c
# #
ifneq ($(GCCVERSIONGTEQ9),1)
ISAMAXKERNEL = isamax_power9.S ISAMAXKERNEL = isamax_power9.S
else
ISAMAXKERNEL = isamax.c
endif
IDAMAXKERNEL = idamax.c IDAMAXKERNEL = idamax.c
ifneq ($(GCCVERSIONGTEQ9),1)
ICAMAXKERNEL = icamax_power9.S ICAMAXKERNEL = icamax_power9.S
else
ICAMAXKERNEL = icamax.c
endif
IZAMAXKERNEL = izamax.c IZAMAXKERNEL = izamax.c
# #
ifneq ($(GCCVERSIONGTEQ9),1)
ISAMINKERNEL = isamin_power9.S ISAMINKERNEL = isamin_power9.S
else
ISAMINKERNEL = isamin.c
endif
IDAMINKERNEL = idamin.c IDAMINKERNEL = idamin.c
ifneq ($(GCCVERSIONGTEQ9),1)
ICAMINKERNEL = icamin_power9.S ICAMINKERNEL = icamin_power9.S
else
ICAMINKERNEL = icamin.c
endif
IZAMINKERNEL = izamin.c IZAMINKERNEL = izamin.c
# #
#ISMAXKERNEL = ../arm/imax.c #ISMAXKERNEL = ../arm/imax.c
@ -112,7 +128,11 @@ ZASUMKERNEL = zasum.c
# #
SAXPYKERNEL = saxpy.c SAXPYKERNEL = saxpy.c
DAXPYKERNEL = daxpy.c DAXPYKERNEL = daxpy.c
ifneq ($(GCCVERSIONGTEQ9),1)
CAXPYKERNEL = caxpy_power9.S CAXPYKERNEL = caxpy_power9.S
else
CAXPYKERNEL = caxpy.c
endif
ZAXPYKERNEL = zaxpy.c ZAXPYKERNEL = zaxpy.c
# #
SCOPYKERNEL = scopy.c SCOPYKERNEL = scopy.c
@ -123,7 +143,11 @@ ZCOPYKERNEL = zcopy.c
SDOTKERNEL = sdot.c SDOTKERNEL = sdot.c
DDOTKERNEL = ddot.c DDOTKERNEL = ddot.c
DSDOTKERNEL = sdot.c DSDOTKERNEL = sdot.c
ifneq ($(GCCVERSIONGTEQ9),1)
CDOTKERNEL = cdot_power9.S CDOTKERNEL = cdot_power9.S
else
CDOTKERNEL = cdot.c
endif
ZDOTKERNEL = zdot.c ZDOTKERNEL = zdot.c
# #
SNRM2KERNEL = ../arm/nrm2.c SNRM2KERNEL = ../arm/nrm2.c

View File

@ -16,7 +16,11 @@
0: addis 2,12,.TOC.-.LCF0@ha 0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l addi 2,2,.TOC.-.LCF0@l
#if _CALL_ELF ==2 #if _CALL_ELF ==2
#ifdef CONJ
.localentry caxpyc_k,.-caxpyc_k
#else
.localentry caxpy_k,.-caxpy_k .localentry caxpy_k,.-caxpy_k
#endif
#endif #endif
mr. 7,3 mr. 7,3
ble 0,.L33 ble 0,.L33
@ -517,7 +521,11 @@
.long 0 .long 0
.byte 0,0,0,0,0,4,0,0 .byte 0,0,0,0,0,4,0,0
#if _CALL_ELF ==2 #if _CALL_ELF ==2
#ifdef CONJ
.size caxpyc_k,.-caxpyc_k
#else
.size caxpy_k,.-caxpy_k .size caxpy_k,.-caxpy_k
#endif
#endif #endif
.section .rodata .section .rodata
.align 4 .align 4

View File

@ -17,7 +17,11 @@ caxpy_k:
.LCF0: .LCF0:
0: addis 2,12,.TOC.-.LCF0@ha 0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l addi 2,2,.TOC.-.LCF0@l
#ifdef CONJ
.localentry caxpyc_k,.-caxpyc_k
#else
.localentry caxpy_k,.-caxpy_k .localentry caxpy_k,.-caxpy_k
#endif
mr. 7,3 mr. 7,3
ble 0,.L33 ble 0,.L33
cmpdi 7,9,1 cmpdi 7,9,1
@ -474,7 +478,11 @@ caxpy_k:
b .L13 b .L13
.long 0 .long 0
.byte 0,0,0,0,0,1,0,0 .byte 0,0,0,0,0,1,0,0
#ifdef CONJ
.size caxpyc_k,.-caxpyc_k
#else
.size caxpy_k,.-caxpy_k .size caxpy_k,.-caxpy_k
#endif
.section .rodata .section .rodata
.align 4 .align 4
.set .LANCHOR0,. + 0 .set .LANCHOR0,. + 0

View File

@ -13,10 +13,7 @@
cdot_k: cdot_k:
.LCF0: .LCF0:
0: addis 2,12,.TOC.-.LCF0@ha 0: mr. 9,3
addi 2,2,.TOC.-.LCF0@l
.localentry cdot_k,.-cdot_k
mr. 9,3
ble 0,.L10 ble 0,.L10
cmpdi 7,5,1 cmpdi 7,5,1
beq 7,.L18 beq 7,.L18

View File

@ -33,6 +33,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define HAVE_KERNEL_4x4 1 #define HAVE_KERNEL_4x4 1
#if defined(__clang_patchlevel__) && __clang_major__ == 9 && __clang_minor__ == 0 && __clang_patchlevel__ == 0
#pragma clang optimize off
#endif
static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FLOAT *y, FLOAT *temp1, FLOAT *temp2) static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FLOAT *y, FLOAT *temp1, FLOAT *temp2)
{ {
@ -156,6 +160,11 @@ static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
temp2[2] += half_accum2[0]; temp2[2] += half_accum2[0];
temp2[3] += half_accum3[0]; temp2[3] += half_accum3[0];
} }
#if defined(__clang_patchlevel__) && __clang_major__ == 9 && __clang_minor__ == 0 && __clang_patchlevel__ == 0
#pragma clang optimize on
#endif
#else #else
#include "dsymv_L_microk_haswell-2.c" #include "dsymv_L_microk_haswell-2.c"
#endif #endif