Merge pull request #2328 from martin-frbg/ppc9

Fix precompiled kernels on POWER9 and make their use conditional on (old) gcc version
This commit is contained in:
Martin Kroeker 2019-11-30 12:23:57 +01:00 committed by GitHub
commit dd04143d4a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 229 additions and 184 deletions

View File

@ -5,6 +5,11 @@ endif
TOPDIR = .. TOPDIR = ..
include $(TOPDIR)/Makefile.system include $(TOPDIR)/Makefile.system
ifeq ($(C_COMPILER), GCC)
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
endif
AVX2OPT = AVX2OPT =
ifeq ($(C_COMPILER), GCC) ifeq ($(C_COMPILER), GCC)
# AVX2 support was added in 4.7.0 # AVX2 support was added in 4.7.0

View File

@ -89,14 +89,30 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
#SMINKERNEL = ../arm/min.c #SMINKERNEL = ../arm/min.c
#DMINKERNEL = ../arm/min.c #DMINKERNEL = ../arm/min.c
# #
ifneq ($(GCCVERSIONGTEQ9),1)
ISAMAXKERNEL = isamax_power9.S ISAMAXKERNEL = isamax_power9.S
else
ISAMAXKERNEL = isamax.c
endif
IDAMAXKERNEL = idamax.c IDAMAXKERNEL = idamax.c
ifneq ($(GCCVERSIONGTEQ9),1)
ICAMAXKERNEL = icamax_power9.S ICAMAXKERNEL = icamax_power9.S
else
ICAMAXKERNEL = icamax.c
endif
IZAMAXKERNEL = izamax.c IZAMAXKERNEL = izamax.c
# #
ifneq ($(GCCVERSIONGTEQ9),1)
ISAMINKERNEL = isamin_power9.S ISAMINKERNEL = isamin_power9.S
else
ISAMINKERNEL = isamin.c
endif
IDAMINKERNEL = idamin.c IDAMINKERNEL = idamin.c
ifneq ($(GCCVERSIONGTEQ9),1)
ICAMINKERNEL = icamin_power9.S ICAMINKERNEL = icamin_power9.S
else
ICAMINKERNEL = icamin.c
endif
IZAMINKERNEL = izamin.c IZAMINKERNEL = izamin.c
# #
#ISMAXKERNEL = ../arm/imax.c #ISMAXKERNEL = ../arm/imax.c
@ -112,7 +128,11 @@ ZASUMKERNEL = zasum.c
# #
SAXPYKERNEL = saxpy.c SAXPYKERNEL = saxpy.c
DAXPYKERNEL = daxpy.c DAXPYKERNEL = daxpy.c
ifneq ($(GCCVERSIONGTEQ9),1)
CAXPYKERNEL = caxpy_power9.S CAXPYKERNEL = caxpy_power9.S
else
CAXPYKERNEL = caxpy.c
endif
ZAXPYKERNEL = zaxpy.c ZAXPYKERNEL = zaxpy.c
# #
SCOPYKERNEL = scopy.c SCOPYKERNEL = scopy.c
@ -123,7 +143,11 @@ ZCOPYKERNEL = zcopy.c
SDOTKERNEL = sdot.c SDOTKERNEL = sdot.c
DDOTKERNEL = ddot.c DDOTKERNEL = ddot.c
DSDOTKERNEL = sdot.c DSDOTKERNEL = sdot.c
ifneq ($(GCCVERSIONGTEQ9),1)
CDOTKERNEL = cdot_power9.S CDOTKERNEL = cdot_power9.S
else
CDOTKERNEL = cdot.c
endif
ZDOTKERNEL = zdot.c ZDOTKERNEL = zdot.c
# #
SNRM2KERNEL = ../arm/nrm2.c SNRM2KERNEL = ../arm/nrm2.c

View File

@ -16,7 +16,11 @@
0: addis 2,12,.TOC.-.LCF0@ha 0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l addi 2,2,.TOC.-.LCF0@l
#if _CALL_ELF ==2 #if _CALL_ELF ==2
#ifdef CONJ
.localentry caxpyc_k,.-caxpyc_k
#else
.localentry caxpy_k,.-caxpy_k .localentry caxpy_k,.-caxpy_k
#endif
#endif #endif
mr. 7,3 mr. 7,3
ble 0,.L33 ble 0,.L33
@ -517,7 +521,11 @@
.long 0 .long 0
.byte 0,0,0,0,0,4,0,0 .byte 0,0,0,0,0,4,0,0
#if _CALL_ELF ==2 #if _CALL_ELF ==2
#ifdef CONJ
.size caxpyc_k,.-caxpyc_k
#else
.size caxpy_k,.-caxpy_k .size caxpy_k,.-caxpy_k
#endif
#endif #endif
.section .rodata .section .rodata
.align 4 .align 4

View File

@ -17,7 +17,11 @@ caxpy_k:
.LCF0: .LCF0:
0: addis 2,12,.TOC.-.LCF0@ha 0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l addi 2,2,.TOC.-.LCF0@l
#ifdef CONJ
.localentry caxpyc_k,.-caxpyc_k
#else
.localentry caxpy_k,.-caxpy_k .localentry caxpy_k,.-caxpy_k
#endif
mr. 7,3 mr. 7,3
ble 0,.L33 ble 0,.L33
cmpdi 7,9,1 cmpdi 7,9,1
@ -474,7 +478,11 @@ caxpy_k:
b .L13 b .L13
.long 0 .long 0
.byte 0,0,0,0,0,1,0,0 .byte 0,0,0,0,0,1,0,0
#ifdef CONJ
.size caxpyc_k,.-caxpyc_k
#else
.size caxpy_k,.-caxpy_k .size caxpy_k,.-caxpy_k
#endif
.section .rodata .section .rodata
.align 4 .align 4
.set .LANCHOR0,. + 0 .set .LANCHOR0,. + 0