Merge pull request #20 from xianyi/develop

Rebase
This commit is contained in:
Martin Kroeker 2019-12-03 08:22:40 +01:00 committed by GitHub
commit 10705183ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 241 additions and 190 deletions

View File

@ -5,6 +5,11 @@ endif
TOPDIR = ..
include $(TOPDIR)/Makefile.system
ifeq ($(C_COMPILER), GCC)
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
endif
AVX2OPT =
ifeq ($(C_COMPILER), GCC)
# AVX2 support was added in 4.7.0

View File

@ -89,14 +89,30 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
#SMINKERNEL = ../arm/min.c
#DMINKERNEL = ../arm/min.c
#
ifneq ($(GCCVERSIONGTEQ9),1)
ISAMAXKERNEL = isamax_power9.S
else
ISAMAXKERNEL = isamax.c
endif
IDAMAXKERNEL = idamax.c
ifneq ($(GCCVERSIONGTEQ9),1)
ICAMAXKERNEL = icamax_power9.S
else
ICAMAXKERNEL = icamax.c
endif
IZAMAXKERNEL = izamax.c
#
ifneq ($(GCCVERSIONGTEQ9),1)
ISAMINKERNEL = isamin_power9.S
else
ISAMINKERNEL = isamin.c
endif
IDAMINKERNEL = idamin.c
ifneq ($(GCCVERSIONGTEQ9),1)
ICAMINKERNEL = icamin_power9.S
else
ICAMINKERNEL = icamin.c
endif
IZAMINKERNEL = izamin.c
#
#ISMAXKERNEL = ../arm/imax.c
@ -112,7 +128,11 @@ ZASUMKERNEL = zasum.c
#
SAXPYKERNEL = saxpy.c
DAXPYKERNEL = daxpy.c
ifneq ($(GCCVERSIONGTEQ9),1)
CAXPYKERNEL = caxpy_power9.S
else
CAXPYKERNEL = caxpy.c
endif
ZAXPYKERNEL = zaxpy.c
#
SCOPYKERNEL = scopy.c
@ -123,7 +143,11 @@ ZCOPYKERNEL = zcopy.c
SDOTKERNEL = sdot.c
DDOTKERNEL = ddot.c
DSDOTKERNEL = sdot.c
ifneq ($(GCCVERSIONGTEQ9),1)
CDOTKERNEL = cdot_power9.S
else
CDOTKERNEL = cdot.c
endif
ZDOTKERNEL = zdot.c
#
SNRM2KERNEL = ../arm/nrm2.c

View File

@ -16,7 +16,11 @@
0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l
#if _CALL_ELF ==2
#ifdef CONJ
.localentry caxpyc_k,.-caxpyc_k
#else
.localentry caxpy_k,.-caxpy_k
#endif
#endif
mr. 7,3
ble 0,.L33
@ -517,7 +521,11 @@
.long 0
.byte 0,0,0,0,0,4,0,0
#if _CALL_ELF ==2
#ifdef CONJ
.size caxpyc_k,.-caxpyc_k
#else
.size caxpy_k,.-caxpy_k
#endif
#endif
.section .rodata
.align 4

View File

@ -17,7 +17,11 @@ caxpy_k:
.LCF0:
0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l
#ifdef CONJ
.localentry caxpyc_k,.-caxpyc_k
#else
.localentry caxpy_k,.-caxpy_k
#endif
mr. 7,3
ble 0,.L33
cmpdi 7,9,1
@ -474,7 +478,11 @@ caxpy_k:
b .L13
.long 0
.byte 0,0,0,0,0,1,0,0
#ifdef CONJ
.size caxpyc_k,.-caxpyc_k
#else
.size caxpy_k,.-caxpy_k
#endif
.section .rodata
.align 4
.set .LANCHOR0,. + 0

View File

@ -13,10 +13,7 @@
cdot_k:
.LCF0:
0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l
.localentry cdot_k,.-cdot_k
mr. 9,3
0: mr. 9,3
ble 0,.L10
cmpdi 7,5,1
beq 7,.L18

View File

@ -33,6 +33,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define HAVE_KERNEL_4x4 1
#if defined(__clang_patchlevel__) && __clang_major__ == 9 && __clang_minor__ == 0 && __clang_patchlevel__ == 0
#pragma clang optimize off
#endif
static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FLOAT *y, FLOAT *temp1, FLOAT *temp2)
{
@ -156,6 +160,11 @@ static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
temp2[2] += half_accum2[0];
temp2[3] += half_accum3[0];
}
#if defined(__clang_patchlevel__) && __clang_major__ == 9 && __clang_minor__ == 0 && __clang_patchlevel__ == 0
#pragma clang optimize on
#endif
#else
#include "dsymv_L_microk_haswell-2.c"
#endif