From cf2a8e410cc095b40d3b357e74a5f77af83ce602 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 Nov 2019 21:55:04 -0700 Subject: [PATCH 1/6] Fix SEGV in cdot_power9 We were corrupting r2 because the local entry wasn't being setup correctly. --- kernel/power/cdot_power9.S | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/power/cdot_power9.S b/kernel/power/cdot_power9.S index 9ec7cdd85..6ca7a02a5 100644 --- a/kernel/power/cdot_power9.S +++ b/kernel/power/cdot_power9.S @@ -13,10 +13,7 @@ cdot_k: .LCF0: -0: addis 2,12,.TOC.-.LCF0@ha - addi 2,2,.TOC.-.LCF0@l - .localentry cdot_k,.-cdot_k - mr. 9,3 +0: mr. 9,3 ble 0,.L10 cmpdi 7,5,1 beq 7,.L18 From 97762234f9517f1ae90fc97a4456cd0923c30319 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 29 Nov 2019 23:47:23 +0100 Subject: [PATCH 2/6] Add variable for gcc >=9 test used in KERNEL.POWER9 --- kernel/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/Makefile b/kernel/Makefile index e81225075..9b468a6af 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -5,6 +5,11 @@ endif TOPDIR = .. include $(TOPDIR)/Makefile.system + +ifeq ($(C_COMPILER), GCC) +GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9) +endif + AVX2OPT = ifeq ($(C_COMPILER), GCC) # AVX2 support was added in 4.7.0 From a9b62c03f852a38cc2171a652b93a673591c483b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 29 Nov 2019 23:49:50 +0100 Subject: [PATCH 3/6] Substitute precompiled gcc7 codes only when gcc is older than 9.x --- kernel/power/KERNEL.POWER9 | 392 ++++++++++++++++++++----------------- 1 file changed, 208 insertions(+), 184 deletions(-) diff --git a/kernel/power/KERNEL.POWER9 b/kernel/power/KERNEL.POWER9 index 2ed843fff..4bfa017e1 100644 --- a/kernel/power/KERNEL.POWER9 +++ b/kernel/power/KERNEL.POWER9 @@ -1,184 +1,208 @@ -#SGEMM_BETA = ../generic/gemm_beta.c -#DGEMM_BETA = ../generic/gemm_beta.c -#CGEMM_BETA = ../generic/zgemm_beta.c -#ZGEMM_BETA = ../generic/zgemm_beta.c - -STRMMKERNEL = sgemm_kernel_power9.S -DTRMMKERNEL = dgemm_kernel_power9.S -CTRMMKERNEL = cgemm_kernel_power9.S -ZTRMMKERNEL = zgemm_kernel_power9.S - -SGEMMKERNEL = sgemm_kernel_power9.S -SGEMMINCOPY = ../generic/gemm_ncopy_16.c -SGEMMITCOPY = sgemm_tcopy_16_power8.S -SGEMMONCOPY = ../generic/gemm_ncopy_8.c -SGEMMOTCOPY = sgemm_tcopy_8_power8.S -SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) -SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) -SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) -SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) - -DGEMMKERNEL = dgemm_kernel_power9.S -DGEMMINCOPY = ../generic/gemm_ncopy_16.c -DGEMMITCOPY = dgemm_tcopy_16_power8.S -DGEMMONCOPY = dgemm_ncopy_4_power8.S -DGEMMOTCOPY = ../generic/gemm_tcopy_4.c -DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) -DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) -DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) -DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) - -CGEMMKERNEL = cgemm_kernel_power9.S -CGEMMINCOPY = ../generic/zgemm_ncopy_8.c -CGEMMITCOPY = ../generic/zgemm_tcopy_8.c -CGEMMONCOPY = ../generic/zgemm_ncopy_4.c -CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c -CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) -CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) -CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) -CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) - -ZGEMMKERNEL = zgemm_kernel_power9.S -ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c -ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c -ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c -ZGEMMITCOPY = zgemm_tcopy_8_power8.S -ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) -ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) -ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) -ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) - -STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c - -DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S -DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c - -CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c - -ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c - -#Todo: CGEMM3MKERNEL should be 4x4 blocksizes. -#CGEMM3MKERNEL = zgemm3m_kernel_8x4_sse3.S -#ZGEMM3MKERNEL = zgemm3m_kernel_4x4_sse3.S - -#Pure C for other kernels -#SAMAXKERNEL = ../arm/amax.c -#DAMAXKERNEL = ../arm/amax.c -#CAMAXKERNEL = ../arm/zamax.c -#ZAMAXKERNEL = ../arm/zamax.c -# -#SAMINKERNEL = ../arm/amin.c -#DAMINKERNEL = ../arm/amin.c -#CAMINKERNEL = ../arm/zamin.c -#ZAMINKERNEL = ../arm/zamin.c -# -#SMAXKERNEL = ../arm/max.c -#DMAXKERNEL = ../arm/max.c -# -#SMINKERNEL = ../arm/min.c -#DMINKERNEL = ../arm/min.c -# -ISAMAXKERNEL = isamax_power9.S -IDAMAXKERNEL = idamax.c -ICAMAXKERNEL = icamax_power9.S -IZAMAXKERNEL = izamax.c -# -ISAMINKERNEL = isamin_power9.S -IDAMINKERNEL = idamin.c -ICAMINKERNEL = icamin_power9.S -IZAMINKERNEL = izamin.c -# -#ISMAXKERNEL = ../arm/imax.c -#IDMAXKERNEL = ../arm/imax.c -# -#ISMINKERNEL = ../arm/imin.c -#IDMINKERNEL = ../arm/imin.c -# -SASUMKERNEL = sasum.c -DASUMKERNEL = dasum.c -CASUMKERNEL = casum.c -ZASUMKERNEL = zasum.c -# -SAXPYKERNEL = saxpy.c -DAXPYKERNEL = daxpy.c -CAXPYKERNEL = caxpy_power9.S -ZAXPYKERNEL = zaxpy.c -# -SCOPYKERNEL = scopy.c -DCOPYKERNEL = dcopy.c -CCOPYKERNEL = ccopy.c -ZCOPYKERNEL = zcopy.c -# -SDOTKERNEL = sdot.c -DDOTKERNEL = ddot.c -DSDOTKERNEL = sdot.c -CDOTKERNEL = cdot_power9.S -ZDOTKERNEL = zdot.c -# -SNRM2KERNEL = ../arm/nrm2.c -DNRM2KERNEL = ../arm/nrm2.c -CNRM2KERNEL = ../arm/znrm2.c -ZNRM2KERNEL = ../arm/znrm2.c -# -SROTKERNEL = srot.c -DROTKERNEL = drot.c -CROTKERNEL = crot.c -ZROTKERNEL = zrot.c -# -SSCALKERNEL = sscal.c -DSCALKERNEL = dscal.c -CSCALKERNEL = zscal.c -ZSCALKERNEL = zscal.c -# -SSWAPKERNEL = sswap.c -DSWAPKERNEL = dswap.c -CSWAPKERNEL = cswap.c -ZSWAPKERNEL = zswap.c -# - -SGEMVNKERNEL = sgemv_n.c -DGEMVNKERNEL = dgemv_n.c -CGEMVNKERNEL = cgemv_n.c -ZGEMVNKERNEL = zgemv_n_4.c -# -SGEMVTKERNEL = sgemv_t.c -DGEMVTKERNEL = dgemv_t.c -CGEMVTKERNEL = cgemv_t.c -ZGEMVTKERNEL = zgemv_t_4.c - - -#SSYMV_U_KERNEL = ../generic/symv_k.c -#SSYMV_L_KERNEL = ../generic/symv_k.c -#DSYMV_U_KERNEL = ../generic/symv_k.c -#DSYMV_L_KERNEL = ../generic/symv_k.c -#QSYMV_U_KERNEL = ../generic/symv_k.c -#QSYMV_L_KERNEL = ../generic/symv_k.c -#CSYMV_U_KERNEL = ../generic/zsymv_k.c -#CSYMV_L_KERNEL = ../generic/zsymv_k.c -#ZSYMV_U_KERNEL = ../generic/zsymv_k.c -#ZSYMV_L_KERNEL = ../generic/zsymv_k.c -#XSYMV_U_KERNEL = ../generic/zsymv_k.c -#XSYMV_L_KERNEL = ../generic/zsymv_k.c - -#ZHEMV_U_KERNEL = ../generic/zhemv_k.c -#ZHEMV_L_KERNEL = ../generic/zhemv_k.c - -LSAME_KERNEL = ../generic/lsame.c -SCABS_KERNEL = ../generic/cabs.c -DCABS_KERNEL = ../generic/cabs.c -QCABS_KERNEL = ../generic/cabs.c - -#Dump kernel -CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c -ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c +#SGEMM_BETA = ../generic/gemm_beta.c +#DGEMM_BETA = ../generic/gemm_beta.c +#CGEMM_BETA = ../generic/zgemm_beta.c +#ZGEMM_BETA = ../generic/zgemm_beta.c + +STRMMKERNEL = sgemm_kernel_power9.S +DTRMMKERNEL = dgemm_kernel_power9.S +CTRMMKERNEL = cgemm_kernel_power9.S +ZTRMMKERNEL = zgemm_kernel_power9.S + +SGEMMKERNEL = sgemm_kernel_power9.S +SGEMMINCOPY = ../generic/gemm_ncopy_16.c +SGEMMITCOPY = sgemm_tcopy_16_power8.S +SGEMMONCOPY = ../generic/gemm_ncopy_8.c +SGEMMOTCOPY = sgemm_tcopy_8_power8.S +SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) +SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) +SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) +SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) + +DGEMMKERNEL = dgemm_kernel_power9.S +DGEMMINCOPY = ../generic/gemm_ncopy_16.c +DGEMMITCOPY = dgemm_tcopy_16_power8.S +DGEMMONCOPY = dgemm_ncopy_4_power8.S +DGEMMOTCOPY = ../generic/gemm_tcopy_4.c +DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) +DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) + +CGEMMKERNEL = cgemm_kernel_power9.S +CGEMMINCOPY = ../generic/zgemm_ncopy_8.c +CGEMMITCOPY = ../generic/zgemm_tcopy_8.c +CGEMMONCOPY = ../generic/zgemm_ncopy_4.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c +CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) +CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) +CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) +CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) + +ZGEMMKERNEL = zgemm_kernel_power9.S +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c +ZGEMMITCOPY = zgemm_tcopy_8_power8.S +ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) +ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) +ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) +ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +#Todo: CGEMM3MKERNEL should be 4x4 blocksizes. +#CGEMM3MKERNEL = zgemm3m_kernel_8x4_sse3.S +#ZGEMM3MKERNEL = zgemm3m_kernel_4x4_sse3.S + +#Pure C for other kernels +#SAMAXKERNEL = ../arm/amax.c +#DAMAXKERNEL = ../arm/amax.c +#CAMAXKERNEL = ../arm/zamax.c +#ZAMAXKERNEL = ../arm/zamax.c +# +#SAMINKERNEL = ../arm/amin.c +#DAMINKERNEL = ../arm/amin.c +#CAMINKERNEL = ../arm/zamin.c +#ZAMINKERNEL = ../arm/zamin.c +# +#SMAXKERNEL = ../arm/max.c +#DMAXKERNEL = ../arm/max.c +# +#SMINKERNEL = ../arm/min.c +#DMINKERNEL = ../arm/min.c +# +ifneq ($(GCCVERSIONGTEQ9),1) +ISAMAXKERNEL = isamax_power9.S +else +ISAMAXKERNEL = isamax.c +endif +IDAMAXKERNEL = idamax.c +ifneq ($(GCCVERSIONGTEQ9),1) +ICAMAXKERNEL = icamax_power9.S +else +ICAMAXKERNEL = icamax.c +endif +IZAMAXKERNEL = izamax.c +# +ifneq ($(GCCVERSIONGTEQ9),1) +ISAMINKERNEL = isamin_power9.S +else +ISAMINKERNEL = isamin.c +endif +IDAMINKERNEL = idamin.c +ifneq ($(GCCVERSIONGTEQ9),1) +ICAMINKERNEL = icamin_power9.S +else +ICAMINKERNEL = icamin.c +endif +IZAMINKERNEL = izamin.c +# +#ISMAXKERNEL = ../arm/imax.c +#IDMAXKERNEL = ../arm/imax.c +# +#ISMINKERNEL = ../arm/imin.c +#IDMINKERNEL = ../arm/imin.c +# +SASUMKERNEL = sasum.c +DASUMKERNEL = dasum.c +CASUMKERNEL = casum.c +ZASUMKERNEL = zasum.c +# +SAXPYKERNEL = saxpy.c +DAXPYKERNEL = daxpy.c +ifneq ($(GCCVERSIONGTEQ9),1) +CAXPYKERNEL = caxpy_power9.S +else +CAXPYKERNEL = caxpy.c +endif +ZAXPYKERNEL = zaxpy.c +# +SCOPYKERNEL = scopy.c +DCOPYKERNEL = dcopy.c +CCOPYKERNEL = ccopy.c +ZCOPYKERNEL = zcopy.c +# +SDOTKERNEL = sdot.c +DDOTKERNEL = ddot.c +DSDOTKERNEL = sdot.c +ifneq ($(GCCVERSIONGTEQ9),1) +CDOTKERNEL = cdot_power9.S +else +CDOTKERNEL = cdot.c +endif +ZDOTKERNEL = zdot.c +# +SNRM2KERNEL = ../arm/nrm2.c +DNRM2KERNEL = ../arm/nrm2.c +CNRM2KERNEL = ../arm/znrm2.c +ZNRM2KERNEL = ../arm/znrm2.c +# +SROTKERNEL = srot.c +DROTKERNEL = drot.c +CROTKERNEL = crot.c +ZROTKERNEL = zrot.c +# +SSCALKERNEL = sscal.c +DSCALKERNEL = dscal.c +CSCALKERNEL = zscal.c +ZSCALKERNEL = zscal.c +# +SSWAPKERNEL = sswap.c +DSWAPKERNEL = dswap.c +CSWAPKERNEL = cswap.c +ZSWAPKERNEL = zswap.c +# + +SGEMVNKERNEL = sgemv_n.c +DGEMVNKERNEL = dgemv_n.c +CGEMVNKERNEL = cgemv_n.c +ZGEMVNKERNEL = zgemv_n_4.c +# +SGEMVTKERNEL = sgemv_t.c +DGEMVTKERNEL = dgemv_t.c +CGEMVTKERNEL = cgemv_t.c +ZGEMVTKERNEL = zgemv_t_4.c + + +#SSYMV_U_KERNEL = ../generic/symv_k.c +#SSYMV_L_KERNEL = ../generic/symv_k.c +#DSYMV_U_KERNEL = ../generic/symv_k.c +#DSYMV_L_KERNEL = ../generic/symv_k.c +#QSYMV_U_KERNEL = ../generic/symv_k.c +#QSYMV_L_KERNEL = ../generic/symv_k.c +#CSYMV_U_KERNEL = ../generic/zsymv_k.c +#CSYMV_L_KERNEL = ../generic/zsymv_k.c +#ZSYMV_U_KERNEL = ../generic/zsymv_k.c +#ZSYMV_L_KERNEL = ../generic/zsymv_k.c +#XSYMV_U_KERNEL = ../generic/zsymv_k.c +#XSYMV_L_KERNEL = ../generic/zsymv_k.c + +#ZHEMV_U_KERNEL = ../generic/zhemv_k.c +#ZHEMV_L_KERNEL = ../generic/zhemv_k.c + +LSAME_KERNEL = ../generic/lsame.c +SCABS_KERNEL = ../generic/cabs.c +DCABS_KERNEL = ../generic/cabs.c +QCABS_KERNEL = ../generic/cabs.c + +#Dump kernel +CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c +ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c From 2181fb7047f87f66ae1584c8af4e66e766b31b53 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 29 Nov 2019 23:54:15 +0100 Subject: [PATCH 4/6] Fix caxpy/caxpyc naming in localentry --- kernel/power/caxpy_power8.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/power/caxpy_power8.S b/kernel/power/caxpy_power8.S index b5f841d2e..294a1d24d 100644 --- a/kernel/power/caxpy_power8.S +++ b/kernel/power/caxpy_power8.S @@ -16,7 +16,11 @@ 0: addis 2,12,.TOC.-.LCF0@ha addi 2,2,.TOC.-.LCF0@l #if _CALL_ELF ==2 +#ifdef CONJ + .localentry caxpyc_k,.-caxpyc_k +#else .localentry caxpy_k,.-caxpy_k +#endif #endif mr. 7,3 ble 0,.L33 @@ -517,7 +521,11 @@ .long 0 .byte 0,0,0,0,0,4,0,0 #if _CALL_ELF ==2 +#ifdef CONJ + .size caxpyc_k,.-caxpyc_k +#else .size caxpy_k,.-caxpy_k +#endif #endif .section .rodata .align 4 From dedd822d1aeb2315e44e47e97167ae8a02c9c9ff Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 29 Nov 2019 23:56:57 +0100 Subject: [PATCH 5/6] Fix caxpy/caxpyc naming in localentry --- kernel/power/caxpy_power9.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/power/caxpy_power9.S b/kernel/power/caxpy_power9.S index 48e6e5ba3..844cacd50 100644 --- a/kernel/power/caxpy_power9.S +++ b/kernel/power/caxpy_power9.S @@ -17,7 +17,11 @@ caxpy_k: .LCF0: 0: addis 2,12,.TOC.-.LCF0@ha addi 2,2,.TOC.-.LCF0@l +#ifdef CONJ + .localentry caxpyc_k,.-caxpyc_k +#else .localentry caxpy_k,.-caxpy_k +#endif mr. 7,3 ble 0,.L33 cmpdi 7,9,1 @@ -474,7 +478,11 @@ caxpy_k: b .L13 .long 0 .byte 0,0,0,0,0,1,0,0 +#ifdef CONJ + .size caxpyc_k,.-caxpyc_k +#else .size caxpy_k,.-caxpy_k +#endif .section .rodata .align 4 .set .LANCHOR0,. + 0 From b863b32ac5598e96b76d5783ae3a96c2b58e1712 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sun, 1 Dec 2019 11:55:49 -0600 Subject: [PATCH 6/6] Workaround an ICE in clang 9.0.0 This bug is not there in 8.x nor in the 9.0 daily snapshot. --- kernel/x86_64/dsymv_L_microk_skylakex-2.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/x86_64/dsymv_L_microk_skylakex-2.c b/kernel/x86_64/dsymv_L_microk_skylakex-2.c index 8244dffa1..bdcd914fb 100644 --- a/kernel/x86_64/dsymv_L_microk_skylakex-2.c +++ b/kernel/x86_64/dsymv_L_microk_skylakex-2.c @@ -33,6 +33,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define HAVE_KERNEL_4x4 1 +#if defined(__clang_patchlevel__) && __clang_major__ == 9 && __clang_minor__ == 0 && __clang_patchlevel__ == 0 +#pragma clang optimize off +#endif + static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FLOAT *y, FLOAT *temp1, FLOAT *temp2) { @@ -155,7 +159,12 @@ static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL temp2[1] += half_accum1[0]; temp2[2] += half_accum2[0]; temp2[3] += half_accum3[0]; -} +} + +#if defined(__clang_patchlevel__) && __clang_major__ == 9 && __clang_minor__ == 0 && __clang_patchlevel__ == 0 +#pragma clang optimize on +#endif + #else #include "dsymv_L_microk_haswell-2.c" -#endif \ No newline at end of file +#endif