From 5f3b68b4d454d2eebcc60df48f330f1ec65f79cc Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sat, 10 May 2014 11:24:07 +0200 Subject: [PATCH] replaced sgemm and cgemm kernels because lapack bugs --- kernel/x86_64/KERNEL.SANDYBRIDGE | 30 ++++++++++++++++-------------- param.h | 4 ++-- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/kernel/x86_64/KERNEL.SANDYBRIDGE b/kernel/x86_64/KERNEL.SANDYBRIDGE index c321be752..4d095d21f 100644 --- a/kernel/x86_64/KERNEL.SANDYBRIDGE +++ b/kernel/x86_64/KERNEL.SANDYBRIDGE @@ -1,34 +1,35 @@ -SGEMMKERNEL = sgemm_kernel_8x8_sandy.S -SGEMMINCOPY = -SGEMMITCOPY = +SGEMMKERNEL = gemm_kernel_4x8_nehalem.S +SGEMMINCOPY = gemm_ncopy_4.S +SGEMMITCOPY = gemm_tcopy_4.S SGEMMONCOPY = ../generic/gemm_ncopy_8.c SGEMMOTCOPY = ../generic/gemm_tcopy_8.c -SGEMMINCOPYOBJ = -SGEMMITCOPYOBJ = +SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) +SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) + + DGEMMKERNEL = dgemm_kernel_4x8_sandy.S DGEMMINCOPY = ../generic/gemm_ncopy_8.c DGEMMITCOPY = ../generic/gemm_tcopy_8.c -#DGEMMONCOPY = gemm_ncopy_4.S DGEMMONCOPY = ../generic/gemm_ncopy_4.c DGEMMOTCOPY = ../generic/gemm_tcopy_4.c -#DGEMMOTCOPY = gemm_tcopy_4.S DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) -#CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S -CGEMMKERNEL = cgemm_kernel_4x8_sandy.S -CGEMMINCOPY = ../generic/zgemm_ncopy_8_sandy.c -CGEMMITCOPY = ../generic/zgemm_tcopy_8_sandy.c -CGEMMONCOPY = ../generic/zgemm_ncopy_4_sandy.c -CGEMMOTCOPY = ../generic/zgemm_tcopy_4_sandy.c + +CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S +CGEMMINCOPY = zgemm_ncopy_2.S +CGEMMITCOPY = zgemm_tcopy_2.S +CGEMMONCOPY = ../generic/zgemm_ncopy_4.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) -#ZGEMMKERNEL = zgemm_kernel_1x4_nehalem.S + + ZGEMMKERNEL = zgemm_kernel_4x4_sandy.S ZGEMMINCOPY = ZGEMMITCOPY = @@ -58,6 +59,7 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) #ZTRSMKERNEL_LT = ztrsm_kernel_LT_1x4_nehalem.S #ZTRSMKERNEL_RN = ztrsm_kernel_LT_1x4_nehalem.S #ZTRSMKERNEL_RT = ztrsm_kernel_RT_1x4_nehalem.S + STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c diff --git a/param.h b/param.h index 0065cf5e4..efec08b85 100644 --- a/param.h +++ b/param.h @@ -1104,10 +1104,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ZGEMM_DEFAULT_UNROLL_N 2 #define XGEMM_DEFAULT_UNROLL_N 1 #else -#define SGEMM_DEFAULT_UNROLL_M 8 +#define SGEMM_DEFAULT_UNROLL_M 4 #define DGEMM_DEFAULT_UNROLL_M 8 #define QGEMM_DEFAULT_UNROLL_M 2 -#define CGEMM_DEFAULT_UNROLL_M 8 +#define CGEMM_DEFAULT_UNROLL_M 2 #define ZGEMM_DEFAULT_UNROLL_M 4 #define XGEMM_DEFAULT_UNROLL_M 1