Merge pull request #596 from wernsaar/develop
optimizations for haswell
This commit is contained in:
commit
bdb5c842fc
|
@ -32,6 +32,10 @@ ifeq ($(TARGET), GENERIC)
|
||||||
USE_TRMM = 1
|
USE_TRMM = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), HASWELL)
|
||||||
|
USE_TRMM = 1
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
SKERNELOBJS += \
|
SKERNELOBJS += \
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -29,6 +29,7 @@ DAXPYKERNEL = daxpy.c
|
||||||
CAXPYKERNEL = caxpy.c
|
CAXPYKERNEL = caxpy.c
|
||||||
ZAXPYKERNEL = zaxpy.c
|
ZAXPYKERNEL = zaxpy.c
|
||||||
|
|
||||||
|
STRMMKERNEL = sgemm_kernel_16x4_haswell.S
|
||||||
SGEMMKERNEL = sgemm_kernel_16x4_haswell.S
|
SGEMMKERNEL = sgemm_kernel_16x4_haswell.S
|
||||||
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||||
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
|
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
|
||||||
|
@ -39,16 +40,18 @@ SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
DGEMMKERNEL = dgemm_kernel_4x4_haswell.S
|
DTRMMKERNEL = dtrmm_kernel_4x8_haswell.c
|
||||||
DGEMMINCOPY =
|
DGEMMKERNEL = dgemm_kernel_4x8_haswell.S
|
||||||
DGEMMITCOPY =
|
DGEMMINCOPY = ../generic/gemm_ncopy_4.c
|
||||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
DGEMMITCOPY = ../generic/gemm_tcopy_4.c
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||||
DGEMMINCOPYOBJ =
|
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||||
DGEMMITCOPYOBJ =
|
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
CTRMMKERNEL = cgemm_kernel_8x2_haswell.S
|
||||||
CGEMMKERNEL = cgemm_kernel_8x2_haswell.S
|
CGEMMKERNEL = cgemm_kernel_8x2_haswell.S
|
||||||
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
|
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
|
||||||
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
|
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
|
||||||
|
@ -59,6 +62,7 @@ CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
ZTRMMKERNEL = zgemm_kernel_4x2_haswell.S
|
||||||
ZGEMMKERNEL = zgemm_kernel_4x2_haswell.S
|
ZGEMMKERNEL = zgemm_kernel_4x2_haswell.S
|
||||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_4.c
|
ZGEMMINCOPY = ../generic/zgemm_ncopy_4.c
|
||||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_4.c
|
ZGEMMITCOPY = ../generic/zgemm_tcopy_4.c
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
2
param.h
2
param.h
|
@ -1414,7 +1414,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||||
#define QGEMM_DEFAULT_UNROLL_N 2
|
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||||
#define CGEMM_DEFAULT_UNROLL_N 2
|
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
Loading…
Reference in New Issue