Merge pull request #596 from wernsaar/develop

optimizations for haswell
This commit is contained in:
wernsaar 2015-06-13 16:44:48 +02:00
commit bdb5c842fc
6 changed files with 7717 additions and 8 deletions

View File

@ -32,6 +32,10 @@ ifeq ($(TARGET), GENERIC)
USE_TRMM = 1
endif
ifeq ($(CORE), HASWELL)
USE_TRMM = 1
endif
SKERNELOBJS += \

File diff suppressed because it is too large Load Diff

View File

@ -29,6 +29,7 @@ DAXPYKERNEL = daxpy.c
CAXPYKERNEL = caxpy.c
ZAXPYKERNEL = zaxpy.c
STRMMKERNEL = sgemm_kernel_16x4_haswell.S
SGEMMKERNEL = sgemm_kernel_16x4_haswell.S
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
@ -39,16 +40,18 @@ SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = dgemm_kernel_4x4_haswell.S
DGEMMINCOPY =
DGEMMITCOPY =
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ =
DTRMMKERNEL = dtrmm_kernel_4x8_haswell.c
DGEMMKERNEL = dgemm_kernel_4x8_haswell.S
DGEMMINCOPY = ../generic/gemm_ncopy_4.c
DGEMMITCOPY = ../generic/gemm_tcopy_4.c
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CTRMMKERNEL = cgemm_kernel_8x2_haswell.S
CGEMMKERNEL = cgemm_kernel_8x2_haswell.S
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
@ -59,6 +62,7 @@ CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZTRMMKERNEL = zgemm_kernel_4x2_haswell.S
ZGEMMKERNEL = zgemm_kernel_4x2_haswell.S
ZGEMMINCOPY = ../generic/zgemm_ncopy_4.c
ZGEMMITCOPY = ../generic/zgemm_tcopy_4.c

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1414,7 +1414,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define XGEMM_DEFAULT_UNROLL_M 1
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 8
#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2