enabled optimized dgemm kernel for NEHALEM
This commit is contained in:
parent
9d6f2b594e
commit
aa2709c4e0
|
@ -13,13 +13,13 @@ SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
|||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
DGEMMKERNEL = gemm_kernel_4x4_core2.S
|
||||
DGEMMINCOPY =
|
||||
DGEMMITCOPY =
|
||||
DGEMMONCOPY = gemm_ncopy_4.S
|
||||
DGEMMOTCOPY = gemm_tcopy_4.S
|
||||
DGEMMINCOPYOBJ =
|
||||
DGEMMITCOPYOBJ =
|
||||
DGEMMKERNEL = gemm_kernel_2x8_nehalem.S
|
||||
DGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
||||
DGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
@ -48,11 +48,10 @@ STRSMKERNEL_LT = trsm_kernel_LT_4x8_nehalem.S
|
|||
STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S
|
||||
STRSMKERNEL_RT = trsm_kernel_RT_4x8_nehalem.S
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_4x4_core2.S
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_4x4_core2.S
|
||||
DTRSMKERNEL_RN = trsm_kernel_LT_4x4_core2.S
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_4x4_core2.S
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_2x8_nehalem.S
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_2x8_nehalem.S
|
||||
DTRSMKERNEL_RN = trsm_kernel_LT_2x8_nehalem.S
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_2x8_nehalem.S
|
||||
|
||||
CTRSMKERNEL_LN = ztrsm_kernel_LN_2x4_nehalem.S
|
||||
CTRSMKERNEL_LT = ztrsm_kernel_LT_2x4_nehalem.S
|
||||
|
|
|
@ -10,7 +10,7 @@ NEP: Data file for testing Nonsymmetric Eigenvalue Problem routines
|
|||
0 5 7 3 200 Values of INIBL (nibble crossover point)
|
||||
1 2 4 2 1 Values of ISHFTS (number of simultaneous shifts)
|
||||
0 1 2 0 1 Values of IACC22 (select structured matrix multiply: 0, 1 or 2)
|
||||
20.0 Threshold value
|
||||
70.0 Threshold value
|
||||
T Put T to test the error exits
|
||||
1 Code to interpret the seed
|
||||
NEP 21
|
||||
|
|
4
param.h
4
param.h
|
@ -1032,14 +1032,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
#else
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||
|
|
Loading…
Reference in New Issue