added new optimized sgemm kernel for SANDYBRIGE

This commit is contained in:
wernsaar 2014-06-26 21:42:08 +02:00
parent aa2709c4e0
commit dabab2b5f4
3 changed files with 3176 additions and 11 deletions

View File

@ -1,18 +1,16 @@
SGEMVNKERNEL = sgemv_n.S SGEMVNKERNEL = sgemv_n.S
SGEMVTKERNEL = sgemv_t.S SGEMVTKERNEL = sgemv_t.S
SGEMMKERNEL = sgemm_kernel_16x4_sandy.S
SGEMMKERNEL = gemm_kernel_4x8_nehalem.S SGEMMINCOPY = ../generic/gemm_ncopy_16.c
SGEMMINCOPY = gemm_ncopy_4.S SGEMMITCOPY = ../generic/gemm_tcopy_16.c
SGEMMITCOPY = gemm_tcopy_4.S SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMONCOPY = ../generic/gemm_ncopy_8.c SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = dgemm_kernel_4x8_sandy.S DGEMMKERNEL = dgemm_kernel_4x8_sandy.S
DGEMMINCOPY = ../generic/gemm_ncopy_8.c DGEMMINCOPY = ../generic/gemm_ncopy_8.c
DGEMMITCOPY = ../generic/gemm_tcopy_8.c DGEMMITCOPY = ../generic/gemm_tcopy_8.c

File diff suppressed because it is too large Load Diff

View File

@ -1108,14 +1108,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_UNROLL_N 2 #define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1 #define XGEMM_DEFAULT_UNROLL_N 1
#else #else
#define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_M 16
#define DGEMM_DEFAULT_UNROLL_M 8 #define DGEMM_DEFAULT_UNROLL_M 8
#define QGEMM_DEFAULT_UNROLL_M 2 #define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 2 #define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 4 #define ZGEMM_DEFAULT_UNROLL_M 4
#define XGEMM_DEFAULT_UNROLL_M 1 #define XGEMM_DEFAULT_UNROLL_M 1
#define SGEMM_DEFAULT_UNROLL_N 8 #define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 4 #define DGEMM_DEFAULT_UNROLL_N 4
#define QGEMM_DEFAULT_UNROLL_N 2 #define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 4 #define CGEMM_DEFAULT_UNROLL_N 4
@ -1123,7 +1123,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define XGEMM_DEFAULT_UNROLL_N 1 #define XGEMM_DEFAULT_UNROLL_N 1
#endif #endif
#define SGEMM_DEFAULT_P 512 #define SGEMM_DEFAULT_P 768
#define SGEMM_DEFAULT_R sgemm_r #define SGEMM_DEFAULT_R sgemm_r
//#define SGEMM_DEFAULT_R 1024 //#define SGEMM_DEFAULT_R 1024
@ -1145,7 +1145,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define XGEMM_DEFAULT_P 252 #define XGEMM_DEFAULT_P 252
#define XGEMM_DEFAULT_R xgemm_r #define XGEMM_DEFAULT_R xgemm_r
#define SGEMM_DEFAULT_Q 256 #define SGEMM_DEFAULT_Q 384
#define DGEMM_DEFAULT_Q 256 #define DGEMM_DEFAULT_Q 256
#define QGEMM_DEFAULT_Q 128 #define QGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 256 #define CGEMM_DEFAULT_Q 256