From d2385f0d524e6431374062cdcddeda8b83008e34 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sun, 1 Dec 2013 18:02:54 +0100 Subject: [PATCH] modified param.h --- param.h | 232 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 227 insertions(+), 5 deletions(-) diff --git a/param.h b/param.h index c4d15323a..b865287be 100644 --- a/param.h +++ b/param.h @@ -304,9 +304,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef PILEDRIVER - -#define SNUMOPT 8 -#define DNUMOPT 4 +#define SNUMOPT 8 +#define DNUMOPT 4 #define GEMM_DEFAULT_OFFSET_A 64 #define GEMM_DEFAULT_OFFSET_B 832 @@ -344,7 +343,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMV_UNROLL 8 #endif - #if defined(ARCH_X86_64) #define SGEMM_DEFAULT_P 768 #define DGEMM_DEFAULT_P 768 @@ -380,7 +378,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ZGEMM_DEFAULT_R zgemm_r #define XGEMM_DEFAULT_R xgemm_r -#define SYMV_P 16 +#define SYMV_P 16 #define HAVE_EXCLUSIVE_CACHE #define GEMM_THREAD gemm_thread_mn @@ -1154,6 +1152,107 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif +#ifdef HASWELL + +#define SNUMOPT 8 +#define DNUMOPT 4 + +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN 0x03fffUL + +#define SYMV_P 8 + +#define SWITCH_RATIO 4 + +#ifdef ARCH_X86 + +#define SGEMM_DEFAULT_UNROLL_M 4 +#define DGEMM_DEFAULT_UNROLL_M 2 +#define QGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_M 1 +#define XGEMM_DEFAULT_UNROLL_M 1 + +#define SGEMM_DEFAULT_UNROLL_N 4 +#define DGEMM_DEFAULT_UNROLL_N 4 +#define QGEMM_DEFAULT_UNROLL_N 2 +#define CGEMM_DEFAULT_UNROLL_N 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 +#define XGEMM_DEFAULT_UNROLL_N 1 + +#else + +#define SGEMM_DEFAULT_UNROLL_M 16 +#define DGEMM_DEFAULT_UNROLL_M 4 +#define QGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_M 8 +#define ZGEMM_DEFAULT_UNROLL_M 4 +#define XGEMM_DEFAULT_UNROLL_M 1 + +#define SGEMM_DEFAULT_UNROLL_N 4 +#define DGEMM_DEFAULT_UNROLL_N 4 +#define QGEMM_DEFAULT_UNROLL_N 2 +#define CGEMM_DEFAULT_UNROLL_N 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 +#define XGEMM_DEFAULT_UNROLL_N 1 + +#endif + +#ifdef ARCH_X86 + +#define SGEMM_DEFAULT_P 512 +#define SGEMM_DEFAULT_R sgemm_r +#define DGEMM_DEFAULT_P 512 +#define DGEMM_DEFAULT_R dgemm_r +#define QGEMM_DEFAULT_P 504 +#define QGEMM_DEFAULT_R qgemm_r +#define CGEMM_DEFAULT_P 128 +#define CGEMM_DEFAULT_R 1024 +#define ZGEMM_DEFAULT_P 512 +#define ZGEMM_DEFAULT_R zgemm_r +#define XGEMM_DEFAULT_P 252 +#define XGEMM_DEFAULT_R xgemm_r +#define SGEMM_DEFAULT_Q 256 +#define DGEMM_DEFAULT_Q 256 +#define QGEMM_DEFAULT_Q 128 +#define CGEMM_DEFAULT_Q 256 +#define ZGEMM_DEFAULT_Q 192 +#define XGEMM_DEFAULT_Q 128 + +#else + +#define SGEMM_DEFAULT_P 768 +#define DGEMM_DEFAULT_P 512 +#define CGEMM_DEFAULT_P 384 +#define ZGEMM_DEFAULT_P 256 + +#define SGEMM_DEFAULT_Q 384 +#define DGEMM_DEFAULT_Q 256 +#define CGEMM_DEFAULT_Q 192 +#define ZGEMM_DEFAULT_Q 128 + +#define SGEMM_DEFAULT_R sgemm_r +#define DGEMM_DEFAULT_R 13824 +#define CGEMM_DEFAULT_R cgemm_r +#define ZGEMM_DEFAULT_R zgemm_r + +#define QGEMM_DEFAULT_Q 128 +#define QGEMM_DEFAULT_P 504 +#define QGEMM_DEFAULT_R qgemm_r +#define XGEMM_DEFAULT_P 252 +#define XGEMM_DEFAULT_R xgemm_r +#define XGEMM_DEFAULT_Q 128 + +#define CGEMM3M_DEFAULT_UNROLL_N 4 +#define CGEMM3M_DEFAULT_UNROLL_M 8 +#define ZGEMM3M_DEFAULT_UNROLL_N 2 +#define ZGEMM3M_DEFAULT_UNROLL_M 8 +#endif + + +#endif + #ifdef ATOM @@ -1797,6 +1896,129 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif + +#ifdef ARMV7 +#define SNUMOPT 2 +#define DNUMOPT 2 + +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN 0x03fffUL + +#define SGEMM_DEFAULT_UNROLL_M 4 +#define SGEMM_DEFAULT_UNROLL_N 4 + +#define DGEMM_DEFAULT_UNROLL_M 4 +#define DGEMM_DEFAULT_UNROLL_N 4 + +#define CGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_N 2 + +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 + +#define SGEMM_DEFAULT_P 128 +#define DGEMM_DEFAULT_P 128 +#define CGEMM_DEFAULT_P 96 +#define ZGEMM_DEFAULT_P 64 + +#define SGEMM_DEFAULT_Q 240 +#define DGEMM_DEFAULT_Q 120 +#define CGEMM_DEFAULT_Q 120 +#define ZGEMM_DEFAULT_Q 120 + +#define SGEMM_DEFAULT_R 12288 +#define DGEMM_DEFAULT_R 8192 +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 + + + +#define SYMV_P 16 +#endif + + +#if defined(ARMV6) +#define SNUMOPT 2 +#define DNUMOPT 2 + +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN 0x03fffUL + +#define SGEMM_DEFAULT_UNROLL_M 4 +#define SGEMM_DEFAULT_UNROLL_N 2 + +#define DGEMM_DEFAULT_UNROLL_M 4 +#define DGEMM_DEFAULT_UNROLL_N 2 + +#define CGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_N 2 + +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 + +#define SGEMM_DEFAULT_P 128 +#define DGEMM_DEFAULT_P 128 +#define CGEMM_DEFAULT_P 96 +#define ZGEMM_DEFAULT_P 64 + +#define SGEMM_DEFAULT_Q 240 +#define DGEMM_DEFAULT_Q 120 +#define CGEMM_DEFAULT_Q 120 +#define ZGEMM_DEFAULT_Q 120 + +#define SGEMM_DEFAULT_R 12288 +#define DGEMM_DEFAULT_R 8192 +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 + + +#define SYMV_P 16 +#endif + +#if defined(ARMV8) +#define SNUMOPT 2 +#define DNUMOPT 2 + +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN 0x03fffUL + +#define SGEMM_DEFAULT_UNROLL_M 2 +#define SGEMM_DEFAULT_UNROLL_N 2 + +#define DGEMM_DEFAULT_UNROLL_M 2 +#define DGEMM_DEFAULT_UNROLL_N 2 + +#define CGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_N 2 + +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 + +#define SGEMM_DEFAULT_P 128 +#define DGEMM_DEFAULT_P 128 +#define CGEMM_DEFAULT_P 96 +#define ZGEMM_DEFAULT_P 64 + +#define SGEMM_DEFAULT_Q 240 +#define DGEMM_DEFAULT_Q 120 +#define CGEMM_DEFAULT_Q 120 +#define ZGEMM_DEFAULT_Q 120 + +#define SGEMM_DEFAULT_R 12288 +#define DGEMM_DEFAULT_R 8192 +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 + + +#define SYMV_P 16 +#endif + + + + #ifdef GENERIC #define SNUMOPT 2