Merge pull request #46 from xianyi/develop

rebase
This commit is contained in:
Martin Kroeker 2020-04-13 12:06:40 +02:00 committed by GitHub
commit 20d0cb2f65
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 1332 additions and 16 deletions

View File

@ -121,7 +121,7 @@ REALNAME:
#endif
#define HUGE_PAGESIZE ( 4 << 20)
#define BUFFER_SIZE (16 << 20)
#define BUFFER_SIZE (32 << 20)
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)

View File

@ -141,12 +141,17 @@ REALNAME:
#endif
#define HUGE_PAGESIZE ( 4 << 20)
#ifndef BUFFERSIZE
#if defined(CORTEXA57)
#define BUFFER_SIZE (20 << 20)
#elif defined(TSV110) || defined(EMAG8180)
#define BUFFER_SIZE (32 << 20)
#else
#define BUFFER_SIZE (16 << 20)
#endif
#else
#define BUFFER_SIZE (32 << BUFFERSIZE)
#endif
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)

View File

@ -226,7 +226,13 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
#define HUGE_PAGESIZE ( 2 << 20)
#ifndef BUFFERSIZE
#if defined(SKYLAKEX)
#define BUFFER_SIZE (32 << 21)
#elif defined(HASWELL) || defined(ZEN)
#define BUFFER_SIZE (32 << 22)
#else
#define BUFFER_SIZE (32 << 20)
#endif
#else
#define BUFFER_SIZE (32 << BUFFERSIZE)
#endif

View File

@ -123,11 +123,7 @@ REALNAME:
#endif
#define HUGE_PAGESIZE ( 4 << 20)
#if defined(CORTEXA57)
#define BUFFER_SIZE (20 << 20)
#else
#define BUFFER_SIZE (16 << 20)
#endif
#define BUFFER_SIZE (32 << 22)
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)

View File

@ -87,6 +87,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#endif
/* Memory buffer must fit two matrix subblocks of maximal size */
#define XSTR(x) STR(x)
#define STR(x) #x
#if BUFFER_SIZE < (SGEMM_DEFAULT_P * SGEMM_DEFAULT_Q * 4 * 2) || \
BUFFER_SIZE < (SGEMM_DEFAULT_P * SGEMM_DEFAULT_R * 4 * 2) || \
BUFFER_SIZE < (SGEMM_DEFAULT_R * SGEMM_DEFAULT_Q * 4 * 2)
#warning BUFFER_SIZE is too small for P, Q, and R of SGEMM - large calculations may crash !
#endif
#if BUFFER_SIZE < (DGEMM_DEFAULT_P * DGEMM_DEFAULT_Q * 8 * 2) || \
BUFFER_SIZE < (DGEMM_DEFAULT_P * DGEMM_DEFAULT_R * 8 * 2) || \
BUFFER_SIZE < (DGEMM_DEFAULT_R * DGEMM_DEFAULT_Q * 8 * 2)
#warning BUFFER_SIZE is too small for P, Q, and R of DGEMM - large calculations may crash !
#endif
#if BUFFER_SIZE < (CGEMM_DEFAULT_P * CGEMM_DEFAULT_Q * 8 * 2) || \
BUFFER_SIZE < (CGEMM_DEFAULT_P * CGEMM_DEFAULT_R * 8 * 2) || \
BUFFER_SIZE < (CGEMM_DEFAULT_R * CGEMM_DEFAULT_Q * 8 * 2)
#warning BUFFER_SIZE is too small for P, Q, and R of CGEMM - large calculations may crash !
#endif
#if BUFFER_SIZE < (ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_Q * 16 * 2) || \
BUFFER_SIZE < (ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_R * 16 * 2) || \
BUFFER_SIZE < (ZGEMM_DEFAULT_R * ZGEMM_DEFAULT_Q * 16 * 2)
#warning BUFFER_SIZE is too small for P, Q, and R of ZGEMM - large calculations may crash !
#endif
#if defined(COMPILE_TLS)
#include <errno.h>
@ -2740,7 +2764,7 @@ void *blas_memory_alloc(int procpos){
#ifdef DEBUG
printf(" Position -> %d\n", position);
#endif
WMB;
memory[position].used = 1;
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);

1274
kernel/common_param.h Normal file

File diff suppressed because it is too large Load Diff

27
param.h
View File

@ -2229,15 +2229,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_UNROLL_M 8
#define ZGEMM_DEFAULT_UNROLL_N 2
#define SGEMM_DEFAULT_P 1280
#define DGEMM_DEFAULT_P 640
#define CGEMM_DEFAULT_P 640
#define ZGEMM_DEFAULT_P 320
#define SGEMM_DEFAULT_P 1280UL
#define DGEMM_DEFAULT_P 640UL
#define CGEMM_DEFAULT_P 640UL
#define ZGEMM_DEFAULT_P 320UL
#define SGEMM_DEFAULT_Q 640
#define DGEMM_DEFAULT_Q 720
#define CGEMM_DEFAULT_Q 640
#define ZGEMM_DEFAULT_Q 640
#define SGEMM_DEFAULT_Q 640UL
#define DGEMM_DEFAULT_Q 720UL
#define CGEMM_DEFAULT_Q 640UL
#define ZGEMM_DEFAULT_Q 640UL
#if 0
#define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
#define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
#define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
#define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
#endif
#define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 512
#define SYMV_P 8