Merge pull request #103 from xianyi/develop

rebase
This commit is contained in:
Martin Kroeker 2020-10-19 15:56:20 +02:00 committed by GitHub
commit 9cac379655
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 52 additions and 10 deletions

View File

@ -416,6 +416,29 @@ endif ()
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "VORTEX")
file(APPEND ${TARGET_CONF_TEMP}
"#define ARMV8\n"
"#define L1_CODE_SIZE\t32768\n"
"#define L1_CODE_LINESIZE\t64\n"
"#define L1_CODE_ASSOCIATIVE\t4\n"
"#define L1_DATA_SIZE\t32768\n"
"#define L1_DATA_LINESIZE\t64\n"
"#define L1_DATA_ASSOCIATIVE\t4\n"
"#define L2_SIZE\t5262144\n"
"#define L2_LINESIZE\t64\n"
"#define L2_ASSOCIATIVE\t8\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 8)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "POWER6")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE 32768\n"

View File

@ -424,7 +424,7 @@ void get_cpuconfig(void)
sysctlbyname("hw.l1dcachesize",&value,&length,NULL,0);
printf("#define L1_DATA_SIZE %d \n",value);
sysctlbyname("hw.l2dcachesize",&value,&length,NULL,0);
printf("#define L2_DATA_SIZE %d \n",value);
printf("#define L2_SIZE %d \n",value);
break;
#endif
}

View File

@ -50,7 +50,7 @@
zomatcopy, zimatcopy,dzamax,dzamin,dzasum,dznrm2,
zgeadd, dzsum);
@cblasobjs = (lsame, xerbla);
@blasobjs = (lsame, xerbla);
@halfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
@cblasobjsc = (
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
@ -3600,6 +3600,7 @@ if ($ARGV[13] == 1) {
@lapack2objs = (@lapack2objs, @lapack2objss);
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s);
@lapackeobjs = (@lapackeobjs, @lapackeobjss);
@lapackobjs2 = (@lapackobjs2, @lapackobjs2s);
}
if ($ARGV[14] == 1) {
@blasobjs = (@blasobjs, @blasobjsd);
@ -3608,6 +3609,7 @@ if ($ARGV[14] == 1) {
@lapack2objs = (@lapack2objs, @lapack2objsd);
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d);
@lapackeobjs = (@lapackeobjs, @lapackeobjsd);
@lapackobjs2 = (@lapackobjs2, @lapackobjs2d);
}
if ($ARGV[15] == 1) {
@blasobjs = (@blasobjs, @blasobjsc);
@ -3618,6 +3620,7 @@ if ($ARGV[15] == 1) {
@lapack2objs = (@lapack2objs, @lapack2objsc, @lapac2objszc);
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c);
@lapackeobjs = (@lapackeobjs, @lapackeobjsc);
@lapackobjs2 = (@lapackobjs2, @lapackobjs2sc, @lapackobjs2c);
}
if ($ARGV[16] == 1) {
@blasobjs = (@blasobjs, @blasobjsz);
@ -3628,6 +3631,7 @@ if ($ARGV[16] == 1) {
@lapack2objs = (@lapack2objs, @lapack2objsz, @lapack2objszc);
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_z);
@lapackeobjs = (@lapackeobjs, @lapackeobjsz);
@lapackobjs2 = (@lapackobjs2, @lapackobjs2dz, @lapackobjs2z);
}
if ($ARGV[8] == 1) {
#ONLY_CBLAS=1

View File

@ -1222,6 +1222,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#endif
#ifdef FORCE_VORTEX
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "VORTEX"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DVORTEX " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "vortex"
#define CORENAME "VORTEX"
#endif
#ifdef FORCE_ZARCH_GENERIC
#define FORCE
#define ARCHITECTURE "ZARCH"

View File

@ -1,7 +1,8 @@
#if defined(SKYLAKEX) || defined (COOPERLAKE)
/* the direct sgemm code written by Arjan van der Ven */
#include <immintrin.h>
#include "common.h"
#if defined(SKYLAKEX) || defined (COOPERLAKE)
/*
* "Direct sgemm" code. This code operates directly on the inputs and outputs
* of the sgemm call, avoiding the copies, memory realignments and threading,