commit
9cac379655
|
@ -416,6 +416,29 @@ endif ()
|
||||||
set(ZGEMM_UNROLL_M 4)
|
set(ZGEMM_UNROLL_M 4)
|
||||||
set(ZGEMM_UNROLL_N 4)
|
set(ZGEMM_UNROLL_N 4)
|
||||||
set(SYMV_P 16)
|
set(SYMV_P 16)
|
||||||
|
elseif ("${TCORE}" STREQUAL "VORTEX")
|
||||||
|
file(APPEND ${TARGET_CONF_TEMP}
|
||||||
|
"#define ARMV8\n"
|
||||||
|
"#define L1_CODE_SIZE\t32768\n"
|
||||||
|
"#define L1_CODE_LINESIZE\t64\n"
|
||||||
|
"#define L1_CODE_ASSOCIATIVE\t4\n"
|
||||||
|
"#define L1_DATA_SIZE\t32768\n"
|
||||||
|
"#define L1_DATA_LINESIZE\t64\n"
|
||||||
|
"#define L1_DATA_ASSOCIATIVE\t4\n"
|
||||||
|
"#define L2_SIZE\t5262144\n"
|
||||||
|
"#define L2_LINESIZE\t64\n"
|
||||||
|
"#define L2_ASSOCIATIVE\t8\n"
|
||||||
|
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||||
|
"#define DTB_SIZE\t4096\n")
|
||||||
|
set(SGEMM_UNROLL_M 16)
|
||||||
|
set(SGEMM_UNROLL_N 4)
|
||||||
|
set(DGEMM_UNROLL_M 8)
|
||||||
|
set(DGEMM_UNROLL_N 4)
|
||||||
|
set(CGEMM_UNROLL_M 8)
|
||||||
|
set(CGEMM_UNROLL_N 4)
|
||||||
|
set(ZGEMM_UNROLL_M 4)
|
||||||
|
set(ZGEMM_UNROLL_N 4)
|
||||||
|
set(SYMV_P 16)
|
||||||
elseif ("${TCORE}" STREQUAL "POWER6")
|
elseif ("${TCORE}" STREQUAL "POWER6")
|
||||||
file(APPEND ${TARGET_CONF_TEMP}
|
file(APPEND ${TARGET_CONF_TEMP}
|
||||||
"#define L1_DATA_SIZE 32768\n"
|
"#define L1_DATA_SIZE 32768\n"
|
||||||
|
|
|
@ -424,7 +424,7 @@ void get_cpuconfig(void)
|
||||||
sysctlbyname("hw.l1dcachesize",&value,&length,NULL,0);
|
sysctlbyname("hw.l1dcachesize",&value,&length,NULL,0);
|
||||||
printf("#define L1_DATA_SIZE %d \n",value);
|
printf("#define L1_DATA_SIZE %d \n",value);
|
||||||
sysctlbyname("hw.l2dcachesize",&value,&length,NULL,0);
|
sysctlbyname("hw.l2dcachesize",&value,&length,NULL,0);
|
||||||
printf("#define L2_DATA_SIZE %d \n",value);
|
printf("#define L2_SIZE %d \n",value);
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,7 +50,7 @@
|
||||||
zomatcopy, zimatcopy,dzamax,dzamin,dzasum,dznrm2,
|
zomatcopy, zimatcopy,dzamax,dzamin,dzasum,dznrm2,
|
||||||
zgeadd, dzsum);
|
zgeadd, dzsum);
|
||||||
|
|
||||||
@cblasobjs = (lsame, xerbla);
|
@blasobjs = (lsame, xerbla);
|
||||||
@halfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
|
@halfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
|
||||||
@cblasobjsc = (
|
@cblasobjsc = (
|
||||||
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
|
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
|
||||||
|
@ -3600,6 +3600,7 @@ if ($ARGV[13] == 1) {
|
||||||
@lapack2objs = (@lapack2objs, @lapack2objss);
|
@lapack2objs = (@lapack2objs, @lapack2objss);
|
||||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s);
|
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s);
|
||||||
@lapackeobjs = (@lapackeobjs, @lapackeobjss);
|
@lapackeobjs = (@lapackeobjs, @lapackeobjss);
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2s);
|
||||||
}
|
}
|
||||||
if ($ARGV[14] == 1) {
|
if ($ARGV[14] == 1) {
|
||||||
@blasobjs = (@blasobjs, @blasobjsd);
|
@blasobjs = (@blasobjs, @blasobjsd);
|
||||||
|
@ -3608,6 +3609,7 @@ if ($ARGV[14] == 1) {
|
||||||
@lapack2objs = (@lapack2objs, @lapack2objsd);
|
@lapack2objs = (@lapack2objs, @lapack2objsd);
|
||||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d);
|
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d);
|
||||||
@lapackeobjs = (@lapackeobjs, @lapackeobjsd);
|
@lapackeobjs = (@lapackeobjs, @lapackeobjsd);
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2d);
|
||||||
}
|
}
|
||||||
if ($ARGV[15] == 1) {
|
if ($ARGV[15] == 1) {
|
||||||
@blasobjs = (@blasobjs, @blasobjsc);
|
@blasobjs = (@blasobjs, @blasobjsc);
|
||||||
|
@ -3618,6 +3620,7 @@ if ($ARGV[15] == 1) {
|
||||||
@lapack2objs = (@lapack2objs, @lapack2objsc, @lapac2objszc);
|
@lapack2objs = (@lapack2objs, @lapack2objsc, @lapac2objszc);
|
||||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c);
|
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c);
|
||||||
@lapackeobjs = (@lapackeobjs, @lapackeobjsc);
|
@lapackeobjs = (@lapackeobjs, @lapackeobjsc);
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2sc, @lapackobjs2c);
|
||||||
}
|
}
|
||||||
if ($ARGV[16] == 1) {
|
if ($ARGV[16] == 1) {
|
||||||
@blasobjs = (@blasobjs, @blasobjsz);
|
@blasobjs = (@blasobjs, @blasobjsz);
|
||||||
|
@ -3628,6 +3631,7 @@ if ($ARGV[16] == 1) {
|
||||||
@lapack2objs = (@lapack2objs, @lapack2objsz, @lapack2objszc);
|
@lapack2objs = (@lapack2objs, @lapack2objsz, @lapack2objszc);
|
||||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_z);
|
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_z);
|
||||||
@lapackeobjs = (@lapackeobjs, @lapackeobjsz);
|
@lapackeobjs = (@lapackeobjs, @lapackeobjsz);
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2dz, @lapackobjs2z);
|
||||||
}
|
}
|
||||||
if ($ARGV[8] == 1) {
|
if ($ARGV[8] == 1) {
|
||||||
#ONLY_CBLAS=1
|
#ONLY_CBLAS=1
|
||||||
|
|
14
getarch.c
14
getarch.c
|
@ -1222,6 +1222,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#else
|
#else
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_VORTEX
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ARM64"
|
||||||
|
#define SUBARCHITECTURE "VORTEX"
|
||||||
|
#define SUBDIRNAME "arm64"
|
||||||
|
#define ARCHCONFIG "-DVORTEX " \
|
||||||
|
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||||
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||||
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
|
#define LIBNAME "vortex"
|
||||||
|
#define CORENAME "VORTEX"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_ZARCH_GENERIC
|
#ifdef FORCE_ZARCH_GENERIC
|
||||||
#define FORCE
|
#define FORCE
|
||||||
#define ARCHITECTURE "ZARCH"
|
#define ARCHITECTURE "ZARCH"
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
#if defined(SKYLAKEX) || defined (COOPERLAKE)
|
|
||||||
/* the direct sgemm code written by Arjan van der Ven */
|
/* the direct sgemm code written by Arjan van der Ven */
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
#if defined(SKYLAKEX) || defined (COOPERLAKE)
|
||||||
/*
|
/*
|
||||||
* "Direct sgemm" code. This code operates directly on the inputs and outputs
|
* "Direct sgemm" code. This code operates directly on the inputs and outputs
|
||||||
* of the sgemm call, avoiding the copies, memory realignments and threading,
|
* of the sgemm call, avoiding the copies, memory realignments and threading,
|
||||||
|
|
Loading…
Reference in New Issue