Merge pull request #1829 from ashwinyes/develop_aarch64_dynamic_arch_support
Add DYNAMIC_ARCH support for ARM64
This commit is contained in:
commit
76a66eaac8
|
@ -510,6 +510,13 @@ CCOMMON_OPT += $(XCCOMMON_OPT)
|
|||
#CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)'
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm64)
|
||||
DYNAMIC_CORE = ARMV8
|
||||
DYNAMIC_CORE += CORTEXA57
|
||||
DYNAMIC_CORE += THUNDERX
|
||||
DYNAMIC_CORE += THUNDERX2T99
|
||||
endif
|
||||
|
||||
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
|
||||
ifndef DYNAMIC_CORE
|
||||
override DYNAMIC_ARCH=
|
||||
|
|
|
@ -237,7 +237,6 @@ void get_cpuconfig(void)
|
|||
break;
|
||||
|
||||
case CPU_THUNDERX:
|
||||
printf("#define ARMV8\n");
|
||||
printf("#define THUNDERX\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 128\n");
|
||||
|
|
|
@ -15,7 +15,11 @@ endif
|
|||
# COMMONOBJS += info.$(SUFFIX)
|
||||
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
ifeq ($(ARCH),arm64)
|
||||
COMMONOBJS += dynamic_arm64.$(SUFFIX)
|
||||
else
|
||||
COMMONOBJS += dynamic.$(SUFFIX)
|
||||
endif
|
||||
else
|
||||
COMMONOBJS += parameter.$(SUFFIX)
|
||||
endif
|
||||
|
@ -71,7 +75,11 @@ BLAS_SERVER = blas_server.c
|
|||
endif
|
||||
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
ifeq ($(ARCH),arm64)
|
||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_arm64.$(SUFFIX)
|
||||
else
|
||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
|
||||
endif
|
||||
else
|
||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
|
||||
endif
|
||||
|
|
|
@ -0,0 +1,198 @@
|
|||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <asm/hwcap.h>
|
||||
#include <sys/auxv.h>
|
||||
|
||||
extern gotoblas_t gotoblas_ARMV8;
|
||||
extern gotoblas_t gotoblas_CORTEXA57;
|
||||
extern gotoblas_t gotoblas_THUNDERX;
|
||||
extern gotoblas_t gotoblas_THUNDERX2T99;
|
||||
|
||||
extern void openblas_warning(int verbose, const char * msg);
|
||||
|
||||
#define NUM_CORETYPES 4
|
||||
|
||||
/*
|
||||
* In case asm/hwcap.h is outdated on the build system, make sure
|
||||
* that HWCAP_CPUID is defined
|
||||
*/
|
||||
#ifndef HWCAP_CPUID
|
||||
#define HWCAP_CPUID (1 << 11)
|
||||
#endif
|
||||
|
||||
#define get_cpu_ftr(id, var) ({ \
|
||||
asm("mrs %0, "#id : "=r" (var)); \
|
||||
})
|
||||
|
||||
static char *corename[] = {
|
||||
"armv8",
|
||||
"cortexa57",
|
||||
"thunderx",
|
||||
"thunderx2t99",
|
||||
"unknown"
|
||||
};
|
||||
|
||||
char *gotoblas_corename(void) {
|
||||
if (gotoblas == &gotoblas_ARMV8) return corename[ 0];
|
||||
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 1];
|
||||
if (gotoblas == &gotoblas_THUNDERX) return corename[ 2];
|
||||
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 3];
|
||||
return corename[NUM_CORETYPES];
|
||||
}
|
||||
|
||||
static gotoblas_t *force_coretype(char *coretype) {
|
||||
int i ;
|
||||
int found = -1;
|
||||
char message[128];
|
||||
|
||||
for ( i=0 ; i < NUM_CORETYPES; i++)
|
||||
{
|
||||
if (!strncasecmp(coretype, corename[i], 20))
|
||||
{
|
||||
found = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (found)
|
||||
{
|
||||
case 0: return (&gotoblas_ARMV8);
|
||||
case 1: return (&gotoblas_CORTEXA57);
|
||||
case 2: return (&gotoblas_THUNDERX);
|
||||
case 3: return (&gotoblas_THUNDERX2T99);
|
||||
}
|
||||
snprintf(message, 128, "Core not found: %s\n", coretype);
|
||||
openblas_warning(1, message);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static gotoblas_t *get_coretype(void) {
|
||||
int implementer, variant, part, arch, revision, midr_el1;
|
||||
|
||||
if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
|
||||
char coremsg[128];
|
||||
snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n");
|
||||
openblas_warning(1, coremsg);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
get_cpu_ftr(MIDR_EL1, midr_el1);
|
||||
/*
|
||||
* MIDR_EL1
|
||||
*
|
||||
* 31 24 23 20 19 16 15 4 3 0
|
||||
* -----------------------------------------------------------------
|
||||
* | Implementer | Variant | Architecture | Part Number | Revision |
|
||||
* -----------------------------------------------------------------
|
||||
*/
|
||||
implementer = (midr_el1 >> 24) & 0xFF;
|
||||
part = (midr_el1 >> 4) & 0xFFF;
|
||||
|
||||
switch(implementer)
|
||||
{
|
||||
case 0x41: // ARM
|
||||
switch (part)
|
||||
{
|
||||
case 0xd07: // Cortex A57
|
||||
case 0xd08: // Cortex A72
|
||||
case 0xd03: // Cortex A53
|
||||
return &gotoblas_CORTEXA57;
|
||||
}
|
||||
break;
|
||||
case 0x42: // Broadcom
|
||||
switch (part)
|
||||
{
|
||||
case 0x516: // Vulcan
|
||||
return &gotoblas_THUNDERX2T99;
|
||||
}
|
||||
break;
|
||||
case 0x43: // Cavium
|
||||
switch (part)
|
||||
{
|
||||
case 0x0a1: // ThunderX
|
||||
return &gotoblas_THUNDERX;
|
||||
case 0x0af: // ThunderX2
|
||||
return &gotoblas_THUNDERX2T99;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void gotoblas_dynamic_init(void) {
|
||||
|
||||
char coremsg[128];
|
||||
char coren[22];
|
||||
char *p;
|
||||
|
||||
if (gotoblas) return;
|
||||
|
||||
p = getenv("OPENBLAS_CORETYPE");
|
||||
if ( p )
|
||||
{
|
||||
gotoblas = force_coretype(p);
|
||||
}
|
||||
else
|
||||
{
|
||||
gotoblas = get_coretype();
|
||||
}
|
||||
|
||||
if (gotoblas == NULL)
|
||||
{
|
||||
snprintf(coremsg, 128, "Falling back to generic ARMV8 core\n");
|
||||
openblas_warning(1, coremsg);
|
||||
gotoblas = &gotoblas_ARMV8;
|
||||
}
|
||||
|
||||
if (gotoblas && gotoblas->init) {
|
||||
strncpy(coren, gotoblas_corename(), 20);
|
||||
sprintf(coremsg, "Core: %s\n", coren);
|
||||
openblas_warning(2, coremsg);
|
||||
gotoblas -> init();
|
||||
} else {
|
||||
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void gotoblas_dynamic_quit(void) {
|
||||
gotoblas = NULL;
|
||||
}
|
|
@ -730,35 +730,8 @@ void blas_set_parameter(void){
|
|||
|
||||
#if defined(ARCH_ARM64)
|
||||
|
||||
#if defined(VULCAN) || defined(THUNDERX2T99) || defined(ARMV8)
|
||||
unsigned long dgemm_prefetch_size_a;
|
||||
unsigned long dgemm_prefetch_size_b;
|
||||
unsigned long dgemm_prefetch_size_c;
|
||||
#endif
|
||||
|
||||
void blas_set_parameter(void)
|
||||
{
|
||||
#if defined(VULCAN) || defined(THUNDERX2T99) || defined(ARMV8)
|
||||
dgemm_p = 160;
|
||||
dgemm_q = 128;
|
||||
dgemm_r = 4096;
|
||||
|
||||
sgemm_p = 128;
|
||||
sgemm_q = 352;
|
||||
sgemm_r = 4096;
|
||||
|
||||
cgemm_p = 128;
|
||||
cgemm_q = 224;
|
||||
cgemm_r = 4096;
|
||||
|
||||
zgemm_p = 128;
|
||||
zgemm_q = 112;
|
||||
zgemm_r = 4096;
|
||||
|
||||
dgemm_prefetch_size_a = 3584;
|
||||
dgemm_prefetch_size_b = 512;
|
||||
dgemm_prefetch_size_c = 128;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -88,7 +88,11 @@ lsame.$(SUFFIX): $(KERNELDIR)/$(LSAME_KERNEL)
|
|||
$(CC) -c $(CFLAGS) -DF_INTERFACE $< -o $(@F)
|
||||
|
||||
setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h
|
||||
ifeq ($(USE_GEMM3M), 1)
|
||||
$(CC) -c $(CFLAGS) -DUSE_GEMM3M $< -o $@
|
||||
else
|
||||
$(CC) -c $(CFLAGS) $< -o $@
|
||||
endif
|
||||
|
||||
setparam$(TSUFFIX).c : setparam-ref.c
|
||||
sed 's/TS/$(TSUFFIX)/g' $< > $(@F)
|
||||
|
|
|
@ -113,13 +113,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
|||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
||||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
||||
SGEMMINCOPYOBJ = sgemm_incopy.o
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||
|
@ -134,8 +134,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
|
|||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
|
||||
endif
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifeq ($(DGEMM_UNROLL_N), 4)
|
||||
|
@ -146,34 +146,34 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
|
|||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
|
||||
endif
|
||||
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
|
||||
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
|
||||
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
|
||||
CGEMMINCOPYOBJ = cgemm_incopy.o
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy.o
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
||||
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
||||
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
|
||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
|
||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy.o
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy.o
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4)
|
||||
DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S
|
||||
|
@ -201,25 +201,25 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
|||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
endif
|
||||
|
|
|
@ -111,13 +111,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
|||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
||||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
||||
SGEMMINCOPYOBJ = sgemm_incopy.o
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||
|
@ -132,8 +132,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
|
|||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
|
||||
endif
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifeq ($(DGEMM_UNROLL_N), 4)
|
||||
|
@ -144,32 +144,32 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
|
|||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
|
||||
endif
|
||||
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
|
||||
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
|
||||
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
|
||||
CGEMMINCOPYOBJ = cgemm_incopy.o
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy.o
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
||||
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
||||
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
|
||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
|
||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy.o
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy.o
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
|
|
@ -89,26 +89,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
|||
SGEMMKERNEL = sgemm_kernel_4x4.S
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
|
|
|
@ -74,13 +74,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
|||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
||||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
||||
SGEMMINCOPYOBJ = sgemm_incopy.o
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||
|
||||
|
@ -94,8 +94,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
|
|||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
|
||||
endif
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifeq ($(DGEMM_UNROLL_N), 4)
|
||||
|
@ -106,32 +106,32 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
|
|||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
|
||||
endif
|
||||
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
|
||||
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
|
||||
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
|
||||
CGEMMINCOPYOBJ = cgemm_incopy.o
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy.o
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
||||
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
|
||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
|
||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy.o
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy.o
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
SASUMKERNEL = sasum_thunderx2t99.c
|
||||
DASUMKERNEL = dasum_thunderx2t99.c
|
||||
|
|
|
@ -1,135 +0,0 @@
|
|||
SAMAXKERNEL = amax.S
|
||||
DAMAXKERNEL = amax.S
|
||||
CAMAXKERNEL = zamax.S
|
||||
ZAMAXKERNEL = zamax.S
|
||||
|
||||
SAMINKERNEL = ../arm/amin.c
|
||||
DAMINKERNEL = ../arm/amin.c
|
||||
CAMINKERNEL = ../arm/zamin.c
|
||||
ZAMINKERNEL = ../arm/zamin.c
|
||||
|
||||
SMAXKERNEL = ../arm/max.c
|
||||
DMAXKERNEL = ../arm/max.c
|
||||
|
||||
SMINKERNEL = ../arm/min.c
|
||||
DMINKERNEL = ../arm/min.c
|
||||
|
||||
ISAMAXKERNEL = iamax.S
|
||||
IDAMAXKERNEL = iamax.S
|
||||
ICAMAXKERNEL = izamax.S
|
||||
IZAMAXKERNEL = izamax.S
|
||||
|
||||
ISAMINKERNEL = ../arm/iamin.c
|
||||
IDAMINKERNEL = ../arm/iamin.c
|
||||
ICAMINKERNEL = ../arm/izamin.c
|
||||
IZAMINKERNEL = ../arm/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../arm/imax.c
|
||||
IDMAXKERNEL = ../arm/imax.c
|
||||
|
||||
ISMINKERNEL = ../arm/imin.c
|
||||
IDMINKERNEL = ../arm/imin.c
|
||||
|
||||
SASUMKERNEL = asum.S
|
||||
DASUMKERNEL = asum.S
|
||||
CASUMKERNEL = casum.S
|
||||
ZASUMKERNEL = zasum.S
|
||||
|
||||
SAXPYKERNEL = axpy.S
|
||||
DAXPYKERNEL = axpy.S
|
||||
CAXPYKERNEL = zaxpy.S
|
||||
ZAXPYKERNEL = zaxpy.S
|
||||
|
||||
SCOPYKERNEL = copy.S
|
||||
DCOPYKERNEL = copy.S
|
||||
CCOPYKERNEL = copy.S
|
||||
ZCOPYKERNEL = copy.S
|
||||
|
||||
SDOTKERNEL = dot.S
|
||||
DDOTKERNEL = dot.S
|
||||
CDOTKERNEL = zdot.S
|
||||
ZDOTKERNEL = zdot.S
|
||||
DSDOTKERNEL = dot.S
|
||||
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
SROTKERNEL = rot.S
|
||||
DROTKERNEL = rot.S
|
||||
CROTKERNEL = zrot.S
|
||||
ZROTKERNEL = zrot.S
|
||||
|
||||
SSCALKERNEL = scal.S
|
||||
DSCALKERNEL = scal.S
|
||||
CSCALKERNEL = zscal.S
|
||||
ZSCALKERNEL = zscal.S
|
||||
|
||||
SSWAPKERNEL = swap.S
|
||||
DSWAPKERNEL = swap.S
|
||||
CSWAPKERNEL = swap.S
|
||||
ZSWAPKERNEL = swap.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n.S
|
||||
DGEMVNKERNEL = gemv_n.S
|
||||
CGEMVNKERNEL = zgemv_n.S
|
||||
ZGEMVNKERNEL = zgemv_n.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t.S
|
||||
DGEMVTKERNEL = gemv_t.S
|
||||
CGEMVTKERNEL = zgemv_t.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
STRMMKERNEL = ../generic/trmmkernel_4x4.c
|
||||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_4x4.S
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
|
||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
|
||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
|
||||
|
||||
|
|
@ -943,13 +943,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
prfm PLDL1KEEP, [origPB]
|
||||
prfm PLDL1KEEP, [origPA]
|
||||
|
||||
|
||||
ldr A_PRE_SIZE, =dgemm_prefetch_size_a
|
||||
ldr A_PRE_SIZE, [A_PRE_SIZE]
|
||||
ldr B_PRE_SIZE, =dgemm_prefetch_size_b
|
||||
ldr B_PRE_SIZE, [B_PRE_SIZE]
|
||||
ldr C_PRE_SIZE, =dgemm_prefetch_size_c
|
||||
ldr C_PRE_SIZE, [C_PRE_SIZE]
|
||||
mov A_PRE_SIZE, #3584
|
||||
mov B_PRE_SIZE, #512
|
||||
mov C_PRE_SIZE, #128
|
||||
add A_PRE_SIZE_64, A_PRE_SIZE, #64
|
||||
add B_PRE_SIZE_64, B_PRE_SIZE, #64
|
||||
|
||||
|
|
|
@ -294,6 +294,8 @@ gotoblas_t TABLE_NAME = {
|
|||
chemm_outcopyTS, chemm_oltcopyTS,
|
||||
|
||||
0, 0, 0,
|
||||
|
||||
#if defined(USE_GEMM3M)
|
||||
#ifdef CGEMM3M_DEFAULT_UNROLL_M
|
||||
CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
|
||||
#else
|
||||
|
@ -324,6 +326,33 @@ gotoblas_t TABLE_NAME = {
|
|||
chemm3m_oucopybTS, chemm3m_olcopybTS,
|
||||
chemm3m_oucopyrTS, chemm3m_olcopyrTS,
|
||||
chemm3m_oucopyiTS, chemm3m_olcopyiTS,
|
||||
#else
|
||||
0, 0, 0,
|
||||
|
||||
NULL,
|
||||
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
#endif
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
cneg_tcopyTS, claswp_ncopyTS,
|
||||
|
@ -400,6 +429,7 @@ gotoblas_t TABLE_NAME = {
|
|||
zhemm_outcopyTS, zhemm_oltcopyTS,
|
||||
|
||||
0, 0, 0,
|
||||
#if defined(USE_GEMM3M)
|
||||
#ifdef ZGEMM3M_DEFAULT_UNROLL_M
|
||||
ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
|
||||
#else
|
||||
|
@ -430,6 +460,33 @@ gotoblas_t TABLE_NAME = {
|
|||
zhemm3m_oucopybTS, zhemm3m_olcopybTS,
|
||||
zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
|
||||
zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
|
||||
#else
|
||||
0, 0, 0,
|
||||
|
||||
NULL,
|
||||
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
#endif
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
zneg_tcopyTS, zlaswp_ncopyTS,
|
||||
|
@ -503,6 +560,7 @@ gotoblas_t TABLE_NAME = {
|
|||
xhemm_outcopyTS, xhemm_oltcopyTS,
|
||||
|
||||
0, 0, 0,
|
||||
#if defined(USE_GEMM3M)
|
||||
QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
|
||||
|
||||
xgemm3m_kernelTS,
|
||||
|
@ -528,6 +586,33 @@ gotoblas_t TABLE_NAME = {
|
|||
xhemm3m_oucopybTS, xhemm3m_olcopybTS,
|
||||
xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
|
||||
xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
|
||||
#else
|
||||
0, 0, 0,
|
||||
|
||||
NULL,
|
||||
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
#endif
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
xneg_tcopyTS, xlaswp_ncopyTS,
|
||||
|
@ -561,6 +646,78 @@ gotoblas_t TABLE_NAME = {
|
|||
|
||||
};
|
||||
|
||||
#if defined(ARCH_ARM64)
|
||||
static void init_parameter(void) {
|
||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
||||
|
||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
||||
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
|
||||
|
||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
||||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
||||
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
|
||||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
|
||||
|
||||
#ifdef EXPRECISION
|
||||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
|
||||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
|
||||
TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
|
||||
TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
|
||||
TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
|
||||
TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
|
||||
#endif
|
||||
|
||||
#if defined(USE_GEMM3M)
|
||||
#ifdef CGEMM3M_DEFAULT_P
|
||||
TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
|
||||
#else
|
||||
TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
|
||||
#endif
|
||||
|
||||
#ifdef ZGEMM3M_DEFAULT_P
|
||||
TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
|
||||
#else
|
||||
TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
|
||||
#endif
|
||||
|
||||
#ifdef CGEMM3M_DEFAULT_Q
|
||||
TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
|
||||
#else
|
||||
TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
|
||||
#endif
|
||||
|
||||
#ifdef ZGEMM3M_DEFAULT_Q
|
||||
TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
|
||||
#else
|
||||
TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
|
||||
#endif
|
||||
|
||||
#ifdef CGEMM3M_DEFAULT_R
|
||||
TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
|
||||
#else
|
||||
TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
|
||||
#endif
|
||||
|
||||
#ifdef ZGEMM3M_DEFAULT_R
|
||||
TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
|
||||
#else
|
||||
TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
|
||||
#endif
|
||||
|
||||
#ifdef EXPRECISION
|
||||
TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
|
||||
TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
|
||||
TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}
|
||||
#else // defined(ARCH_ARM64)
|
||||
#ifdef ARCH_X86
|
||||
static int get_l2_size_old(void){
|
||||
int i, eax, ebx, ecx, edx, cpuid_level;
|
||||
|
@ -1146,3 +1303,4 @@ static void init_parameter(void) {
|
|||
|
||||
|
||||
}
|
||||
#endif //defined(ARCH_ARM64)
|
||||
|
|
48
param.h
48
param.h
|
@ -2641,20 +2641,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ZGEMM_DEFAULT_UNROLL_M 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define SGEMM_DEFAULT_P sgemm_p
|
||||
#define DGEMM_DEFAULT_P dgemm_p
|
||||
#define CGEMM_DEFAULT_P cgemm_p
|
||||
#define ZGEMM_DEFAULT_P zgemm_p
|
||||
#define SGEMM_DEFAULT_P 128
|
||||
#define DGEMM_DEFAULT_P 160
|
||||
#define CGEMM_DEFAULT_P 128
|
||||
#define ZGEMM_DEFAULT_P 128
|
||||
|
||||
#define SGEMM_DEFAULT_Q sgemm_q
|
||||
#define DGEMM_DEFAULT_Q dgemm_q
|
||||
#define CGEMM_DEFAULT_Q cgemm_q
|
||||
#define ZGEMM_DEFAULT_Q zgemm_q
|
||||
#define SGEMM_DEFAULT_Q 352
|
||||
#define DGEMM_DEFAULT_Q 128
|
||||
#define CGEMM_DEFAULT_Q 224
|
||||
#define ZGEMM_DEFAULT_Q 112
|
||||
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
#define DGEMM_DEFAULT_R dgemm_r
|
||||
#define CGEMM_DEFAULT_R cgemm_r
|
||||
#define ZGEMM_DEFAULT_R zgemm_r
|
||||
#define SGEMM_DEFAULT_R 4096
|
||||
#define DGEMM_DEFAULT_R 4096
|
||||
#define CGEMM_DEFAULT_R 4096
|
||||
#define ZGEMM_DEFAULT_R 4096
|
||||
|
||||
#define SYMV_P 16
|
||||
#endif
|
||||
|
@ -2720,20 +2720,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ZGEMM_DEFAULT_UNROLL_M 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define SGEMM_DEFAULT_P sgemm_p
|
||||
#define DGEMM_DEFAULT_P dgemm_p
|
||||
#define CGEMM_DEFAULT_P cgemm_p
|
||||
#define ZGEMM_DEFAULT_P zgemm_p
|
||||
#define SGEMM_DEFAULT_P 128
|
||||
#define DGEMM_DEFAULT_P 160
|
||||
#define CGEMM_DEFAULT_P 128
|
||||
#define ZGEMM_DEFAULT_P 128
|
||||
|
||||
#define SGEMM_DEFAULT_Q sgemm_q
|
||||
#define DGEMM_DEFAULT_Q dgemm_q
|
||||
#define CGEMM_DEFAULT_Q cgemm_q
|
||||
#define ZGEMM_DEFAULT_Q zgemm_q
|
||||
#define SGEMM_DEFAULT_Q 352
|
||||
#define DGEMM_DEFAULT_Q 128
|
||||
#define CGEMM_DEFAULT_Q 224
|
||||
#define ZGEMM_DEFAULT_Q 112
|
||||
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
#define DGEMM_DEFAULT_R dgemm_r
|
||||
#define CGEMM_DEFAULT_R cgemm_r
|
||||
#define ZGEMM_DEFAULT_R zgemm_r
|
||||
#define SGEMM_DEFAULT_R 4096
|
||||
#define DGEMM_DEFAULT_R 4096
|
||||
#define CGEMM_DEFAULT_R 4096
|
||||
#define ZGEMM_DEFAULT_R 4096
|
||||
|
||||
#define SYMV_P 16
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue