ARM64: Enable DYNAMIC_ARCH
Enable DYNAMIC_ARCH feature on ARM64. This patch uses the cpuid feature in linux kernel to detect the core type at runtime (https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt). If this feature is missing in kernel, then the user should use the OPENBLAS_CORETYPE env variable to select the desired core type.
This commit is contained in:
parent
af2837c392
commit
d5aeff636f
|
@ -510,6 +510,13 @@ CCOMMON_OPT += $(XCCOMMON_OPT)
|
||||||
#CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)'
|
#CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)'
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(ARCH), arm64)
|
||||||
|
DYNAMIC_CORE = ARMV8
|
||||||
|
DYNAMIC_CORE += CORTEXA57
|
||||||
|
DYNAMIC_CORE += THUNDERX
|
||||||
|
DYNAMIC_CORE += THUNDERX2T99
|
||||||
|
endif
|
||||||
|
|
||||||
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
|
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
|
||||||
ifndef DYNAMIC_CORE
|
ifndef DYNAMIC_CORE
|
||||||
override DYNAMIC_ARCH=
|
override DYNAMIC_ARCH=
|
||||||
|
|
|
@ -15,7 +15,11 @@ endif
|
||||||
# COMMONOBJS += info.$(SUFFIX)
|
# COMMONOBJS += info.$(SUFFIX)
|
||||||
|
|
||||||
ifeq ($(DYNAMIC_ARCH), 1)
|
ifeq ($(DYNAMIC_ARCH), 1)
|
||||||
|
ifeq ($(ARCH),arm64)
|
||||||
|
COMMONOBJS += dynamic_arm64.$(SUFFIX)
|
||||||
|
else
|
||||||
COMMONOBJS += dynamic.$(SUFFIX)
|
COMMONOBJS += dynamic.$(SUFFIX)
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
COMMONOBJS += parameter.$(SUFFIX)
|
COMMONOBJS += parameter.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
@ -71,7 +75,11 @@ BLAS_SERVER = blas_server.c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(DYNAMIC_ARCH), 1)
|
ifeq ($(DYNAMIC_ARCH), 1)
|
||||||
|
ifeq ($(ARCH),arm64)
|
||||||
|
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_arm64.$(SUFFIX)
|
||||||
|
else
|
||||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
|
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
|
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -0,0 +1,198 @@
|
||||||
|
/*********************************************************************/
|
||||||
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* All rights reserved. */
|
||||||
|
/* */
|
||||||
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
/* without modification, are permitted provided that the following */
|
||||||
|
/* conditions are met: */
|
||||||
|
/* */
|
||||||
|
/* 1. Redistributions of source code must retain the above */
|
||||||
|
/* copyright notice, this list of conditions and the following */
|
||||||
|
/* disclaimer. */
|
||||||
|
/* */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above */
|
||||||
|
/* copyright notice, this list of conditions and the following */
|
||||||
|
/* disclaimer in the documentation and/or other materials */
|
||||||
|
/* provided with the distribution. */
|
||||||
|
/* */
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||||
|
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||||
|
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||||
|
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||||
|
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||||
|
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||||
|
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||||
|
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||||
|
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||||
|
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||||
|
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||||
|
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/* */
|
||||||
|
/* The views and conclusions contained in the software and */
|
||||||
|
/* documentation are those of the authors and should not be */
|
||||||
|
/* interpreted as representing official policies, either expressed */
|
||||||
|
/* or implied, of The University of Texas at Austin. */
|
||||||
|
/*********************************************************************/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include <asm/hwcap.h>
|
||||||
|
#include <sys/auxv.h>
|
||||||
|
|
||||||
|
extern gotoblas_t gotoblas_ARMV8;
|
||||||
|
extern gotoblas_t gotoblas_CORTEXA57;
|
||||||
|
extern gotoblas_t gotoblas_THUNDERX;
|
||||||
|
extern gotoblas_t gotoblas_THUNDERX2T99;
|
||||||
|
|
||||||
|
extern void openblas_warning(int verbose, const char * msg);
|
||||||
|
|
||||||
|
#define NUM_CORETYPES 4
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In case asm/hwcap.h is outdated on the build system, make sure
|
||||||
|
* that HWCAP_CPUID is defined
|
||||||
|
*/
|
||||||
|
#ifndef HWCAP_CPUID
|
||||||
|
#define HWCAP_CPUID (1 << 11)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define get_cpu_ftr(id, var) ({ \
|
||||||
|
asm("mrs %0, "#id : "=r" (var)); \
|
||||||
|
})
|
||||||
|
|
||||||
|
static char *corename[] = {
|
||||||
|
"armv8",
|
||||||
|
"cortexa57",
|
||||||
|
"thunderx",
|
||||||
|
"thunderx2t99",
|
||||||
|
"unknown"
|
||||||
|
};
|
||||||
|
|
||||||
|
char *gotoblas_corename(void) {
|
||||||
|
if (gotoblas == &gotoblas_ARMV8) return corename[ 0];
|
||||||
|
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 1];
|
||||||
|
if (gotoblas == &gotoblas_THUNDERX) return corename[ 2];
|
||||||
|
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 3];
|
||||||
|
return corename[NUM_CORETYPES];
|
||||||
|
}
|
||||||
|
|
||||||
|
static gotoblas_t *force_coretype(char *coretype) {
|
||||||
|
int i ;
|
||||||
|
int found = -1;
|
||||||
|
char message[128];
|
||||||
|
|
||||||
|
for ( i=0 ; i < NUM_CORETYPES; i++)
|
||||||
|
{
|
||||||
|
if (!strncasecmp(coretype, corename[i], 20))
|
||||||
|
{
|
||||||
|
found = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (found)
|
||||||
|
{
|
||||||
|
case 0: return (&gotoblas_ARMV8);
|
||||||
|
case 1: return (&gotoblas_CORTEXA57);
|
||||||
|
case 2: return (&gotoblas_THUNDERX);
|
||||||
|
case 3: return (&gotoblas_THUNDERX2T99);
|
||||||
|
}
|
||||||
|
snprintf(message, 128, "Core not found: %s\n", coretype);
|
||||||
|
openblas_warning(1, message);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static gotoblas_t *get_coretype(void) {
|
||||||
|
int implementer, variant, part, arch, revision, midr_el1;
|
||||||
|
|
||||||
|
if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
|
||||||
|
char coremsg[128];
|
||||||
|
snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n");
|
||||||
|
openblas_warning(1, coremsg);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
get_cpu_ftr(MIDR_EL1, midr_el1);
|
||||||
|
/*
|
||||||
|
* MIDR_EL1
|
||||||
|
*
|
||||||
|
* 31 24 23 20 19 16 15 4 3 0
|
||||||
|
* -----------------------------------------------------------------
|
||||||
|
* | Implementer | Variant | Architecture | Part Number | Revision |
|
||||||
|
* -----------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
implementer = (midr_el1 >> 24) & 0xFF;
|
||||||
|
part = (midr_el1 >> 4) & 0xFFF;
|
||||||
|
|
||||||
|
switch(implementer)
|
||||||
|
{
|
||||||
|
case 0x41: // ARM
|
||||||
|
switch (part)
|
||||||
|
{
|
||||||
|
case 0xd07: // Cortex A57
|
||||||
|
case 0xd08: // Cortex A72
|
||||||
|
case 0xd03: // Cortex A53
|
||||||
|
return &gotoblas_CORTEXA57;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 0x42: // Broadcom
|
||||||
|
switch (part)
|
||||||
|
{
|
||||||
|
case 0x516: // Vulcan
|
||||||
|
return &gotoblas_THUNDERX2T99;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 0x43: // Cavium
|
||||||
|
switch (part)
|
||||||
|
{
|
||||||
|
case 0x0a1: // ThunderX
|
||||||
|
return &gotoblas_THUNDERX;
|
||||||
|
case 0x0af: // ThunderX2
|
||||||
|
return &gotoblas_THUNDERX2T99;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gotoblas_dynamic_init(void) {
|
||||||
|
|
||||||
|
char coremsg[128];
|
||||||
|
char coren[22];
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
if (gotoblas) return;
|
||||||
|
|
||||||
|
p = getenv("OPENBLAS_CORETYPE");
|
||||||
|
if ( p )
|
||||||
|
{
|
||||||
|
gotoblas = force_coretype(p);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gotoblas = get_coretype();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gotoblas == NULL)
|
||||||
|
{
|
||||||
|
snprintf(coremsg, 128, "Falling back to generic ARMV8 core\n");
|
||||||
|
openblas_warning(1, coremsg);
|
||||||
|
gotoblas = &gotoblas_ARMV8;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gotoblas && gotoblas->init) {
|
||||||
|
strncpy(coren, gotoblas_corename(), 20);
|
||||||
|
sprintf(coremsg, "Core: %s\n", coren);
|
||||||
|
openblas_warning(2, coremsg);
|
||||||
|
gotoblas -> init();
|
||||||
|
} else {
|
||||||
|
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void gotoblas_dynamic_quit(void) {
|
||||||
|
gotoblas = NULL;
|
||||||
|
}
|
|
@ -113,13 +113,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
||||||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
||||||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
||||||
SGEMMINCOPYOBJ = sgemm_incopy.o
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||||
|
@ -134,8 +134,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
|
||||||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
|
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(DGEMM_UNROLL_N), 4)
|
ifeq ($(DGEMM_UNROLL_N), 4)
|
||||||
|
@ -146,34 +146,34 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
|
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
|
||||||
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
|
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
|
||||||
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
|
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
|
||||||
CGEMMINCOPYOBJ = cgemm_incopy.o
|
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMITCOPYOBJ = cgemm_itcopy.o
|
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
|
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
|
||||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
||||||
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
||||||
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
|
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
|
||||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
|
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
|
||||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
|
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
|
||||||
ZGEMMINCOPYOBJ = zgemm_incopy.o
|
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMITCOPYOBJ = zgemm_itcopy.o
|
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4)
|
ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4)
|
||||||
DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S
|
DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S
|
||||||
|
@ -201,25 +201,25 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -111,13 +111,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
||||||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
||||||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
||||||
SGEMMINCOPYOBJ = sgemm_incopy.o
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||||
|
@ -132,8 +132,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
|
||||||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
|
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(DGEMM_UNROLL_N), 4)
|
ifeq ($(DGEMM_UNROLL_N), 4)
|
||||||
|
@ -144,32 +144,32 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
|
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
|
||||||
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
|
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
|
||||||
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
|
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
|
||||||
CGEMMINCOPYOBJ = cgemm_incopy.o
|
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMITCOPYOBJ = cgemm_itcopy.o
|
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
|
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
|
||||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
||||||
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
||||||
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
|
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
|
||||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
|
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
|
||||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
|
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
|
||||||
ZGEMMINCOPYOBJ = zgemm_incopy.o
|
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMITCOPYOBJ = zgemm_itcopy.o
|
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
|
|
@ -89,26 +89,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||||
SGEMMKERNEL = sgemm_kernel_4x4.S
|
SGEMMKERNEL = sgemm_kernel_4x4.S
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
|
|
@ -74,13 +74,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
||||||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
||||||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
||||||
SGEMMINCOPYOBJ = sgemm_incopy.o
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
@ -94,8 +94,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
|
||||||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
|
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(DGEMM_UNROLL_N), 4)
|
ifeq ($(DGEMM_UNROLL_N), 4)
|
||||||
|
@ -106,32 +106,32 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
|
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
|
||||||
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
|
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
|
||||||
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
|
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
|
||||||
CGEMMINCOPYOBJ = cgemm_incopy.o
|
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMITCOPYOBJ = cgemm_itcopy.o
|
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
|
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
|
||||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
||||||
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
|
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
|
||||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
|
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
|
||||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
|
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
|
||||||
ZGEMMINCOPYOBJ = zgemm_incopy.o
|
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMITCOPYOBJ = zgemm_itcopy.o
|
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
SASUMKERNEL = sasum_thunderx2t99.c
|
SASUMKERNEL = sasum_thunderx2t99.c
|
||||||
DASUMKERNEL = dasum_thunderx2t99.c
|
DASUMKERNEL = dasum_thunderx2t99.c
|
||||||
|
|
|
@ -646,6 +646,78 @@ gotoblas_t TABLE_NAME = {
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(ARCH_ARM64)
|
||||||
|
static void init_parameter(void) {
|
||||||
|
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||||
|
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
||||||
|
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
||||||
|
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
||||||
|
|
||||||
|
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||||
|
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||||
|
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
||||||
|
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
|
||||||
|
|
||||||
|
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
||||||
|
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
||||||
|
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
|
||||||
|
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
|
||||||
|
|
||||||
|
#ifdef EXPRECISION
|
||||||
|
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
|
||||||
|
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
|
||||||
|
TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
|
||||||
|
TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
|
||||||
|
TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
|
||||||
|
TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(USE_GEMM3M)
|
||||||
|
#ifdef CGEMM3M_DEFAULT_P
|
||||||
|
TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
|
||||||
|
#else
|
||||||
|
TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ZGEMM3M_DEFAULT_P
|
||||||
|
TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
|
||||||
|
#else
|
||||||
|
TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CGEMM3M_DEFAULT_Q
|
||||||
|
TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
|
||||||
|
#else
|
||||||
|
TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ZGEMM3M_DEFAULT_Q
|
||||||
|
TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
|
||||||
|
#else
|
||||||
|
TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CGEMM3M_DEFAULT_R
|
||||||
|
TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
|
||||||
|
#else
|
||||||
|
TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ZGEMM3M_DEFAULT_R
|
||||||
|
TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
|
||||||
|
#else
|
||||||
|
TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef EXPRECISION
|
||||||
|
TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
|
||||||
|
TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
|
||||||
|
TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
#else // defined(ARCH_ARM64)
|
||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
static int get_l2_size_old(void){
|
static int get_l2_size_old(void){
|
||||||
int i, eax, ebx, ecx, edx, cpuid_level;
|
int i, eax, ebx, ecx, edx, cpuid_level;
|
||||||
|
@ -1231,3 +1303,4 @@ static void init_parameter(void) {
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
#endif //defined(ARCH_ARM64)
|
||||||
|
|
Loading…
Reference in New Issue