Merge pull request #1829 from ashwinyes/develop_aarch64_dynamic_arch_support

Add DYNAMIC_ARCH support for ARM64
This commit is contained in:
Martin Kroeker 2018-10-23 18:14:28 +02:00 committed by GitHub
commit 76a66eaac8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 466 additions and 258 deletions

View File

@ -510,6 +510,13 @@ CCOMMON_OPT += $(XCCOMMON_OPT)
#CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)' #CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)'
endif endif
ifeq ($(ARCH), arm64)
DYNAMIC_CORE = ARMV8
DYNAMIC_CORE += CORTEXA57
DYNAMIC_CORE += THUNDERX
DYNAMIC_CORE += THUNDERX2T99
endif
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty # If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
ifndef DYNAMIC_CORE ifndef DYNAMIC_CORE
override DYNAMIC_ARCH= override DYNAMIC_ARCH=

View File

@ -237,7 +237,6 @@ void get_cpuconfig(void)
break; break;
case CPU_THUNDERX: case CPU_THUNDERX:
printf("#define ARMV8\n");
printf("#define THUNDERX\n"); printf("#define THUNDERX\n");
printf("#define L1_DATA_SIZE 32768\n"); printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 128\n"); printf("#define L1_DATA_LINESIZE 128\n");

View File

@ -15,7 +15,11 @@ endif
# COMMONOBJS += info.$(SUFFIX) # COMMONOBJS += info.$(SUFFIX)
ifeq ($(DYNAMIC_ARCH), 1) ifeq ($(DYNAMIC_ARCH), 1)
ifeq ($(ARCH),arm64)
COMMONOBJS += dynamic_arm64.$(SUFFIX)
else
COMMONOBJS += dynamic.$(SUFFIX) COMMONOBJS += dynamic.$(SUFFIX)
endif
else else
COMMONOBJS += parameter.$(SUFFIX) COMMONOBJS += parameter.$(SUFFIX)
endif endif
@ -71,7 +75,11 @@ BLAS_SERVER = blas_server.c
endif endif
ifeq ($(DYNAMIC_ARCH), 1) ifeq ($(DYNAMIC_ARCH), 1)
ifeq ($(ARCH),arm64)
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_arm64.$(SUFFIX)
else
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
endif
else else
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
endif endif

View File

@ -0,0 +1,198 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#include "common.h"
#include <asm/hwcap.h>
#include <sys/auxv.h>
extern gotoblas_t gotoblas_ARMV8;
extern gotoblas_t gotoblas_CORTEXA57;
extern gotoblas_t gotoblas_THUNDERX;
extern gotoblas_t gotoblas_THUNDERX2T99;
extern void openblas_warning(int verbose, const char * msg);
#define NUM_CORETYPES 4
/*
* In case asm/hwcap.h is outdated on the build system, make sure
* that HWCAP_CPUID is defined
*/
#ifndef HWCAP_CPUID
#define HWCAP_CPUID (1 << 11)
#endif
#define get_cpu_ftr(id, var) ({ \
asm("mrs %0, "#id : "=r" (var)); \
})
static char *corename[] = {
"armv8",
"cortexa57",
"thunderx",
"thunderx2t99",
"unknown"
};
char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_ARMV8) return corename[ 0];
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 1];
if (gotoblas == &gotoblas_THUNDERX) return corename[ 2];
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 3];
return corename[NUM_CORETYPES];
}
static gotoblas_t *force_coretype(char *coretype) {
int i ;
int found = -1;
char message[128];
for ( i=0 ; i < NUM_CORETYPES; i++)
{
if (!strncasecmp(coretype, corename[i], 20))
{
found = i;
break;
}
}
switch (found)
{
case 0: return (&gotoblas_ARMV8);
case 1: return (&gotoblas_CORTEXA57);
case 2: return (&gotoblas_THUNDERX);
case 3: return (&gotoblas_THUNDERX2T99);
}
snprintf(message, 128, "Core not found: %s\n", coretype);
openblas_warning(1, message);
return NULL;
}
static gotoblas_t *get_coretype(void) {
int implementer, variant, part, arch, revision, midr_el1;
if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
char coremsg[128];
snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n");
openblas_warning(1, coremsg);
return NULL;
}
get_cpu_ftr(MIDR_EL1, midr_el1);
/*
* MIDR_EL1
*
* 31 24 23 20 19 16 15 4 3 0
* -----------------------------------------------------------------
* | Implementer | Variant | Architecture | Part Number | Revision |
* -----------------------------------------------------------------
*/
implementer = (midr_el1 >> 24) & 0xFF;
part = (midr_el1 >> 4) & 0xFFF;
switch(implementer)
{
case 0x41: // ARM
switch (part)
{
case 0xd07: // Cortex A57
case 0xd08: // Cortex A72
case 0xd03: // Cortex A53
return &gotoblas_CORTEXA57;
}
break;
case 0x42: // Broadcom
switch (part)
{
case 0x516: // Vulcan
return &gotoblas_THUNDERX2T99;
}
break;
case 0x43: // Cavium
switch (part)
{
case 0x0a1: // ThunderX
return &gotoblas_THUNDERX;
case 0x0af: // ThunderX2
return &gotoblas_THUNDERX2T99;
}
break;
}
return NULL;
}
void gotoblas_dynamic_init(void) {
char coremsg[128];
char coren[22];
char *p;
if (gotoblas) return;
p = getenv("OPENBLAS_CORETYPE");
if ( p )
{
gotoblas = force_coretype(p);
}
else
{
gotoblas = get_coretype();
}
if (gotoblas == NULL)
{
snprintf(coremsg, 128, "Falling back to generic ARMV8 core\n");
openblas_warning(1, coremsg);
gotoblas = &gotoblas_ARMV8;
}
if (gotoblas && gotoblas->init) {
strncpy(coren, gotoblas_corename(), 20);
sprintf(coremsg, "Core: %s\n", coren);
openblas_warning(2, coremsg);
gotoblas -> init();
} else {
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
exit(1);
}
}
void gotoblas_dynamic_quit(void) {
gotoblas = NULL;
}

View File

@ -730,35 +730,8 @@ void blas_set_parameter(void){
#if defined(ARCH_ARM64) #if defined(ARCH_ARM64)
#if defined(VULCAN) || defined(THUNDERX2T99) || defined(ARMV8)
unsigned long dgemm_prefetch_size_a;
unsigned long dgemm_prefetch_size_b;
unsigned long dgemm_prefetch_size_c;
#endif
void blas_set_parameter(void) void blas_set_parameter(void)
{ {
#if defined(VULCAN) || defined(THUNDERX2T99) || defined(ARMV8)
dgemm_p = 160;
dgemm_q = 128;
dgemm_r = 4096;
sgemm_p = 128;
sgemm_q = 352;
sgemm_r = 4096;
cgemm_p = 128;
cgemm_q = 224;
cgemm_r = 4096;
zgemm_p = 128;
zgemm_q = 112;
zgemm_r = 4096;
dgemm_prefetch_size_a = 3584;
dgemm_prefetch_size_b = 512;
dgemm_prefetch_size_c = 128;
#endif
} }
#endif #endif

View File

@ -88,7 +88,11 @@ lsame.$(SUFFIX): $(KERNELDIR)/$(LSAME_KERNEL)
$(CC) -c $(CFLAGS) -DF_INTERFACE $< -o $(@F) $(CC) -c $(CFLAGS) -DF_INTERFACE $< -o $(@F)
setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h
ifeq ($(USE_GEMM3M), 1)
$(CC) -c $(CFLAGS) -DUSE_GEMM3M $< -o $@
else
$(CC) -c $(CFLAGS) $< -o $@ $(CC) -c $(CFLAGS) $< -o $@
endif
setparam$(TSUFFIX).c : setparam-ref.c setparam$(TSUFFIX).c : setparam-ref.c
sed 's/TS/$(TSUFFIX)/g' $< > $(@F) sed 's/TS/$(TSUFFIX)/g' $< > $(@F)

View File

@ -113,13 +113,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
SGEMMINCOPYOBJ = sgemm_incopy.o SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy.o SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif endif
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy.o SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
@ -134,8 +134,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
endif endif
DGEMMINCOPYOBJ = dgemm_incopy.o DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy.o DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif endif
ifeq ($(DGEMM_UNROLL_N), 4) ifeq ($(DGEMM_UNROLL_N), 4)
@ -146,34 +146,34 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
endif endif
DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy.o DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
CGEMMINCOPYOBJ = cgemm_incopy.o CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy.o CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif endif
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
CGEMMONCOPYOBJ = cgemm_oncopy.o CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy.o CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
ZGEMMINCOPYOBJ = zgemm_incopy.o ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy.o ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif endif
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMONCOPYOBJ = zgemm_oncopy.o ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy.o ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4) ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4)
DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S
@ -201,25 +201,25 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
SGEMMKERNEL = ../generic/gemmkernel_2x2.c SGEMMKERNEL = ../generic/gemmkernel_2x2.c
SGEMMONCOPY = ../generic/gemm_ncopy_2.c SGEMMONCOPY = ../generic/gemm_ncopy_2.c
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy.o SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = ../generic/gemmkernel_2x2.c DGEMMKERNEL = ../generic/gemmkernel_2x2.c
DGEMMONCOPY = ../generic/gemm_ncopy_2.c DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy.o DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy.o CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy.o ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
endif endif

View File

@ -111,13 +111,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
SGEMMINCOPYOBJ = sgemm_incopy.o SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy.o SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif endif
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy.o SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
@ -132,8 +132,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
endif endif
DGEMMINCOPYOBJ = dgemm_incopy.o DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy.o DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif endif
ifeq ($(DGEMM_UNROLL_N), 4) ifeq ($(DGEMM_UNROLL_N), 4)
@ -144,32 +144,32 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
endif endif
DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy.o DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
CGEMMINCOPYOBJ = cgemm_incopy.o CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy.o CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif endif
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
CGEMMONCOPYOBJ = cgemm_oncopy.o CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy.o CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
ZGEMMINCOPYOBJ = zgemm_incopy.o ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy.o ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif endif
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMONCOPYOBJ = zgemm_oncopy.o ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy.o ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)

View File

@ -89,26 +89,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
SGEMMKERNEL = sgemm_kernel_4x4.S SGEMMKERNEL = sgemm_kernel_4x4.S
SGEMMONCOPY = ../generic/gemm_ncopy_4.c SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy.o SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = ../generic/gemmkernel_2x2.c DGEMMKERNEL = ../generic/gemmkernel_2x2.c
DGEMMONCOPY = ../generic/gemm_ncopy_2.c DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy.o DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy.o CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy.o ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c

View File

@ -74,13 +74,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
SGEMMINCOPYOBJ = sgemm_incopy.o SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy.o SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif endif
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy.o SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
@ -94,8 +94,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
endif endif
DGEMMINCOPYOBJ = dgemm_incopy.o DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy.o DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif endif
ifeq ($(DGEMM_UNROLL_N), 4) ifeq ($(DGEMM_UNROLL_N), 4)
@ -106,32 +106,32 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
endif endif
DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy.o DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
CGEMMINCOPYOBJ = cgemm_incopy.o CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy.o CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif endif
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
CGEMMONCOPYOBJ = cgemm_oncopy.o CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy.o CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
ZGEMMINCOPYOBJ = zgemm_incopy.o ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy.o ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif endif
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMONCOPYOBJ = zgemm_oncopy.o ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy.o ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
SASUMKERNEL = sasum_thunderx2t99.c SASUMKERNEL = sasum_thunderx2t99.c
DASUMKERNEL = dasum_thunderx2t99.c DASUMKERNEL = dasum_thunderx2t99.c

View File

@ -1,135 +0,0 @@
SAMAXKERNEL = amax.S
DAMAXKERNEL = amax.S
CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c
SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c
SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c
ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax.S
ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c
ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c
ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c
SASUMKERNEL = asum.S
DASUMKERNEL = asum.S
CASUMKERNEL = casum.S
ZASUMKERNEL = zasum.S
SAXPYKERNEL = axpy.S
DAXPYKERNEL = axpy.S
CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S
SCOPYKERNEL = copy.S
DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S
SDOTKERNEL = dot.S
DDOTKERNEL = dot.S
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
DSDOTKERNEL = dot.S
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
SROTKERNEL = rot.S
DROTKERNEL = rot.S
CROTKERNEL = zrot.S
ZROTKERNEL = zrot.S
SSCALKERNEL = scal.S
DSCALKERNEL = scal.S
CSCALKERNEL = zscal.S
ZSCALKERNEL = zscal.S
SSWAPKERNEL = swap.S
DSWAPKERNEL = swap.S
CSWAPKERNEL = swap.S
ZSWAPKERNEL = swap.S
SGEMVNKERNEL = gemv_n.S
DGEMVNKERNEL = gemv_n.S
CGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n.S
SGEMVTKERNEL = gemv_t.S
DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S
STRMMKERNEL = ../generic/trmmkernel_4x4.c
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
SGEMMKERNEL = sgemm_kernel_4x4.S
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

View File

@ -943,13 +943,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
prfm PLDL1KEEP, [origPB] prfm PLDL1KEEP, [origPB]
prfm PLDL1KEEP, [origPA] prfm PLDL1KEEP, [origPA]
mov A_PRE_SIZE, #3584
ldr A_PRE_SIZE, =dgemm_prefetch_size_a mov B_PRE_SIZE, #512
ldr A_PRE_SIZE, [A_PRE_SIZE] mov C_PRE_SIZE, #128
ldr B_PRE_SIZE, =dgemm_prefetch_size_b
ldr B_PRE_SIZE, [B_PRE_SIZE]
ldr C_PRE_SIZE, =dgemm_prefetch_size_c
ldr C_PRE_SIZE, [C_PRE_SIZE]
add A_PRE_SIZE_64, A_PRE_SIZE, #64 add A_PRE_SIZE_64, A_PRE_SIZE, #64
add B_PRE_SIZE_64, B_PRE_SIZE, #64 add B_PRE_SIZE_64, B_PRE_SIZE, #64

View File

@ -294,6 +294,8 @@ gotoblas_t TABLE_NAME = {
chemm_outcopyTS, chemm_oltcopyTS, chemm_outcopyTS, chemm_oltcopyTS,
0, 0, 0, 0, 0, 0,
#if defined(USE_GEMM3M)
#ifdef CGEMM3M_DEFAULT_UNROLL_M #ifdef CGEMM3M_DEFAULT_UNROLL_M
CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N), CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
#else #else
@ -324,6 +326,33 @@ gotoblas_t TABLE_NAME = {
chemm3m_oucopybTS, chemm3m_olcopybTS, chemm3m_oucopybTS, chemm3m_olcopybTS,
chemm3m_oucopyrTS, chemm3m_olcopyrTS, chemm3m_oucopyrTS, chemm3m_olcopyrTS,
chemm3m_oucopyiTS, chemm3m_olcopyiTS, chemm3m_oucopyiTS, chemm3m_olcopyiTS,
#else
0, 0, 0,
NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
#endif
#ifndef NO_LAPACK #ifndef NO_LAPACK
cneg_tcopyTS, claswp_ncopyTS, cneg_tcopyTS, claswp_ncopyTS,
@ -400,6 +429,7 @@ gotoblas_t TABLE_NAME = {
zhemm_outcopyTS, zhemm_oltcopyTS, zhemm_outcopyTS, zhemm_oltcopyTS,
0, 0, 0, 0, 0, 0,
#if defined(USE_GEMM3M)
#ifdef ZGEMM3M_DEFAULT_UNROLL_M #ifdef ZGEMM3M_DEFAULT_UNROLL_M
ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N), ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
#else #else
@ -430,6 +460,33 @@ gotoblas_t TABLE_NAME = {
zhemm3m_oucopybTS, zhemm3m_olcopybTS, zhemm3m_oucopybTS, zhemm3m_olcopybTS,
zhemm3m_oucopyrTS, zhemm3m_olcopyrTS, zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
zhemm3m_oucopyiTS, zhemm3m_olcopyiTS, zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
#else
0, 0, 0,
NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
#endif
#ifndef NO_LAPACK #ifndef NO_LAPACK
zneg_tcopyTS, zlaswp_ncopyTS, zneg_tcopyTS, zlaswp_ncopyTS,
@ -503,6 +560,7 @@ gotoblas_t TABLE_NAME = {
xhemm_outcopyTS, xhemm_oltcopyTS, xhemm_outcopyTS, xhemm_oltcopyTS,
0, 0, 0, 0, 0, 0,
#if defined(USE_GEMM3M)
QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N), QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
xgemm3m_kernelTS, xgemm3m_kernelTS,
@ -528,6 +586,33 @@ gotoblas_t TABLE_NAME = {
xhemm3m_oucopybTS, xhemm3m_olcopybTS, xhemm3m_oucopybTS, xhemm3m_olcopybTS,
xhemm3m_oucopyrTS, xhemm3m_olcopyrTS, xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
xhemm3m_oucopyiTS, xhemm3m_olcopyiTS, xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
#else
0, 0, 0,
NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
NULL, NULL,
#endif
#ifndef NO_LAPACK #ifndef NO_LAPACK
xneg_tcopyTS, xlaswp_ncopyTS, xneg_tcopyTS, xlaswp_ncopyTS,
@ -561,6 +646,78 @@ gotoblas_t TABLE_NAME = {
}; };
#if defined(ARCH_ARM64)
static void init_parameter(void) {
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
#endif
#if defined(USE_GEMM3M)
#ifdef CGEMM3M_DEFAULT_P
TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
#else
TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
#endif
#ifdef ZGEMM3M_DEFAULT_P
TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
#else
TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
#endif
#ifdef CGEMM3M_DEFAULT_Q
TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
#else
TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
#endif
#ifdef ZGEMM3M_DEFAULT_Q
TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
#else
TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
#endif
#ifdef CGEMM3M_DEFAULT_R
TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
#else
TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
#endif
#ifdef ZGEMM3M_DEFAULT_R
TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
#else
TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
#endif
#ifdef EXPRECISION
TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
#endif
#endif
}
#else // defined(ARCH_ARM64)
#ifdef ARCH_X86 #ifdef ARCH_X86
static int get_l2_size_old(void){ static int get_l2_size_old(void){
int i, eax, ebx, ecx, edx, cpuid_level; int i, eax, ebx, ecx, edx, cpuid_level;
@ -1146,3 +1303,4 @@ static void init_parameter(void) {
} }
#endif //defined(ARCH_ARM64)

48
param.h
View File

@ -2641,20 +2641,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_UNROLL_M 4 #define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_P sgemm_p #define SGEMM_DEFAULT_P 128
#define DGEMM_DEFAULT_P dgemm_p #define DGEMM_DEFAULT_P 160
#define CGEMM_DEFAULT_P cgemm_p #define CGEMM_DEFAULT_P 128
#define ZGEMM_DEFAULT_P zgemm_p #define ZGEMM_DEFAULT_P 128
#define SGEMM_DEFAULT_Q sgemm_q #define SGEMM_DEFAULT_Q 352
#define DGEMM_DEFAULT_Q dgemm_q #define DGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q cgemm_q #define CGEMM_DEFAULT_Q 224
#define ZGEMM_DEFAULT_Q zgemm_q #define ZGEMM_DEFAULT_Q 112
#define SGEMM_DEFAULT_R sgemm_r #define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R dgemm_r #define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R cgemm_r #define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R zgemm_r #define ZGEMM_DEFAULT_R 4096
#define SYMV_P 16 #define SYMV_P 16
#endif #endif
@ -2720,20 +2720,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_UNROLL_M 4 #define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_P sgemm_p #define SGEMM_DEFAULT_P 128
#define DGEMM_DEFAULT_P dgemm_p #define DGEMM_DEFAULT_P 160
#define CGEMM_DEFAULT_P cgemm_p #define CGEMM_DEFAULT_P 128
#define ZGEMM_DEFAULT_P zgemm_p #define ZGEMM_DEFAULT_P 128
#define SGEMM_DEFAULT_Q sgemm_q #define SGEMM_DEFAULT_Q 352
#define DGEMM_DEFAULT_Q dgemm_q #define DGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q cgemm_q #define CGEMM_DEFAULT_Q 224
#define ZGEMM_DEFAULT_Q zgemm_q #define ZGEMM_DEFAULT_Q 112
#define SGEMM_DEFAULT_R sgemm_r #define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R dgemm_r #define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R cgemm_r #define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R zgemm_r #define ZGEMM_DEFAULT_R 4096
#define SYMV_P 16 #define SYMV_P 16
#endif #endif