commit
9b3965b08c
6
Makefile
6
Makefile
|
@ -59,6 +59,9 @@ endif
|
||||||
@$(CC) --version > /dev/null 2>&1;\
|
@$(CC) --version > /dev/null 2>&1;\
|
||||||
if [ $$? -eq 0 ]; then \
|
if [ $$? -eq 0 ]; then \
|
||||||
cverinfo=`$(CC) --version | sed -n '1p'`; \
|
cverinfo=`$(CC) --version | sed -n '1p'`; \
|
||||||
|
if [ -z "$${cverinfo}" ]; then \
|
||||||
|
cverinfo=`$(CC) --version | sed -n '2p'`; \
|
||||||
|
fi; \
|
||||||
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\
|
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\
|
||||||
else \
|
else \
|
||||||
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\
|
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\
|
||||||
|
@ -67,6 +70,9 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||||
@$(FC) --version > /dev/null 2>&1;\
|
@$(FC) --version > /dev/null 2>&1;\
|
||||||
if [ $$? -eq 0 ]; then \
|
if [ $$? -eq 0 ]; then \
|
||||||
fverinfo=`$(FC) --version | sed -n '1p'`; \
|
fverinfo=`$(FC) --version | sed -n '1p'`; \
|
||||||
|
if [ -z "$${fverinfo}" ]; then \
|
||||||
|
fverinfo=`$(FC) --version | sed -n '2p'`; \
|
||||||
|
fi; \
|
||||||
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\
|
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\
|
||||||
else \
|
else \
|
||||||
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\
|
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\
|
||||||
|
|
|
@ -10,9 +10,11 @@ USE_OPENMP = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), POWER10)
|
ifeq ($(CORE), POWER10)
|
||||||
|
ifneq ($(C_COMPILER), PGI)
|
||||||
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
||||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), POWER9)
|
ifeq ($(CORE), POWER9)
|
||||||
ifneq ($(C_COMPILER), PGI)
|
ifneq ($(C_COMPILER), PGI)
|
||||||
|
|
|
@ -181,7 +181,7 @@ endif
|
||||||
|
|
||||||
# On x86_64 build getarch with march=native unless the compiler is PGI. This is required to detect AVX512 support in getarch.
|
# On x86_64 build getarch with march=native unless the compiler is PGI. This is required to detect AVX512 support in getarch.
|
||||||
ifeq ($(HOSTARCH), x86_64)
|
ifeq ($(HOSTARCH), x86_64)
|
||||||
ifeq ($(findstring pgcc,$(HOSTCC)),)
|
ifeq ($(findstring pgcc,$(HOSTCC))$(findstring nvc,$(HOSTCC)),)
|
||||||
GETARCH_FLAGS += -march=native
|
GETARCH_FLAGS += -march=native
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
@ -663,6 +663,7 @@ endif
|
||||||
endif # ARCH zarch
|
endif # ARCH zarch
|
||||||
|
|
||||||
ifeq ($(ARCH), power)
|
ifeq ($(ARCH), power)
|
||||||
|
ifneq ($(C_COMPILER), PGI)
|
||||||
DYNAMIC_CORE = POWER6
|
DYNAMIC_CORE = POWER6
|
||||||
DYNAMIC_CORE += POWER8
|
DYNAMIC_CORE += POWER8
|
||||||
ifneq ($(C_COMPILER), GCC)
|
ifneq ($(C_COMPILER), GCC)
|
||||||
|
@ -689,6 +690,10 @@ else
|
||||||
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
|
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
else
|
||||||
|
DYNAMIC_CORE = POWER8
|
||||||
|
DYNAMIC_CORE += POWER9
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
|
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
|
||||||
|
@ -847,9 +852,19 @@ endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(C_COMPILER), PGI)
|
ifeq ($(C_COMPILER), PGI)
|
||||||
|
PGCVERSIONGT20 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -d "." -f 1` \> 20)
|
||||||
|
PGCVERSIONGTEQ20 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -d "." -f 1` \>= 20)
|
||||||
|
PGCMINORVERSIONGE11 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -c 4-5` == 11)
|
||||||
|
PGCVERSIONCHECK := $(PGCVERSIONGT20)$(PGCVERSIONEQ20)$(PGCMINORVERSIONGE11)
|
||||||
|
ifeq ($(PGCVERSIONCHECK), $(filter $(PGCVERSIONCHECK), 110 111 011))
|
||||||
|
NEWPGI := 1
|
||||||
|
endif
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
ifeq ($(ARCH), x86_64)
|
ifeq ($(ARCH), x86_64)
|
||||||
CCOMMON_OPT += -tp p7-64 -D__MMX__ -Mnollvm
|
CCOMMON_OPT += -tp p7-64
|
||||||
|
ifneq ($(NEWPGI),1)
|
||||||
|
CCOMMON_OPT += -D__MMX__ -Mnollvm
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
ifeq ($(ARCH), power)
|
ifeq ($(ARCH), power)
|
||||||
ifeq ($(CORE), POWER8)
|
ifeq ($(CORE), POWER8)
|
||||||
|
@ -1029,18 +1044,24 @@ ifeq ($(ARCH), x86_64)
|
||||||
FCOMMON_OPT += -tp p7-64
|
FCOMMON_OPT += -tp p7-64
|
||||||
else
|
else
|
||||||
ifeq ($(ARCH), power)
|
ifeq ($(ARCH), power)
|
||||||
|
ifeq ($(CORE), POWER6)
|
||||||
|
$(warning NVIDIA HPC compilers do not support POWER6.)
|
||||||
|
endif
|
||||||
ifeq ($(CORE), POWER8)
|
ifeq ($(CORE), POWER8)
|
||||||
FCOMMON_OPT += -tp pwr8
|
FCOMMON_OPT += -tp pwr8
|
||||||
endif
|
endif
|
||||||
ifeq ($(CORE), POWER9)
|
ifeq ($(CORE), POWER9)
|
||||||
FCOMMON_OPT += -tp pwr9
|
FCOMMON_OPT += -tp pwr9
|
||||||
endif
|
endif
|
||||||
|
ifeq ($(CORE), POWER10)
|
||||||
|
$(warning NVIDIA HPC compilers do not support POWER10.)
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
FCOMMON_OPT += -tp p7
|
FCOMMON_OPT += -tp p7
|
||||||
endif
|
endif
|
||||||
FCOMMON_OPT += -Mrecursive
|
FCOMMON_OPT += -Mrecursive -Kieee
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
FCOMMON_OPT += -mp
|
FCOMMON_OPT += -mp
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -13,7 +13,7 @@ Drone CI: [ {
|
char *gotoblas_corename(void) {
|
||||||
|
#ifndef C_PGI
|
||||||
if (gotoblas == &gotoblas_POWER6) return corename[1];
|
if (gotoblas == &gotoblas_POWER6) return corename[1];
|
||||||
|
#endif
|
||||||
if (gotoblas == &gotoblas_POWER8) return corename[2];
|
if (gotoblas == &gotoblas_POWER8) return corename[2];
|
||||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||||
if (gotoblas == &gotoblas_POWER9) return corename[3];
|
if (gotoblas == &gotoblas_POWER9) return corename[3];
|
||||||
|
@ -38,10 +40,157 @@ char *gotoblas_corename(void) {
|
||||||
return corename[0];
|
return corename[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef C_PGI
|
||||||
|
/*
|
||||||
|
* NV HPC compilers do not yet implement __builtin_cpu_is().
|
||||||
|
* Fake a version here for use in the CPU detection code below.
|
||||||
|
*
|
||||||
|
* Strategy here is to first check the CPU to see what it actually is,
|
||||||
|
* and then test the input to see if what the CPU actually is matches
|
||||||
|
* what was requested.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Define POWER processor version table.
|
||||||
|
*
|
||||||
|
* NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define CPU_UNKNOWN 0
|
||||||
|
#define CPU_POWER5 5
|
||||||
|
#define CPU_POWER6 6
|
||||||
|
#define CPU_POWER8 8
|
||||||
|
#define CPU_POWER9 9
|
||||||
|
#define CPU_POWER10 10
|
||||||
|
|
||||||
|
static struct {
|
||||||
|
uint32_t pvr_mask;
|
||||||
|
uint32_t pvr_value;
|
||||||
|
const char* cpu_name;
|
||||||
|
uint32_t cpu_type;
|
||||||
|
} pvrPOWER [] = {
|
||||||
|
|
||||||
|
{ /* POWER6 in P5+ mode; 2.04-compliant processor */
|
||||||
|
.pvr_mask = 0xffffffff,
|
||||||
|
.pvr_value = 0x0f000001,
|
||||||
|
.cpu_name = "POWER5+",
|
||||||
|
.cpu_type = CPU_POWER5,
|
||||||
|
},
|
||||||
|
|
||||||
|
{ /* Power6 aka POWER6X*/
|
||||||
|
.pvr_mask = 0xffff0000,
|
||||||
|
.pvr_value = 0x003e0000,
|
||||||
|
.cpu_name = "POWER6 (raw)",
|
||||||
|
.cpu_type = CPU_POWER6,
|
||||||
|
},
|
||||||
|
|
||||||
|
{ /* Power7 */
|
||||||
|
.pvr_mask = 0xffff0000,
|
||||||
|
.pvr_value = 0x003f0000,
|
||||||
|
.cpu_name = "POWER7 (raw)",
|
||||||
|
.cpu_type = CPU_POWER6,
|
||||||
|
},
|
||||||
|
|
||||||
|
{ /* Power7+ */
|
||||||
|
.pvr_mask = 0xffff0000,
|
||||||
|
.pvr_value = 0x004A0000,
|
||||||
|
.cpu_name = "POWER7+ (raw)",
|
||||||
|
.cpu_type = CPU_POWER6,
|
||||||
|
},
|
||||||
|
|
||||||
|
{ /* Power8E */
|
||||||
|
.pvr_mask = 0xffff0000,
|
||||||
|
.pvr_value = 0x004b0000,
|
||||||
|
.cpu_name = "POWER8E (raw)",
|
||||||
|
.cpu_type = CPU_POWER8,
|
||||||
|
},
|
||||||
|
|
||||||
|
{ /* Power8NVL */
|
||||||
|
.pvr_mask = 0xffff0000,
|
||||||
|
.pvr_value = 0x004c0000,
|
||||||
|
.cpu_name = "POWER8NVL (raw)",
|
||||||
|
.cpu_type = CPU_POWER8,
|
||||||
|
},
|
||||||
|
|
||||||
|
{ /* Power8 */
|
||||||
|
.pvr_mask = 0xffff0000,
|
||||||
|
.pvr_value = 0x004d0000,
|
||||||
|
.cpu_name = "POWER8 (raw)",
|
||||||
|
.cpu_type = CPU_POWER8,
|
||||||
|
},
|
||||||
|
|
||||||
|
{ /* Power9 DD2.0 */
|
||||||
|
.pvr_mask = 0xffffefff,
|
||||||
|
.pvr_value = 0x004e0200,
|
||||||
|
.cpu_name = "POWER9 (raw)",
|
||||||
|
.cpu_type = CPU_POWER9,
|
||||||
|
},
|
||||||
|
|
||||||
|
{ /* Power9 DD 2.1 */
|
||||||
|
.pvr_mask = 0xffffefff,
|
||||||
|
.pvr_value = 0x004e0201,
|
||||||
|
.cpu_name = "POWER9 (raw)",
|
||||||
|
.cpu_type = CPU_POWER9,
|
||||||
|
},
|
||||||
|
|
||||||
|
{ /* Power9 DD2.2 or later */
|
||||||
|
.pvr_mask = 0xffff0000,
|
||||||
|
.pvr_value = 0x004e0000,
|
||||||
|
.cpu_name = "POWER9 (raw)",
|
||||||
|
.cpu_type = CPU_POWER9,
|
||||||
|
},
|
||||||
|
|
||||||
|
{ /* Power10 */
|
||||||
|
.pvr_mask = 0xffff0000,
|
||||||
|
.pvr_value = 0x00800000,
|
||||||
|
.cpu_name = "POWER10 (raw)",
|
||||||
|
.cpu_type = CPU_POWER10,
|
||||||
|
},
|
||||||
|
|
||||||
|
{ /* End of table, pvr_mask and pvr_value must be zero */
|
||||||
|
.pvr_mask = 0x0,
|
||||||
|
.pvr_value = 0x0,
|
||||||
|
.cpu_name = "Unknown",
|
||||||
|
.cpu_type = CPU_UNKNOWN,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __builtin_cpu_is(const char *cpu) {
|
||||||
|
int i;
|
||||||
|
uint32_t pvr;
|
||||||
|
uint32_t cpu_type;
|
||||||
|
|
||||||
|
asm("mfpvr %0" : "=r"(pvr));
|
||||||
|
|
||||||
|
for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) {
|
||||||
|
if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(DEBUG)
|
||||||
|
printf("%s: returning CPU=%s, cpu_type=%p\n", __func__,
|
||||||
|
pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type);
|
||||||
|
#endif
|
||||||
|
cpu_type = pvrPOWER[i].cpu_type;
|
||||||
|
|
||||||
|
if (!strcmp(cpu, "power8"))
|
||||||
|
return cpu_type == CPU_POWER8;
|
||||||
|
if (!strcmp(cpu, "power9"))
|
||||||
|
return cpu_type == CPU_POWER9;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* C_PGI */
|
||||||
|
|
||||||
static gotoblas_t *get_coretype(void) {
|
static gotoblas_t *get_coretype(void) {
|
||||||
|
|
||||||
|
#ifndef C_PGI
|
||||||
if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x"))
|
if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x"))
|
||||||
return &gotoblas_POWER6;
|
return &gotoblas_POWER6;
|
||||||
|
#endif
|
||||||
if (__builtin_cpu_is("power8"))
|
if (__builtin_cpu_is("power8"))
|
||||||
return &gotoblas_POWER8;
|
return &gotoblas_POWER8;
|
||||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||||
|
@ -77,7 +226,9 @@ static gotoblas_t *force_coretype(char * coretype) {
|
||||||
|
|
||||||
switch (found)
|
switch (found)
|
||||||
{
|
{
|
||||||
|
#ifndef C_PGI
|
||||||
case 1: return (&gotoblas_POWER6);
|
case 1: return (&gotoblas_POWER6);
|
||||||
|
#endif
|
||||||
case 2: return (&gotoblas_POWER8);
|
case 2: return (&gotoblas_POWER8);
|
||||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||||
case 3: return (&gotoblas_POWER9);
|
case 3: return (&gotoblas_POWER9);
|
||||||
|
|
11
f_check
11
f_check
|
@ -32,7 +32,7 @@ if ($compiler eq "") {
|
||||||
"xlf95", "xlf90", "xlf",
|
"xlf95", "xlf90", "xlf",
|
||||||
"ppuf77", "ppuf95", "ppuf90", "ppuxlf",
|
"ppuf77", "ppuf95", "ppuf90", "ppuxlf",
|
||||||
"pathf90", "pathf95",
|
"pathf90", "pathf95",
|
||||||
"pgf95", "pgf90", "pgf77",
|
"pgf95", "pgf90", "pgf77", "pgfortran", "nvfortran",
|
||||||
"flang", "egfortran",
|
"flang", "egfortran",
|
||||||
"ifort");
|
"ifort");
|
||||||
|
|
||||||
|
@ -64,7 +64,6 @@ if ($compiler eq "") {
|
||||||
if (!$?) {
|
if (!$?) {
|
||||||
|
|
||||||
$data = `$compiler -O2 -S ftest.f > /dev/null 2>&1 && cat ftest.s && rm -f ftest.s`;
|
$data = `$compiler -O2 -S ftest.f > /dev/null 2>&1 && cat ftest.s && rm -f ftest.s`;
|
||||||
|
|
||||||
if ($data =~ /zhoge_/) {
|
if ($data =~ /zhoge_/) {
|
||||||
$bu = "_";
|
$bu = "_";
|
||||||
}
|
}
|
||||||
|
@ -87,7 +86,7 @@ if ($compiler eq "") {
|
||||||
if ($compiler =~ /flang/) {
|
if ($compiler =~ /flang/) {
|
||||||
$vendor = FLANG;
|
$vendor = FLANG;
|
||||||
$openmp = "-fopenmp";
|
$openmp = "-fopenmp";
|
||||||
} elsif ($compiler =~ /pgf/) {
|
} elsif ($compiler =~ /pgf/ || $compiler =~ /nvf/) {
|
||||||
$vendor = PGI;
|
$vendor = PGI;
|
||||||
$openmp = "-mp";
|
$openmp = "-mp";
|
||||||
} else {
|
} else {
|
||||||
|
@ -123,7 +122,7 @@ if ($compiler eq "") {
|
||||||
$openmp = "-mp";
|
$openmp = "-mp";
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($data =~ /PGF/) {
|
if ($data =~ /PGF/ || $data =~ /NVF/) {
|
||||||
$vendor = PGI;
|
$vendor = PGI;
|
||||||
$openmp = "-mp";
|
$openmp = "-mp";
|
||||||
}
|
}
|
||||||
|
@ -177,7 +176,7 @@ if ($compiler eq "") {
|
||||||
$openmp = "-mp";
|
$openmp = "-mp";
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($compiler =~ /pgf/) {
|
if ($compiler =~ /pgf/ || $compiler =~ /nvf/) {
|
||||||
$vendor = PGI;
|
$vendor = PGI;
|
||||||
$bu = "_";
|
$bu = "_";
|
||||||
$openmp = "-mp";
|
$openmp = "-mp";
|
||||||
|
@ -330,7 +329,7 @@ if ($link ne "") {
|
||||||
$flags =~ s/\@/\,/g;
|
$flags =~ s/\@/\,/g;
|
||||||
$linker_L .= "-Wl,". $flags . " " ;
|
$linker_L .= "-Wl,". $flags . " " ;
|
||||||
}
|
}
|
||||||
if ($flags =~ /-lgomp/ && $ENV{"CC"} =~ /clang/) {
|
if ($flags =~ /-lgomp/ && $CC =~ /clang/) {
|
||||||
$flags = "-lomp";
|
$flags = "-lomp";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ ifeq ($(TARGET_CORE), COOPERLAKE)
|
||||||
ifeq ($(GCCVERSIONGTEQ10), 1)
|
ifeq ($(GCCVERSIONGTEQ10), 1)
|
||||||
override CFLAGS += -march=cooperlake
|
override CFLAGS += -march=cooperlake
|
||||||
else
|
else
|
||||||
override CFLAGS += -march=skylake-avx512
|
override CFLAGS += -march=skylake-avx512 -mavx512f
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), CYGWIN_NT)
|
ifeq ($(OSNAME), CYGWIN_NT)
|
||||||
override CFLAGS += -fno-asynchronous-unwind-tables
|
override CFLAGS += -fno-asynchronous-unwind-tables
|
||||||
|
@ -47,7 +47,7 @@ ifeq ($(TARGET_CORE), COOPERLAKE)
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
else ifeq ($(TARGET_CORE), SKYLAKEX)
|
else ifeq ($(TARGET_CORE), SKYLAKEX)
|
||||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -march=skylake-avx512
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -march=skylake-avx512 -mavx512f
|
||||||
ifeq ($(OSNAME), CYGWIN_NT)
|
ifeq ($(OSNAME), CYGWIN_NT)
|
||||||
override CFLAGS += -fno-asynchronous-unwind-tables
|
override CFLAGS += -fno-asynchronous-unwind-tables
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -91,10 +91,10 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
|
||||||
ICAMAXKERNEL = izamax_thunderx2t99.c
|
ICAMAXKERNEL = izamax_thunderx2t99.c
|
||||||
IZAMAXKERNEL = izamax_thunderx2t99.c
|
IZAMAXKERNEL = izamax_thunderx2t99.c
|
||||||
|
|
||||||
SNRM2KERNEL = scnrm2_thunderx2t99.c
|
SNRM2KERNEL = nrm2.S
|
||||||
DNRM2KERNEL = dznrm2_thunderx2t99.c
|
DNRM2KERNEL = nrm2.S
|
||||||
CNRM2KERNEL = scnrm2_thunderx2t99.c
|
CNRM2KERNEL = znrm2.S
|
||||||
ZNRM2KERNEL = dznrm2_thunderx2t99.c
|
ZNRM2KERNEL = znrm2.S
|
||||||
|
|
||||||
DDOTKERNEL = dot_thunderx2t99.c
|
DDOTKERNEL = dot_thunderx2t99.c
|
||||||
SDOTKERNEL = dot_thunderx2t99.c
|
SDOTKERNEL = dot_thunderx2t99.c
|
||||||
|
|
|
@ -153,12 +153,12 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
|
||||||
ICAMAXKERNEL = izamax_thunderx2t99.c
|
ICAMAXKERNEL = izamax_thunderx2t99.c
|
||||||
IZAMAXKERNEL = izamax_thunderx2t99.c
|
IZAMAXKERNEL = izamax_thunderx2t99.c
|
||||||
|
|
||||||
SNRM2KERNEL = scnrm2_thunderx2t99.c
|
SNRM2KERNEL = nrm2.S
|
||||||
CNRM2KERNEL = scnrm2_thunderx2t99.c
|
CNRM2KERNEL = nrm2.S
|
||||||
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
||||||
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
||||||
DNRM2KERNEL = dznrm2_thunderx2t99.c
|
DNRM2KERNEL = znrm2.S
|
||||||
ZNRM2KERNEL = dznrm2_thunderx2t99.c
|
ZNRM2KERNEL = znrm2.S
|
||||||
|
|
||||||
|
|
||||||
DDOTKERNEL = dot_thunderx2t99.c
|
DDOTKERNEL = dot_thunderx2t99.c
|
||||||
|
|
|
@ -153,13 +153,16 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
|
||||||
ICAMAXKERNEL = izamax_thunderx2t99.c
|
ICAMAXKERNEL = izamax_thunderx2t99.c
|
||||||
IZAMAXKERNEL = izamax_thunderx2t99.c
|
IZAMAXKERNEL = izamax_thunderx2t99.c
|
||||||
|
|
||||||
SNRM2KERNEL = scnrm2_thunderx2t99.c
|
#SNRM2KERNEL = scnrm2_thunderx2t99.c
|
||||||
CNRM2KERNEL = scnrm2_thunderx2t99.c
|
#CNRM2KERNEL = scnrm2_thunderx2t99.c
|
||||||
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
##DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
||||||
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
##ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
|
||||||
DNRM2KERNEL = dznrm2_thunderx2t99.c
|
#DNRM2KERNEL = dznrm2_thunderx2t99.c
|
||||||
ZNRM2KERNEL = dznrm2_thunderx2t99.c
|
#ZNRM2KERNEL = dznrm2_thunderx2t99.c
|
||||||
|
SNRM2KERNEL = nrm2.S
|
||||||
|
DNRM2KERNEL = nrm2.S
|
||||||
|
CNRM2KERNEL = znrm2.S
|
||||||
|
ZNRM2KERNEL = znrm2.S
|
||||||
|
|
||||||
DDOTKERNEL = dot_thunderx2t99.c
|
DDOTKERNEL = dot_thunderx2t99.c
|
||||||
SDOTKERNEL = dot_thunderx2t99.c
|
SDOTKERNEL = dot_thunderx2t99.c
|
||||||
|
|
Loading…
Reference in New Issue