Merge pull request #6 from xianyi/develop

rebase
This commit is contained in:
Martin Kroeker 2020-12-27 21:28:10 +01:00 committed by GitHub
commit 9b3965b08c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 209 additions and 27 deletions

View File

@ -59,6 +59,9 @@ endif
@$(CC) --version > /dev/null 2>&1;\
if [ $$? -eq 0 ]; then \
cverinfo=`$(CC) --version | sed -n '1p'`; \
if [ -z "$${cverinfo}" ]; then \
cverinfo=`$(CC) --version | sed -n '2p'`; \
fi; \
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\
else \
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\
@ -67,6 +70,9 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
@$(FC) --version > /dev/null 2>&1;\
if [ $$? -eq 0 ]; then \
fverinfo=`$(FC) --version | sed -n '1p'`; \
if [ -z "$${fverinfo}" ]; then \
fverinfo=`$(FC) --version | sed -n '2p'`; \
fi; \
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\
else \
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\

View File

@ -10,9 +10,11 @@ USE_OPENMP = 1
endif
ifeq ($(CORE), POWER10)
ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
endif
endif
ifeq ($(CORE), POWER9)
ifneq ($(C_COMPILER), PGI)

View File

@ -181,7 +181,7 @@ endif
# On x86_64 build getarch with march=native unless the compiler is PGI. This is required to detect AVX512 support in getarch.
ifeq ($(HOSTARCH), x86_64)
ifeq ($(findstring pgcc,$(HOSTCC)),)
ifeq ($(findstring pgcc,$(HOSTCC))$(findstring nvc,$(HOSTCC)),)
GETARCH_FLAGS += -march=native
endif
endif
@ -663,6 +663,7 @@ endif
endif # ARCH zarch
ifeq ($(ARCH), power)
ifneq ($(C_COMPILER), PGI)
DYNAMIC_CORE = POWER6
DYNAMIC_CORE += POWER8
ifneq ($(C_COMPILER), GCC)
@ -689,6 +690,10 @@ else
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
endif
endif
else
DYNAMIC_CORE = POWER8
DYNAMIC_CORE += POWER9
endif
endif
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
@ -847,9 +852,19 @@ endif
endif
ifeq ($(C_COMPILER), PGI)
PGCVERSIONGT20 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -d "." -f 1` \> 20)
PGCVERSIONGTEQ20 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -d "." -f 1` \>= 20)
PGCMINORVERSIONGE11 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -c 4-5` == 11)
PGCVERSIONCHECK := $(PGCVERSIONGT20)$(PGCVERSIONEQ20)$(PGCMINORVERSIONGE11)
ifeq ($(PGCVERSIONCHECK), $(filter $(PGCVERSIONCHECK), 110 111 011))
NEWPGI := 1
endif
ifdef BINARY64
ifeq ($(ARCH), x86_64)
CCOMMON_OPT += -tp p7-64 -D__MMX__ -Mnollvm
CCOMMON_OPT += -tp p7-64
ifneq ($(NEWPGI),1)
CCOMMON_OPT += -D__MMX__ -Mnollvm
endif
else
ifeq ($(ARCH), power)
ifeq ($(CORE), POWER8)
@ -1029,18 +1044,24 @@ ifeq ($(ARCH), x86_64)
FCOMMON_OPT += -tp p7-64
else
ifeq ($(ARCH), power)
ifeq ($(CORE), POWER6)
$(warning NVIDIA HPC compilers do not support POWER6.)
endif
ifeq ($(CORE), POWER8)
FCOMMON_OPT += -tp pwr8
endif
ifeq ($(CORE), POWER9)
FCOMMON_OPT += -tp pwr9
endif
ifeq ($(CORE), POWER10)
$(warning NVIDIA HPC compilers do not support POWER10.)
endif
endif
endif
else
FCOMMON_OPT += -tp p7
endif
FCOMMON_OPT += -Mrecursive
FCOMMON_OPT += -Mrecursive -Kieee
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -mp
endif

View File

@ -13,7 +13,7 @@ Drone CI: [![Build Status](https://cloud.drone.io/api/badges/xianyi/OpenBLAS/sta
## Introduction
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
OpenBLAS is an optimized Basic Linear Algebra Subprograms library based on GotoBLAS2 1.13 BSD version.
Please read the documentation on the OpenBLAS wiki pages: <https://github.com/xianyi/OpenBLAS/wiki>.

View File

@ -27,7 +27,9 @@ static char *corename[] = {
#define NUM_CORETYPES 4
char *gotoblas_corename(void) {
#ifndef C_PGI
if (gotoblas == &gotoblas_POWER6) return corename[1];
#endif
if (gotoblas == &gotoblas_POWER8) return corename[2];
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
if (gotoblas == &gotoblas_POWER9) return corename[3];
@ -38,10 +40,157 @@ char *gotoblas_corename(void) {
return corename[0];
}
#ifdef C_PGI
/*
* NV HPC compilers do not yet implement __builtin_cpu_is().
* Fake a version here for use in the CPU detection code below.
*
* Strategy here is to first check the CPU to see what it actually is,
* and then test the input to see if what the CPU actually is matches
* what was requested.
*/
#include <string.h>
/*
* Define POWER processor version table.
*
* NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
*/
#define CPU_UNKNOWN 0
#define CPU_POWER5 5
#define CPU_POWER6 6
#define CPU_POWER8 8
#define CPU_POWER9 9
#define CPU_POWER10 10
static struct {
uint32_t pvr_mask;
uint32_t pvr_value;
const char* cpu_name;
uint32_t cpu_type;
} pvrPOWER [] = {
{ /* POWER6 in P5+ mode; 2.04-compliant processor */
.pvr_mask = 0xffffffff,
.pvr_value = 0x0f000001,
.cpu_name = "POWER5+",
.cpu_type = CPU_POWER5,
},
{ /* Power6 aka POWER6X*/
.pvr_mask = 0xffff0000,
.pvr_value = 0x003e0000,
.cpu_name = "POWER6 (raw)",
.cpu_type = CPU_POWER6,
},
{ /* Power7 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003f0000,
.cpu_name = "POWER7 (raw)",
.cpu_type = CPU_POWER6,
},
{ /* Power7+ */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004A0000,
.cpu_name = "POWER7+ (raw)",
.cpu_type = CPU_POWER6,
},
{ /* Power8E */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004b0000,
.cpu_name = "POWER8E (raw)",
.cpu_type = CPU_POWER8,
},
{ /* Power8NVL */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004c0000,
.cpu_name = "POWER8NVL (raw)",
.cpu_type = CPU_POWER8,
},
{ /* Power8 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004d0000,
.cpu_name = "POWER8 (raw)",
.cpu_type = CPU_POWER8,
},
{ /* Power9 DD2.0 */
.pvr_mask = 0xffffefff,
.pvr_value = 0x004e0200,
.cpu_name = "POWER9 (raw)",
.cpu_type = CPU_POWER9,
},
{ /* Power9 DD 2.1 */
.pvr_mask = 0xffffefff,
.pvr_value = 0x004e0201,
.cpu_name = "POWER9 (raw)",
.cpu_type = CPU_POWER9,
},
{ /* Power9 DD2.2 or later */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004e0000,
.cpu_name = "POWER9 (raw)",
.cpu_type = CPU_POWER9,
},
{ /* Power10 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00800000,
.cpu_name = "POWER10 (raw)",
.cpu_type = CPU_POWER10,
},
{ /* End of table, pvr_mask and pvr_value must be zero */
.pvr_mask = 0x0,
.pvr_value = 0x0,
.cpu_name = "Unknown",
.cpu_type = CPU_UNKNOWN,
},
};
static int __builtin_cpu_is(const char *cpu) {
int i;
uint32_t pvr;
uint32_t cpu_type;
asm("mfpvr %0" : "=r"(pvr));
for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) {
if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) {
break;
}
}
#if defined(DEBUG)
printf("%s: returning CPU=%s, cpu_type=%p\n", __func__,
pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type);
#endif
cpu_type = pvrPOWER[i].cpu_type;
if (!strcmp(cpu, "power8"))
return cpu_type == CPU_POWER8;
if (!strcmp(cpu, "power9"))
return cpu_type == CPU_POWER9;
return 0;
}
#endif /* C_PGI */
static gotoblas_t *get_coretype(void) {
#ifndef C_PGI
if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x"))
return &gotoblas_POWER6;
#endif
if (__builtin_cpu_is("power8"))
return &gotoblas_POWER8;
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
@ -77,7 +226,9 @@ static gotoblas_t *force_coretype(char * coretype) {
switch (found)
{
#ifndef C_PGI
case 1: return (&gotoblas_POWER6);
#endif
case 2: return (&gotoblas_POWER8);
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
case 3: return (&gotoblas_POWER9);

11
f_check
View File

@ -32,7 +32,7 @@ if ($compiler eq "") {
"xlf95", "xlf90", "xlf",
"ppuf77", "ppuf95", "ppuf90", "ppuxlf",
"pathf90", "pathf95",
"pgf95", "pgf90", "pgf77",
"pgf95", "pgf90", "pgf77", "pgfortran", "nvfortran",
"flang", "egfortran",
"ifort");
@ -64,7 +64,6 @@ if ($compiler eq "") {
if (!$?) {
$data = `$compiler -O2 -S ftest.f > /dev/null 2>&1 && cat ftest.s && rm -f ftest.s`;
if ($data =~ /zhoge_/) {
$bu = "_";
}
@ -87,7 +86,7 @@ if ($compiler eq "") {
if ($compiler =~ /flang/) {
$vendor = FLANG;
$openmp = "-fopenmp";
} elsif ($compiler =~ /pgf/) {
} elsif ($compiler =~ /pgf/ || $compiler =~ /nvf/) {
$vendor = PGI;
$openmp = "-mp";
} else {
@ -123,7 +122,7 @@ if ($compiler eq "") {
$openmp = "-mp";
}
if ($data =~ /PGF/) {
if ($data =~ /PGF/ || $data =~ /NVF/) {
$vendor = PGI;
$openmp = "-mp";
}
@ -177,7 +176,7 @@ if ($compiler eq "") {
$openmp = "-mp";
}
if ($compiler =~ /pgf/) {
if ($compiler =~ /pgf/ || $compiler =~ /nvf/) {
$vendor = PGI;
$bu = "_";
$openmp = "-mp";
@ -330,7 +329,7 @@ if ($link ne "") {
$flags =~ s/\@/\,/g;
$linker_L .= "-Wl,". $flags . " " ;
}
if ($flags =~ /-lgomp/ && $ENV{"CC"} =~ /clang/) {
if ($flags =~ /-lgomp/ && $CC =~ /clang/) {
$flags = "-lomp";
}

View File

@ -36,7 +36,7 @@ ifeq ($(TARGET_CORE), COOPERLAKE)
ifeq ($(GCCVERSIONGTEQ10), 1)
override CFLAGS += -march=cooperlake
else
override CFLAGS += -march=skylake-avx512
override CFLAGS += -march=skylake-avx512 -mavx512f
endif
ifeq ($(OSNAME), CYGWIN_NT)
override CFLAGS += -fno-asynchronous-unwind-tables
@ -47,7 +47,7 @@ ifeq ($(TARGET_CORE), COOPERLAKE)
endif
endif
else ifeq ($(TARGET_CORE), SKYLAKEX)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -march=skylake-avx512
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -march=skylake-avx512 -mavx512f
ifeq ($(OSNAME), CYGWIN_NT)
override CFLAGS += -fno-asynchronous-unwind-tables
endif

View File

@ -91,10 +91,10 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
ICAMAXKERNEL = izamax_thunderx2t99.c
IZAMAXKERNEL = izamax_thunderx2t99.c
SNRM2KERNEL = scnrm2_thunderx2t99.c
DNRM2KERNEL = dznrm2_thunderx2t99.c
CNRM2KERNEL = scnrm2_thunderx2t99.c
ZNRM2KERNEL = dznrm2_thunderx2t99.c
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot_thunderx2t99.c
SDOTKERNEL = dot_thunderx2t99.c

View File

@ -153,12 +153,12 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
ICAMAXKERNEL = izamax_thunderx2t99.c
IZAMAXKERNEL = izamax_thunderx2t99.c
SNRM2KERNEL = scnrm2_thunderx2t99.c
CNRM2KERNEL = scnrm2_thunderx2t99.c
SNRM2KERNEL = nrm2.S
CNRM2KERNEL = nrm2.S
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
DNRM2KERNEL = dznrm2_thunderx2t99.c
ZNRM2KERNEL = dznrm2_thunderx2t99.c
DNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot_thunderx2t99.c

View File

@ -153,13 +153,16 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
ICAMAXKERNEL = izamax_thunderx2t99.c
IZAMAXKERNEL = izamax_thunderx2t99.c
SNRM2KERNEL = scnrm2_thunderx2t99.c
CNRM2KERNEL = scnrm2_thunderx2t99.c
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
DNRM2KERNEL = dznrm2_thunderx2t99.c
ZNRM2KERNEL = dznrm2_thunderx2t99.c
#SNRM2KERNEL = scnrm2_thunderx2t99.c
#CNRM2KERNEL = scnrm2_thunderx2t99.c
##DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
##ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
#DNRM2KERNEL = dznrm2_thunderx2t99.c
#ZNRM2KERNEL = dznrm2_thunderx2t99.c
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot_thunderx2t99.c
SDOTKERNEL = dot_thunderx2t99.c