Merge pull request #2829 from mhillenibm/clang_s390x
Fix DYNAMIC_ARCH=1 with clang s390x
This commit is contained in:
commit
a270894730
12
.travis.yml
12
.travis.yml
|
@ -43,6 +43,18 @@ matrix:
|
|||
- TARGET_BOX=IBMZ_LINUX
|
||||
- BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
os: linux
|
||||
dist: focal
|
||||
arch: s390x
|
||||
compiler: clang
|
||||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=IBMZ_LINUX
|
||||
- BTYPE="BINARY=64 USE_OPENMP=0 CC=clang"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
|
|
|
@ -187,6 +187,7 @@ In chronological order:
|
|||
* Marius Hillenbrand <https://github.com/mhillenibm>
|
||||
* [2020-05-12] Revise dynamic architecture detection for IBM z
|
||||
* [2020-05-12] Add new sgemm and strmm kernel for IBM z14
|
||||
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support
|
||||
|
||||
* Danfeng Zhang <https://github.com/craft-zhang>
|
||||
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
|
|
@ -295,7 +295,6 @@ endif
|
|||
ifeq ($(C_COMPILER), GCC)
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
|
||||
GCCVERSIONEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` = 5)
|
||||
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
|
||||
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
|
||||
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
|
||||
|
@ -594,34 +593,36 @@ endif
|
|||
ifeq ($(ARCH), zarch)
|
||||
DYNAMIC_CORE = ZARCH_GENERIC
|
||||
|
||||
# Z13 is supported since gcc-5.2, gcc-6, and in RHEL 7.3 and newer
|
||||
ifeq ($(GCCVERSIONGT5), 1)
|
||||
ZARCH_SUPPORT_Z13 := 1
|
||||
else ifeq ($(GCCVERSIONEQ5), 1)
|
||||
ifeq ($(GCCMINORVERSIONGTEQ2), 1)
|
||||
ZARCH_SUPPORT_Z13 := 1
|
||||
endif
|
||||
endif
|
||||
# if the compiler accepts -march=arch11 or -march=z13 and can compile a file
|
||||
# with z13-specific inline assembly, then we can include support for Z13.
|
||||
# note: -march=z13 is equivalent to -march=arch11 yet some compiler releases
|
||||
# only support one or the other.
|
||||
# note: LLVM version 6.x supported -march=z13 yet could not handle vector
|
||||
# registers in inline assembly, so the check for supporting the -march flag is
|
||||
# not enough.
|
||||
ZARCH_TEST_COMPILE=-c $(TOPDIR)/kernel/zarch/damin_z13.c -I$(TOPDIR) -o /dev/null > /dev/null 2> /dev/null
|
||||
ZARCH_CC_SUPPORTS_ARCH11=$(shell $(CC) -march=arch11 $(ZARCH_TEST_COMPILE) && echo 1)
|
||||
ZARCH_CC_SUPPORTS_Z13=$(shell $(CC) -march=z13 $(ZARCH_TEST_COMPILE) && echo 1)
|
||||
|
||||
ifeq ($(wildcard /etc/redhat-release), /etc/redhat-release)
|
||||
ifeq ($(shell source /etc/os-release ; expr $$VERSION_ID \>= "7.3"), 1)
|
||||
ZARCH_SUPPORT_Z13 := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ZARCH_SUPPORT_Z13), 1)
|
||||
ifeq ($(or $(ZARCH_CC_SUPPORTS_ARCH11), $(ZARCH_CC_SUPPORTS_Z13)), 1)
|
||||
DYNAMIC_CORE += Z13
|
||||
CCOMMON_OPT += -DDYN_Z13
|
||||
else
|
||||
$(info OpenBLAS: Not building Z13 kernels because gcc is older than 5.2 or 6.x)
|
||||
$(info OpenBLAS: Not building Z13 kernels because the compiler $(CC) does not support it)
|
||||
endif
|
||||
|
||||
ifeq ($(GCCVERSIONGTEQ7), 1)
|
||||
# as above for z13, check for -march=arch12 and z14 support in the compiler.
|
||||
ZARCH_CC_SUPPORTS_ARCH12=$(shell $(CC) -march=arch12 $(ZARCH_TEST_COMPILE) && echo 1)
|
||||
ZARCH_CC_SUPPORTS_Z14=$(shell $(CC) -march=z14 $(ZARCH_TEST_COMPILE) && echo 1)
|
||||
ifeq ($(or $(ZARCH_CC_SUPPORTS_ARCH12), $(ZARCH_CC_SUPPORTS_Z14)), 1)
|
||||
DYNAMIC_CORE += Z14
|
||||
CCOMMON_OPT += -DDYN_Z14
|
||||
else
|
||||
$(info OpenBLAS: Not building Z14 kernels because gcc is older than 7.x)
|
||||
endif
|
||||
$(info OpenBLAS: Not building Z14 kernels because the compiler $(CC) does not support it)
|
||||
endif
|
||||
|
||||
endif # ARCH zarch
|
||||
|
||||
ifeq ($(ARCH), power)
|
||||
DYNAMIC_CORE = POWER6
|
||||
DYNAMIC_CORE += POWER8
|
||||
|
|
|
@ -1,18 +1,6 @@
|
|||
#include "common.h"
|
||||
#include <stdbool.h>
|
||||
|
||||
// Gate kernels for z13 and z14 on gcc version
|
||||
#if (__GNUC__ == 5 && __GNUC_MINOR__ >= 2) || __GNUC__ >= 6 || \
|
||||
/* RHEL 7 since 7.3: */ \
|
||||
(__GNUC__ == 4 && __GNUC_MINOR__ == 8 && __GNUC_PATCHLEVEL__ == 5 && \
|
||||
__GNUC_RH_RELEASE__ >= 11)
|
||||
#define HAVE_Z13_SUPPORT
|
||||
#endif
|
||||
|
||||
#if __GNUC__ >= 7
|
||||
#define HAVE_Z14_SUPPORT
|
||||
#endif
|
||||
|
||||
// Guard the use of getauxval() on glibc version >= 2.16
|
||||
#ifdef __GLIBC__
|
||||
#include <features.h>
|
||||
|
@ -47,10 +35,10 @@ static unsigned long get_hwcap(void) {
|
|||
#endif // __GLIBC
|
||||
|
||||
extern gotoblas_t gotoblas_ZARCH_GENERIC;
|
||||
#ifdef HAVE_Z13_SUPPORT
|
||||
#ifdef DYN_Z13
|
||||
extern gotoblas_t gotoblas_Z13;
|
||||
#endif
|
||||
#ifdef HAVE_Z14_SUPPORT
|
||||
#ifdef DYN_Z14
|
||||
extern gotoblas_t gotoblas_Z14;
|
||||
#endif
|
||||
|
||||
|
@ -66,10 +54,10 @@ static char* corename[] = {
|
|||
};
|
||||
|
||||
char* gotoblas_corename(void) {
|
||||
#ifdef HAVE_Z13_SUPPORT
|
||||
#ifdef DYN_Z13
|
||||
if (gotoblas == &gotoblas_Z13) return corename[1];
|
||||
#endif
|
||||
#ifdef HAVE_Z14_SUPPORT
|
||||
#ifdef DYN_Z14
|
||||
if (gotoblas == &gotoblas_Z14) return corename[2];
|
||||
#endif
|
||||
if (gotoblas == &gotoblas_ZARCH_GENERIC) return corename[3];
|
||||
|
@ -77,6 +65,10 @@ char* gotoblas_corename(void) {
|
|||
return corename[0];
|
||||
}
|
||||
|
||||
#ifndef HWCAP_S390_VXE
|
||||
#define HWCAP_S390_VXE 8192
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Detect the fitting set of kernels by retrieving the CPU features supported by
|
||||
* OS from the auxiliary value AT_HWCAP and choosing the set of kernels
|
||||
|
@ -89,15 +81,15 @@ static gotoblas_t* get_coretype(void) {
|
|||
|
||||
unsigned long hwcap __attribute__((unused)) = get_hwcap();
|
||||
|
||||
#ifdef DYN_Z14
|
||||
// z14 and z15 systems: exploit Vector Facility (SIMD) and
|
||||
// Vector-Enhancements Facility 1 (float SIMD instructions), if present.
|
||||
#ifdef HAVE_Z14_SUPPORT
|
||||
if ((hwcap & HWCAP_S390_VX) && (hwcap & HWCAP_S390_VXE))
|
||||
return &gotoblas_Z14;
|
||||
#endif
|
||||
|
||||
#ifdef DYN_Z13
|
||||
// z13: Vector Facility (SIMD for double)
|
||||
#ifdef HAVE_Z13_SUPPORT
|
||||
if (hwcap & HWCAP_S390_VX)
|
||||
return &gotoblas_Z13;
|
||||
#endif
|
||||
|
@ -123,19 +115,27 @@ static gotoblas_t* force_coretype(char* coretype) {
|
|||
}
|
||||
}
|
||||
|
||||
switch (found)
|
||||
{
|
||||
#ifdef HAVE_Z13_SUPPORT
|
||||
case 1: return (&gotoblas_Z13);
|
||||
if (found == 1) {
|
||||
#ifdef DYN_Z13
|
||||
return &gotoblas_Z13;
|
||||
#else
|
||||
openblas_warning(1, "Z13 support not compiled in");
|
||||
return NULL;
|
||||
#endif
|
||||
#ifdef HAVE_Z14_SUPPORT
|
||||
case 2: return (&gotoblas_Z14);
|
||||
} else if (found == 2) {
|
||||
#ifdef DYN_Z14
|
||||
return &gotoblas_Z14;
|
||||
#else
|
||||
openblas_warning(1, "Z14 support not compiled in");
|
||||
return NULL;
|
||||
#endif
|
||||
case 3: return (&gotoblas_ZARCH_GENERIC);
|
||||
default: return NULL;
|
||||
} else if (found == 3) {
|
||||
return &gotoblas_ZARCH_GENERIC;
|
||||
}
|
||||
|
||||
snprintf(message, 128, "Core not found: %s\n", coretype);
|
||||
openblas_warning(1, message);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void gotoblas_dynamic_init(void) {
|
||||
|
|
Loading…
Reference in New Issue