Merge pull request #4272 from RajalakshmiSR/AIX_AS

POWER: AIX: Make use of power10 optimization
This commit is contained in:
Martin Kroeker 2023-10-24 12:08:51 +02:00 committed by GitHub
commit 96f8bb1eb9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 4 additions and 13 deletions

View File

@ -277,10 +277,6 @@ endif
ifndef GOTOBLAS_MAKEFILE
export GOTOBLAS_MAKEFILE = 1
# Determine if the assembler is GNU Assembler
HAVE_GAS := $(shell $(AS) -v < /dev/null 2>&1 | grep GNU 2>&1 >/dev/null ; echo $$?)
GETARCH_FLAGS += -DHAVE_GAS=$(HAVE_GAS)
# Generating Makefile.conf and config.h
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" HOST_CFLAGS="$(GETARCH_FLAGS)" CFLAGS="$(CFLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) DYNAMIC_ARCH=$(DYNAMIC_ARCH) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
@ -760,7 +756,11 @@ DYNAMIC_CORE += POWER9
else
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
endif
ifeq ($(OSNAME), AIX)
LDVERSIONGTEQ35 := 1
else
LDVERSIONGTEQ35 := $(shell expr `$(CC) -Wl,--version 2> /dev/null | head -1 | cut -f2 -d "." | cut -f1 -d "-"` \>= 35)
endif
ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11)
DYNAMIC_CORE += POWER10
CCOMMON_OPT += -DHAVE_P10_SUPPORT

View File

@ -1,6 +1,3 @@
ifeq ($(HAVE_GAS), 1)
include $(KERNELDIR)/KERNEL.POWER8
else
#SGEMM_BETA = ../generic/gemm_beta.c
#DGEMM_BETA = ../generic/gemm_beta.c
#CGEMM_BETA = ../generic/zgemm_beta.c
@ -265,4 +262,3 @@ QCABS_KERNEL = ../generic/cabs.c
#Dump kernel
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
endif

View File

@ -2600,13 +2600,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 8
#if defined(HAVE_GAS) && (HAVE_GAS == 1)
#define DGEMM_DEFAULT_UNROLL_M 16
#define DGEMM_DEFAULT_UNROLL_N 4
#else
#define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 8
#endif
#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 8