diff --git a/Makefile.power b/Makefile.power index bf7037995..c1556fe82 100644 --- a/Makefile.power +++ b/Makefile.power @@ -11,34 +11,34 @@ endif ifeq ($(CORE), POWER10) ifeq ($(USE_OPENMP), 1) -COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp -FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp +COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -DUSE_OPENMP -fno-fast-math -fopenmp +FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -DUSE_OPENMP -fno-fast-math -fopenmp else -COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -malign-power -fno-fast-math -FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -malign-power -fno-fast-math +COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math +FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math endif endif ifeq ($(CORE), POWER9) ifeq ($(USE_OPENMP), 1) ifneq ($(C_COMPILER), PGI) -CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp +CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -DUSE_OPENMP -fno-fast-math -fopenmp else CCOMMON_OPT += -fast -Mvect=simd -Mcache_align -DUSE_OPENMP -mp endif ifneq ($(F_COMPILER), PGI) -FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp +FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -DUSE_OPENMP -fno-fast-math -fopenmp else FCOMMON_OPT += -O2 -Mrecursive -DUSE_OPENMP -mp endif else ifneq ($(C_COMPILER), PGI) -CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math +CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math else CCOMMON_OPT += -fast -Mvect=simd -Mcache_align endif ifneq ($(F_COMPILER), PGI) -FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math +FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -fno-fast-math else FCOMMON_OPT += -O2 -Mrecursive endif @@ -48,26 +48,26 @@ endif ifeq ($(CORE), POWER8) ifeq ($(USE_OPENMP), 1) ifneq ($(C_COMPILER), PGI) -CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp +CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -DUSE_OPENMP -fno-fast-math -fopenmp else CCOMMON_OPT += -fast -Mvect=simd -Mcache_align -DUSE_OPENMP -mp endif ifneq ($(F_COMPILER), PGI) -FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp +FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -DUSE_OPENMP -fno-fast-math -fopenmp else FCOMMON_OPT += -O2 -Mrecursive -DUSE_OPENMP -mp endif else ifneq ($(C_COMPILER), PGI) -CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math +CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math else CCOMMON_OPT += -fast -Mvect=simd -Mcache_align endif ifneq ($(F_COMPILER), PGI) ifeq ($(OSNAME), AIX) -FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math +FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math else -FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math +FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math endif else FCOMMON_OPT += -O2 -Mrecursive diff --git a/kernel/Makefile b/kernel/Makefile index 9b468a6af..db3282c05 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,6 +10,11 @@ ifeq ($(C_COMPILER), GCC) GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9) endif +ifeq ($(ARCH), power) +ifeq ($(C_COMPILER), CLANG) + override CFLAGS += -fno-integrated-as +endif +endif AVX2OPT = ifeq ($(C_COMPILER), GCC) # AVX2 support was added in 4.7.0 diff --git a/kernel/power/dgemv_t.c b/kernel/power/dgemv_t.c index 09abd5a43..c07b3c223 100644 --- a/kernel/power/dgemv_t.c +++ b/kernel/power/dgemv_t.c @@ -359,7 +359,7 @@ static void dgemv_kernel_4x8(BLASLONG n, BLASLONG lda, double *ap, double *x, do "stxvd2x 39, %[off], %[y] \n\t" "stxvd2x 40, %[off2], %[y] \n\t" - : [memy] "+m" (*(const double (*)[8])y), + : [memy] "+m" (*(double (*)[8])y), [n] "+&r" (n), [a0] "=b" (a0), [a1] "=&b" (a1), @@ -373,7 +373,7 @@ static void dgemv_kernel_4x8(BLASLONG n, BLASLONG lda, double *ap, double *x, do [off2]"=&b" (off2), [temp] "=&b" (tempR) : [memx] "m" (*(const double (*)[n])x), - [mem_ap] "m" (*(const double (*)[]) ap), + [mem_ap] "m" (*(const double (*)[n*8]) ap), [alpha] "d" (alpha), "[a0]" (ap), [x] "b" (x),