Merge pull request #2740 from RajalakshmiSR/clang-power
Fix compilation issues with clang on POWER
This commit is contained in:
commit
383262035d
|
@ -11,34 +11,34 @@ endif
|
||||||
|
|
||||||
ifeq ($(CORE), POWER10)
|
ifeq ($(CORE), POWER10)
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||||
else
|
else
|
||||||
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -malign-power -fno-fast-math
|
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
||||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -malign-power -fno-fast-math
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), POWER9)
|
ifeq ($(CORE), POWER9)
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
ifneq ($(C_COMPILER), PGI)
|
ifneq ($(C_COMPILER), PGI)
|
||||||
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||||
else
|
else
|
||||||
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align -DUSE_OPENMP -mp
|
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align -DUSE_OPENMP -mp
|
||||||
endif
|
endif
|
||||||
ifneq ($(F_COMPILER), PGI)
|
ifneq ($(F_COMPILER), PGI)
|
||||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||||
else
|
else
|
||||||
FCOMMON_OPT += -O2 -Mrecursive -DUSE_OPENMP -mp
|
FCOMMON_OPT += -O2 -Mrecursive -DUSE_OPENMP -mp
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
ifneq ($(C_COMPILER), PGI)
|
ifneq ($(C_COMPILER), PGI)
|
||||||
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math
|
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
|
||||||
else
|
else
|
||||||
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
|
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
|
||||||
endif
|
endif
|
||||||
ifneq ($(F_COMPILER), PGI)
|
ifneq ($(F_COMPILER), PGI)
|
||||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -fno-fast-math
|
||||||
else
|
else
|
||||||
FCOMMON_OPT += -O2 -Mrecursive
|
FCOMMON_OPT += -O2 -Mrecursive
|
||||||
endif
|
endif
|
||||||
|
@ -48,26 +48,26 @@ endif
|
||||||
ifeq ($(CORE), POWER8)
|
ifeq ($(CORE), POWER8)
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
ifneq ($(C_COMPILER), PGI)
|
ifneq ($(C_COMPILER), PGI)
|
||||||
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||||
else
|
else
|
||||||
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align -DUSE_OPENMP -mp
|
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align -DUSE_OPENMP -mp
|
||||||
endif
|
endif
|
||||||
ifneq ($(F_COMPILER), PGI)
|
ifneq ($(F_COMPILER), PGI)
|
||||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||||
else
|
else
|
||||||
FCOMMON_OPT += -O2 -Mrecursive -DUSE_OPENMP -mp
|
FCOMMON_OPT += -O2 -Mrecursive -DUSE_OPENMP -mp
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
ifneq ($(C_COMPILER), PGI)
|
ifneq ($(C_COMPILER), PGI)
|
||||||
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
|
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
|
||||||
else
|
else
|
||||||
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
|
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
|
||||||
endif
|
endif
|
||||||
ifneq ($(F_COMPILER), PGI)
|
ifneq ($(F_COMPILER), PGI)
|
||||||
ifeq ($(OSNAME), AIX)
|
ifeq ($(OSNAME), AIX)
|
||||||
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
|
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
|
||||||
else
|
else
|
||||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
FCOMMON_OPT += -O2 -Mrecursive
|
FCOMMON_OPT += -O2 -Mrecursive
|
||||||
|
|
|
@ -10,6 +10,11 @@ ifeq ($(C_COMPILER), GCC)
|
||||||
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
|
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(ARCH), power)
|
||||||
|
ifeq ($(C_COMPILER), CLANG)
|
||||||
|
override CFLAGS += -fno-integrated-as
|
||||||
|
endif
|
||||||
|
endif
|
||||||
AVX2OPT =
|
AVX2OPT =
|
||||||
ifeq ($(C_COMPILER), GCC)
|
ifeq ($(C_COMPILER), GCC)
|
||||||
# AVX2 support was added in 4.7.0
|
# AVX2 support was added in 4.7.0
|
||||||
|
|
|
@ -359,7 +359,7 @@ static void dgemv_kernel_4x8(BLASLONG n, BLASLONG lda, double *ap, double *x, do
|
||||||
"stxvd2x 39, %[off], %[y] \n\t"
|
"stxvd2x 39, %[off], %[y] \n\t"
|
||||||
"stxvd2x 40, %[off2], %[y] \n\t"
|
"stxvd2x 40, %[off2], %[y] \n\t"
|
||||||
|
|
||||||
: [memy] "+m" (*(const double (*)[8])y),
|
: [memy] "+m" (*(double (*)[8])y),
|
||||||
[n] "+&r" (n),
|
[n] "+&r" (n),
|
||||||
[a0] "=b" (a0),
|
[a0] "=b" (a0),
|
||||||
[a1] "=&b" (a1),
|
[a1] "=&b" (a1),
|
||||||
|
@ -373,7 +373,7 @@ static void dgemv_kernel_4x8(BLASLONG n, BLASLONG lda, double *ap, double *x, do
|
||||||
[off2]"=&b" (off2),
|
[off2]"=&b" (off2),
|
||||||
[temp] "=&b" (tempR)
|
[temp] "=&b" (tempR)
|
||||||
: [memx] "m" (*(const double (*)[n])x),
|
: [memx] "m" (*(const double (*)[n])x),
|
||||||
[mem_ap] "m" (*(const double (*)[]) ap),
|
[mem_ap] "m" (*(const double (*)[n*8]) ap),
|
||||||
[alpha] "d" (alpha),
|
[alpha] "d" (alpha),
|
||||||
"[a0]" (ap),
|
"[a0]" (ap),
|
||||||
[x] "b" (x),
|
[x] "b" (x),
|
||||||
|
|
Loading…
Reference in New Issue