The march=cooperlake and march=sapphirerapids flags were never getting added when building with Clang targetting those architectures. Instead it was falling back to the skylake AVX512 implementation. Clang added support for these two architectures in Clang 9 and Clang 12, so introduce new checks for those versions to enable the appropriate march flag, and fallback to skylake otherwise.
208 lines
5.8 KiB
Makefile
208 lines
5.8 KiB
Makefile
ifdef TARGET_CORE
|
|
TARGET = $(TARGET_CORE)
|
|
endif
|
|
|
|
TOPDIR = ..
|
|
include $(TOPDIR)/Makefile.system
|
|
|
|
ifeq ($(ARCH), power)
|
|
ifeq ($(C_COMPILER), CLANG)
|
|
override CFLAGS += -fno-integrated-as
|
|
endif
|
|
endif
|
|
|
|
AVX2OPT =
|
|
ifeq ($(C_COMPILER), GCC)
|
|
# AVX2 support was added in 4.7.0
|
|
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
|
|
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
|
AVX2OPT = -mavx2
|
|
endif
|
|
endif
|
|
ifeq ($(C_COMPILER), CLANG)
|
|
# Any clang posing as gcc 4.2 should be new enough (3.4 or later)
|
|
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2)
|
|
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
|
AVX2OPT = -mavx2 -mfma
|
|
endif
|
|
endif
|
|
ifdef NO_AVX2
|
|
AVX2OPT=
|
|
endif
|
|
|
|
ifdef TARGET_CORE
|
|
ifeq ($(TARGET_CORE), SAPPHIRERAPIDS)
|
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(CLANGVERSIONGTEQ12)))
|
|
override CFLAGS += -march=sapphirerapids
|
|
else
|
|
override CFLAGS += -march=skylake-avx512 -mavx512f
|
|
endif
|
|
ifeq ($(OSNAME), CYGWIN_NT)
|
|
override CFLAGS += -fno-asynchronous-unwind-tables
|
|
endif
|
|
ifeq ($(OSNAME), WINNT)
|
|
ifeq ($(C_COMPILER), GCC)
|
|
override CFLAGS += -fno-asynchronous-unwind-tables
|
|
endif
|
|
endif
|
|
else ifeq ($(TARGET_CORE), COOPERLAKE)
|
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(CLANGVERSIONGTEQ9)))
|
|
override CFLAGS += -march=cooperlake
|
|
else
|
|
override CFLAGS += -march=skylake-avx512 -mavx512f
|
|
endif
|
|
ifeq ($(OSNAME), CYGWIN_NT)
|
|
override CFLAGS += -fno-asynchronous-unwind-tables
|
|
endif
|
|
ifeq ($(OSNAME), WINNT)
|
|
ifeq ($(C_COMPILER), GCC)
|
|
override CFLAGS += -fno-asynchronous-unwind-tables
|
|
endif
|
|
endif
|
|
else ifeq ($(TARGET_CORE), SKYLAKEX)
|
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -march=skylake-avx512 -mavx512f
|
|
ifeq ($(OSNAME), CYGWIN_NT)
|
|
override CFLAGS += -fno-asynchronous-unwind-tables
|
|
endif
|
|
ifeq ($(OSNAME), WINNT)
|
|
ifeq ($(C_COMPILER), GCC)
|
|
override CFLAGS += -fno-asynchronous-unwind-tables
|
|
endif
|
|
endif
|
|
else ifeq ($(TARGET_CORE), HASWELL)
|
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
|
|
else ifeq ($(TARGET_CORE), ZEN)
|
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
|
|
else ifeq ($(TARGET_CORE), LOONGSON3R4)
|
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS)
|
|
else
|
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
|
endif
|
|
BUILD_KERNEL = 1
|
|
KDIR =
|
|
TSUFFIX = _$(TARGET_CORE)
|
|
else
|
|
TARGET_CORE = $(CORE)
|
|
KDIR =
|
|
TSUFFIX =
|
|
ifeq ($(TARGET_CORE), LOONGSON3R4)
|
|
override CFLAGS += $(MSA_FLAGS)
|
|
endif
|
|
endif
|
|
|
|
-include $(KERNELDIR)/KERNEL.$(TARGET_CORE)
|
|
|
|
include $(KERNELDIR)/KERNEL
|
|
|
|
include Makefile.L1
|
|
|
|
include Makefile.L2
|
|
|
|
include Makefile.L3
|
|
|
|
include Makefile.LA
|
|
|
|
HPLOBJS = \
|
|
dgemm_kernel.$(SUFFIX) \
|
|
$(DGEMMINCOPYOBJ) $(DGEMMITCOPYOBJ) \
|
|
$(DGEMMONCOPYOBJ) $(DGEMMOTCOPYOBJ) \
|
|
dtrsm_kernel_LN.$(SUFFIX) dtrsm_kernel_LT.$(SUFFIX) \
|
|
dtrsm_kernel_RN.$(SUFFIX) dtrsm_kernel_RT.$(SUFFIX) \
|
|
daxpy_k.$(SUFFIX) dcopy_k.$(SUFFIX) ddot_k.$(SUFFIX) \
|
|
dger_k.$(SUFFIX) dscal_k.$(SUFFIX) idamax_k.$(SUFFIX) \
|
|
dgemv_n.$(SUFFIX) dgemv_t.$(SUFFIX) dgemm_beta.$(SUFFIX) \
|
|
dtrsm_iunucopy.$(SUFFIX) dtrsm_iunncopy.$(SUFFIX) \
|
|
dtrsm_ilnucopy.$(SUFFIX) dtrsm_ilnncopy.$(SUFFIX) \
|
|
dtrsm_iutucopy.$(SUFFIX) dtrsm_iutncopy.$(SUFFIX) \
|
|
dtrsm_iltucopy.$(SUFFIX) dtrsm_iltncopy.$(SUFFIX) \
|
|
dtrsm_ounucopy.$(SUFFIX) dtrsm_ounncopy.$(SUFFIX) \
|
|
dtrsm_olnucopy.$(SUFFIX) dtrsm_olnncopy.$(SUFFIX) \
|
|
dtrsm_outucopy.$(SUFFIX) dtrsm_outncopy.$(SUFFIX) \
|
|
dtrsm_oltucopy.$(SUFFIX) dtrsm_oltncopy.$(SUFFIX)
|
|
|
|
COMMONOBJS += lsame.$(SUFFIX) scabs1.$(SUFFIX) dcabs1.$(SUFFIX)
|
|
|
|
ifeq ($(DYNAMIC_ARCH), 1)
|
|
SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX)
|
|
CCOMMON_OPT += -DTS=$(TSUFFIX)
|
|
endif
|
|
|
|
KERNEL_INTERFACE = ../common_level1.h ../common_level2.h ../common_level3.h
|
|
ifneq ($(NO_LAPACK), 1)
|
|
KERNEL_INTERFACE += ../common_lapack.h
|
|
endif
|
|
|
|
ifeq ($(ARCH), x86)
|
|
COMMONOBJS += cpuid.$(SUFFIX)
|
|
endif
|
|
|
|
ifdef EXPRECISION
|
|
COMMONOBJS += qconjg.$(SUFFIX) qcabs1.$(SUFFIX)
|
|
endif
|
|
|
|
ifdef QUAD_PRECISION
|
|
COMMONOBJS += qconjg.$(SUFFIX) qcabs1.$(SUFFIX)
|
|
endif
|
|
|
|
all : libs
|
|
|
|
scabs1.$(SUFFIX): $(KERNELDIR)/$(SCABS_KERNEL)
|
|
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DF_INTERFACE $< -o $(@F)
|
|
|
|
dcabs1.$(SUFFIX): $(KERNELDIR)/$(DCABS_KERNEL)
|
|
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DF_INTERFACE $< -o $(@F)
|
|
|
|
qcabs1.$(SUFFIX): $(KERNELDIR)/$(QCABS_KERNEL)
|
|
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DF_INTERFACE $< -o $(@F)
|
|
|
|
qconjg.$(SUFFIX): $(KERNELDIR)/qconjg.S
|
|
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DF_INTERFACE $< -o $(@F)
|
|
|
|
lsame.$(SUFFIX): $(KERNELDIR)/$(LSAME_KERNEL)
|
|
$(CC) -c $(CFLAGS) -DF_INTERFACE $< -o $(@F)
|
|
|
|
setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h
|
|
ifeq ($(USE_GEMM3M), 1)
|
|
$(CC) -c $(CFLAGS) -DUSE_GEMM3M $< -o $@
|
|
else
|
|
$(CC) -c $(CFLAGS) $< -o $@
|
|
endif
|
|
|
|
setparam$(TSUFFIX).c : setparam-ref.c
|
|
sed 's/TS/$(TSUFFIX)/g' $< > $(@F)
|
|
|
|
kernel$(TSUFFIX).h : $(KERNEL_INTERFACE)
|
|
sed 's/\ *(/$(TSUFFIX)(/g' $^ > $(@F)
|
|
|
|
|
|
cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S
|
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
|
|
|
scabs1.$(PSUFFIX): $(KERNELDIR)/$(SCABS_KERNEL)
|
|
$(CC) -c $(PFLAGS) -DCOMPLEX -UDOUBLE -DF_INTERFACE $< -o $(@F)
|
|
|
|
dcabs1.$(PSUFFIX): $(KERNELDIR)/$(DCABS_KERNEL)
|
|
$(CC) -c $(PFLAGS) -DCOMPLEX -DDOUBLE -DF_INTERFACE $< -o $(@F)
|
|
|
|
qcabs1.$(PSUFFIX): $(KERNELDIR)/$(QCABS_KERNEL)
|
|
$(CC) -c $(PFLAGS) -DCOMPLEX -DXDOUBLE -DF_INTERFACE $< -o $(@F)
|
|
|
|
qconjg.$(PSUFFIX): $(KERNELDIR)/qconjg.S
|
|
$(CC) -c $(PFLAGS) -DCOMPLEX -DXDOUBLE -DF_INTERFACE $< -o $(@F)
|
|
|
|
lsame.$(PSUFFIX): $(KERNELDIR)/$(LSAME_KERNEL)
|
|
$(CC) -c $(PFLAGS) -DF_INTERFACE $< -o $(@F)
|
|
|
|
cpuid.$(PSUFFIX): $(KERNELDIR)/cpuid.S
|
|
$(CC) -c $(PFLAGS) $< -o $(@F)
|
|
|
|
#ifdef DYNAMIC_ARCH
|
|
clean ::
|
|
@rm -f setparam_*.c kernel_*.h setparam.h kernel.h
|
|
|
|
#endif
|
|
|
|
include $(TOPDIR)/Makefile.tail
|