Merge pull request #2890 from martin-frbg/s-d-sum
Revert special handling of Windows xNRM2 and enable C+intrinsics kern…
This commit is contained in:
commit
756802df61
|
@ -12,6 +12,10 @@ ifdef HAVE_SSE3
|
|||
ifndef DYNAMIC_ARCH
|
||||
CCOMMON_OPT += -msse3
|
||||
FCOMMON_OPT += -msse3
|
||||
ifdef HAVE_SSSE3
|
||||
CCOMMON_OPT += -mssse3
|
||||
FCOMMON_OPT += -mssse3
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
@ -60,7 +64,7 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), HASWELL)
|
||||
ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE))
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifndef NO_AVX2
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
|
|
|
@ -109,10 +109,25 @@ if (${CORE} STREQUAL "COOPERLAKE")
|
|||
if (NOT NO_AVX512)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
|
||||
set (CCOMMON_OPT = "${CCOMMON_OPT} -march=cooperlake")
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=cooperlake")
|
||||
else ()
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
|
||||
endif()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
if (HAVE_AVX2)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx2")
|
||||
endif ()
|
||||
if (HAVE_AVX)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx")
|
||||
endif ()
|
||||
if (HAVE_SSE3)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -msse3")
|
||||
endif ()
|
||||
if (HAVE_SSSE3)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -mssse3")
|
||||
endif ()
|
||||
endif()
|
||||
|
|
|
@ -8,6 +8,9 @@ include $(TOPDIR)/Makefile.system
|
|||
ifdef HAVE_SSE3
|
||||
CFLAGS += -msse3
|
||||
endif
|
||||
ifdef HAVE_SSSE3
|
||||
CFLAGS += -mssse3
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
|
||||
|
@ -41,8 +44,8 @@ ifdef NO_AVX2
|
|||
endif
|
||||
|
||||
ifdef TARGET_CORE
|
||||
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO NEHALEM BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3))
|
||||
override CFLAGS += -msse3
|
||||
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3))
|
||||
override CFLAGS += -msse3 -mssse3
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), COOPERLAKE)
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
||||
|
|
|
@ -259,12 +259,8 @@ SNRM2KERNEL = nrm2_sse.S
|
|||
endif
|
||||
|
||||
ifndef DNRM2KERNEL
|
||||
ifeq ($(OSNAME),WINNT)
|
||||
DNRM2KERNEL = ../arm/nrm2.c
|
||||
else
|
||||
DNRM2KERNEL = nrm2.S
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef QNRM2KERNEL
|
||||
QNRM2KERNEL = nrm2.S
|
||||
|
@ -275,12 +271,8 @@ CNRM2KERNEL = znrm2_sse.S
|
|||
endif
|
||||
|
||||
ifndef ZNRM2KERNEL
|
||||
ifeq ($(OSNAME),WINNT)
|
||||
ZNRM2KERNEL = ../arm/znrm2.c
|
||||
else
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef XNRM2KERNEL
|
||||
XNRM2KERNEL = znrm2.S
|
||||
|
@ -486,3 +478,6 @@ XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S
|
|||
XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S
|
||||
|
||||
XGEMM3MKERNEL = xgemm3m_kernel_2x2.S
|
||||
|
||||
SSUMKERNEL = ../arm/sum.c
|
||||
DSUMKERNEL = ../arm/sum.c
|
||||
|
|
Loading…
Reference in New Issue