Merge pull request #2890 from martin-frbg/s-d-sum

Revert special handling of Windows xNRM2 and enable C+intrinsics kern…
This commit is contained in:
Martin Kroeker 2020-10-14 09:02:03 +02:00 committed by GitHub
commit 756802df61
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 29 additions and 12 deletions

View File

@ -12,6 +12,10 @@ ifdef HAVE_SSE3
ifndef DYNAMIC_ARCH
CCOMMON_OPT += -msse3
FCOMMON_OPT += -msse3
ifdef HAVE_SSSE3
CCOMMON_OPT += -mssse3
FCOMMON_OPT += -mssse3
endif
endif
endif
@ -60,7 +64,7 @@ endif
endif
endif
ifeq ($(CORE), HASWELL)
ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE))
ifndef DYNAMIC_ARCH
ifndef NO_AVX2
ifeq ($(C_COMPILER), GCC)

View File

@ -109,10 +109,25 @@ if (${CORE} STREQUAL "COOPERLAKE")
if (NOT NO_AVX512)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
set (CCOMMON_OPT = "${CCOMMON_OPT} -march=cooperlake")
set (CCOMMON_OPT "${CCOMMON_OPT} -march=cooperlake")
else ()
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
endif()
endif ()
endif ()
endif ()
if (NOT DYNAMIC_ARCH)
if (HAVE_AVX2)
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx2")
endif ()
if (HAVE_AVX)
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx")
endif ()
if (HAVE_SSE3)
set (CCOMMON_OPT "${CCOMMON_OPT} -msse3")
endif ()
if (HAVE_SSSE3)
set (CCOMMON_OPT "${CCOMMON_OPT} -mssse3")
endif ()
endif()

View File

@ -8,6 +8,9 @@ include $(TOPDIR)/Makefile.system
ifdef HAVE_SSE3
CFLAGS += -msse3
endif
ifdef HAVE_SSSE3
CFLAGS += -mssse3
endif
ifeq ($(C_COMPILER), GCC)
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
@ -41,8 +44,8 @@ ifdef NO_AVX2
endif
ifdef TARGET_CORE
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO NEHALEM BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3))
override CFLAGS += -msse3
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3))
override CFLAGS += -msse3 -mssse3
endif
ifeq ($(TARGET_CORE), COOPERLAKE)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)

View File

@ -259,12 +259,8 @@ SNRM2KERNEL = nrm2_sse.S
endif
ifndef DNRM2KERNEL
ifeq ($(OSNAME),WINNT)
DNRM2KERNEL = ../arm/nrm2.c
else
DNRM2KERNEL = nrm2.S
endif
endif
ifndef QNRM2KERNEL
QNRM2KERNEL = nrm2.S
@ -275,12 +271,8 @@ CNRM2KERNEL = znrm2_sse.S
endif
ifndef ZNRM2KERNEL
ifeq ($(OSNAME),WINNT)
ZNRM2KERNEL = ../arm/znrm2.c
else
ZNRM2KERNEL = znrm2.S
endif
endif
ifndef XNRM2KERNEL
XNRM2KERNEL = znrm2.S
@ -486,3 +478,6 @@ XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S
XGEMM3MKERNEL = xgemm3m_kernel_2x2.S
SSUMKERNEL = ../arm/sum.c
DSUMKERNEL = ../arm/sum.c