Merge branch 'develop' into clapack
This commit is contained in:
commit
e3250e2362
|
@ -23,9 +23,9 @@ if(MSVC AND NOT DEFINED NOFORTRAN)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
#######
|
#######
|
||||||
if(MSVC)
|
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" OFF)
|
||||||
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
|
|
||||||
endif()
|
option(BUILD_TESTING "Build LAPACK testsuite when building LAPACK" ON)
|
||||||
|
|
||||||
option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF)
|
option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF)
|
||||||
|
|
||||||
|
@ -320,7 +320,9 @@ if (NOT NOFORTRAN)
|
||||||
if(NOT NO_CBLAS)
|
if(NOT NO_CBLAS)
|
||||||
add_subdirectory(ctest)
|
add_subdirectory(ctest)
|
||||||
endif()
|
endif()
|
||||||
add_subdirectory(lapack-netlib/TESTING)
|
if (BUILD_TESTING)
|
||||||
|
add_subdirectory(lapack-netlib/TESTING)
|
||||||
|
endif()
|
||||||
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
|
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
|
||||||
add_subdirectory(cpp_thread_test)
|
add_subdirectory(cpp_thread_test)
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -55,6 +55,13 @@ FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), FT2000)
|
||||||
|
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
# Use a72 tunings because Neoverse-N1 is only available
|
# Use a72 tunings because Neoverse-N1 is only available
|
||||||
# in GCC>=9
|
# in GCC>=9
|
||||||
ifeq ($(CORE), NEOVERSEN1)
|
ifeq ($(CORE), NEOVERSEN1)
|
||||||
|
@ -229,6 +236,43 @@ endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||||
|
ifeq ($(CORE), CORTEXX1)
|
||||||
|
CCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
|
FCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||||
|
ifeq ($(CORE), CORTEXX2)
|
||||||
|
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
|
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
#ifeq (1, $(filter 1,$(ISCLANG)))
|
||||||
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||||
|
ifeq ($(CORE), CORTEXA510)
|
||||||
|
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
|
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||||
|
ifeq ($(CORE), CORTEXA710)
|
||||||
|
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
|
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -71,7 +71,8 @@ endif
|
||||||
|
|
||||||
|
|
||||||
getarch : getarch.c cpuid.S dummy $(CPUIDEMU)
|
getarch : getarch.c cpuid.S dummy $(CPUIDEMU)
|
||||||
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
|
avx512=$$(perl c_check - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \
|
||||||
|
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
|
||||||
|
|
||||||
getarch_2nd : getarch_2nd.c config.h dummy
|
getarch_2nd : getarch_2nd.c config.h dummy
|
||||||
ifndef TARGET_CORE
|
ifndef TARGET_CORE
|
||||||
|
|
|
@ -92,6 +92,10 @@ CORTEXA53
|
||||||
CORTEXA57
|
CORTEXA57
|
||||||
CORTEXA72
|
CORTEXA72
|
||||||
CORTEXA73
|
CORTEXA73
|
||||||
|
CORTEXA510
|
||||||
|
CORTEXA710
|
||||||
|
CORTEXX1
|
||||||
|
CORTEXX2
|
||||||
NEOVERSEN1
|
NEOVERSEN1
|
||||||
NEOVERSEV1
|
NEOVERSEV1
|
||||||
NEOVERSEN2
|
NEOVERSEN2
|
||||||
|
@ -103,6 +107,9 @@ THUNDERX2T99
|
||||||
TSV110
|
TSV110
|
||||||
THUNDERX3T110
|
THUNDERX3T110
|
||||||
VORTEX
|
VORTEX
|
||||||
|
A64FX
|
||||||
|
ARMV8SVE
|
||||||
|
FT2000
|
||||||
|
|
||||||
9.System Z:
|
9.System Z:
|
||||||
ZARCH_GENERIC
|
ZARCH_GENERIC
|
||||||
|
|
|
@ -65,7 +65,7 @@ jobs:
|
||||||
- task: CMake@1
|
- task: CMake@1
|
||||||
inputs:
|
inputs:
|
||||||
workingDirectory: 'build' # Optional
|
workingDirectory: 'build' # Optional
|
||||||
cmakeArgs: '-G "Visual Studio 16 2019" ..'
|
cmakeArgs: '-G "Visual Studio 17 2022" ..'
|
||||||
- task: CMake@1
|
- task: CMake@1
|
||||||
inputs:
|
inputs:
|
||||||
cmakeArgs: '--build . --config Release'
|
cmakeArgs: '--build . --config Release'
|
||||||
|
@ -103,7 +103,7 @@ jobs:
|
||||||
|
|
||||||
- job: Windows_flang_clang
|
- job: Windows_flang_clang
|
||||||
pool:
|
pool:
|
||||||
vmImage: 'windows-latest'
|
vmImage: 'windows-2022'
|
||||||
steps:
|
steps:
|
||||||
- script: |
|
- script: |
|
||||||
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
|
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
|
||||||
|
@ -114,8 +114,8 @@ jobs:
|
||||||
conda install --yes --quiet ninja flang
|
conda install --yes --quiet ninja flang
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
||||||
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
|
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
|
||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
ctest
|
ctest
|
||||||
|
|
||||||
|
@ -178,7 +178,7 @@ jobs:
|
||||||
cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_Fortran_COMPILER=gfortran-10 -DBUILD_SHARED_LIBS=ON ..
|
cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_Fortran_COMPILER=gfortran-10 -DBUILD_SHARED_LIBS=ON ..
|
||||||
cmake --build .
|
cmake --build .
|
||||||
ctest
|
ctest
|
||||||
|
|
||||||
- job: OSX_Ifort_Clang
|
- job: OSX_Ifort_Clang
|
||||||
pool:
|
pool:
|
||||||
vmImage: 'macOS-10.15'
|
vmImage: 'macOS-10.15'
|
||||||
|
|
5
c_check
5
c_check
|
@ -254,7 +254,7 @@ if (($architecture eq "x86") || ($architecture eq "x86_64")) {
|
||||||
# $tmpf = new File::Temp( UNLINK => 1 );
|
# $tmpf = new File::Temp( UNLINK => 1 );
|
||||||
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 );
|
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 );
|
||||||
$code = '"vbroadcastss -4 * 4(%rsi), %zmm2"';
|
$code = '"vbroadcastss -4 * 4(%rsi), %zmm2"';
|
||||||
print $tmpf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n";
|
print $fh "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n";
|
||||||
$args = " -march=skylake-avx512 -c -o $tmpf.o $tmpf";
|
$args = " -march=skylake-avx512 -c -o $tmpf.o $tmpf";
|
||||||
if ($compiler eq "PGI") {
|
if ($compiler eq "PGI") {
|
||||||
$args = " -tp skylake -c -o $tmpf.o $tmpf";
|
$args = " -tp skylake -c -o $tmpf.o $tmpf";
|
||||||
|
@ -278,7 +278,7 @@ if ($data =~ /HAVE_C11/) {
|
||||||
$c11_atomics = 0;
|
$c11_atomics = 0;
|
||||||
} else {
|
} else {
|
||||||
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 );
|
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 );
|
||||||
print $tmpf "#include <stdatomic.h>\nint main(void){}\n";
|
print $fh "#include <stdatomic.h>\nint main(void){}\n";
|
||||||
$args = " -c -o $tmpf.o $tmpf";
|
$args = " -c -o $tmpf.o $tmpf";
|
||||||
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null");
|
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null");
|
||||||
system(@cmd) == 0;
|
system(@cmd) == 0;
|
||||||
|
@ -316,6 +316,7 @@ if ($architecture ne $hostarch) {
|
||||||
}
|
}
|
||||||
|
|
||||||
$cross = 1 if ($os ne $hostos);
|
$cross = 1 if ($os ne $hostos);
|
||||||
|
$cross = 0 if (($os eq "Android") && ($hostos eq "Linux") && ($ENV{TERMUX_APP_PID} != ""));
|
||||||
|
|
||||||
$openmp = "" if $ENV{USE_OPENMP} != 1;
|
$openmp = "" if $ENV{USE_OPENMP} != 1;
|
||||||
|
|
||||||
|
|
|
@ -161,6 +161,30 @@ if (${CORE} STREQUAL ARMV8SVE)
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (${CORE} STREQUAL CORTEXA510)
|
||||||
|
if (NOT DYNAMIC_ARCH)
|
||||||
|
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
if (${CORE} STREQUAL CORTEXA710)
|
||||||
|
if (NOT DYNAMIC_ARCH)
|
||||||
|
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
if (${CORE} STREQUAL CORTEXX1)
|
||||||
|
if (NOT DYNAMIC_ARCH)
|
||||||
|
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a")
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
if (${CORE} STREQUAL CORTEXX2)
|
||||||
|
if (NOT DYNAMIC_ARCH)
|
||||||
|
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (${CORE} STREQUAL POWER10)
|
if (${CORE} STREQUAL POWER10)
|
||||||
if (NOT DYNAMIC_ARCH)
|
if (NOT DYNAMIC_ARCH)
|
||||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||||
|
|
|
@ -67,7 +67,15 @@ if (${F_COMPILER} STREQUAL "GFORTRAN")
|
||||||
if (BINARY64)
|
if (BINARY64)
|
||||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||||
if (INTERFACE64)
|
if (INTERFACE64)
|
||||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
|
if (CMAKE_Fortran_COMPILER_ID STREQUAL "Intel")
|
||||||
|
if (WIN32)
|
||||||
|
set(FCOMMON_OPT "${FCOMMON_OPT} /integer-size:64")
|
||||||
|
else ()
|
||||||
|
set(FCOMMON_OPT "${FCOMMON_OPT} -integer-size 64")
|
||||||
|
endif ()
|
||||||
|
else ()
|
||||||
|
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
|
||||||
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
else ()
|
else ()
|
||||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||||
|
|
|
@ -2610,8 +2610,9 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)\
|
#if !defined(DYNAMIC_ARCH) \
|
||||||
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K)
|
&& (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \
|
||||||
|
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K))
|
||||||
extern BLASLONG gemm_offset_a;
|
extern BLASLONG gemm_offset_a;
|
||||||
extern BLASLONG gemm_offset_b;
|
extern BLASLONG gemm_offset_b;
|
||||||
extern BLASLONG sbgemm_p;
|
extern BLASLONG sbgemm_p;
|
||||||
|
|
|
@ -45,6 +45,10 @@ size_t length64=sizeof(value64);
|
||||||
#define CPU_NEOVERSEN1 11
|
#define CPU_NEOVERSEN1 11
|
||||||
#define CPU_NEOVERSEV1 16
|
#define CPU_NEOVERSEV1 16
|
||||||
#define CPU_NEOVERSEN2 17
|
#define CPU_NEOVERSEN2 17
|
||||||
|
#define CPU_CORTEXX1 18
|
||||||
|
#define CPU_CORTEXX2 19
|
||||||
|
#define CPU_CORTEXA510 20
|
||||||
|
#define CPU_CORTEXA710 21
|
||||||
// Qualcomm
|
// Qualcomm
|
||||||
#define CPU_FALKOR 6
|
#define CPU_FALKOR 6
|
||||||
// Cavium
|
// Cavium
|
||||||
|
@ -59,6 +63,8 @@ size_t length64=sizeof(value64);
|
||||||
#define CPU_VORTEX 13
|
#define CPU_VORTEX 13
|
||||||
// Fujitsu
|
// Fujitsu
|
||||||
#define CPU_A64FX 15
|
#define CPU_A64FX 15
|
||||||
|
// Phytium
|
||||||
|
#define CPU_FT2000 22
|
||||||
|
|
||||||
static char *cpuname[] = {
|
static char *cpuname[] = {
|
||||||
"UNKNOWN",
|
"UNKNOWN",
|
||||||
|
@ -73,12 +79,17 @@ static char *cpuname[] = {
|
||||||
"TSV110",
|
"TSV110",
|
||||||
"EMAG8180",
|
"EMAG8180",
|
||||||
"NEOVERSEN1",
|
"NEOVERSEN1",
|
||||||
"NEOVERSEV1"
|
|
||||||
"NEOVERSEN2"
|
|
||||||
"THUNDERX3T110",
|
"THUNDERX3T110",
|
||||||
"VORTEX",
|
"VORTEX",
|
||||||
"CORTEXA55",
|
"CORTEXA55",
|
||||||
"A64FX"
|
"A64FX",
|
||||||
|
"NEOVERSEV1",
|
||||||
|
"NEOVERSEN2",
|
||||||
|
"CORTEXX1",
|
||||||
|
"CORTEXX2",
|
||||||
|
"CORTEXA510",
|
||||||
|
"CORTEXA710",
|
||||||
|
"FT2000"
|
||||||
};
|
};
|
||||||
|
|
||||||
static char *cpuname_lower[] = {
|
static char *cpuname_lower[] = {
|
||||||
|
@ -94,12 +105,17 @@ static char *cpuname_lower[] = {
|
||||||
"tsv110",
|
"tsv110",
|
||||||
"emag8180",
|
"emag8180",
|
||||||
"neoversen1",
|
"neoversen1",
|
||||||
"neoversev1",
|
|
||||||
"neoversen2",
|
|
||||||
"thunderx3t110",
|
"thunderx3t110",
|
||||||
"vortex",
|
"vortex",
|
||||||
"cortexa55",
|
"cortexa55",
|
||||||
"a64fx"
|
"a64fx",
|
||||||
|
"neoversev1",
|
||||||
|
"neoversen2",
|
||||||
|
"cortexx1",
|
||||||
|
"cortexx2",
|
||||||
|
"cortexa510",
|
||||||
|
"cortexa710",
|
||||||
|
"ft2000"
|
||||||
};
|
};
|
||||||
|
|
||||||
int get_feature(char *search)
|
int get_feature(char *search)
|
||||||
|
@ -182,6 +198,14 @@ int detect(void)
|
||||||
return CPU_NEOVERSEN2;
|
return CPU_NEOVERSEN2;
|
||||||
else if (strstr(cpu_part, "0xd05"))
|
else if (strstr(cpu_part, "0xd05"))
|
||||||
return CPU_CORTEXA55;
|
return CPU_CORTEXA55;
|
||||||
|
else if (strstr(cpu_part, "0xd46"))
|
||||||
|
return CPU_CORTEXA510;
|
||||||
|
else if (strstr(cpu_part, "0xd47"))
|
||||||
|
return CPU_CORTEXA710;
|
||||||
|
else if (strstr(cpu_part, "0xd44"))
|
||||||
|
return CPU_CORTEXX1;
|
||||||
|
else if (strstr(cpu_part, "0xd4c"))
|
||||||
|
return CPU_CORTEXX2;
|
||||||
}
|
}
|
||||||
// Qualcomm
|
// Qualcomm
|
||||||
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
|
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
|
||||||
|
@ -202,6 +226,13 @@ int detect(void)
|
||||||
// Fujitsu
|
// Fujitsu
|
||||||
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
|
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
|
||||||
return CPU_A64FX;
|
return CPU_A64FX;
|
||||||
|
// Apple
|
||||||
|
else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022"))
|
||||||
|
return CPU_VORTEX;
|
||||||
|
// Phytium
|
||||||
|
else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661")
|
||||||
|
|| strstr(cpu_part, "0x662") || strstr(cpu_part, "0x663")))
|
||||||
|
return CPU_FT2000;
|
||||||
}
|
}
|
||||||
|
|
||||||
p = (char *) NULL ;
|
p = (char *) NULL ;
|
||||||
|
@ -382,7 +413,24 @@ void get_cpuconfig(void)
|
||||||
printf("#define DTB_DEFAULT_ENTRIES 48\n");
|
printf("#define DTB_DEFAULT_ENTRIES 48\n");
|
||||||
printf("#define DTB_SIZE 4096\n");
|
printf("#define DTB_SIZE 4096\n");
|
||||||
break;
|
break;
|
||||||
|
case CPU_CORTEXA510:
|
||||||
|
case CPU_CORTEXA710:
|
||||||
|
case CPU_CORTEXX1:
|
||||||
|
case CPU_CORTEXX2:
|
||||||
|
printf("#define ARMV9\n");
|
||||||
|
printf("#define %s\n", cpuname[d]);
|
||||||
|
printf("#define L1_CODE_SIZE 65536\n");
|
||||||
|
printf("#define L1_CODE_LINESIZE 64\n");
|
||||||
|
printf("#define L1_CODE_ASSOCIATIVE 4\n");
|
||||||
|
printf("#define L1_DATA_SIZE 65536\n");
|
||||||
|
printf("#define L1_DATA_LINESIZE 64\n");
|
||||||
|
printf("#define L1_DATA_ASSOCIATIVE 4\n");
|
||||||
|
printf("#define L2_SIZE 1048576\n");
|
||||||
|
printf("#define L2_LINESIZE 64\n");
|
||||||
|
printf("#define L2_ASSOCIATIVE 8\n");
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||||
|
printf("#define DTB_SIZE 4096\n");
|
||||||
|
break;
|
||||||
case CPU_FALKOR:
|
case CPU_FALKOR:
|
||||||
printf("#define FALKOR\n");
|
printf("#define FALKOR\n");
|
||||||
printf("#define L1_CODE_SIZE 65536\n");
|
printf("#define L1_CODE_SIZE 65536\n");
|
||||||
|
@ -469,9 +517,9 @@ void get_cpuconfig(void)
|
||||||
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
|
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
|
||||||
printf("#define DTB_SIZE 4096 \n");
|
printf("#define DTB_SIZE 4096 \n");
|
||||||
break;
|
break;
|
||||||
#ifdef __APPLE__
|
|
||||||
case CPU_VORTEX:
|
case CPU_VORTEX:
|
||||||
printf("#define VORTEX \n");
|
printf("#define VORTEX \n");
|
||||||
|
#ifdef __APPLE__
|
||||||
sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0);
|
sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0);
|
||||||
printf("#define L1_CODE_SIZE %lld \n",value64);
|
printf("#define L1_CODE_SIZE %lld \n",value64);
|
||||||
sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0);
|
sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0);
|
||||||
|
@ -480,10 +528,10 @@ void get_cpuconfig(void)
|
||||||
printf("#define L1_DATA_SIZE %lld \n",value64);
|
printf("#define L1_DATA_SIZE %lld \n",value64);
|
||||||
sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0);
|
sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0);
|
||||||
printf("#define L2_SIZE %lld \n",value64);
|
printf("#define L2_SIZE %lld \n",value64);
|
||||||
|
#endif
|
||||||
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
|
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
|
||||||
printf("#define DTB_SIZE 4096 \n");
|
printf("#define DTB_SIZE 4096 \n");
|
||||||
break;
|
break;
|
||||||
#endif
|
|
||||||
case CPU_A64FX:
|
case CPU_A64FX:
|
||||||
printf("#define A64FX\n");
|
printf("#define A64FX\n");
|
||||||
printf("#define L1_CODE_SIZE 65535\n");
|
printf("#define L1_CODE_SIZE 65535\n");
|
||||||
|
@ -494,6 +542,16 @@ void get_cpuconfig(void)
|
||||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||||
printf("#define DTB_SIZE 4096\n");
|
printf("#define DTB_SIZE 4096\n");
|
||||||
break;
|
break;
|
||||||
|
case CPU_FT2000:
|
||||||
|
printf("#define FT2000\n");
|
||||||
|
printf("#define L1_CODE_SIZE 32768\n");
|
||||||
|
printf("#define L1_DATA_SIZE 32768\n");
|
||||||
|
printf("#define L1_DATA_LINESIZE 64\n");
|
||||||
|
printf("#define L2_SIZE 33554432\n");
|
||||||
|
printf("#define L2_LINESIZE 64\n");
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||||
|
printf("#define DTB_SIZE 4096\n");
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
get_cpucount();
|
get_cpucount();
|
||||||
}
|
}
|
||||||
|
|
54
cpuid_x86.c
54
cpuid_x86.c
|
@ -1707,8 +1707,18 @@ int get_cpuname(void){
|
||||||
if (model == 0xf && stepping < 0xe)
|
if (model == 0xf && stepping < 0xe)
|
||||||
return CPUTYPE_NANO;
|
return CPUTYPE_NANO;
|
||||||
return CPUTYPE_NEHALEM;
|
return CPUTYPE_NEHALEM;
|
||||||
|
case 0x7:
|
||||||
|
switch (exmodel) {
|
||||||
|
case 5:
|
||||||
|
if (support_avx2())
|
||||||
|
return CPUTYPE_ZEN;
|
||||||
|
else
|
||||||
|
return CPUTYPE_DUNNINGTON;
|
||||||
|
default:
|
||||||
|
return CPUTYPE_NEHALEM;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
if (family >= 0x7)
|
if (family >= 0x8)
|
||||||
return CPUTYPE_NEHALEM;
|
return CPUTYPE_NEHALEM;
|
||||||
else
|
else
|
||||||
return CPUTYPE_VIAC3;
|
return CPUTYPE_VIAC3;
|
||||||
|
@ -1716,7 +1726,20 @@ int get_cpuname(void){
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vendor == VENDOR_ZHAOXIN){
|
if (vendor == VENDOR_ZHAOXIN){
|
||||||
return CPUTYPE_NEHALEM;
|
switch (family) {
|
||||||
|
case 0x7:
|
||||||
|
switch (exmodel) {
|
||||||
|
case 5:
|
||||||
|
if (support_avx2())
|
||||||
|
return CPUTYPE_ZEN;
|
||||||
|
else
|
||||||
|
return CPUTYPE_DUNNINGTON;
|
||||||
|
default:
|
||||||
|
return CPUTYPE_NEHALEM;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return CPUTYPE_NEHALEM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vendor == VENDOR_RISE){
|
if (vendor == VENDOR_RISE){
|
||||||
|
@ -2416,8 +2439,18 @@ int get_coretype(void){
|
||||||
if (model == 0xf && stepping < 0xe)
|
if (model == 0xf && stepping < 0xe)
|
||||||
return CORE_NANO;
|
return CORE_NANO;
|
||||||
return CORE_NEHALEM;
|
return CORE_NEHALEM;
|
||||||
|
case 0x7:
|
||||||
|
switch (exmodel) {
|
||||||
|
case 5:
|
||||||
|
if (support_avx2())
|
||||||
|
return CORE_ZEN;
|
||||||
|
else
|
||||||
|
return CORE_DUNNINGTON;
|
||||||
|
default:
|
||||||
|
return CORE_NEHALEM;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
if (family >= 0x7)
|
if (family >= 0x8)
|
||||||
return CORE_NEHALEM;
|
return CORE_NEHALEM;
|
||||||
else
|
else
|
||||||
return CORE_VIAC3;
|
return CORE_VIAC3;
|
||||||
|
@ -2425,7 +2458,20 @@ int get_coretype(void){
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vendor == VENDOR_ZHAOXIN) {
|
if (vendor == VENDOR_ZHAOXIN) {
|
||||||
return CORE_NEHALEM;
|
switch (family) {
|
||||||
|
case 0x7:
|
||||||
|
switch (exmodel) {
|
||||||
|
case 5:
|
||||||
|
if (support_avx2())
|
||||||
|
return CORE_ZEN;
|
||||||
|
else
|
||||||
|
return CORE_DUNNINGTON;
|
||||||
|
default:
|
||||||
|
return CORE_NEHALEM;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return CORE_NEHALEM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return CORE_UNKNOWN;
|
return CORE_UNKNOWN;
|
||||||
|
|
|
@ -96,7 +96,7 @@ extern gotoblas_t gotoblas_BARCELONA;
|
||||||
#endif
|
#endif
|
||||||
#ifdef DYN_ATOM
|
#ifdef DYN_ATOM
|
||||||
extern gotoblas_t gotoblas_ATOM;
|
extern gotoblas_t gotoblas_ATOM;
|
||||||
elif defined(DYN_NEHALEM)
|
#elif defined(DYN_NEHALEM)
|
||||||
#define gotoblas_ATOM gotoblas_NEHALEM
|
#define gotoblas_ATOM gotoblas_NEHALEM
|
||||||
#else
|
#else
|
||||||
#define gotoblas_ATOM gotoblas_PRESCOTT
|
#define gotoblas_ATOM gotoblas_PRESCOTT
|
||||||
|
@ -875,14 +875,37 @@ static gotoblas_t *get_coretype(void){
|
||||||
if (model == 0xf && stepping < 0xe)
|
if (model == 0xf && stepping < 0xe)
|
||||||
return &gotoblas_NANO;
|
return &gotoblas_NANO;
|
||||||
return &gotoblas_NEHALEM;
|
return &gotoblas_NEHALEM;
|
||||||
|
case 0x7:
|
||||||
|
switch (exmodel) {
|
||||||
|
case 5:
|
||||||
|
if (support_avx2())
|
||||||
|
return &gotoblas_ZEN;
|
||||||
|
else
|
||||||
|
return &gotoblas_DUNNINGTON;
|
||||||
|
default:
|
||||||
|
return &gotoblas_NEHALEM;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
if (family >= 0x7)
|
if (family >= 0x8)
|
||||||
return &gotoblas_NEHALEM;
|
return &gotoblas_NEHALEM;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vendor == VENDOR_ZHAOXIN) {
|
if (vendor == VENDOR_ZHAOXIN) {
|
||||||
return &gotoblas_NEHALEM;
|
switch (family) {
|
||||||
|
case 0x7:
|
||||||
|
switch (exmodel) {
|
||||||
|
case 5:
|
||||||
|
if (support_avx2())
|
||||||
|
return &gotoblas_ZEN;
|
||||||
|
else
|
||||||
|
return &gotoblas_DUNNINGTON;
|
||||||
|
default:
|
||||||
|
return &gotoblas_NEHALEM;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return &gotoblas_NEHALEM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -60,6 +60,9 @@ static char* openblas_config_str=""
|
||||||
#ifdef USE_OPENMP
|
#ifdef USE_OPENMP
|
||||||
"USE_OPENMP "
|
"USE_OPENMP "
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef USE_TLS
|
||||||
|
"USE_TLS "
|
||||||
|
#endif
|
||||||
#ifndef DYNAMIC_ARCH
|
#ifndef DYNAMIC_ARCH
|
||||||
CHAR_CORENAME
|
CHAR_CORENAME
|
||||||
#endif
|
#endif
|
||||||
|
|
94
getarch.c
94
getarch.c
|
@ -94,14 +94,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include <sys/sysinfo.h>
|
#include <sys/sysinfo.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__x86_64__) || defined(_M_X64)
|
|
||||||
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX2__)) || (defined(__clang__) && __clang_major__ >= 6))
|
|
||||||
#else
|
|
||||||
#ifndef NO_AVX512
|
|
||||||
#define NO_AVX512
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
/* #define FORCE_P2 */
|
/* #define FORCE_P2 */
|
||||||
/* #define FORCE_KATMAI */
|
/* #define FORCE_KATMAI */
|
||||||
/* #define FORCE_COPPERMINE */
|
/* #define FORCE_COPPERMINE */
|
||||||
|
@ -1240,7 +1232,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
#define LIBNAME "cortexa53"
|
#define LIBNAME "cortexa53"
|
||||||
#define CORENAME "CORTEXA53"
|
#define CORENAME "CORTEXA53"
|
||||||
#else
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_CORTEXA57
|
#ifdef FORCE_CORTEXA57
|
||||||
|
@ -1256,7 +1247,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
#define LIBNAME "cortexa57"
|
#define LIBNAME "cortexa57"
|
||||||
#define CORENAME "CORTEXA57"
|
#define CORENAME "CORTEXA57"
|
||||||
#else
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_CORTEXA72
|
#ifdef FORCE_CORTEXA72
|
||||||
|
@ -1272,7 +1262,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
#define LIBNAME "cortexa72"
|
#define LIBNAME "cortexa72"
|
||||||
#define CORENAME "CORTEXA72"
|
#define CORENAME "CORTEXA72"
|
||||||
#else
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_CORTEXA73
|
#ifdef FORCE_CORTEXA73
|
||||||
|
@ -1288,7 +1277,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
#define LIBNAME "cortexa73"
|
#define LIBNAME "cortexa73"
|
||||||
#define CORENAME "CORTEXA73"
|
#define CORENAME "CORTEXA73"
|
||||||
#else
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_CORTEXX1
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ARM64"
|
||||||
|
#define SUBARCHITECTURE "CORTEXX1"
|
||||||
|
#define SUBDIRNAME "arm64"
|
||||||
|
#define ARCHCONFIG "-DCORTEXX1 " \
|
||||||
|
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||||
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||||
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
|
#define LIBNAME "cortexx1"
|
||||||
|
#define CORENAME "CORTEXX1"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_CORTEXX2
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ARM64"
|
||||||
|
#define SUBARCHITECTURE "CORTEXX2"
|
||||||
|
#define SUBDIRNAME "arm64"
|
||||||
|
#define ARCHCONFIG "-DCORTEXX2 " \
|
||||||
|
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||||
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||||
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
|
||||||
|
#define LIBNAME "cortexx2"
|
||||||
|
#define CORENAME "CORTEXX2"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_CORTEXA510
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ARM64"
|
||||||
|
#define SUBARCHITECTURE "CORTEXA510"
|
||||||
|
#define SUBDIRNAME "arm64"
|
||||||
|
#define ARCHCONFIG "-DCORTEXA510 " \
|
||||||
|
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||||
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||||
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
|
||||||
|
#define LIBNAME "cortexa510"
|
||||||
|
#define CORENAME "CORTEXA510"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_CORTEXA710
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ARM64"
|
||||||
|
#define SUBARCHITECTURE "CORTEXA710"
|
||||||
|
#define SUBDIRNAME "arm64"
|
||||||
|
#define ARCHCONFIG "-DCORTEXA710 " \
|
||||||
|
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||||
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||||
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
|
||||||
|
#define LIBNAME "cortexa710"
|
||||||
|
#define CORENAME "CORTEXA710"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_NEOVERSEN1
|
#ifdef FORCE_NEOVERSEN1
|
||||||
|
@ -1305,7 +1349,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-march=armv8.2-a -mtune=neoverse-n1"
|
"-march=armv8.2-a -mtune=neoverse-n1"
|
||||||
#define LIBNAME "neoversen1"
|
#define LIBNAME "neoversen1"
|
||||||
#define CORENAME "NEOVERSEN1"
|
#define CORENAME "NEOVERSEN1"
|
||||||
#else
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_NEOVERSEV1
|
#ifdef FORCE_NEOVERSEV1
|
||||||
|
@ -1322,7 +1365,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-march=armv8.4-a -mtune=neoverse-v1"
|
"-march=armv8.4-a -mtune=neoverse-v1"
|
||||||
#define LIBNAME "neoversev1"
|
#define LIBNAME "neoversev1"
|
||||||
#define CORENAME "NEOVERSEV1"
|
#define CORENAME "NEOVERSEV1"
|
||||||
#else
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
@ -1340,7 +1382,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-march=armv8.5-a -mtune=neoverse-n2"
|
"-march=armv8.5-a -mtune=neoverse-n2"
|
||||||
#define LIBNAME "neoversen2"
|
#define LIBNAME "neoversen2"
|
||||||
#define CORENAME "NEOVERSEN2"
|
#define CORENAME "NEOVERSEN2"
|
||||||
#else
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_CORTEXA55
|
#ifdef FORCE_CORTEXA55
|
||||||
|
@ -1356,7 +1397,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
#define LIBNAME "cortexa55"
|
#define LIBNAME "cortexa55"
|
||||||
#define CORENAME "CORTEXA55"
|
#define CORENAME "CORTEXA55"
|
||||||
#else
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_FALKOR
|
#ifdef FORCE_FALKOR
|
||||||
|
@ -1372,7 +1412,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
#define LIBNAME "falkor"
|
#define LIBNAME "falkor"
|
||||||
#define CORENAME "FALKOR"
|
#define CORENAME "FALKOR"
|
||||||
#else
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_THUNDERX
|
#ifdef FORCE_THUNDERX
|
||||||
|
@ -1387,7 +1426,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
#define LIBNAME "thunderx"
|
#define LIBNAME "thunderx"
|
||||||
#define CORENAME "THUNDERX"
|
#define CORENAME "THUNDERX"
|
||||||
#else
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_THUNDERX2T99
|
#ifdef FORCE_THUNDERX2T99
|
||||||
|
@ -1405,7 +1443,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
#define LIBNAME "thunderx2t99"
|
#define LIBNAME "thunderx2t99"
|
||||||
#define CORENAME "THUNDERX2T99"
|
#define CORENAME "THUNDERX2T99"
|
||||||
#else
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_TSV110
|
#ifdef FORCE_TSV110
|
||||||
|
@ -1421,7 +1458,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
#define LIBNAME "tsv110"
|
#define LIBNAME "tsv110"
|
||||||
#define CORENAME "TSV110"
|
#define CORENAME "TSV110"
|
||||||
#else
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_EMAG8180
|
#ifdef FORCE_EMAG8180
|
||||||
|
@ -1456,7 +1492,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
#define LIBNAME "thunderx3t110"
|
#define LIBNAME "thunderx3t110"
|
||||||
#define CORENAME "THUNDERX3T110"
|
#define CORENAME "THUNDERX3T110"
|
||||||
#else
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_VORTEX
|
#ifdef FORCE_VORTEX
|
||||||
|
@ -1488,7 +1523,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8"
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8"
|
||||||
#define LIBNAME "a64fx"
|
#define LIBNAME "a64fx"
|
||||||
#define CORENAME "A64FX"
|
#define CORENAME "A64FX"
|
||||||
#else
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_FT2000
|
||||||
|
#define ARMV8
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ARM64"
|
||||||
|
#define SUBARCHITECTURE "FT2000"
|
||||||
|
#define SUBDIRNAME "arm64"
|
||||||
|
#define ARCHCONFIG "-DFT2000 " \
|
||||||
|
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
|
||||||
|
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
|
||||||
|
"-DL2_SIZE=33554426-DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||||
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
|
#define LIBNAME "ft2000"
|
||||||
|
#define CORENAME "FT2000"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_ZARCH_GENERIC
|
#ifdef FORCE_ZARCH_GENERIC
|
||||||
|
|
|
@ -678,7 +678,7 @@ endif ()
|
||||||
set(SBGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
|
set(SBGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
|
||||||
endif ()
|
endif ()
|
||||||
if (NOT DEFINED SBGEMM_SMALL_K_B0_TT)
|
if (NOT DEFINED SBGEMM_SMALL_K_B0_TT)
|
||||||
set($SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
|
set(SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
|
||||||
endif ()
|
endif ()
|
||||||
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "BFLOAT16")
|
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "BFLOAT16")
|
||||||
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "BFLOAT16")
|
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "BFLOAT16")
|
||||||
|
|
|
@ -0,0 +1,216 @@
|
||||||
|
SAMINKERNEL = ../arm/amin.c
|
||||||
|
DAMINKERNEL = ../arm/amin.c
|
||||||
|
CAMINKERNEL = ../arm/zamin.c
|
||||||
|
ZAMINKERNEL = ../arm/zamin.c
|
||||||
|
|
||||||
|
SMAXKERNEL = ../arm/max.c
|
||||||
|
DMAXKERNEL = ../arm/max.c
|
||||||
|
|
||||||
|
SMINKERNEL = ../arm/min.c
|
||||||
|
DMINKERNEL = ../arm/min.c
|
||||||
|
|
||||||
|
ISAMINKERNEL = ../arm/iamin.c
|
||||||
|
IDAMINKERNEL = ../arm/iamin.c
|
||||||
|
ICAMINKERNEL = ../arm/izamin.c
|
||||||
|
IZAMINKERNEL = ../arm/izamin.c
|
||||||
|
|
||||||
|
ISMAXKERNEL = ../arm/imax.c
|
||||||
|
IDMAXKERNEL = ../arm/imax.c
|
||||||
|
|
||||||
|
ISMINKERNEL = ../arm/imin.c
|
||||||
|
IDMINKERNEL = ../arm/imin.c
|
||||||
|
|
||||||
|
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||||
|
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||||
|
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||||
|
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||||
|
|
||||||
|
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||||
|
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||||
|
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||||
|
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||||
|
|
||||||
|
TRSMCOPYLN_M = trsm_lncopy_sve.c
|
||||||
|
TRSMCOPYLT_M = trsm_ltcopy_sve.c
|
||||||
|
TRSMCOPYUN_M = trsm_uncopy_sve.c
|
||||||
|
TRSMCOPYUT_M = trsm_utcopy_sve.c
|
||||||
|
|
||||||
|
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||||
|
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||||
|
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||||
|
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||||
|
|
||||||
|
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||||
|
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||||
|
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||||
|
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||||
|
|
||||||
|
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
|
||||||
|
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
|
||||||
|
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
|
||||||
|
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
|
||||||
|
|
||||||
|
|
||||||
|
SAMAXKERNEL = amax.S
|
||||||
|
DAMAXKERNEL = amax.S
|
||||||
|
CAMAXKERNEL = zamax.S
|
||||||
|
ZAMAXKERNEL = zamax.S
|
||||||
|
|
||||||
|
SAXPYKERNEL = axpy.S
|
||||||
|
DAXPYKERNEL = axpy.S
|
||||||
|
CAXPYKERNEL = zaxpy.S
|
||||||
|
ZAXPYKERNEL = zaxpy.S
|
||||||
|
|
||||||
|
SROTKERNEL = rot.S
|
||||||
|
DROTKERNEL = rot.S
|
||||||
|
CROTKERNEL = zrot.S
|
||||||
|
ZROTKERNEL = zrot.S
|
||||||
|
|
||||||
|
SSCALKERNEL = scal.S
|
||||||
|
DSCALKERNEL = scal.S
|
||||||
|
CSCALKERNEL = zscal.S
|
||||||
|
ZSCALKERNEL = zscal.S
|
||||||
|
|
||||||
|
SGEMVNKERNEL = gemv_n.S
|
||||||
|
DGEMVNKERNEL = gemv_n.S
|
||||||
|
CGEMVNKERNEL = zgemv_n.S
|
||||||
|
ZGEMVNKERNEL = zgemv_n.S
|
||||||
|
|
||||||
|
SGEMVTKERNEL = gemv_t.S
|
||||||
|
DGEMVTKERNEL = gemv_t.S
|
||||||
|
CGEMVTKERNEL = zgemv_t.S
|
||||||
|
ZGEMVTKERNEL = zgemv_t.S
|
||||||
|
|
||||||
|
|
||||||
|
SASUMKERNEL = asum.S
|
||||||
|
DASUMKERNEL = asum.S
|
||||||
|
CASUMKERNEL = casum.S
|
||||||
|
ZASUMKERNEL = zasum.S
|
||||||
|
|
||||||
|
SCOPYKERNEL = copy.S
|
||||||
|
DCOPYKERNEL = copy.S
|
||||||
|
CCOPYKERNEL = copy.S
|
||||||
|
ZCOPYKERNEL = copy.S
|
||||||
|
|
||||||
|
SSWAPKERNEL = swap.S
|
||||||
|
DSWAPKERNEL = swap.S
|
||||||
|
CSWAPKERNEL = swap.S
|
||||||
|
ZSWAPKERNEL = swap.S
|
||||||
|
|
||||||
|
ISAMAXKERNEL = iamax.S
|
||||||
|
IDAMAXKERNEL = iamax.S
|
||||||
|
ICAMAXKERNEL = izamax.S
|
||||||
|
IZAMAXKERNEL = izamax.S
|
||||||
|
|
||||||
|
SNRM2KERNEL = nrm2.S
|
||||||
|
DNRM2KERNEL = nrm2.S
|
||||||
|
CNRM2KERNEL = znrm2.S
|
||||||
|
ZNRM2KERNEL = znrm2.S
|
||||||
|
|
||||||
|
DDOTKERNEL = dot.S
|
||||||
|
ifneq ($(C_COMPILER), PGI)
|
||||||
|
SDOTKERNEL = ../generic/dot.c
|
||||||
|
else
|
||||||
|
SDOTKERNEL = dot.S
|
||||||
|
endif
|
||||||
|
ifneq ($(C_COMPILER), PGI)
|
||||||
|
CDOTKERNEL = zdot.S
|
||||||
|
ZDOTKERNEL = zdot.S
|
||||||
|
else
|
||||||
|
CDOTKERNEL = ../arm/zdot.c
|
||||||
|
ZDOTKERNEL = ../arm/zdot.c
|
||||||
|
endif
|
||||||
|
DSDOTKERNEL = dot.S
|
||||||
|
|
||||||
|
DGEMM_BETA = dgemm_beta.S
|
||||||
|
SGEMM_BETA = sgemm_beta.S
|
||||||
|
|
||||||
|
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||||
|
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||||
|
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||||
|
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||||
|
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||||
|
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||||
|
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||||
|
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||||
|
|
||||||
|
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||||
|
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||||
|
|
||||||
|
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||||
|
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||||
|
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||||
|
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||||
|
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||||
|
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||||
|
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||||
|
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||||
|
|
||||||
|
DSYMMUCOPY_M = symm_ucopy_sve.c
|
||||||
|
DSYMMLCOPY_M = symm_lcopy_sve.c
|
||||||
|
|
||||||
|
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||||
|
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
||||||
|
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
||||||
|
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||||
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||||
|
|
||||||
|
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||||
|
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||||
|
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||||
|
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||||
|
|
||||||
|
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||||
|
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||||
|
|
||||||
|
CSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||||
|
CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||||
|
|
||||||
|
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||||
|
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
||||||
|
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
||||||
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||||
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||||
|
|
||||||
|
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||||
|
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||||
|
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||||
|
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||||
|
|
||||||
|
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||||
|
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||||
|
|
||||||
|
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||||
|
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
|
|
@ -0,0 +1,216 @@
|
||||||
|
SAMINKERNEL = ../arm/amin.c
|
||||||
|
DAMINKERNEL = ../arm/amin.c
|
||||||
|
CAMINKERNEL = ../arm/zamin.c
|
||||||
|
ZAMINKERNEL = ../arm/zamin.c
|
||||||
|
|
||||||
|
SMAXKERNEL = ../arm/max.c
|
||||||
|
DMAXKERNEL = ../arm/max.c
|
||||||
|
|
||||||
|
SMINKERNEL = ../arm/min.c
|
||||||
|
DMINKERNEL = ../arm/min.c
|
||||||
|
|
||||||
|
ISAMINKERNEL = ../arm/iamin.c
|
||||||
|
IDAMINKERNEL = ../arm/iamin.c
|
||||||
|
ICAMINKERNEL = ../arm/izamin.c
|
||||||
|
IZAMINKERNEL = ../arm/izamin.c
|
||||||
|
|
||||||
|
ISMAXKERNEL = ../arm/imax.c
|
||||||
|
IDMAXKERNEL = ../arm/imax.c
|
||||||
|
|
||||||
|
ISMINKERNEL = ../arm/imin.c
|
||||||
|
IDMINKERNEL = ../arm/imin.c
|
||||||
|
|
||||||
|
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||||
|
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||||
|
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||||
|
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||||
|
|
||||||
|
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||||
|
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||||
|
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||||
|
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||||
|
|
||||||
|
TRSMCOPYLN_M = trsm_lncopy_sve.c
|
||||||
|
TRSMCOPYLT_M = trsm_ltcopy_sve.c
|
||||||
|
TRSMCOPYUN_M = trsm_uncopy_sve.c
|
||||||
|
TRSMCOPYUT_M = trsm_utcopy_sve.c
|
||||||
|
|
||||||
|
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||||
|
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||||
|
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||||
|
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||||
|
|
||||||
|
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||||
|
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||||
|
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||||
|
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||||
|
|
||||||
|
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
|
||||||
|
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
|
||||||
|
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
|
||||||
|
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
|
||||||
|
|
||||||
|
|
||||||
|
SAMAXKERNEL = amax.S
|
||||||
|
DAMAXKERNEL = amax.S
|
||||||
|
CAMAXKERNEL = zamax.S
|
||||||
|
ZAMAXKERNEL = zamax.S
|
||||||
|
|
||||||
|
SAXPYKERNEL = axpy.S
|
||||||
|
DAXPYKERNEL = axpy.S
|
||||||
|
CAXPYKERNEL = zaxpy.S
|
||||||
|
ZAXPYKERNEL = zaxpy.S
|
||||||
|
|
||||||
|
SROTKERNEL = rot.S
|
||||||
|
DROTKERNEL = rot.S
|
||||||
|
CROTKERNEL = zrot.S
|
||||||
|
ZROTKERNEL = zrot.S
|
||||||
|
|
||||||
|
SSCALKERNEL = scal.S
|
||||||
|
DSCALKERNEL = scal.S
|
||||||
|
CSCALKERNEL = zscal.S
|
||||||
|
ZSCALKERNEL = zscal.S
|
||||||
|
|
||||||
|
SGEMVNKERNEL = gemv_n.S
|
||||||
|
DGEMVNKERNEL = gemv_n.S
|
||||||
|
CGEMVNKERNEL = zgemv_n.S
|
||||||
|
ZGEMVNKERNEL = zgemv_n.S
|
||||||
|
|
||||||
|
SGEMVTKERNEL = gemv_t.S
|
||||||
|
DGEMVTKERNEL = gemv_t.S
|
||||||
|
CGEMVTKERNEL = zgemv_t.S
|
||||||
|
ZGEMVTKERNEL = zgemv_t.S
|
||||||
|
|
||||||
|
|
||||||
|
SASUMKERNEL = asum.S
|
||||||
|
DASUMKERNEL = asum.S
|
||||||
|
CASUMKERNEL = casum.S
|
||||||
|
ZASUMKERNEL = zasum.S
|
||||||
|
|
||||||
|
SCOPYKERNEL = copy.S
|
||||||
|
DCOPYKERNEL = copy.S
|
||||||
|
CCOPYKERNEL = copy.S
|
||||||
|
ZCOPYKERNEL = copy.S
|
||||||
|
|
||||||
|
SSWAPKERNEL = swap.S
|
||||||
|
DSWAPKERNEL = swap.S
|
||||||
|
CSWAPKERNEL = swap.S
|
||||||
|
ZSWAPKERNEL = swap.S
|
||||||
|
|
||||||
|
ISAMAXKERNEL = iamax.S
|
||||||
|
IDAMAXKERNEL = iamax.S
|
||||||
|
ICAMAXKERNEL = izamax.S
|
||||||
|
IZAMAXKERNEL = izamax.S
|
||||||
|
|
||||||
|
SNRM2KERNEL = nrm2.S
|
||||||
|
DNRM2KERNEL = nrm2.S
|
||||||
|
CNRM2KERNEL = znrm2.S
|
||||||
|
ZNRM2KERNEL = znrm2.S
|
||||||
|
|
||||||
|
DDOTKERNEL = dot.S
|
||||||
|
ifneq ($(C_COMPILER), PGI)
|
||||||
|
SDOTKERNEL = ../generic/dot.c
|
||||||
|
else
|
||||||
|
SDOTKERNEL = dot.S
|
||||||
|
endif
|
||||||
|
ifneq ($(C_COMPILER), PGI)
|
||||||
|
CDOTKERNEL = zdot.S
|
||||||
|
ZDOTKERNEL = zdot.S
|
||||||
|
else
|
||||||
|
CDOTKERNEL = ../arm/zdot.c
|
||||||
|
ZDOTKERNEL = ../arm/zdot.c
|
||||||
|
endif
|
||||||
|
DSDOTKERNEL = dot.S
|
||||||
|
|
||||||
|
DGEMM_BETA = dgemm_beta.S
|
||||||
|
SGEMM_BETA = sgemm_beta.S
|
||||||
|
|
||||||
|
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||||
|
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||||
|
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||||
|
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||||
|
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||||
|
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||||
|
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||||
|
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||||
|
|
||||||
|
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||||
|
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||||
|
|
||||||
|
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||||
|
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||||
|
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||||
|
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||||
|
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||||
|
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||||
|
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||||
|
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||||
|
|
||||||
|
DSYMMUCOPY_M = symm_ucopy_sve.c
|
||||||
|
DSYMMLCOPY_M = symm_lcopy_sve.c
|
||||||
|
|
||||||
|
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||||
|
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
||||||
|
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
||||||
|
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||||
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||||
|
|
||||||
|
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||||
|
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||||
|
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||||
|
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||||
|
|
||||||
|
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||||
|
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||||
|
|
||||||
|
CSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||||
|
CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||||
|
|
||||||
|
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||||
|
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
||||||
|
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
||||||
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||||
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||||
|
|
||||||
|
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||||
|
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||||
|
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||||
|
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||||
|
|
||||||
|
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||||
|
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||||
|
|
||||||
|
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||||
|
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
|
|
@ -0,0 +1 @@
|
||||||
|
include $(KERNELDIR)/KERNEL.CORTEXA57
|
|
@ -0,0 +1,216 @@
|
||||||
|
SAMINKERNEL = ../arm/amin.c
|
||||||
|
DAMINKERNEL = ../arm/amin.c
|
||||||
|
CAMINKERNEL = ../arm/zamin.c
|
||||||
|
ZAMINKERNEL = ../arm/zamin.c
|
||||||
|
|
||||||
|
SMAXKERNEL = ../arm/max.c
|
||||||
|
DMAXKERNEL = ../arm/max.c
|
||||||
|
|
||||||
|
SMINKERNEL = ../arm/min.c
|
||||||
|
DMINKERNEL = ../arm/min.c
|
||||||
|
|
||||||
|
ISAMINKERNEL = ../arm/iamin.c
|
||||||
|
IDAMINKERNEL = ../arm/iamin.c
|
||||||
|
ICAMINKERNEL = ../arm/izamin.c
|
||||||
|
IZAMINKERNEL = ../arm/izamin.c
|
||||||
|
|
||||||
|
ISMAXKERNEL = ../arm/imax.c
|
||||||
|
IDMAXKERNEL = ../arm/imax.c
|
||||||
|
|
||||||
|
ISMINKERNEL = ../arm/imin.c
|
||||||
|
IDMINKERNEL = ../arm/imin.c
|
||||||
|
|
||||||
|
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||||
|
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||||
|
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||||
|
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||||
|
|
||||||
|
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||||
|
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||||
|
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||||
|
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||||
|
|
||||||
|
TRSMCOPYLN_M = trsm_lncopy_sve.c
|
||||||
|
TRSMCOPYLT_M = trsm_ltcopy_sve.c
|
||||||
|
TRSMCOPYUN_M = trsm_uncopy_sve.c
|
||||||
|
TRSMCOPYUT_M = trsm_utcopy_sve.c
|
||||||
|
|
||||||
|
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||||
|
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||||
|
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||||
|
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||||
|
|
||||||
|
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||||
|
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||||
|
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||||
|
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||||
|
|
||||||
|
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
|
||||||
|
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
|
||||||
|
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
|
||||||
|
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
|
||||||
|
|
||||||
|
|
||||||
|
SAMAXKERNEL = amax.S
|
||||||
|
DAMAXKERNEL = amax.S
|
||||||
|
CAMAXKERNEL = zamax.S
|
||||||
|
ZAMAXKERNEL = zamax.S
|
||||||
|
|
||||||
|
SAXPYKERNEL = axpy.S
|
||||||
|
DAXPYKERNEL = axpy.S
|
||||||
|
CAXPYKERNEL = zaxpy.S
|
||||||
|
ZAXPYKERNEL = zaxpy.S
|
||||||
|
|
||||||
|
SROTKERNEL = rot.S
|
||||||
|
DROTKERNEL = rot.S
|
||||||
|
CROTKERNEL = zrot.S
|
||||||
|
ZROTKERNEL = zrot.S
|
||||||
|
|
||||||
|
SSCALKERNEL = scal.S
|
||||||
|
DSCALKERNEL = scal.S
|
||||||
|
CSCALKERNEL = zscal.S
|
||||||
|
ZSCALKERNEL = zscal.S
|
||||||
|
|
||||||
|
SGEMVNKERNEL = gemv_n.S
|
||||||
|
DGEMVNKERNEL = gemv_n.S
|
||||||
|
CGEMVNKERNEL = zgemv_n.S
|
||||||
|
ZGEMVNKERNEL = zgemv_n.S
|
||||||
|
|
||||||
|
SGEMVTKERNEL = gemv_t.S
|
||||||
|
DGEMVTKERNEL = gemv_t.S
|
||||||
|
CGEMVTKERNEL = zgemv_t.S
|
||||||
|
ZGEMVTKERNEL = zgemv_t.S
|
||||||
|
|
||||||
|
|
||||||
|
SASUMKERNEL = asum.S
|
||||||
|
DASUMKERNEL = asum.S
|
||||||
|
CASUMKERNEL = casum.S
|
||||||
|
ZASUMKERNEL = zasum.S
|
||||||
|
|
||||||
|
SCOPYKERNEL = copy.S
|
||||||
|
DCOPYKERNEL = copy.S
|
||||||
|
CCOPYKERNEL = copy.S
|
||||||
|
ZCOPYKERNEL = copy.S
|
||||||
|
|
||||||
|
SSWAPKERNEL = swap.S
|
||||||
|
DSWAPKERNEL = swap.S
|
||||||
|
CSWAPKERNEL = swap.S
|
||||||
|
ZSWAPKERNEL = swap.S
|
||||||
|
|
||||||
|
ISAMAXKERNEL = iamax.S
|
||||||
|
IDAMAXKERNEL = iamax.S
|
||||||
|
ICAMAXKERNEL = izamax.S
|
||||||
|
IZAMAXKERNEL = izamax.S
|
||||||
|
|
||||||
|
SNRM2KERNEL = nrm2.S
|
||||||
|
DNRM2KERNEL = nrm2.S
|
||||||
|
CNRM2KERNEL = znrm2.S
|
||||||
|
ZNRM2KERNEL = znrm2.S
|
||||||
|
|
||||||
|
DDOTKERNEL = dot.S
|
||||||
|
ifneq ($(C_COMPILER), PGI)
|
||||||
|
SDOTKERNEL = ../generic/dot.c
|
||||||
|
else
|
||||||
|
SDOTKERNEL = dot.S
|
||||||
|
endif
|
||||||
|
ifneq ($(C_COMPILER), PGI)
|
||||||
|
CDOTKERNEL = zdot.S
|
||||||
|
ZDOTKERNEL = zdot.S
|
||||||
|
else
|
||||||
|
CDOTKERNEL = ../arm/zdot.c
|
||||||
|
ZDOTKERNEL = ../arm/zdot.c
|
||||||
|
endif
|
||||||
|
DSDOTKERNEL = dot.S
|
||||||
|
|
||||||
|
DGEMM_BETA = dgemm_beta.S
|
||||||
|
SGEMM_BETA = sgemm_beta.S
|
||||||
|
|
||||||
|
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||||
|
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||||
|
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||||
|
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||||
|
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||||
|
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||||
|
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||||
|
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||||
|
|
||||||
|
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||||
|
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||||
|
|
||||||
|
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||||
|
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||||
|
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||||
|
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||||
|
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||||
|
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||||
|
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||||
|
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||||
|
|
||||||
|
DSYMMUCOPY_M = symm_ucopy_sve.c
|
||||||
|
DSYMMLCOPY_M = symm_lcopy_sve.c
|
||||||
|
|
||||||
|
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||||
|
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
||||||
|
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
||||||
|
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||||
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||||
|
|
||||||
|
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||||
|
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||||
|
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||||
|
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||||
|
|
||||||
|
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||||
|
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||||
|
|
||||||
|
CSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||||
|
CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||||
|
|
||||||
|
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||||
|
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||||
|
|
||||||
|
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
||||||
|
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
||||||
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||||
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||||
|
|
||||||
|
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||||
|
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||||
|
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||||
|
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||||
|
|
||||||
|
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||||
|
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||||
|
|
||||||
|
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||||
|
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
|
|
@ -0,0 +1,3 @@
|
||||||
|
include $(KERNELDIR)/KERNEL.CORTEXA57
|
||||||
|
|
||||||
|
|
|
@ -1239,7 +1239,6 @@ static void init_parameter(void) {
|
||||||
|
|
||||||
#ifdef BUILD_BFLOAT16
|
#ifdef BUILD_BFLOAT16
|
||||||
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
|
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
|
|
||||||
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
|
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
|
||||||
#endif
|
#endif
|
||||||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
|
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
|
||||||
|
@ -1824,6 +1823,13 @@ static void init_parameter(void) {
|
||||||
fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
|
fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if BUILD_BFLOAT16==1
|
||||||
|
TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
|
||||||
|
((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
|
||||||
|
+ TABLE_NAME.align) & ~TABLE_NAME.align)
|
||||||
|
) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
|
||||||
|
#endif
|
||||||
|
|
||||||
#if BUILD_SINGLE==1
|
#if BUILD_SINGLE==1
|
||||||
TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
|
TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
|
||||||
((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
|
((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
|
||||||
|
|
|
@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
|
||||||
|
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
@ -47,7 +48,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
_mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N)
|
_mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N)
|
||||||
#define MASK_STORE_512(M, N) \
|
#define MASK_STORE_512(M, N) \
|
||||||
result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \
|
result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \
|
||||||
asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "k"(mask)); \
|
asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "Yk"(mask)); \
|
||||||
_mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N)
|
_mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -265,7 +266,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
||||||
int mm = M - i;
|
int mm = M - i;
|
||||||
if (!mm) return 0;
|
if (!mm) return 0;
|
||||||
if (mm > 4 || K < 16) {
|
if (mm > 4 || K < 16) {
|
||||||
register __mmask8 mask asm("k1") = (1UL << mm) - 1;
|
register __mmask8 mask = (1UL << mm) - 1;
|
||||||
for (j = 0; j < n6; j += 6) {
|
for (j = 0; j < n6; j += 6) {
|
||||||
DECLARE_RESULT_512(0, 0);
|
DECLARE_RESULT_512(0, 0);
|
||||||
DECLARE_RESULT_512(0, 1);
|
DECLARE_RESULT_512(0, 1);
|
||||||
|
@ -588,3 +589,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
#include "../generic/gemm_small_matrix_kernel_nn.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -55,7 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
_mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N)
|
_mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N)
|
||||||
#define MASK_STORE_512(M, N) \
|
#define MASK_STORE_512(M, N) \
|
||||||
result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \
|
result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \
|
||||||
asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "k"(mask)); \
|
asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "Yk"(mask)); \
|
||||||
_mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N)
|
_mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N)
|
||||||
#define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \
|
#define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \
|
||||||
__m512d tmp##M##N = _mm512_i64gather_pd(vindex_n, &C[(j + N*8)*ldc + i + M], 8); \
|
__m512d tmp##M##N = _mm512_i64gather_pd(vindex_n, &C[(j + N*8)*ldc + i + M], 8); \
|
||||||
|
@ -303,7 +303,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
||||||
}
|
}
|
||||||
int mm = M - i;
|
int mm = M - i;
|
||||||
if (mm >= 6) {
|
if (mm >= 6) {
|
||||||
register __mmask16 mask asm("k1") = (1UL << mm) - 1;
|
register __mmask16 mask = (1UL << mm) - 1;
|
||||||
for (j = 0; j < n8; j += 8) {
|
for (j = 0; j < n8; j += 8) {
|
||||||
DECLARE_RESULT_512(0, 0);
|
DECLARE_RESULT_512(0, 0);
|
||||||
DECLARE_RESULT_512(0, 1);
|
DECLARE_RESULT_512(0, 1);
|
||||||
|
|
|
@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
|
||||||
|
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
@ -320,3 +321,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
#include "../generic/gemm_small_matrix_kernel_tn.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -114,10 +114,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
asm("vmovups %0, (%1, %2, 4)": : "v"(val1), "r"(addr), "r"(ldc))
|
asm("vmovups %0, (%1, %2, 4)": : "v"(val1), "r"(addr), "r"(ldc))
|
||||||
|
|
||||||
#define _MASK_STORE_C_2nx16(addr, val0, val1) \
|
#define _MASK_STORE_C_2nx16(addr, val0, val1) \
|
||||||
asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "k"(mmask)); \
|
asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "Yk"(mmask)); \
|
||||||
asm("vfmadd213ps (%1, %3, 4), %2, %0 %{%4%}": "+v"(val1) : "r"(addr), "v"(alpha_512), "r"(ldc), "k"(mmask)); \
|
asm("vfmadd213ps (%1, %3, 4), %2, %0 %{%4%}": "+v"(val1) : "r"(addr), "v"(alpha_512), "r"(ldc), "Yk"(mmask)); \
|
||||||
asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "k"(mmask)); \
|
asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "Yk"(mmask)); \
|
||||||
asm("vmovups %0, (%1, %2, 4) %{%3%}": : "v"(val1), "r"(addr), "r"(ldc), "k"(mmask))
|
asm("vmovups %0, (%1, %2, 4) %{%3%}": : "v"(val1), "r"(addr), "r"(ldc), "Yk"(mmask))
|
||||||
|
|
||||||
#define _REORDER_C_2X(result_0, result_1) { \
|
#define _REORDER_C_2X(result_0, result_1) { \
|
||||||
__m512 tmp0, tmp1; \
|
__m512 tmp0, tmp1; \
|
||||||
|
@ -154,8 +154,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
asm("vmovups %0, (%1)": : "v"(val0), "r"(addr));
|
asm("vmovups %0, (%1)": : "v"(val0), "r"(addr));
|
||||||
|
|
||||||
#define _MASK_STORE_C_16(addr, val0) \
|
#define _MASK_STORE_C_16(addr, val0) \
|
||||||
asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "k"(mmask)); \
|
asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "Yk"(mmask)); \
|
||||||
asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "k"(mmask));
|
asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "Yk"(mmask));
|
||||||
|
|
||||||
#define N_STORE_4X(A, Bx, By) { \
|
#define N_STORE_4X(A, Bx, By) { \
|
||||||
_REORDER_C_2X(result_00_##A##Bx##By, result_01_##A##Bx##By); \
|
_REORDER_C_2X(result_00_##A##Bx##By, result_01_##A##Bx##By); \
|
||||||
|
|
|
@ -13,6 +13,8 @@
|
||||||
#define ONE 1.e0f
|
#define ONE 1.e0f
|
||||||
#define ZERO 0.e0f
|
#define ZERO 0.e0f
|
||||||
|
|
||||||
|
#define SHUFFLE_MAGIC_NO (const int) 0x39
|
||||||
|
|
||||||
#undef STORE16_COMPLETE_RESULT
|
#undef STORE16_COMPLETE_RESULT
|
||||||
#undef STORE16_MASK_COMPLETE_RESULT
|
#undef STORE16_MASK_COMPLETE_RESULT
|
||||||
#undef SBGEMM_BLOCK_KERNEL_NN_32x8xK
|
#undef SBGEMM_BLOCK_KERNEL_NN_32x8xK
|
||||||
|
@ -356,7 +358,6 @@ void sbgemm_block_kernel_nn_32xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
|
||||||
bfloat16 * B_addr = B;
|
bfloat16 * B_addr = B;
|
||||||
float * C_addr = C;
|
float * C_addr = C;
|
||||||
|
|
||||||
int SHUFFLE_MAGIC_NO = 0x39;
|
|
||||||
BLASLONG tag_k_32x = k & (~31);
|
BLASLONG tag_k_32x = k & (~31);
|
||||||
|
|
||||||
#ifndef ONE_ALPHA
|
#ifndef ONE_ALPHA
|
||||||
|
@ -465,7 +466,6 @@ void sbgemm_block_kernel_nn_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
|
||||||
bfloat16 * B_addr = B;
|
bfloat16 * B_addr = B;
|
||||||
float * C_addr = C;
|
float * C_addr = C;
|
||||||
|
|
||||||
int SHUFFLE_MAGIC_NO = 0x39;
|
|
||||||
BLASLONG tag_k_32x = k & (~31);
|
BLASLONG tag_k_32x = k & (~31);
|
||||||
|
|
||||||
#ifndef ONE_ALPHA
|
#ifndef ONE_ALPHA
|
||||||
|
@ -1192,7 +1192,6 @@ void sbgemm_block_kernel_tn_32xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
|
||||||
bfloat16 * B_addr = B;
|
bfloat16 * B_addr = B;
|
||||||
float * C_addr = C;
|
float * C_addr = C;
|
||||||
|
|
||||||
int SHUFFLE_MAGIC_NO = 0x39;
|
|
||||||
BLASLONG tag_k_32x = k & (~31);
|
BLASLONG tag_k_32x = k & (~31);
|
||||||
|
|
||||||
#ifndef ONE_ALPHA
|
#ifndef ONE_ALPHA
|
||||||
|
@ -1291,7 +1290,6 @@ void sbgemm_block_kernel_tn_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
|
||||||
bfloat16 * B_addr = B;
|
bfloat16 * B_addr = B;
|
||||||
float * C_addr = C;
|
float * C_addr = C;
|
||||||
|
|
||||||
int SHUFFLE_MAGIC_NO = 0x39;
|
|
||||||
BLASLONG tag_k_32x = k & (~31);
|
BLASLONG tag_k_32x = k & (~31);
|
||||||
|
|
||||||
#ifndef ONE_ALPHA
|
#ifndef ONE_ALPHA
|
||||||
|
|
|
@ -135,7 +135,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
||||||
0x0, 0x1, 0x2, 0x3, 0x10, 0x11, 0x12, 0x13, 0x8, 0x9, 0xa, 0xb, 0x18, 0x19, 0x1a, 0x1b,
|
0x0, 0x1, 0x2, 0x3, 0x10, 0x11, 0x12, 0x13, 0x8, 0x9, 0xa, 0xb, 0x18, 0x19, 0x1a, 0x1b,
|
||||||
0x4, 0x5, 0x6, 0x7, 0x14, 0x15, 0x16, 0x17, 0xc, 0xd, 0xe, 0xf, 0x1c, 0x1d, 0x1e, 0x1f,
|
0x4, 0x5, 0x6, 0x7, 0x14, 0x15, 0x16, 0x17, 0xc, 0xd, 0xe, 0xf, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||||
};
|
};
|
||||||
u_int64_t permute_table2[] = {
|
uint64_t permute_table2[] = {
|
||||||
0x00, 0x01, 0x02, 0x03, 8|0x0, 8|0x1, 8|0x2, 8|0x3,
|
0x00, 0x01, 0x02, 0x03, 8|0x0, 8|0x1, 8|0x2, 8|0x3,
|
||||||
0x04, 0x05, 0x06, 0x07, 8|0x4, 8|0x5, 8|0x6, 8|0x7,
|
0x04, 0x05, 0x06, 0x07, 8|0x4, 8|0x5, 8|0x6, 8|0x7,
|
||||||
};
|
};
|
||||||
|
|
|
@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
|
||||||
|
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
@ -47,7 +48,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
_mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N)
|
_mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N)
|
||||||
#define MASK_STORE_512(M, N) \
|
#define MASK_STORE_512(M, N) \
|
||||||
result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \
|
result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \
|
||||||
asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "k"(mask)); \
|
asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "Yk"(mask)); \
|
||||||
_mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N)
|
_mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -266,7 +267,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
||||||
int mm = M - i;
|
int mm = M - i;
|
||||||
if (!mm) return 0;
|
if (!mm) return 0;
|
||||||
if (mm > 8 || K < 32) {
|
if (mm > 8 || K < 32) {
|
||||||
register __mmask16 mask asm("k1") = (1UL << mm) - 1;
|
register __mmask16 mask = (1UL << mm) - 1;
|
||||||
for (j = 0; j < n6; j += 6) {
|
for (j = 0; j < n6; j += 6) {
|
||||||
DECLARE_RESULT_512(0, 0);
|
DECLARE_RESULT_512(0, 0);
|
||||||
DECLARE_RESULT_512(0, 1);
|
DECLARE_RESULT_512(0, 1);
|
||||||
|
@ -610,3 +611,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
#include "../generic/gemm_small_matrix_kernel_nn.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -55,7 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
_mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N)
|
_mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N)
|
||||||
#define MASK_STORE_512(M, N) \
|
#define MASK_STORE_512(M, N) \
|
||||||
result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \
|
result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \
|
||||||
asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "k"(mask)); \
|
asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "Yk"(mask)); \
|
||||||
_mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N)
|
_mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N)
|
||||||
#define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \
|
#define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \
|
||||||
__m512 tmp##M##N = _mm512_i32gather_ps(vindex_n, &C[(j + N*16)*ldc + i + M], 4); \
|
__m512 tmp##M##N = _mm512_i32gather_ps(vindex_n, &C[(j + N*16)*ldc + i + M], 4); \
|
||||||
|
@ -303,7 +303,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
||||||
}
|
}
|
||||||
int mm = M - i;
|
int mm = M - i;
|
||||||
if (mm >= 12) {
|
if (mm >= 12) {
|
||||||
register __mmask16 mask asm("k1") = (1UL << mm) - 1;
|
register __mmask16 mask = (1UL << mm) - 1;
|
||||||
for (j = 0; j < n8; j += 8) {
|
for (j = 0; j < n8; j += 8) {
|
||||||
DECLARE_RESULT_512(0, 0);
|
DECLARE_RESULT_512(0, 0);
|
||||||
DECLARE_RESULT_512(0, 1);
|
DECLARE_RESULT_512(0, 1);
|
||||||
|
|
|
@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
|
||||||
|
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
@ -314,3 +315,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
#include "../generic/gemm_small_matrix_kernel_tn.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -452,11 +452,6 @@
|
||||||
|
|
||||||
MOVDDUP(4 * SIZE, A1, a1)
|
MOVDDUP(4 * SIZE, A1, a1)
|
||||||
|
|
||||||
movsd 0 * SIZE(YY), yy1
|
|
||||||
movhpd 1 * SIZE(YY), yy1
|
|
||||||
movsd 2 * SIZE(YY), yy2
|
|
||||||
movhpd 3 * SIZE(YY), yy2
|
|
||||||
|
|
||||||
movapd 8 * SIZE(XX), xtemp1
|
movapd 8 * SIZE(XX), xtemp1
|
||||||
movapd 10 * SIZE(XX), xtemp2
|
movapd 10 * SIZE(XX), xtemp2
|
||||||
movapd 12 * SIZE(XX), xtemp3
|
movapd 12 * SIZE(XX), xtemp3
|
||||||
|
@ -475,6 +470,12 @@
|
||||||
MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2)
|
MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2)
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
|
.L12_prep:
|
||||||
|
movsd 0 * SIZE(YY), yy1
|
||||||
|
movhpd 1 * SIZE(YY), yy1
|
||||||
|
movsd 2 * SIZE(YY), yy2
|
||||||
|
movhpd 3 * SIZE(YY), yy2
|
||||||
|
|
||||||
.L12:
|
.L12:
|
||||||
movapd xtemp1, xt1
|
movapd xtemp1, xt1
|
||||||
mulpd a1, xt1
|
mulpd a1, xt1
|
||||||
|
@ -608,8 +609,6 @@
|
||||||
|
|
||||||
movlpd yy2, 6 * SIZE(YY)
|
movlpd yy2, 6 * SIZE(YY)
|
||||||
movhpd yy2, 7 * SIZE(YY)
|
movhpd yy2, 7 * SIZE(YY)
|
||||||
movsd 10 * SIZE(YY), yy2
|
|
||||||
movhpd 11 * SIZE(YY), yy2
|
|
||||||
|
|
||||||
movapd xtemp2, xt1
|
movapd xtemp2, xt1
|
||||||
movapd 18 * SIZE(XX), xtemp2
|
movapd 18 * SIZE(XX), xtemp2
|
||||||
|
@ -621,8 +620,6 @@
|
||||||
|
|
||||||
movlpd yy1, 4 * SIZE(YY)
|
movlpd yy1, 4 * SIZE(YY)
|
||||||
movhpd yy1, 5 * SIZE(YY)
|
movhpd yy1, 5 * SIZE(YY)
|
||||||
movsd 8 * SIZE(YY), yy1
|
|
||||||
movhpd 9 * SIZE(YY), yy1
|
|
||||||
|
|
||||||
subq $-16 * SIZE, XX
|
subq $-16 * SIZE, XX
|
||||||
addq $ 8 * SIZE, YY
|
addq $ 8 * SIZE, YY
|
||||||
|
@ -630,7 +627,8 @@
|
||||||
addq $ 8 * SIZE, A2
|
addq $ 8 * SIZE, A2
|
||||||
|
|
||||||
decq I
|
decq I
|
||||||
jg .L12
|
jg .L12_prep
|
||||||
|
jmp .L15
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L14:
|
.L14:
|
||||||
|
@ -641,7 +639,6 @@
|
||||||
jle .L16
|
jle .L16
|
||||||
|
|
||||||
MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2)
|
MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2)
|
||||||
jmp .L15_pastcheck
|
|
||||||
|
|
||||||
.L15:
|
.L15:
|
||||||
movq M, I
|
movq M, I
|
||||||
|
@ -650,6 +647,11 @@
|
||||||
testq $2, I
|
testq $2, I
|
||||||
jle .L16
|
jle .L16
|
||||||
|
|
||||||
|
movsd 0 * SIZE(YY), yy1
|
||||||
|
movhpd 1 * SIZE(YY), yy1
|
||||||
|
movsd 2 * SIZE(YY), yy2
|
||||||
|
movhpd 3 * SIZE(YY), yy2
|
||||||
|
|
||||||
.L15_pastcheck:
|
.L15_pastcheck:
|
||||||
movapd xtemp1, xt1
|
movapd xtemp1, xt1
|
||||||
mulpd a1, xt1
|
mulpd a1, xt1
|
||||||
|
@ -705,8 +707,6 @@
|
||||||
|
|
||||||
movlpd yy2, 2 * SIZE(YY)
|
movlpd yy2, 2 * SIZE(YY)
|
||||||
movhpd yy2, 3 * SIZE(YY)
|
movhpd yy2, 3 * SIZE(YY)
|
||||||
movsd 6 * SIZE(YY), yy2
|
|
||||||
movhpd 7 * SIZE(YY), yy2
|
|
||||||
|
|
||||||
movapd xtemp2, xt1
|
movapd xtemp2, xt1
|
||||||
movapd 10 * SIZE(XX), xtemp2
|
movapd 10 * SIZE(XX), xtemp2
|
||||||
|
@ -717,8 +717,6 @@
|
||||||
|
|
||||||
movlpd yy1, 0 * SIZE(YY)
|
movlpd yy1, 0 * SIZE(YY)
|
||||||
movhpd yy1, 1 * SIZE(YY)
|
movhpd yy1, 1 * SIZE(YY)
|
||||||
movsd 4 * SIZE(YY), yy1
|
|
||||||
movhpd 5 * SIZE(YY), yy1
|
|
||||||
|
|
||||||
addq $4 * SIZE, YY
|
addq $4 * SIZE, YY
|
||||||
addq $4 * SIZE, A1
|
addq $4 * SIZE, A1
|
||||||
|
@ -731,6 +729,9 @@
|
||||||
|
|
||||||
MOVDDUP(1 * SIZE, A1, a2)
|
MOVDDUP(1 * SIZE, A1, a2)
|
||||||
|
|
||||||
|
movsd 0 * SIZE(YY), yy1
|
||||||
|
movhpd 1 * SIZE(YY), yy1
|
||||||
|
|
||||||
movapd xtemp1, xt1
|
movapd xtemp1, xt1
|
||||||
mulpd a1, xt1
|
mulpd a1, xt1
|
||||||
mulpd atemp1, a1
|
mulpd atemp1, a1
|
||||||
|
|
|
@ -2,9 +2,9 @@ add_subdirectory(SRC)
|
||||||
if(BUILD_TESTING)
|
if(BUILD_TESTING)
|
||||||
add_subdirectory(TESTING)
|
add_subdirectory(TESTING)
|
||||||
endif()
|
endif()
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/blas.pc @ONLY)
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${BLASLIB}.pc @ONLY)
|
||||||
install(FILES
|
install(FILES
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/blas.pc
|
${CMAKE_CURRENT_BINARY_DIR}/${BLASLIB}.pc
|
||||||
DESTINATION ${PKG_CONFIG_DIR}
|
DESTINATION ${PKG_CONFIG_DIR}
|
||||||
COMPONENT Development
|
COMPONENT Development
|
||||||
)
|
)
|
||||||
|
|
|
@ -97,10 +97,10 @@ if(BUILD_COMPLEX16)
|
||||||
endif()
|
endif()
|
||||||
list(REMOVE_DUPLICATES SOURCES)
|
list(REMOVE_DUPLICATES SOURCES)
|
||||||
|
|
||||||
add_library(blas ${SOURCES})
|
add_library(${BLASLIB} ${SOURCES})
|
||||||
set_target_properties(
|
set_target_properties(
|
||||||
blas PROPERTIES
|
${BLASLIB} PROPERTIES
|
||||||
VERSION ${LAPACK_VERSION}
|
VERSION ${LAPACK_VERSION}
|
||||||
SOVERSION ${LAPACK_MAJOR_VERSION}
|
SOVERSION ${LAPACK_MAJOR_VERSION}
|
||||||
)
|
)
|
||||||
lapack_install_library(blas)
|
lapack_install_library(${BLASLIB})
|
||||||
|
|
|
@ -2,7 +2,7 @@ macro(add_blas_test name src)
|
||||||
get_filename_component(baseNAME ${src} NAME_WE)
|
get_filename_component(baseNAME ${src} NAME_WE)
|
||||||
set(TEST_INPUT "${CMAKE_CURRENT_SOURCE_DIR}/${baseNAME}.in")
|
set(TEST_INPUT "${CMAKE_CURRENT_SOURCE_DIR}/${baseNAME}.in")
|
||||||
add_executable(${name} ${src})
|
add_executable(${name} ${src})
|
||||||
target_link_libraries(${name} blas)
|
target_link_libraries(${name} ${BLASLIB})
|
||||||
if(EXISTS "${TEST_INPUT}")
|
if(EXISTS "${TEST_INPUT}")
|
||||||
add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}"
|
add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}"
|
||||||
-DTEST=$<TARGET_FILE:${name}>
|
-DTEST=$<TARGET_FILE:${name}>
|
||||||
|
|
|
@ -5,4 +5,4 @@ Name: BLAS
|
||||||
Description: FORTRAN reference implementation of BLAS Basic Linear Algebra Subprograms
|
Description: FORTRAN reference implementation of BLAS Basic Linear Algebra Subprograms
|
||||||
Version: @LAPACK_VERSION@
|
Version: @LAPACK_VERSION@
|
||||||
URL: http://www.netlib.org/blas/
|
URL: http://www.netlib.org/blas/
|
||||||
Libs: -L${libdir} -lblas
|
Libs: -L${libdir} -l@BLASLIB@
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
message(STATUS "CBLAS enable")
|
message(STATUS "CBLAS enable")
|
||||||
enable_language(C)
|
enable_language(C)
|
||||||
|
|
||||||
set(LAPACK_INSTALL_EXPORT_NAME cblas-targets)
|
set(LAPACK_INSTALL_EXPORT_NAME ${CBLASLIB}-targets)
|
||||||
|
|
||||||
# Create a header file cblas.h for the routines called in my C programs
|
# Create a header file cblas.h for the routines called in my C programs
|
||||||
include(FortranCInterface)
|
include(FortranCInterface)
|
||||||
|
@ -42,15 +42,15 @@ if(BUILD_TESTING)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(NOT BLAS_FOUND)
|
if(NOT BLAS_FOUND)
|
||||||
set(ALL_TARGETS ${ALL_TARGETS} blas)
|
set(ALL_TARGETS ${ALL_TARGETS} ${BLASLIB})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Export cblas targets from the
|
# Export cblas targets from the
|
||||||
# install tree, if any.
|
# install tree, if any.
|
||||||
set(_cblas_config_install_guard_target "")
|
set(_cblas_config_install_guard_target "")
|
||||||
if(ALL_TARGETS)
|
if(ALL_TARGETS)
|
||||||
install(EXPORT cblas-targets
|
install(EXPORT ${CBLASLIB}-targets
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION}
|
||||||
COMPONENT Development
|
COMPONENT Development
|
||||||
)
|
)
|
||||||
# Choose one of the cblas targets to use as a guard for
|
# Choose one of the cblas targets to use as a guard for
|
||||||
|
@ -61,7 +61,7 @@ endif()
|
||||||
# Export cblas targets from the build tree, if any.
|
# Export cblas targets from the build tree, if any.
|
||||||
set(_cblas_config_build_guard_target "")
|
set(_cblas_config_build_guard_target "")
|
||||||
if(ALL_TARGETS)
|
if(ALL_TARGETS)
|
||||||
export(TARGETS ${ALL_TARGETS} FILE cblas-targets.cmake)
|
export(TARGETS ${ALL_TARGETS} FILE ${CBLASLIB}-targets.cmake)
|
||||||
|
|
||||||
# Choose one of the cblas targets to use as a guard
|
# Choose one of the cblas targets to use as a guard
|
||||||
# for cblas-config.cmake to load targets from the build tree.
|
# for cblas-config.cmake to load targets from the build tree.
|
||||||
|
@ -69,26 +69,26 @@ if(ALL_TARGETS)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-version.cmake.in
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-version.cmake.in
|
||||||
${LAPACK_BINARY_DIR}/cblas-config-version.cmake @ONLY)
|
${LAPACK_BINARY_DIR}/${CBLASLIB}-config-version.cmake @ONLY)
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-build.cmake.in
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-build.cmake.in
|
||||||
${LAPACK_BINARY_DIR}/cblas-config.cmake @ONLY)
|
${LAPACK_BINARY_DIR}/${CBLASLIB}-config.cmake @ONLY)
|
||||||
|
|
||||||
|
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/cblas.pc @ONLY)
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${CBLASLIB}.pc @ONLY)
|
||||||
install(FILES
|
install(FILES
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/cblas.pc
|
${CMAKE_CURRENT_BINARY_DIR}/${CBLASLIB}.pc
|
||||||
DESTINATION ${PKG_CONFIG_DIR}
|
DESTINATION ${PKG_CONFIG_DIR}
|
||||||
)
|
)
|
||||||
|
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-install.cmake.in
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-install.cmake.in
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake @ONLY)
|
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${CBLASLIB}-config.cmake @ONLY)
|
||||||
install(FILES
|
install(FILES
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake
|
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${CBLASLIB}-config.cmake
|
||||||
${LAPACK_BINARY_DIR}/cblas-config-version.cmake
|
${LAPACK_BINARY_DIR}/${CBLASLIB}-config-version.cmake
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION}
|
||||||
)
|
)
|
||||||
|
|
||||||
#install(EXPORT cblas-targets
|
#install(EXPORT ${CBLASLIB}-targets
|
||||||
# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}
|
# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION}
|
||||||
# COMPONENT Development
|
# COMPONENT Development
|
||||||
# )
|
# )
|
||||||
|
|
|
@ -5,6 +5,6 @@ Name: CBLAS
|
||||||
Description: C Standard Interface to BLAS Basic Linear Algebra Subprograms
|
Description: C Standard Interface to BLAS Basic Linear Algebra Subprograms
|
||||||
Version: @LAPACK_VERSION@
|
Version: @LAPACK_VERSION@
|
||||||
URL: http://www.netlib.org/blas/#_cblas
|
URL: http://www.netlib.org/blas/#_cblas
|
||||||
Libs: -L${libdir} -lcblas
|
Libs: -L${libdir} -l@CBLASLIB@
|
||||||
Cflags: -I${includedir}
|
Cflags: -I${includedir}
|
||||||
Requires.private: blas
|
Requires.private: @BLASLIB@
|
||||||
|
|
|
@ -4,11 +4,11 @@ find_package(LAPACK NO_MODULE)
|
||||||
|
|
||||||
# Load lapack targets from the build tree, including lapacke targets.
|
# Load lapack targets from the build tree, including lapacke targets.
|
||||||
if(NOT TARGET lapacke)
|
if(NOT TARGET lapacke)
|
||||||
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake")
|
include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Report cblas header search locations from build tree.
|
# Report cblas header search locations from build tree.
|
||||||
set(CBLAS_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include")
|
set(CBLAS_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include")
|
||||||
|
|
||||||
# Report cblas libraries.
|
# Report cblas libraries.
|
||||||
set(CBLAS_LIBRARIES cblas)
|
set(CBLAS_LIBRARIES @CBLASLIB@)
|
||||||
|
|
|
@ -5,19 +5,19 @@ get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH)
|
||||||
get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH)
|
get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH)
|
||||||
|
|
||||||
# Load the LAPACK package with which we were built.
|
# Load the LAPACK package with which we were built.
|
||||||
set(LAPACK_DIR "${_CBLAS_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/lapack-@LAPACK_VERSION@")
|
set(LAPACK_DIR "${_CBLAS_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/@LAPACKLIB@-@LAPACK_VERSION@")
|
||||||
find_package(LAPACK NO_MODULE)
|
find_package(LAPACK NO_MODULE)
|
||||||
|
|
||||||
# Load lapacke targets from the install tree.
|
# Load lapacke targets from the install tree.
|
||||||
if(NOT TARGET cblas)
|
if(NOT TARGET @CBLASLIB@)
|
||||||
include(${_CBLAS_SELF_DIR}/cblas-targets.cmake)
|
include(${_CBLAS_SELF_DIR}/@CBLASLIB@-targets.cmake)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Report lapacke header search locations.
|
# Report lapacke header search locations.
|
||||||
set(CBLAS_INCLUDE_DIRS ${_CBLAS_PREFIX}/include)
|
set(CBLAS_INCLUDE_DIRS ${_CBLAS_PREFIX}/include)
|
||||||
|
|
||||||
# Report lapacke libraries.
|
# Report lapacke libraries.
|
||||||
set(CBLAS_LIBRARIES cblas)
|
set(CBLAS_LIBRARIES @CBLASLIB@)
|
||||||
|
|
||||||
unset(_CBLAS_PREFIX)
|
unset(_CBLAS_PREFIX)
|
||||||
unset(_CBLAS_SELF_DIR)
|
unset(_CBLAS_SELF_DIR)
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
add_executable(xexample1_CBLAS cblas_example1.c)
|
add_executable(xexample1_CBLAS cblas_example1.c)
|
||||||
add_executable(xexample2_CBLAS cblas_example2.c)
|
add_executable(xexample2_CBLAS cblas_example2.c)
|
||||||
|
|
||||||
target_link_libraries(xexample1_CBLAS cblas)
|
target_link_libraries(xexample1_CBLAS ${CBLASLIB})
|
||||||
target_link_libraries(xexample2_CBLAS cblas ${BLAS_LIBRARIES})
|
target_link_libraries(xexample2_CBLAS ${CBLASLIB} ${BLAS_LIBRARIES})
|
||||||
|
|
||||||
add_test(example1_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample1_CBLAS)
|
add_test(example1_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample1_CBLAS)
|
||||||
add_test(example2_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample2_CBLAS)
|
add_test(example2_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample2_CBLAS)
|
||||||
|
|
|
@ -11,7 +11,7 @@ int main ( )
|
||||||
|
|
||||||
double *a, *x, *y;
|
double *a, *x, *y;
|
||||||
double alpha, beta;
|
double alpha, beta;
|
||||||
int m, n, lda, incx, incy, i;
|
CBLAS_INDEX m, n, lda, incx, incy, i;
|
||||||
|
|
||||||
Layout = CblasColMajor;
|
Layout = CblasColMajor;
|
||||||
transa = CblasNoTrans;
|
transa = CblasNoTrans;
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
|
|
||||||
int main (int argc, char **argv )
|
int main (int argc, char **argv )
|
||||||
{
|
{
|
||||||
int rout=-1,info=0,m,n,k,lda,ldb,ldc;
|
CBLAS_INDEX rout=-1,info=0,m,n,k,lda,ldb,ldc;
|
||||||
double A[2] = {0.0,0.0},
|
double A[2] = {0.0,0.0},
|
||||||
B[2] = {0.0,0.0},
|
B[2] = {0.0,0.0},
|
||||||
C[2] = {0.0,0.0},
|
C[2] = {0.0,0.0},
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#ifndef CBLAS_H
|
#ifndef CBLAS_H
|
||||||
#define CBLAS_H
|
#define CBLAS_H
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -11,9 +12,9 @@ extern "C" { /* Assume C declarations for C++ */
|
||||||
* Enumerated and derived types
|
* Enumerated and derived types
|
||||||
*/
|
*/
|
||||||
#ifdef WeirdNEC
|
#ifdef WeirdNEC
|
||||||
#define CBLAS_INDEX long
|
#define CBLAS_INDEX int64_t
|
||||||
#else
|
#else
|
||||||
#define CBLAS_INDEX int
|
#define CBLAS_INDEX int32_t
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef enum {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT;
|
typedef enum {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT;
|
||||||
|
|
|
@ -9,6 +9,8 @@
|
||||||
#ifndef CBLAS_F77_H
|
#ifndef CBLAS_F77_H
|
||||||
#define CBLAS_F77_H
|
#define CBLAS_F77_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
#ifdef CRAY
|
#ifdef CRAY
|
||||||
#include <fortran.h>
|
#include <fortran.h>
|
||||||
#define F77_CHAR _fcd
|
#define F77_CHAR _fcd
|
||||||
|
@ -17,8 +19,12 @@
|
||||||
#define F77_STRLEN(a) (_fcdlen)
|
#define F77_STRLEN(a) (_fcdlen)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef F77_INT
|
||||||
#ifdef WeirdNEC
|
#ifdef WeirdNEC
|
||||||
#define F77_INT long
|
#define F77_INT int64_t
|
||||||
|
#else
|
||||||
|
#define F77_INT int32_t
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef F77_CHAR
|
#ifdef F77_CHAR
|
||||||
|
|
|
@ -113,16 +113,16 @@ if(BUILD_COMPLEX16)
|
||||||
endif()
|
endif()
|
||||||
list(REMOVE_DUPLICATES SOURCES)
|
list(REMOVE_DUPLICATES SOURCES)
|
||||||
|
|
||||||
add_library(cblas ${SOURCES})
|
add_library(${CBLASLIB} ${SOURCES})
|
||||||
set_target_properties(
|
set_target_properties(
|
||||||
cblas PROPERTIES
|
${CBLASLIB} PROPERTIES
|
||||||
LINKER_LANGUAGE C
|
LINKER_LANGUAGE C
|
||||||
VERSION ${LAPACK_VERSION}
|
VERSION ${LAPACK_VERSION}
|
||||||
SOVERSION ${LAPACK_MAJOR_VERSION}
|
SOVERSION ${LAPACK_MAJOR_VERSION}
|
||||||
)
|
)
|
||||||
target_include_directories(cblas PUBLIC
|
target_include_directories(${CBLASLIB} PUBLIC
|
||||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
|
||||||
$<INSTALL_INTERFACE:include>
|
$<INSTALL_INTERFACE:include>
|
||||||
)
|
)
|
||||||
target_link_libraries(cblas PRIVATE ${BLAS_LIBRARIES})
|
target_link_libraries(${CBLASLIB} PRIVATE ${BLAS_LIBRARIES})
|
||||||
lapack_install_library(cblas)
|
lapack_install_library(${CBLASLIB})
|
||||||
|
|
|
@ -52,9 +52,9 @@ if(BUILD_SINGLE)
|
||||||
add_executable(xscblat2 c_sblat2.f ${STESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
add_executable(xscblat2 c_sblat2.f ${STESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||||
add_executable(xscblat3 c_sblat3.f ${STESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
add_executable(xscblat3 c_sblat3.f ${STESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||||
|
|
||||||
target_link_libraries(xscblat1 cblas)
|
target_link_libraries(xscblat1 ${CBLASLIB})
|
||||||
target_link_libraries(xscblat2 cblas)
|
target_link_libraries(xscblat2 ${CBLASLIB})
|
||||||
target_link_libraries(xscblat3 cblas)
|
target_link_libraries(xscblat3 ${CBLASLIB})
|
||||||
|
|
||||||
add_cblas_test(stest1.out "" xscblat1)
|
add_cblas_test(stest1.out "" xscblat1)
|
||||||
add_cblas_test(stest2.out sin2 xscblat2)
|
add_cblas_test(stest2.out sin2 xscblat2)
|
||||||
|
@ -66,9 +66,9 @@ if(BUILD_DOUBLE)
|
||||||
add_executable(xdcblat2 c_dblat2.f ${DTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
add_executable(xdcblat2 c_dblat2.f ${DTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||||
add_executable(xdcblat3 c_dblat3.f ${DTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
add_executable(xdcblat3 c_dblat3.f ${DTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||||
|
|
||||||
target_link_libraries(xdcblat1 cblas)
|
target_link_libraries(xdcblat1 ${CBLASLIB})
|
||||||
target_link_libraries(xdcblat2 cblas)
|
target_link_libraries(xdcblat2 ${CBLASLIB})
|
||||||
target_link_libraries(xdcblat3 cblas)
|
target_link_libraries(xdcblat3 ${CBLASLIB})
|
||||||
|
|
||||||
add_cblas_test(dtest1.out "" xdcblat1)
|
add_cblas_test(dtest1.out "" xdcblat1)
|
||||||
add_cblas_test(dtest2.out din2 xdcblat2)
|
add_cblas_test(dtest2.out din2 xdcblat2)
|
||||||
|
@ -80,9 +80,9 @@ if(BUILD_COMPLEX)
|
||||||
add_executable(xccblat2 c_cblat2.f ${CTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
add_executable(xccblat2 c_cblat2.f ${CTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||||
add_executable(xccblat3 c_cblat3.f ${CTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
add_executable(xccblat3 c_cblat3.f ${CTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||||
|
|
||||||
target_link_libraries(xccblat1 cblas ${BLAS_LIBRARIES})
|
target_link_libraries(xccblat1 ${CBLASLIB} ${BLAS_LIBRARIES})
|
||||||
target_link_libraries(xccblat2 cblas)
|
target_link_libraries(xccblat2 ${CBLASLIB})
|
||||||
target_link_libraries(xccblat3 cblas)
|
target_link_libraries(xccblat3 ${CBLASLIB})
|
||||||
|
|
||||||
add_cblas_test(ctest1.out "" xccblat1)
|
add_cblas_test(ctest1.out "" xccblat1)
|
||||||
add_cblas_test(ctest2.out cin2 xccblat2)
|
add_cblas_test(ctest2.out cin2 xccblat2)
|
||||||
|
@ -94,9 +94,9 @@ if(BUILD_COMPLEX16)
|
||||||
add_executable(xzcblat2 c_zblat2.f ${ZTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
add_executable(xzcblat2 c_zblat2.f ${ZTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||||
add_executable(xzcblat3 c_zblat3.f ${ZTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
add_executable(xzcblat3 c_zblat3.f ${ZTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||||
|
|
||||||
target_link_libraries(xzcblat1 cblas)
|
target_link_libraries(xzcblat1 ${CBLASLIB})
|
||||||
target_link_libraries(xzcblat2 cblas)
|
target_link_libraries(xzcblat2 ${CBLASLIB})
|
||||||
target_link_libraries(xzcblat3 cblas)
|
target_link_libraries(xzcblat3 ${CBLASLIB})
|
||||||
|
|
||||||
add_cblas_test(ztest1.out "" xzcblat1)
|
add_cblas_test(ztest1.out "" xzcblat1)
|
||||||
add_cblas_test(ztest2.out zin2 xzcblat2)
|
add_cblas_test(ztest2.out zin2 xzcblat2)
|
||||||
|
|
|
@ -14,6 +14,19 @@ macro( CheckLAPACKCompilerFlags )
|
||||||
|
|
||||||
set( FPE_EXIT FALSE )
|
set( FPE_EXIT FALSE )
|
||||||
|
|
||||||
|
# FORTRAN ILP default
|
||||||
|
if ( FORTRAN_ILP )
|
||||||
|
if( CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" )
|
||||||
|
if ( WIN32 )
|
||||||
|
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} /integer-size:64")
|
||||||
|
else ()
|
||||||
|
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -integer-size 64")
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-integer-8")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
# GNU Fortran
|
# GNU Fortran
|
||||||
if( CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" )
|
if( CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" )
|
||||||
if( "${CMAKE_Fortran_FLAGS}" MATCHES "-ffpe-trap=[izoupd]")
|
if( "${CMAKE_Fortran_FLAGS}" MATCHES "-ffpe-trap=[izoupd]")
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# Load lapack targets from the build tree if necessary.
|
# Load lapack targets from the build tree if necessary.
|
||||||
set(_LAPACK_TARGET "@_lapack_config_build_guard_target@")
|
set(_LAPACK_TARGET "@_lapack_config_build_guard_target@")
|
||||||
if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}")
|
if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}")
|
||||||
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake")
|
include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake")
|
||||||
endif()
|
endif()
|
||||||
unset(_LAPACK_TARGET)
|
unset(_LAPACK_TARGET)
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@ get_filename_component(_LAPACK_SELF_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
||||||
# Load lapack targets from the install tree if necessary.
|
# Load lapack targets from the install tree if necessary.
|
||||||
set(_LAPACK_TARGET "@_lapack_config_install_guard_target@")
|
set(_LAPACK_TARGET "@_lapack_config_install_guard_target@")
|
||||||
if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}")
|
if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}")
|
||||||
include("${_LAPACK_SELF_DIR}/lapack-targets.cmake")
|
include("${_LAPACK_SELF_DIR}/@LAPACKLIB@-targets.cmake")
|
||||||
endif()
|
endif()
|
||||||
unset(_LAPACK_TARGET)
|
unset(_LAPACK_TARGET)
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,24 @@ endif()
|
||||||
# By default static library
|
# By default static library
|
||||||
option(BUILD_SHARED_LIBS "Build shared libraries" OFF)
|
option(BUILD_SHARED_LIBS "Build shared libraries" OFF)
|
||||||
|
|
||||||
|
# By default build index32 library
|
||||||
|
option(BUILD_INDEX64 "Build Index-64 API libraries" OFF)
|
||||||
|
if(BUILD_INDEX64)
|
||||||
|
set(BLASLIB "blas64")
|
||||||
|
set(CBLASLIB "cblas64")
|
||||||
|
set(LAPACKLIB "lapack64")
|
||||||
|
set(LAPACKELIB "lapacke64")
|
||||||
|
set(TMGLIB "tmglib64")
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWeirdNEC -DLAPACK_ILP64 -DHAVE_LAPACK_CONFIG_H")
|
||||||
|
set(FORTRAN_ILP TRUE)
|
||||||
|
else()
|
||||||
|
set(BLASLIB "blas")
|
||||||
|
set(CBLASLIB "cblas")
|
||||||
|
set(LAPACKLIB "lapack")
|
||||||
|
set(LAPACKELIB "lapacke")
|
||||||
|
set(TMGLIB "tmglib")
|
||||||
|
endif()
|
||||||
|
|
||||||
include(GNUInstallDirs)
|
include(GNUInstallDirs)
|
||||||
|
|
||||||
# Updated OSX RPATH settings
|
# Updated OSX RPATH settings
|
||||||
|
@ -73,10 +91,10 @@ include(PreventInBuildInstalls)
|
||||||
|
|
||||||
if(UNIX)
|
if(UNIX)
|
||||||
if(CMAKE_Fortran_COMPILER_ID STREQUAL Intel)
|
if(CMAKE_Fortran_COMPILER_ID STREQUAL Intel)
|
||||||
list(APPEND CMAKE_Fortran_FLAGS "-fp-model strict")
|
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fp-model strict")
|
||||||
endif()
|
endif()
|
||||||
if(CMAKE_Fortran_COMPILER_ID STREQUAL XL)
|
if(CMAKE_Fortran_COMPILER_ID STREQUAL XL)
|
||||||
list(APPEND CMAKE_Fortran_FLAGS "-qnosave -qstrict=none")
|
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qnosave -qstrict=none")
|
||||||
endif()
|
endif()
|
||||||
# Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler.
|
# Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler.
|
||||||
# This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin
|
# This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin
|
||||||
|
@ -112,7 +130,7 @@ endif()
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------
|
# --------------------------------------------------
|
||||||
set(LAPACK_INSTALL_EXPORT_NAME lapack-targets)
|
set(LAPACK_INSTALL_EXPORT_NAME ${LAPACKLIB}-targets)
|
||||||
|
|
||||||
macro(lapack_install_library lib)
|
macro(lapack_install_library lib)
|
||||||
install(TARGETS ${lib}
|
install(TARGETS ${lib}
|
||||||
|
@ -220,7 +238,7 @@ endif()
|
||||||
if(NOT BLAS_FOUND)
|
if(NOT BLAS_FOUND)
|
||||||
message(STATUS "Using supplied NETLIB BLAS implementation")
|
message(STATUS "Using supplied NETLIB BLAS implementation")
|
||||||
add_subdirectory(BLAS)
|
add_subdirectory(BLAS)
|
||||||
set(BLAS_LIBRARIES blas)
|
set(BLAS_LIBRARIES ${BLASLIB})
|
||||||
else()
|
else()
|
||||||
set(CMAKE_EXE_LINKER_FLAGS
|
set(CMAKE_EXE_LINKER_FLAGS
|
||||||
"${CMAKE_EXE_LINKER_FLAGS} ${BLAS_LINKER_FLAGS}"
|
"${CMAKE_EXE_LINKER_FLAGS} ${BLAS_LINKER_FLAGS}"
|
||||||
|
@ -279,7 +297,7 @@ endif()
|
||||||
# Neither user specified or optimized LAPACK libraries can be used
|
# Neither user specified or optimized LAPACK libraries can be used
|
||||||
if(NOT LATESTLAPACK_FOUND)
|
if(NOT LATESTLAPACK_FOUND)
|
||||||
message(STATUS "Using supplied NETLIB LAPACK implementation")
|
message(STATUS "Using supplied NETLIB LAPACK implementation")
|
||||||
set(LAPACK_LIBRARIES lapack)
|
set(LAPACK_LIBRARIES ${LAPACKLIB})
|
||||||
add_subdirectory(SRC)
|
add_subdirectory(SRC)
|
||||||
else()
|
else()
|
||||||
set(CMAKE_EXE_LINKER_FLAGS
|
set(CMAKE_EXE_LINKER_FLAGS
|
||||||
|
@ -371,23 +389,23 @@ include(CPack)
|
||||||
# --------------------------------------------------
|
# --------------------------------------------------
|
||||||
|
|
||||||
if(NOT BLAS_FOUND)
|
if(NOT BLAS_FOUND)
|
||||||
set(ALL_TARGETS ${ALL_TARGETS} blas)
|
set(ALL_TARGETS ${ALL_TARGETS} ${BLASLIB})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(NOT LATESTLAPACK_FOUND)
|
if(NOT LATESTLAPACK_FOUND)
|
||||||
set(ALL_TARGETS ${ALL_TARGETS} lapack)
|
set(ALL_TARGETS ${ALL_TARGETS} ${LAPACKLIB})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(BUILD_TESTING OR LAPACKE_WITH_TMG)
|
if(BUILD_TESTING OR LAPACKE_WITH_TMG)
|
||||||
set(ALL_TARGETS ${ALL_TARGETS} tmglib)
|
set(ALL_TARGETS ${ALL_TARGETS} ${TMGLIB})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Export lapack targets, not including lapacke, from the
|
# Export lapack targets, not including lapacke, from the
|
||||||
# install tree, if any.
|
# install tree, if any.
|
||||||
set(_lapack_config_install_guard_target "")
|
set(_lapack_config_install_guard_target "")
|
||||||
if(ALL_TARGETS)
|
if(ALL_TARGETS)
|
||||||
install(EXPORT lapack-targets
|
install(EXPORT ${LAPACKLIB}-targets
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION}
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKLIB}-${LAPACK_VERSION}
|
||||||
COMPONENT Development
|
COMPONENT Development
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -398,18 +416,18 @@ endif()
|
||||||
|
|
||||||
# Include cblas in targets exported from the build tree.
|
# Include cblas in targets exported from the build tree.
|
||||||
if(CBLAS)
|
if(CBLAS)
|
||||||
set(ALL_TARGETS ${ALL_TARGETS} cblas)
|
set(ALL_TARGETS ${ALL_TARGETS} ${CBLASLIB})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Include lapacke in targets exported from the build tree.
|
# Include lapacke in targets exported from the build tree.
|
||||||
if(LAPACKE)
|
if(LAPACKE)
|
||||||
set(ALL_TARGETS ${ALL_TARGETS} lapacke)
|
set(ALL_TARGETS ${ALL_TARGETS} ${LAPACKELIB})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Export lapack and lapacke targets from the build tree, if any.
|
# Export lapack and lapacke targets from the build tree, if any.
|
||||||
set(_lapack_config_build_guard_target "")
|
set(_lapack_config_build_guard_target "")
|
||||||
if(ALL_TARGETS)
|
if(ALL_TARGETS)
|
||||||
export(TARGETS ${ALL_TARGETS} FILE lapack-targets.cmake)
|
export(TARGETS ${ALL_TARGETS} FILE ${LAPACKLIB}-targets.cmake)
|
||||||
|
|
||||||
# Choose one of the lapack or lapacke targets to use as a guard
|
# Choose one of the lapack or lapacke targets to use as a guard
|
||||||
# for lapack-config.cmake to load targets from the build tree.
|
# for lapack-config.cmake to load targets from the build tree.
|
||||||
|
@ -417,30 +435,30 @@ if(ALL_TARGETS)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-build.cmake.in
|
configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-build.cmake.in
|
||||||
${LAPACK_BINARY_DIR}/lapack-config.cmake @ONLY)
|
${LAPACK_BINARY_DIR}/${LAPACKLIB}-config.cmake @ONLY)
|
||||||
|
|
||||||
|
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_DIR}/lapack.pc @ONLY)
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKLIB}.pc @ONLY)
|
||||||
install(FILES
|
install(FILES
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/lapack.pc
|
${CMAKE_CURRENT_BINARY_DIR}/${LAPACKLIB}.pc
|
||||||
DESTINATION ${PKG_CONFIG_DIR}
|
DESTINATION ${PKG_CONFIG_DIR}
|
||||||
COMPONENT Development
|
COMPONENT Development
|
||||||
)
|
)
|
||||||
|
|
||||||
configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in
|
configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in
|
||||||
${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake @ONLY)
|
${LAPACK_BINARY_DIR}/CMakeFiles/${LAPACKLIB}-config.cmake @ONLY)
|
||||||
|
|
||||||
include(CMakePackageConfigHelpers)
|
include(CMakePackageConfigHelpers)
|
||||||
write_basic_package_version_file(
|
write_basic_package_version_file(
|
||||||
${LAPACK_BINARY_DIR}/lapack-config-version.cmake
|
${LAPACK_BINARY_DIR}/${LAPACKLIB}-config-version.cmake
|
||||||
VERSION ${LAPACK_VERSION}
|
VERSION ${LAPACK_VERSION}
|
||||||
COMPATIBILITY SameMajorVersion
|
COMPATIBILITY SameMajorVersion
|
||||||
)
|
)
|
||||||
|
|
||||||
install(FILES
|
install(FILES
|
||||||
${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake
|
${LAPACK_BINARY_DIR}/CMakeFiles/${LAPACKLIB}-config.cmake
|
||||||
${LAPACK_BINARY_DIR}/lapack-config-version.cmake
|
${LAPACK_BINARY_DIR}/${LAPACKLIB}-config-version.cmake
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION}
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKLIB}-${LAPACK_VERSION}
|
||||||
COMPONENT Development
|
COMPONENT Development
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
message(STATUS "LAPACKE enable")
|
message(STATUS "LAPACKE enable")
|
||||||
enable_language(C)
|
enable_language(C)
|
||||||
|
|
||||||
set(LAPACK_INSTALL_EXPORT_NAME lapacke-targets)
|
set(LAPACK_INSTALL_EXPORT_NAME ${LAPACKELIB}-targets)
|
||||||
|
|
||||||
# Create a header file lapacke_mangling.h for the routines called in my C programs
|
# Create a header file lapacke_mangling.h for the routines called in my C programs
|
||||||
include(FortranCInterface)
|
include(FortranCInterface)
|
||||||
|
@ -72,28 +72,28 @@ if(LAPACKE_WITH_TMG)
|
||||||
endif()
|
endif()
|
||||||
list(APPEND SOURCES ${UTILS})
|
list(APPEND SOURCES ${UTILS})
|
||||||
|
|
||||||
add_library(lapacke ${SOURCES})
|
add_library(${LAPACKELIB} ${SOURCES})
|
||||||
set_target_properties(
|
set_target_properties(
|
||||||
lapacke PROPERTIES
|
${LAPACKELIB} PROPERTIES
|
||||||
LINKER_LANGUAGE C
|
LINKER_LANGUAGE C
|
||||||
VERSION ${LAPACK_VERSION}
|
VERSION ${LAPACK_VERSION}
|
||||||
SOVERSION ${LAPACK_MAJOR_VERSION}
|
SOVERSION ${LAPACK_MAJOR_VERSION}
|
||||||
)
|
)
|
||||||
target_include_directories(lapacke PUBLIC
|
target_include_directories(${LAPACKELIB} PUBLIC
|
||||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||||
$<INSTALL_INTERFACE:include>
|
$<INSTALL_INTERFACE:include>
|
||||||
)
|
)
|
||||||
if(WIN32 AND NOT UNIX)
|
if(WIN32 AND NOT UNIX)
|
||||||
target_compile_definitions(lapacke PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE)
|
target_compile_definitions(${LAPACKELIB} PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE)
|
||||||
message(STATUS "Windows BUILD")
|
message(STATUS "Windows BUILD")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(LAPACKE_WITH_TMG)
|
if(LAPACKE_WITH_TMG)
|
||||||
target_link_libraries(lapacke PRIVATE tmglib)
|
target_link_libraries(${LAPACKELIB} PRIVATE ${TMGLIB})
|
||||||
endif()
|
endif()
|
||||||
target_link_libraries(lapacke PRIVATE ${LAPACK_LIBRARIES})
|
target_link_libraries(${LAPACKELIB} PRIVATE ${LAPACK_LIBRARIES})
|
||||||
|
|
||||||
lapack_install_library(lapacke)
|
lapack_install_library(${LAPACKELIB})
|
||||||
install(
|
install(
|
||||||
FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h
|
FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h
|
||||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||||
|
@ -105,28 +105,28 @@ if(BUILD_TESTING)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc @ONLY)
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKELIB}.pc @ONLY)
|
||||||
install(FILES
|
install(FILES
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc
|
${CMAKE_CURRENT_BINARY_DIR}/${LAPACKELIB}.pc
|
||||||
DESTINATION ${PKG_CONFIG_DIR}
|
DESTINATION ${PKG_CONFIG_DIR}
|
||||||
COMPONENT Development
|
COMPONENT Development
|
||||||
)
|
)
|
||||||
|
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in
|
||||||
${LAPACK_BINARY_DIR}/lapacke-config-version.cmake @ONLY)
|
${LAPACK_BINARY_DIR}/${LAPACKELIB}-config-version.cmake @ONLY)
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-build.cmake.in
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-build.cmake.in
|
||||||
${LAPACK_BINARY_DIR}/lapacke-config.cmake @ONLY)
|
${LAPACK_BINARY_DIR}/${LAPACKELIB}-config.cmake @ONLY)
|
||||||
|
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-install.cmake.in
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-install.cmake.in
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake @ONLY)
|
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${LAPACKELIB}-config.cmake @ONLY)
|
||||||
install(FILES
|
install(FILES
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake
|
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${LAPACKELIB}-config.cmake
|
||||||
${LAPACK_BINARY_DIR}/lapacke-config-version.cmake
|
${LAPACK_BINARY_DIR}/${LAPACKELIB}-config-version.cmake
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION}
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKELIB}-${LAPACK_VERSION}
|
||||||
COMPONENT Development
|
COMPONENT Development
|
||||||
)
|
)
|
||||||
|
|
||||||
install(EXPORT lapacke-targets
|
install(EXPORT ${LAPACKELIB}-targets
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION}
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKELIB}-${LAPACK_VERSION}
|
||||||
COMPONENT Development
|
COMPONENT Development
|
||||||
)
|
)
|
||||||
|
|
|
@ -3,8 +3,8 @@ set(LAPACK_DIR "@LAPACK_BINARY_DIR@")
|
||||||
find_package(LAPACK NO_MODULE)
|
find_package(LAPACK NO_MODULE)
|
||||||
|
|
||||||
# Load lapack targets from the build tree, including lapacke targets.
|
# Load lapack targets from the build tree, including lapacke targets.
|
||||||
if(NOT TARGET lapacke)
|
if(NOT TARGET @LAPACKELIB@)
|
||||||
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake")
|
include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Hint for project building against lapack
|
# Hint for project building against lapack
|
||||||
|
@ -14,4 +14,4 @@ set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID})
|
||||||
set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include")
|
set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include")
|
||||||
|
|
||||||
# Report lapacke libraries.
|
# Report lapacke libraries.
|
||||||
set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES})
|
set(LAPACKE_LIBRARIES @LAPACKELIB@ ${LAPACK_LIBRARIES})
|
||||||
|
|
|
@ -5,12 +5,12 @@ get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH)
|
||||||
get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH)
|
get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH)
|
||||||
|
|
||||||
# Load the LAPACK package with which we were built.
|
# Load the LAPACK package with which we were built.
|
||||||
set(LAPACK_DIR "${_LAPACKE_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/lapack-@LAPACK_VERSION@")
|
set(LAPACK_DIR "${_LAPACKE_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/@LAPACK@-@LAPACK_VERSION@")
|
||||||
find_package(LAPACK NO_MODULE)
|
find_package(LAPACK NO_MODULE)
|
||||||
|
|
||||||
# Load lapacke targets from the install tree.
|
# Load lapacke targets from the install tree.
|
||||||
if(NOT TARGET lapacke)
|
if(NOT TARGET @LAPACKELIB@)
|
||||||
include(${_LAPACKE_SELF_DIR}/lapacke-targets.cmake)
|
include(${_LAPACKE_SELF_DIR}/@LAPACKELIB@-targets.cmake)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Hint for project building against lapack
|
# Hint for project building against lapack
|
||||||
|
@ -20,7 +20,7 @@ set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID})
|
||||||
set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include)
|
set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include)
|
||||||
|
|
||||||
# Report lapacke libraries.
|
# Report lapacke libraries.
|
||||||
set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES})
|
set(LAPACKE_LIBRARIES @LAPACKELIB@ ${LAPACK_LIBRARIES})
|
||||||
|
|
||||||
unset(_LAPACKE_PREFIX)
|
unset(_LAPACKE_PREFIX)
|
||||||
unset(_LAPACKE_SELF_DIR)
|
unset(_LAPACKE_SELF_DIR)
|
||||||
|
|
|
@ -3,10 +3,10 @@ add_executable(xexample_DGESV_colmajor example_DGESV_colmajor.c lapacke_example_
|
||||||
add_executable(xexample_DGELS_rowmajor example_DGELS_rowmajor.c lapacke_example_aux.c lapacke_example_aux.h)
|
add_executable(xexample_DGELS_rowmajor example_DGELS_rowmajor.c lapacke_example_aux.c lapacke_example_aux.h)
|
||||||
add_executable(xexample_DGELS_colmajor example_DGELS_colmajor.c lapacke_example_aux.c lapacke_example_aux.h)
|
add_executable(xexample_DGELS_colmajor example_DGELS_colmajor.c lapacke_example_aux.c lapacke_example_aux.h)
|
||||||
|
|
||||||
target_link_libraries(xexample_DGESV_rowmajor lapacke)
|
target_link_libraries(xexample_DGESV_rowmajor ${LAPACKELIB})
|
||||||
target_link_libraries(xexample_DGESV_colmajor lapacke)
|
target_link_libraries(xexample_DGESV_colmajor ${LAPACKELIB})
|
||||||
target_link_libraries(xexample_DGELS_rowmajor lapacke)
|
target_link_libraries(xexample_DGELS_rowmajor ${LAPACKELIB})
|
||||||
target_link_libraries(xexample_DGELS_colmajor lapacke)
|
target_link_libraries(xexample_DGELS_colmajor ${LAPACKELIB})
|
||||||
|
|
||||||
add_test(example_DGESV_rowmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_rowmajor)
|
add_test(example_DGESV_rowmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_rowmajor)
|
||||||
add_test(example_DGESV_colmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_colmajor)
|
add_test(example_DGESV_colmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_colmajor)
|
||||||
|
|
|
@ -49,12 +49,13 @@ extern "C" {
|
||||||
#endif /* __cplusplus */
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
#ifndef lapack_int
|
#ifndef lapack_int
|
||||||
#if defined(LAPACK_ILP64)
|
#if defined(LAPACK_ILP64)
|
||||||
#define lapack_int long
|
#define lapack_int int64_t
|
||||||
#else
|
#else
|
||||||
#define lapack_int int
|
#define lapack_int int32_t
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -67,7 +67,11 @@ extern "C" {
|
||||||
void LAPACKE_xerbla( const char *name, lapack_int info );
|
void LAPACKE_xerbla( const char *name, lapack_int info );
|
||||||
|
|
||||||
/* Compare two chars (case-insensitive) */
|
/* Compare two chars (case-insensitive) */
|
||||||
lapack_logical LAPACKE_lsame( char ca, char cb );
|
lapack_logical LAPACKE_lsame( char ca, char cb )
|
||||||
|
#if defined __GNUC__
|
||||||
|
__attribute__((const))
|
||||||
|
#endif
|
||||||
|
;
|
||||||
|
|
||||||
/* Functions to convert column-major to row-major 2d arrays and vice versa. */
|
/* Functions to convert column-major to row-major 2d arrays and vice versa. */
|
||||||
void LAPACKE_cgb_trans( int matrix_layout, lapack_int m, lapack_int n,
|
void LAPACKE_cgb_trans( int matrix_layout, lapack_int m, lapack_int n,
|
||||||
|
|
|
@ -5,6 +5,6 @@ Name: LAPACKE
|
||||||
Description: C Standard Interface to LAPACK Linear Algebra PACKage
|
Description: C Standard Interface to LAPACK Linear Algebra PACKage
|
||||||
Version: @LAPACK_VERSION@
|
Version: @LAPACK_VERSION@
|
||||||
URL: http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack
|
URL: http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack
|
||||||
Libs: -L${libdir} -llapacke
|
Libs: -L${libdir} -l@LAPACKELIB@
|
||||||
Cflags: -I${includedir}
|
Cflags: -I${includedir}
|
||||||
Requires.private: lapack
|
Requires.private: @LAPACKLIB@
|
||||||
|
|
|
@ -500,21 +500,21 @@ if(BUILD_COMPLEX16)
|
||||||
endif()
|
endif()
|
||||||
list(REMOVE_DUPLICATES SOURCES)
|
list(REMOVE_DUPLICATES SOURCES)
|
||||||
|
|
||||||
add_library(lapack ${SOURCES})
|
add_library(${LAPACKLIB} ${SOURCES})
|
||||||
set_target_properties(
|
set_target_properties(
|
||||||
lapack PROPERTIES
|
${LAPACKLIB} PROPERTIES
|
||||||
VERSION ${LAPACK_VERSION}
|
VERSION ${LAPACK_VERSION}
|
||||||
SOVERSION ${LAPACK_MAJOR_VERSION}
|
SOVERSION ${LAPACK_MAJOR_VERSION}
|
||||||
)
|
)
|
||||||
|
|
||||||
if(USE_XBLAS)
|
if(USE_XBLAS)
|
||||||
target_link_libraries(lapack PRIVATE ${XBLAS_LIBRARY})
|
target_link_libraries(${LAPACKLIB} PRIVATE ${XBLAS_LIBRARY})
|
||||||
endif()
|
endif()
|
||||||
target_link_libraries(lapack PRIVATE ${BLAS_LIBRARIES})
|
target_link_libraries(${LAPACKLIB} PRIVATE ${BLAS_LIBRARIES})
|
||||||
|
|
||||||
if(_is_coverage_build)
|
if(_is_coverage_build)
|
||||||
target_link_libraries(lapack PRIVATE gcov)
|
target_link_libraries(${LAPACKLIB} PRIVATE gcov)
|
||||||
add_coverage(lapack)
|
add_coverage(${LAPACKLIB})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
lapack_install_library(lapack)
|
lapack_install_library(${LAPACKLIB})
|
||||||
|
|
|
@ -47,6 +47,6 @@ if(BUILD_COMPLEX16)
|
||||||
endif()
|
endif()
|
||||||
list(REMOVE_DUPLICATES SOURCES)
|
list(REMOVE_DUPLICATES SOURCES)
|
||||||
|
|
||||||
add_library(tmglib ${SOURCES})
|
add_library(${TMGLIB} ${SOURCES})
|
||||||
target_link_libraries(tmglib ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES})
|
target_link_libraries(${TMGLIB} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES})
|
||||||
lapack_install_library(tmglib)
|
lapack_install_library(${TMGLIB})
|
||||||
|
|
12
param.h
12
param.h
|
@ -3128,9 +3128,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define SYMV_P 16
|
#define SYMV_P 16
|
||||||
|
|
||||||
#if defined(CORTEXA57) || \
|
#if defined(CORTEXA57) || defined(CORTEXX1) || \
|
||||||
defined(CORTEXA72) || defined(CORTEXA73) || \
|
defined(CORTEXA72) || defined(CORTEXA73) || \
|
||||||
defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)
|
defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000)
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
@ -3147,7 +3147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
/*FIXME: this should be using the cache size, but there is currently no easy way to
|
/*FIXME: this should be using the cache size, but there is currently no easy way to
|
||||||
query that on ARM. So if getarch counted more than 8 cores we simply assume the host
|
query that on ARM. So if getarch counted more than 8 cores we simply assume the host
|
||||||
is a big desktop or server with abundant cache rather than a phone or embedded device */
|
is a big desktop or server with abundant cache rather than a phone or embedded device */
|
||||||
#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)
|
#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)|| defined(CORTEXX1)
|
||||||
#define SGEMM_DEFAULT_P 512
|
#define SGEMM_DEFAULT_P 512
|
||||||
#define DGEMM_DEFAULT_P 256
|
#define DGEMM_DEFAULT_P 256
|
||||||
#define CGEMM_DEFAULT_P 256
|
#define CGEMM_DEFAULT_P 256
|
||||||
|
@ -3377,7 +3377,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
||||||
#define CGEMM_DEFAULT_R 4096
|
#define CGEMM_DEFAULT_R 4096
|
||||||
#define ZGEMM_DEFAULT_R 4096
|
#define ZGEMM_DEFAULT_R 4096
|
||||||
|
|
||||||
#elif defined(ARMV8SVE) || defined(A64FX)
|
#elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510)|| defined(CORTEXA710) || defined(CORTEXX2)
|
||||||
|
|
||||||
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
|
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
|
||||||
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
|
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
|
||||||
|
@ -3423,8 +3423,8 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
|
||||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
#define DGEMM_DEFAULT_UNROLL_M 8
|
||||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
|
||||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
#define CGEMM_DEFAULT_UNROLL_M 8
|
||||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
|
|
@ -115,7 +115,7 @@
|
||||||
#define INCLUDE_CTGSYL INCLUDE_XTGSYL
|
#define INCLUDE_CTGSYL INCLUDE_XTGSYL
|
||||||
#define INCLUDE_ZTGSYL INCLUDE_XTGSYL
|
#define INCLUDE_ZTGSYL INCLUDE_XTGSYL
|
||||||
|
|
||||||
#define INCLUDE_XGEMMT 0
|
#define INCLUDE_XGEMMT 1
|
||||||
#define INCLUDE_SGEMMT INCLUDE_XGEMMT
|
#define INCLUDE_SGEMMT INCLUDE_XGEMMT
|
||||||
#define INCLUDE_DGEMMT INCLUDE_XGEMMT
|
#define INCLUDE_DGEMMT INCLUDE_XGEMMT
|
||||||
#define INCLUDE_CGEMMT INCLUDE_XGEMMT
|
#define INCLUDE_CGEMMT INCLUDE_XGEMMT
|
||||||
|
|
|
@ -566,7 +566,8 @@ void LAPACK(sgemmt)(
|
||||||
const float *B, const blasint *ldB,
|
const float *B, const blasint *ldB,
|
||||||
const float *beta, float *C, const blasint *ldC
|
const float *beta, float *C, const blasint *ldC
|
||||||
) {
|
) {
|
||||||
RELAPACK_sgemmt(uplo, n, A, ldA, info);
|
blasint info;
|
||||||
|
RELAPACK_sgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -578,7 +579,8 @@ void LAPACK(dgemmt)(
|
||||||
const double *B, const blasint *ldB,
|
const double *B, const blasint *ldB,
|
||||||
const double *beta, double *C, const blasint *ldC
|
const double *beta, double *C, const blasint *ldC
|
||||||
) {
|
) {
|
||||||
RELAPACK_dgemmt(uplo, n, A, ldA, info);
|
blasint info;
|
||||||
|
RELAPACK_dgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -590,7 +592,8 @@ void LAPACK(cgemmt)(
|
||||||
const float *B, const blasint *ldB,
|
const float *B, const blasint *ldB,
|
||||||
const float *beta, float *C, const blasint *ldC
|
const float *beta, float *C, const blasint *ldC
|
||||||
) {
|
) {
|
||||||
RELAPACK_cgemmt(uplo, n, A, ldA, info);
|
blasint info;
|
||||||
|
RELAPACK_cgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -602,6 +605,7 @@ void LAPACK(zgemmt)(
|
||||||
const double *B, const blasint *ldB,
|
const double *B, const blasint *ldB,
|
||||||
const double *beta, double *C, const blasint *ldC
|
const double *beta, double *C, const blasint *ldC
|
||||||
) {
|
) {
|
||||||
RELAPACK_zgemmt(uplo, n, A, ldA, info);
|
blasint info;
|
||||||
|
RELAPACK_zgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -30,6 +30,10 @@ if(WIN32)
|
||||||
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.ps1
|
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.ps1
|
||||||
"if (Test-Path $args[2]) { Remove-Item -Force $args[2] } \n"
|
"if (Test-Path $args[2]) { Remove-Item -Force $args[2] } \n"
|
||||||
"$ErrorActionPreference = \"Stop\"\n"
|
"$ErrorActionPreference = \"Stop\"\n"
|
||||||
|
"If ((Get-Content $args[1] | & file - | %{$_ -match \"BOM\"}) -contains $true) {\n"
|
||||||
|
"echo 'Skipped due to wrong input encoding'\n"
|
||||||
|
"exit 0\n"
|
||||||
|
"}\n"
|
||||||
"Get-Content $args[1] | & $args[0]\n"
|
"Get-Content $args[1] | & $args[0]\n"
|
||||||
"If ((Get-Content $args[2] | %{$_ -match \"FATAL\"}) -contains $true) {\n"
|
"If ((Get-Content $args[2] | %{$_ -match \"FATAL\"}) -contains $true) {\n"
|
||||||
"echo Error\n"
|
"echo Error\n"
|
||||||
|
|
Loading…
Reference in New Issue