Merge branch 'develop' into clapack
This commit is contained in:
commit
e3250e2362
|
@ -23,9 +23,9 @@ if(MSVC AND NOT DEFINED NOFORTRAN)
|
|||
endif()
|
||||
|
||||
#######
|
||||
if(MSVC)
|
||||
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
|
||||
endif()
|
||||
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" OFF)
|
||||
|
||||
option(BUILD_TESTING "Build LAPACK testsuite when building LAPACK" ON)
|
||||
|
||||
option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF)
|
||||
|
||||
|
@ -320,7 +320,9 @@ if (NOT NOFORTRAN)
|
|||
if(NOT NO_CBLAS)
|
||||
add_subdirectory(ctest)
|
||||
endif()
|
||||
add_subdirectory(lapack-netlib/TESTING)
|
||||
if (BUILD_TESTING)
|
||||
add_subdirectory(lapack-netlib/TESTING)
|
||||
endif()
|
||||
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
|
||||
add_subdirectory(cpp_thread_test)
|
||||
endif()
|
||||
|
|
|
@ -55,6 +55,13 @@ FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
|||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), FT2000)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
|
||||
# Use a72 tunings because Neoverse-N1 is only available
|
||||
# in GCC>=9
|
||||
ifeq ($(CORE), NEOVERSEN1)
|
||||
|
@ -229,6 +236,43 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXX1)
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXX2)
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
#ifeq (1, $(filter 1,$(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXA510)
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXA710)
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
endif
|
||||
|
|
|
@ -71,7 +71,8 @@ endif
|
|||
|
||||
|
||||
getarch : getarch.c cpuid.S dummy $(CPUIDEMU)
|
||||
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
|
||||
avx512=$$(perl c_check - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \
|
||||
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
|
||||
|
||||
getarch_2nd : getarch_2nd.c config.h dummy
|
||||
ifndef TARGET_CORE
|
||||
|
|
|
@ -92,6 +92,10 @@ CORTEXA53
|
|||
CORTEXA57
|
||||
CORTEXA72
|
||||
CORTEXA73
|
||||
CORTEXA510
|
||||
CORTEXA710
|
||||
CORTEXX1
|
||||
CORTEXX2
|
||||
NEOVERSEN1
|
||||
NEOVERSEV1
|
||||
NEOVERSEN2
|
||||
|
@ -103,6 +107,9 @@ THUNDERX2T99
|
|||
TSV110
|
||||
THUNDERX3T110
|
||||
VORTEX
|
||||
A64FX
|
||||
ARMV8SVE
|
||||
FT2000
|
||||
|
||||
9.System Z:
|
||||
ZARCH_GENERIC
|
||||
|
|
|
@ -65,7 +65,7 @@ jobs:
|
|||
- task: CMake@1
|
||||
inputs:
|
||||
workingDirectory: 'build' # Optional
|
||||
cmakeArgs: '-G "Visual Studio 16 2019" ..'
|
||||
cmakeArgs: '-G "Visual Studio 17 2022" ..'
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
cmakeArgs: '--build . --config Release'
|
||||
|
@ -103,7 +103,7 @@ jobs:
|
|||
|
||||
- job: Windows_flang_clang
|
||||
pool:
|
||||
vmImage: 'windows-latest'
|
||||
vmImage: 'windows-2022'
|
||||
steps:
|
||||
- script: |
|
||||
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
|
||||
|
@ -114,8 +114,8 @@ jobs:
|
|||
conda install --yes --quiet ninja flang
|
||||
mkdir build
|
||||
cd build
|
||||
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
||||
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
||||
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
|
||||
cmake --build . --config Release
|
||||
ctest
|
||||
|
||||
|
|
5
c_check
5
c_check
|
@ -254,7 +254,7 @@ if (($architecture eq "x86") || ($architecture eq "x86_64")) {
|
|||
# $tmpf = new File::Temp( UNLINK => 1 );
|
||||
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 );
|
||||
$code = '"vbroadcastss -4 * 4(%rsi), %zmm2"';
|
||||
print $tmpf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n";
|
||||
print $fh "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n";
|
||||
$args = " -march=skylake-avx512 -c -o $tmpf.o $tmpf";
|
||||
if ($compiler eq "PGI") {
|
||||
$args = " -tp skylake -c -o $tmpf.o $tmpf";
|
||||
|
@ -278,7 +278,7 @@ if ($data =~ /HAVE_C11/) {
|
|||
$c11_atomics = 0;
|
||||
} else {
|
||||
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 );
|
||||
print $tmpf "#include <stdatomic.h>\nint main(void){}\n";
|
||||
print $fh "#include <stdatomic.h>\nint main(void){}\n";
|
||||
$args = " -c -o $tmpf.o $tmpf";
|
||||
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null");
|
||||
system(@cmd) == 0;
|
||||
|
@ -316,6 +316,7 @@ if ($architecture ne $hostarch) {
|
|||
}
|
||||
|
||||
$cross = 1 if ($os ne $hostos);
|
||||
$cross = 0 if (($os eq "Android") && ($hostos eq "Linux") && ($ENV{TERMUX_APP_PID} != ""));
|
||||
|
||||
$openmp = "" if $ENV{USE_OPENMP} != 1;
|
||||
|
||||
|
|
|
@ -161,6 +161,30 @@ if (${CORE} STREQUAL ARMV8SVE)
|
|||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL CORTEXA510)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL CORTEXA710)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL CORTEXX1)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL CORTEXX2)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL POWER10)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
|
|
|
@ -67,7 +67,15 @@ if (${F_COMPILER} STREQUAL "GFORTRAN")
|
|||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
|
||||
if (CMAKE_Fortran_COMPILER_ID STREQUAL "Intel")
|
||||
if (WIN32)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} /integer-size:64")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -integer-size 64")
|
||||
endif ()
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
|
||||
endif ()
|
||||
endif ()
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
|
|
|
@ -2610,8 +2610,9 @@
|
|||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)\
|
||||
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K)
|
||||
#if !defined(DYNAMIC_ARCH) \
|
||||
&& (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \
|
||||
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K))
|
||||
extern BLASLONG gemm_offset_a;
|
||||
extern BLASLONG gemm_offset_b;
|
||||
extern BLASLONG sbgemm_p;
|
||||
|
|
|
@ -45,6 +45,10 @@ size_t length64=sizeof(value64);
|
|||
#define CPU_NEOVERSEN1 11
|
||||
#define CPU_NEOVERSEV1 16
|
||||
#define CPU_NEOVERSEN2 17
|
||||
#define CPU_CORTEXX1 18
|
||||
#define CPU_CORTEXX2 19
|
||||
#define CPU_CORTEXA510 20
|
||||
#define CPU_CORTEXA710 21
|
||||
// Qualcomm
|
||||
#define CPU_FALKOR 6
|
||||
// Cavium
|
||||
|
@ -59,6 +63,8 @@ size_t length64=sizeof(value64);
|
|||
#define CPU_VORTEX 13
|
||||
// Fujitsu
|
||||
#define CPU_A64FX 15
|
||||
// Phytium
|
||||
#define CPU_FT2000 22
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
|
@ -73,12 +79,17 @@ static char *cpuname[] = {
|
|||
"TSV110",
|
||||
"EMAG8180",
|
||||
"NEOVERSEN1",
|
||||
"NEOVERSEV1"
|
||||
"NEOVERSEN2"
|
||||
"THUNDERX3T110",
|
||||
"VORTEX",
|
||||
"CORTEXA55",
|
||||
"A64FX"
|
||||
"A64FX",
|
||||
"NEOVERSEV1",
|
||||
"NEOVERSEN2",
|
||||
"CORTEXX1",
|
||||
"CORTEXX2",
|
||||
"CORTEXA510",
|
||||
"CORTEXA710",
|
||||
"FT2000"
|
||||
};
|
||||
|
||||
static char *cpuname_lower[] = {
|
||||
|
@ -94,12 +105,17 @@ static char *cpuname_lower[] = {
|
|||
"tsv110",
|
||||
"emag8180",
|
||||
"neoversen1",
|
||||
"neoversev1",
|
||||
"neoversen2",
|
||||
"thunderx3t110",
|
||||
"vortex",
|
||||
"cortexa55",
|
||||
"a64fx"
|
||||
"a64fx",
|
||||
"neoversev1",
|
||||
"neoversen2",
|
||||
"cortexx1",
|
||||
"cortexx2",
|
||||
"cortexa510",
|
||||
"cortexa710",
|
||||
"ft2000"
|
||||
};
|
||||
|
||||
int get_feature(char *search)
|
||||
|
@ -182,6 +198,14 @@ int detect(void)
|
|||
return CPU_NEOVERSEN2;
|
||||
else if (strstr(cpu_part, "0xd05"))
|
||||
return CPU_CORTEXA55;
|
||||
else if (strstr(cpu_part, "0xd46"))
|
||||
return CPU_CORTEXA510;
|
||||
else if (strstr(cpu_part, "0xd47"))
|
||||
return CPU_CORTEXA710;
|
||||
else if (strstr(cpu_part, "0xd44"))
|
||||
return CPU_CORTEXX1;
|
||||
else if (strstr(cpu_part, "0xd4c"))
|
||||
return CPU_CORTEXX2;
|
||||
}
|
||||
// Qualcomm
|
||||
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
|
||||
|
@ -202,6 +226,13 @@ int detect(void)
|
|||
// Fujitsu
|
||||
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
|
||||
return CPU_A64FX;
|
||||
// Apple
|
||||
else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022"))
|
||||
return CPU_VORTEX;
|
||||
// Phytium
|
||||
else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661")
|
||||
|| strstr(cpu_part, "0x662") || strstr(cpu_part, "0x663")))
|
||||
return CPU_FT2000;
|
||||
}
|
||||
|
||||
p = (char *) NULL ;
|
||||
|
@ -382,7 +413,24 @@ void get_cpuconfig(void)
|
|||
printf("#define DTB_DEFAULT_ENTRIES 48\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
break;
|
||||
|
||||
case CPU_CORTEXA510:
|
||||
case CPU_CORTEXA710:
|
||||
case CPU_CORTEXX1:
|
||||
case CPU_CORTEXX2:
|
||||
printf("#define ARMV9\n");
|
||||
printf("#define %s\n", cpuname[d]);
|
||||
printf("#define L1_CODE_SIZE 65536\n");
|
||||
printf("#define L1_CODE_LINESIZE 64\n");
|
||||
printf("#define L1_CODE_ASSOCIATIVE 4\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L1_DATA_ASSOCIATIVE 4\n");
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
break;
|
||||
case CPU_FALKOR:
|
||||
printf("#define FALKOR\n");
|
||||
printf("#define L1_CODE_SIZE 65536\n");
|
||||
|
@ -469,9 +517,9 @@ void get_cpuconfig(void)
|
|||
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
|
||||
printf("#define DTB_SIZE 4096 \n");
|
||||
break;
|
||||
#ifdef __APPLE__
|
||||
case CPU_VORTEX:
|
||||
printf("#define VORTEX \n");
|
||||
#ifdef __APPLE__
|
||||
sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0);
|
||||
printf("#define L1_CODE_SIZE %lld \n",value64);
|
||||
sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0);
|
||||
|
@ -480,10 +528,10 @@ void get_cpuconfig(void)
|
|||
printf("#define L1_DATA_SIZE %lld \n",value64);
|
||||
sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0);
|
||||
printf("#define L2_SIZE %lld \n",value64);
|
||||
#endif
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
|
||||
printf("#define DTB_SIZE 4096 \n");
|
||||
break;
|
||||
#endif
|
||||
case CPU_A64FX:
|
||||
printf("#define A64FX\n");
|
||||
printf("#define L1_CODE_SIZE 65535\n");
|
||||
|
@ -494,6 +542,16 @@ void get_cpuconfig(void)
|
|||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
break;
|
||||
case CPU_FT2000:
|
||||
printf("#define FT2000\n");
|
||||
printf("#define L1_CODE_SIZE 32768\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L2_SIZE 33554432\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
break;
|
||||
}
|
||||
get_cpucount();
|
||||
}
|
||||
|
|
54
cpuid_x86.c
54
cpuid_x86.c
|
@ -1707,8 +1707,18 @@ int get_cpuname(void){
|
|||
if (model == 0xf && stepping < 0xe)
|
||||
return CPUTYPE_NANO;
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 0x7:
|
||||
switch (exmodel) {
|
||||
case 5:
|
||||
if (support_avx2())
|
||||
return CPUTYPE_ZEN;
|
||||
else
|
||||
return CPUTYPE_DUNNINGTON;
|
||||
default:
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
default:
|
||||
if (family >= 0x7)
|
||||
if (family >= 0x8)
|
||||
return CPUTYPE_NEHALEM;
|
||||
else
|
||||
return CPUTYPE_VIAC3;
|
||||
|
@ -1716,7 +1726,20 @@ int get_cpuname(void){
|
|||
}
|
||||
|
||||
if (vendor == VENDOR_ZHAOXIN){
|
||||
return CPUTYPE_NEHALEM;
|
||||
switch (family) {
|
||||
case 0x7:
|
||||
switch (exmodel) {
|
||||
case 5:
|
||||
if (support_avx2())
|
||||
return CPUTYPE_ZEN;
|
||||
else
|
||||
return CPUTYPE_DUNNINGTON;
|
||||
default:
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
default:
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
}
|
||||
|
||||
if (vendor == VENDOR_RISE){
|
||||
|
@ -2416,8 +2439,18 @@ int get_coretype(void){
|
|||
if (model == 0xf && stepping < 0xe)
|
||||
return CORE_NANO;
|
||||
return CORE_NEHALEM;
|
||||
case 0x7:
|
||||
switch (exmodel) {
|
||||
case 5:
|
||||
if (support_avx2())
|
||||
return CORE_ZEN;
|
||||
else
|
||||
return CORE_DUNNINGTON;
|
||||
default:
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
default:
|
||||
if (family >= 0x7)
|
||||
if (family >= 0x8)
|
||||
return CORE_NEHALEM;
|
||||
else
|
||||
return CORE_VIAC3;
|
||||
|
@ -2425,7 +2458,20 @@ int get_coretype(void){
|
|||
}
|
||||
|
||||
if (vendor == VENDOR_ZHAOXIN) {
|
||||
return CORE_NEHALEM;
|
||||
switch (family) {
|
||||
case 0x7:
|
||||
switch (exmodel) {
|
||||
case 5:
|
||||
if (support_avx2())
|
||||
return CORE_ZEN;
|
||||
else
|
||||
return CORE_DUNNINGTON;
|
||||
default:
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
default:
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
}
|
||||
|
||||
return CORE_UNKNOWN;
|
||||
|
|
|
@ -96,7 +96,7 @@ extern gotoblas_t gotoblas_BARCELONA;
|
|||
#endif
|
||||
#ifdef DYN_ATOM
|
||||
extern gotoblas_t gotoblas_ATOM;
|
||||
elif defined(DYN_NEHALEM)
|
||||
#elif defined(DYN_NEHALEM)
|
||||
#define gotoblas_ATOM gotoblas_NEHALEM
|
||||
#else
|
||||
#define gotoblas_ATOM gotoblas_PRESCOTT
|
||||
|
@ -875,14 +875,37 @@ static gotoblas_t *get_coretype(void){
|
|||
if (model == 0xf && stepping < 0xe)
|
||||
return &gotoblas_NANO;
|
||||
return &gotoblas_NEHALEM;
|
||||
case 0x7:
|
||||
switch (exmodel) {
|
||||
case 5:
|
||||
if (support_avx2())
|
||||
return &gotoblas_ZEN;
|
||||
else
|
||||
return &gotoblas_DUNNINGTON;
|
||||
default:
|
||||
return &gotoblas_NEHALEM;
|
||||
}
|
||||
default:
|
||||
if (family >= 0x7)
|
||||
if (family >= 0x8)
|
||||
return &gotoblas_NEHALEM;
|
||||
}
|
||||
}
|
||||
|
||||
if (vendor == VENDOR_ZHAOXIN) {
|
||||
return &gotoblas_NEHALEM;
|
||||
switch (family) {
|
||||
case 0x7:
|
||||
switch (exmodel) {
|
||||
case 5:
|
||||
if (support_avx2())
|
||||
return &gotoblas_ZEN;
|
||||
else
|
||||
return &gotoblas_DUNNINGTON;
|
||||
default:
|
||||
return &gotoblas_NEHALEM;
|
||||
}
|
||||
default:
|
||||
return &gotoblas_NEHALEM;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
|
|
@ -60,6 +60,9 @@ static char* openblas_config_str=""
|
|||
#ifdef USE_OPENMP
|
||||
"USE_OPENMP "
|
||||
#endif
|
||||
#ifdef USE_TLS
|
||||
"USE_TLS "
|
||||
#endif
|
||||
#ifndef DYNAMIC_ARCH
|
||||
CHAR_CORENAME
|
||||
#endif
|
||||
|
|
94
getarch.c
94
getarch.c
|
@ -94,14 +94,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include <sys/sysinfo.h>
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX2__)) || (defined(__clang__) && __clang_major__ >= 6))
|
||||
#else
|
||||
#ifndef NO_AVX512
|
||||
#define NO_AVX512
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
/* #define FORCE_P2 */
|
||||
/* #define FORCE_KATMAI */
|
||||
/* #define FORCE_COPPERMINE */
|
||||
|
@ -1240,7 +1232,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "cortexa53"
|
||||
#define CORENAME "CORTEXA53"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXA57
|
||||
|
@ -1256,7 +1247,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "cortexa57"
|
||||
#define CORENAME "CORTEXA57"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXA72
|
||||
|
@ -1272,7 +1262,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "cortexa72"
|
||||
#define CORENAME "CORTEXA72"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXA73
|
||||
|
@ -1288,7 +1277,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "cortexa73"
|
||||
#define CORENAME "CORTEXA73"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXX1
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
#define SUBARCHITECTURE "CORTEXX1"
|
||||
#define SUBDIRNAME "arm64"
|
||||
#define ARCHCONFIG "-DCORTEXX1 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "cortexx1"
|
||||
#define CORENAME "CORTEXX1"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXX2
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
#define SUBARCHITECTURE "CORTEXX2"
|
||||
#define SUBDIRNAME "arm64"
|
||||
#define ARCHCONFIG "-DCORTEXX2 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
|
||||
#define LIBNAME "cortexx2"
|
||||
#define CORENAME "CORTEXX2"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXA510
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
#define SUBARCHITECTURE "CORTEXA510"
|
||||
#define SUBDIRNAME "arm64"
|
||||
#define ARCHCONFIG "-DCORTEXA510 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
|
||||
#define LIBNAME "cortexa510"
|
||||
#define CORENAME "CORTEXA510"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXA710
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
#define SUBARCHITECTURE "CORTEXA710"
|
||||
#define SUBDIRNAME "arm64"
|
||||
#define ARCHCONFIG "-DCORTEXA710 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
|
||||
#define LIBNAME "cortexa710"
|
||||
#define CORENAME "CORTEXA710"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_NEOVERSEN1
|
||||
|
@ -1305,7 +1349,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-march=armv8.2-a -mtune=neoverse-n1"
|
||||
#define LIBNAME "neoversen1"
|
||||
#define CORENAME "NEOVERSEN1"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_NEOVERSEV1
|
||||
|
@ -1322,7 +1365,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-march=armv8.4-a -mtune=neoverse-v1"
|
||||
#define LIBNAME "neoversev1"
|
||||
#define CORENAME "NEOVERSEV1"
|
||||
#else
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -1340,7 +1382,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-march=armv8.5-a -mtune=neoverse-n2"
|
||||
#define LIBNAME "neoversen2"
|
||||
#define CORENAME "NEOVERSEN2"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXA55
|
||||
|
@ -1356,7 +1397,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "cortexa55"
|
||||
#define CORENAME "CORTEXA55"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_FALKOR
|
||||
|
@ -1372,7 +1412,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "falkor"
|
||||
#define CORENAME "FALKOR"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_THUNDERX
|
||||
|
@ -1387,7 +1426,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "thunderx"
|
||||
#define CORENAME "THUNDERX"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_THUNDERX2T99
|
||||
|
@ -1405,7 +1443,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "thunderx2t99"
|
||||
#define CORENAME "THUNDERX2T99"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_TSV110
|
||||
|
@ -1421,7 +1458,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "tsv110"
|
||||
#define CORENAME "TSV110"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_EMAG8180
|
||||
|
@ -1456,7 +1492,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "thunderx3t110"
|
||||
#define CORENAME "THUNDERX3T110"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_VORTEX
|
||||
|
@ -1488,7 +1523,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8"
|
||||
#define LIBNAME "a64fx"
|
||||
#define CORENAME "A64FX"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_FT2000
|
||||
#define ARMV8
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
#define SUBARCHITECTURE "FT2000"
|
||||
#define SUBDIRNAME "arm64"
|
||||
#define ARCHCONFIG "-DFT2000 " \
|
||||
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
|
||||
"-DL2_SIZE=33554426-DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "ft2000"
|
||||
#define CORENAME "FT2000"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ZARCH_GENERIC
|
||||
|
|
|
@ -678,7 +678,7 @@ endif ()
|
|||
set(SBGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
|
||||
endif ()
|
||||
if (NOT DEFINED SBGEMM_SMALL_K_B0_TT)
|
||||
set($SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
|
||||
set(SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
|
||||
endif ()
|
||||
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "BFLOAT16")
|
||||
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "BFLOAT16")
|
||||
|
|
|
@ -0,0 +1,216 @@
|
|||
SAMINKERNEL = ../arm/amin.c
|
||||
DAMINKERNEL = ../arm/amin.c
|
||||
CAMINKERNEL = ../arm/zamin.c
|
||||
ZAMINKERNEL = ../arm/zamin.c
|
||||
|
||||
SMAXKERNEL = ../arm/max.c
|
||||
DMAXKERNEL = ../arm/max.c
|
||||
|
||||
SMINKERNEL = ../arm/min.c
|
||||
DMINKERNEL = ../arm/min.c
|
||||
|
||||
ISAMINKERNEL = ../arm/iamin.c
|
||||
IDAMINKERNEL = ../arm/iamin.c
|
||||
ICAMINKERNEL = ../arm/izamin.c
|
||||
IZAMINKERNEL = ../arm/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../arm/imax.c
|
||||
IDMAXKERNEL = ../arm/imax.c
|
||||
|
||||
ISMINKERNEL = ../arm/imin.c
|
||||
IDMINKERNEL = ../arm/imin.c
|
||||
|
||||
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
TRSMCOPYLN_M = trsm_lncopy_sve.c
|
||||
TRSMCOPYLT_M = trsm_ltcopy_sve.c
|
||||
TRSMCOPYUN_M = trsm_uncopy_sve.c
|
||||
TRSMCOPYUT_M = trsm_utcopy_sve.c
|
||||
|
||||
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
|
||||
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
|
||||
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
|
||||
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
|
||||
|
||||
|
||||
SAMAXKERNEL = amax.S
|
||||
DAMAXKERNEL = amax.S
|
||||
CAMAXKERNEL = zamax.S
|
||||
ZAMAXKERNEL = zamax.S
|
||||
|
||||
SAXPYKERNEL = axpy.S
|
||||
DAXPYKERNEL = axpy.S
|
||||
CAXPYKERNEL = zaxpy.S
|
||||
ZAXPYKERNEL = zaxpy.S
|
||||
|
||||
SROTKERNEL = rot.S
|
||||
DROTKERNEL = rot.S
|
||||
CROTKERNEL = zrot.S
|
||||
ZROTKERNEL = zrot.S
|
||||
|
||||
SSCALKERNEL = scal.S
|
||||
DSCALKERNEL = scal.S
|
||||
CSCALKERNEL = zscal.S
|
||||
ZSCALKERNEL = zscal.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n.S
|
||||
DGEMVNKERNEL = gemv_n.S
|
||||
CGEMVNKERNEL = zgemv_n.S
|
||||
ZGEMVNKERNEL = zgemv_n.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t.S
|
||||
DGEMVTKERNEL = gemv_t.S
|
||||
CGEMVTKERNEL = zgemv_t.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
|
||||
SASUMKERNEL = asum.S
|
||||
DASUMKERNEL = asum.S
|
||||
CASUMKERNEL = casum.S
|
||||
ZASUMKERNEL = zasum.S
|
||||
|
||||
SCOPYKERNEL = copy.S
|
||||
DCOPYKERNEL = copy.S
|
||||
CCOPYKERNEL = copy.S
|
||||
ZCOPYKERNEL = copy.S
|
||||
|
||||
SSWAPKERNEL = swap.S
|
||||
DSWAPKERNEL = swap.S
|
||||
CSWAPKERNEL = swap.S
|
||||
ZSWAPKERNEL = swap.S
|
||||
|
||||
ISAMAXKERNEL = iamax.S
|
||||
IDAMAXKERNEL = iamax.S
|
||||
ICAMAXKERNEL = izamax.S
|
||||
IZAMAXKERNEL = izamax.S
|
||||
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
DDOTKERNEL = dot.S
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
SDOTKERNEL = ../generic/dot.c
|
||||
else
|
||||
SDOTKERNEL = dot.S
|
||||
endif
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CDOTKERNEL = zdot.S
|
||||
ZDOTKERNEL = zdot.S
|
||||
else
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = ../arm/zdot.c
|
||||
endif
|
||||
DSDOTKERNEL = dot.S
|
||||
|
||||
DGEMM_BETA = dgemm_beta.S
|
||||
SGEMM_BETA = sgemm_beta.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
DSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
DSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
||||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
CSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
||||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
|
|
@ -0,0 +1,216 @@
|
|||
SAMINKERNEL = ../arm/amin.c
|
||||
DAMINKERNEL = ../arm/amin.c
|
||||
CAMINKERNEL = ../arm/zamin.c
|
||||
ZAMINKERNEL = ../arm/zamin.c
|
||||
|
||||
SMAXKERNEL = ../arm/max.c
|
||||
DMAXKERNEL = ../arm/max.c
|
||||
|
||||
SMINKERNEL = ../arm/min.c
|
||||
DMINKERNEL = ../arm/min.c
|
||||
|
||||
ISAMINKERNEL = ../arm/iamin.c
|
||||
IDAMINKERNEL = ../arm/iamin.c
|
||||
ICAMINKERNEL = ../arm/izamin.c
|
||||
IZAMINKERNEL = ../arm/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../arm/imax.c
|
||||
IDMAXKERNEL = ../arm/imax.c
|
||||
|
||||
ISMINKERNEL = ../arm/imin.c
|
||||
IDMINKERNEL = ../arm/imin.c
|
||||
|
||||
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
TRSMCOPYLN_M = trsm_lncopy_sve.c
|
||||
TRSMCOPYLT_M = trsm_ltcopy_sve.c
|
||||
TRSMCOPYUN_M = trsm_uncopy_sve.c
|
||||
TRSMCOPYUT_M = trsm_utcopy_sve.c
|
||||
|
||||
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
|
||||
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
|
||||
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
|
||||
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
|
||||
|
||||
|
||||
SAMAXKERNEL = amax.S
|
||||
DAMAXKERNEL = amax.S
|
||||
CAMAXKERNEL = zamax.S
|
||||
ZAMAXKERNEL = zamax.S
|
||||
|
||||
SAXPYKERNEL = axpy.S
|
||||
DAXPYKERNEL = axpy.S
|
||||
CAXPYKERNEL = zaxpy.S
|
||||
ZAXPYKERNEL = zaxpy.S
|
||||
|
||||
SROTKERNEL = rot.S
|
||||
DROTKERNEL = rot.S
|
||||
CROTKERNEL = zrot.S
|
||||
ZROTKERNEL = zrot.S
|
||||
|
||||
SSCALKERNEL = scal.S
|
||||
DSCALKERNEL = scal.S
|
||||
CSCALKERNEL = zscal.S
|
||||
ZSCALKERNEL = zscal.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n.S
|
||||
DGEMVNKERNEL = gemv_n.S
|
||||
CGEMVNKERNEL = zgemv_n.S
|
||||
ZGEMVNKERNEL = zgemv_n.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t.S
|
||||
DGEMVTKERNEL = gemv_t.S
|
||||
CGEMVTKERNEL = zgemv_t.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
|
||||
SASUMKERNEL = asum.S
|
||||
DASUMKERNEL = asum.S
|
||||
CASUMKERNEL = casum.S
|
||||
ZASUMKERNEL = zasum.S
|
||||
|
||||
SCOPYKERNEL = copy.S
|
||||
DCOPYKERNEL = copy.S
|
||||
CCOPYKERNEL = copy.S
|
||||
ZCOPYKERNEL = copy.S
|
||||
|
||||
SSWAPKERNEL = swap.S
|
||||
DSWAPKERNEL = swap.S
|
||||
CSWAPKERNEL = swap.S
|
||||
ZSWAPKERNEL = swap.S
|
||||
|
||||
ISAMAXKERNEL = iamax.S
|
||||
IDAMAXKERNEL = iamax.S
|
||||
ICAMAXKERNEL = izamax.S
|
||||
IZAMAXKERNEL = izamax.S
|
||||
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
DDOTKERNEL = dot.S
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
SDOTKERNEL = ../generic/dot.c
|
||||
else
|
||||
SDOTKERNEL = dot.S
|
||||
endif
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CDOTKERNEL = zdot.S
|
||||
ZDOTKERNEL = zdot.S
|
||||
else
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = ../arm/zdot.c
|
||||
endif
|
||||
DSDOTKERNEL = dot.S
|
||||
|
||||
DGEMM_BETA = dgemm_beta.S
|
||||
SGEMM_BETA = sgemm_beta.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
DSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
DSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
||||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
CSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
||||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
|
|
@ -0,0 +1 @@
|
|||
include $(KERNELDIR)/KERNEL.CORTEXA57
|
|
@ -0,0 +1,216 @@
|
|||
SAMINKERNEL = ../arm/amin.c
|
||||
DAMINKERNEL = ../arm/amin.c
|
||||
CAMINKERNEL = ../arm/zamin.c
|
||||
ZAMINKERNEL = ../arm/zamin.c
|
||||
|
||||
SMAXKERNEL = ../arm/max.c
|
||||
DMAXKERNEL = ../arm/max.c
|
||||
|
||||
SMINKERNEL = ../arm/min.c
|
||||
DMINKERNEL = ../arm/min.c
|
||||
|
||||
ISAMINKERNEL = ../arm/iamin.c
|
||||
IDAMINKERNEL = ../arm/iamin.c
|
||||
ICAMINKERNEL = ../arm/izamin.c
|
||||
IZAMINKERNEL = ../arm/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../arm/imax.c
|
||||
IDMAXKERNEL = ../arm/imax.c
|
||||
|
||||
ISMINKERNEL = ../arm/imin.c
|
||||
IDMINKERNEL = ../arm/imin.c
|
||||
|
||||
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
TRSMCOPYLN_M = trsm_lncopy_sve.c
|
||||
TRSMCOPYLT_M = trsm_ltcopy_sve.c
|
||||
TRSMCOPYUN_M = trsm_uncopy_sve.c
|
||||
TRSMCOPYUT_M = trsm_utcopy_sve.c
|
||||
|
||||
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
|
||||
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
|
||||
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
|
||||
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
|
||||
|
||||
|
||||
SAMAXKERNEL = amax.S
|
||||
DAMAXKERNEL = amax.S
|
||||
CAMAXKERNEL = zamax.S
|
||||
ZAMAXKERNEL = zamax.S
|
||||
|
||||
SAXPYKERNEL = axpy.S
|
||||
DAXPYKERNEL = axpy.S
|
||||
CAXPYKERNEL = zaxpy.S
|
||||
ZAXPYKERNEL = zaxpy.S
|
||||
|
||||
SROTKERNEL = rot.S
|
||||
DROTKERNEL = rot.S
|
||||
CROTKERNEL = zrot.S
|
||||
ZROTKERNEL = zrot.S
|
||||
|
||||
SSCALKERNEL = scal.S
|
||||
DSCALKERNEL = scal.S
|
||||
CSCALKERNEL = zscal.S
|
||||
ZSCALKERNEL = zscal.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n.S
|
||||
DGEMVNKERNEL = gemv_n.S
|
||||
CGEMVNKERNEL = zgemv_n.S
|
||||
ZGEMVNKERNEL = zgemv_n.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t.S
|
||||
DGEMVTKERNEL = gemv_t.S
|
||||
CGEMVTKERNEL = zgemv_t.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
|
||||
SASUMKERNEL = asum.S
|
||||
DASUMKERNEL = asum.S
|
||||
CASUMKERNEL = casum.S
|
||||
ZASUMKERNEL = zasum.S
|
||||
|
||||
SCOPYKERNEL = copy.S
|
||||
DCOPYKERNEL = copy.S
|
||||
CCOPYKERNEL = copy.S
|
||||
ZCOPYKERNEL = copy.S
|
||||
|
||||
SSWAPKERNEL = swap.S
|
||||
DSWAPKERNEL = swap.S
|
||||
CSWAPKERNEL = swap.S
|
||||
ZSWAPKERNEL = swap.S
|
||||
|
||||
ISAMAXKERNEL = iamax.S
|
||||
IDAMAXKERNEL = iamax.S
|
||||
ICAMAXKERNEL = izamax.S
|
||||
IZAMAXKERNEL = izamax.S
|
||||
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
DDOTKERNEL = dot.S
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
SDOTKERNEL = ../generic/dot.c
|
||||
else
|
||||
SDOTKERNEL = dot.S
|
||||
endif
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CDOTKERNEL = zdot.S
|
||||
ZDOTKERNEL = zdot.S
|
||||
else
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = ../arm/zdot.c
|
||||
endif
|
||||
DSDOTKERNEL = dot.S
|
||||
|
||||
DGEMM_BETA = dgemm_beta.S
|
||||
SGEMM_BETA = sgemm_beta.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
DSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
DSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
||||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
CSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
||||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
|
|
@ -0,0 +1,3 @@
|
|||
include $(KERNELDIR)/KERNEL.CORTEXA57
|
||||
|
||||
|
|
@ -1239,7 +1239,6 @@ static void init_parameter(void) {
|
|||
|
||||
#ifdef BUILD_BFLOAT16
|
||||
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
|
||||
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
|
||||
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
|
||||
#endif
|
||||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
|
||||
|
@ -1824,6 +1823,13 @@ static void init_parameter(void) {
|
|||
fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
|
||||
#endif
|
||||
|
||||
#if BUILD_BFLOAT16==1
|
||||
TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
|
||||
((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
|
||||
+ TABLE_NAME.align) & ~TABLE_NAME.align)
|
||||
) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
|
||||
#endif
|
||||
|
||||
#if BUILD_SINGLE==1
|
||||
TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
|
||||
((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
|
||||
|
|
|
@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
|
||||
|
||||
#include <immintrin.h>
|
||||
#include "common.h"
|
||||
|
@ -47,7 +48,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
_mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N)
|
||||
#define MASK_STORE_512(M, N) \
|
||||
result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \
|
||||
asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "k"(mask)); \
|
||||
asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "Yk"(mask)); \
|
||||
_mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N)
|
||||
#endif
|
||||
|
||||
|
@ -265,7 +266,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
|||
int mm = M - i;
|
||||
if (!mm) return 0;
|
||||
if (mm > 4 || K < 16) {
|
||||
register __mmask8 mask asm("k1") = (1UL << mm) - 1;
|
||||
register __mmask8 mask = (1UL << mm) - 1;
|
||||
for (j = 0; j < n6; j += 6) {
|
||||
DECLARE_RESULT_512(0, 0);
|
||||
DECLARE_RESULT_512(0, 1);
|
||||
|
@ -588,3 +589,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
#include "../generic/gemm_small_matrix_kernel_nn.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
_mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N)
|
||||
#define MASK_STORE_512(M, N) \
|
||||
result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \
|
||||
asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "k"(mask)); \
|
||||
asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "Yk"(mask)); \
|
||||
_mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N)
|
||||
#define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \
|
||||
__m512d tmp##M##N = _mm512_i64gather_pd(vindex_n, &C[(j + N*8)*ldc + i + M], 8); \
|
||||
|
@ -303,7 +303,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
|||
}
|
||||
int mm = M - i;
|
||||
if (mm >= 6) {
|
||||
register __mmask16 mask asm("k1") = (1UL << mm) - 1;
|
||||
register __mmask16 mask = (1UL << mm) - 1;
|
||||
for (j = 0; j < n8; j += 8) {
|
||||
DECLARE_RESULT_512(0, 0);
|
||||
DECLARE_RESULT_512(0, 1);
|
||||
|
|
|
@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
|
||||
|
||||
#include <immintrin.h>
|
||||
#include "common.h"
|
||||
|
@ -320,3 +321,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
#include "../generic/gemm_small_matrix_kernel_tn.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -114,10 +114,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
asm("vmovups %0, (%1, %2, 4)": : "v"(val1), "r"(addr), "r"(ldc))
|
||||
|
||||
#define _MASK_STORE_C_2nx16(addr, val0, val1) \
|
||||
asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "k"(mmask)); \
|
||||
asm("vfmadd213ps (%1, %3, 4), %2, %0 %{%4%}": "+v"(val1) : "r"(addr), "v"(alpha_512), "r"(ldc), "k"(mmask)); \
|
||||
asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "k"(mmask)); \
|
||||
asm("vmovups %0, (%1, %2, 4) %{%3%}": : "v"(val1), "r"(addr), "r"(ldc), "k"(mmask))
|
||||
asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "Yk"(mmask)); \
|
||||
asm("vfmadd213ps (%1, %3, 4), %2, %0 %{%4%}": "+v"(val1) : "r"(addr), "v"(alpha_512), "r"(ldc), "Yk"(mmask)); \
|
||||
asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "Yk"(mmask)); \
|
||||
asm("vmovups %0, (%1, %2, 4) %{%3%}": : "v"(val1), "r"(addr), "r"(ldc), "Yk"(mmask))
|
||||
|
||||
#define _REORDER_C_2X(result_0, result_1) { \
|
||||
__m512 tmp0, tmp1; \
|
||||
|
@ -154,8 +154,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
asm("vmovups %0, (%1)": : "v"(val0), "r"(addr));
|
||||
|
||||
#define _MASK_STORE_C_16(addr, val0) \
|
||||
asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "k"(mmask)); \
|
||||
asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "k"(mmask));
|
||||
asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "Yk"(mmask)); \
|
||||
asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "Yk"(mmask));
|
||||
|
||||
#define N_STORE_4X(A, Bx, By) { \
|
||||
_REORDER_C_2X(result_00_##A##Bx##By, result_01_##A##Bx##By); \
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
#define ONE 1.e0f
|
||||
#define ZERO 0.e0f
|
||||
|
||||
#define SHUFFLE_MAGIC_NO (const int) 0x39
|
||||
|
||||
#undef STORE16_COMPLETE_RESULT
|
||||
#undef STORE16_MASK_COMPLETE_RESULT
|
||||
#undef SBGEMM_BLOCK_KERNEL_NN_32x8xK
|
||||
|
@ -356,7 +358,6 @@ void sbgemm_block_kernel_nn_32xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
|
|||
bfloat16 * B_addr = B;
|
||||
float * C_addr = C;
|
||||
|
||||
int SHUFFLE_MAGIC_NO = 0x39;
|
||||
BLASLONG tag_k_32x = k & (~31);
|
||||
|
||||
#ifndef ONE_ALPHA
|
||||
|
@ -465,7 +466,6 @@ void sbgemm_block_kernel_nn_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
|
|||
bfloat16 * B_addr = B;
|
||||
float * C_addr = C;
|
||||
|
||||
int SHUFFLE_MAGIC_NO = 0x39;
|
||||
BLASLONG tag_k_32x = k & (~31);
|
||||
|
||||
#ifndef ONE_ALPHA
|
||||
|
@ -1192,7 +1192,6 @@ void sbgemm_block_kernel_tn_32xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
|
|||
bfloat16 * B_addr = B;
|
||||
float * C_addr = C;
|
||||
|
||||
int SHUFFLE_MAGIC_NO = 0x39;
|
||||
BLASLONG tag_k_32x = k & (~31);
|
||||
|
||||
#ifndef ONE_ALPHA
|
||||
|
@ -1291,7 +1290,6 @@ void sbgemm_block_kernel_tn_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
|
|||
bfloat16 * B_addr = B;
|
||||
float * C_addr = C;
|
||||
|
||||
int SHUFFLE_MAGIC_NO = 0x39;
|
||||
BLASLONG tag_k_32x = k & (~31);
|
||||
|
||||
#ifndef ONE_ALPHA
|
||||
|
|
|
@ -135,7 +135,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
|||
0x0, 0x1, 0x2, 0x3, 0x10, 0x11, 0x12, 0x13, 0x8, 0x9, 0xa, 0xb, 0x18, 0x19, 0x1a, 0x1b,
|
||||
0x4, 0x5, 0x6, 0x7, 0x14, 0x15, 0x16, 0x17, 0xc, 0xd, 0xe, 0xf, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||
};
|
||||
u_int64_t permute_table2[] = {
|
||||
uint64_t permute_table2[] = {
|
||||
0x00, 0x01, 0x02, 0x03, 8|0x0, 8|0x1, 8|0x2, 8|0x3,
|
||||
0x04, 0x05, 0x06, 0x07, 8|0x4, 8|0x5, 8|0x6, 8|0x7,
|
||||
};
|
||||
|
|
|
@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
|
||||
|
||||
#include <immintrin.h>
|
||||
#include "common.h"
|
||||
|
@ -47,7 +48,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
_mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N)
|
||||
#define MASK_STORE_512(M, N) \
|
||||
result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \
|
||||
asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "k"(mask)); \
|
||||
asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "Yk"(mask)); \
|
||||
_mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N)
|
||||
#endif
|
||||
|
||||
|
@ -266,7 +267,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
|||
int mm = M - i;
|
||||
if (!mm) return 0;
|
||||
if (mm > 8 || K < 32) {
|
||||
register __mmask16 mask asm("k1") = (1UL << mm) - 1;
|
||||
register __mmask16 mask = (1UL << mm) - 1;
|
||||
for (j = 0; j < n6; j += 6) {
|
||||
DECLARE_RESULT_512(0, 0);
|
||||
DECLARE_RESULT_512(0, 1);
|
||||
|
@ -610,3 +611,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
#include "../generic/gemm_small_matrix_kernel_nn.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
_mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N)
|
||||
#define MASK_STORE_512(M, N) \
|
||||
result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \
|
||||
asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "k"(mask)); \
|
||||
asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "Yk"(mask)); \
|
||||
_mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N)
|
||||
#define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \
|
||||
__m512 tmp##M##N = _mm512_i32gather_ps(vindex_n, &C[(j + N*16)*ldc + i + M], 4); \
|
||||
|
@ -303,7 +303,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
|||
}
|
||||
int mm = M - i;
|
||||
if (mm >= 12) {
|
||||
register __mmask16 mask asm("k1") = (1UL << mm) - 1;
|
||||
register __mmask16 mask = (1UL << mm) - 1;
|
||||
for (j = 0; j < n8; j += 8) {
|
||||
DECLARE_RESULT_512(0, 0);
|
||||
DECLARE_RESULT_512(0, 1);
|
||||
|
|
|
@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
|
||||
|
||||
#include <immintrin.h>
|
||||
#include "common.h"
|
||||
|
@ -314,3 +315,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
#include "../generic/gemm_small_matrix_kernel_tn.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -452,11 +452,6 @@
|
|||
|
||||
MOVDDUP(4 * SIZE, A1, a1)
|
||||
|
||||
movsd 0 * SIZE(YY), yy1
|
||||
movhpd 1 * SIZE(YY), yy1
|
||||
movsd 2 * SIZE(YY), yy2
|
||||
movhpd 3 * SIZE(YY), yy2
|
||||
|
||||
movapd 8 * SIZE(XX), xtemp1
|
||||
movapd 10 * SIZE(XX), xtemp2
|
||||
movapd 12 * SIZE(XX), xtemp3
|
||||
|
@ -475,6 +470,12 @@
|
|||
MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2)
|
||||
ALIGN_3
|
||||
|
||||
.L12_prep:
|
||||
movsd 0 * SIZE(YY), yy1
|
||||
movhpd 1 * SIZE(YY), yy1
|
||||
movsd 2 * SIZE(YY), yy2
|
||||
movhpd 3 * SIZE(YY), yy2
|
||||
|
||||
.L12:
|
||||
movapd xtemp1, xt1
|
||||
mulpd a1, xt1
|
||||
|
@ -608,8 +609,6 @@
|
|||
|
||||
movlpd yy2, 6 * SIZE(YY)
|
||||
movhpd yy2, 7 * SIZE(YY)
|
||||
movsd 10 * SIZE(YY), yy2
|
||||
movhpd 11 * SIZE(YY), yy2
|
||||
|
||||
movapd xtemp2, xt1
|
||||
movapd 18 * SIZE(XX), xtemp2
|
||||
|
@ -621,8 +620,6 @@
|
|||
|
||||
movlpd yy1, 4 * SIZE(YY)
|
||||
movhpd yy1, 5 * SIZE(YY)
|
||||
movsd 8 * SIZE(YY), yy1
|
||||
movhpd 9 * SIZE(YY), yy1
|
||||
|
||||
subq $-16 * SIZE, XX
|
||||
addq $ 8 * SIZE, YY
|
||||
|
@ -630,7 +627,8 @@
|
|||
addq $ 8 * SIZE, A2
|
||||
|
||||
decq I
|
||||
jg .L12
|
||||
jg .L12_prep
|
||||
jmp .L15
|
||||
ALIGN_3
|
||||
|
||||
.L14:
|
||||
|
@ -641,7 +639,6 @@
|
|||
jle .L16
|
||||
|
||||
MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2)
|
||||
jmp .L15_pastcheck
|
||||
|
||||
.L15:
|
||||
movq M, I
|
||||
|
@ -650,6 +647,11 @@
|
|||
testq $2, I
|
||||
jle .L16
|
||||
|
||||
movsd 0 * SIZE(YY), yy1
|
||||
movhpd 1 * SIZE(YY), yy1
|
||||
movsd 2 * SIZE(YY), yy2
|
||||
movhpd 3 * SIZE(YY), yy2
|
||||
|
||||
.L15_pastcheck:
|
||||
movapd xtemp1, xt1
|
||||
mulpd a1, xt1
|
||||
|
@ -705,8 +707,6 @@
|
|||
|
||||
movlpd yy2, 2 * SIZE(YY)
|
||||
movhpd yy2, 3 * SIZE(YY)
|
||||
movsd 6 * SIZE(YY), yy2
|
||||
movhpd 7 * SIZE(YY), yy2
|
||||
|
||||
movapd xtemp2, xt1
|
||||
movapd 10 * SIZE(XX), xtemp2
|
||||
|
@ -717,8 +717,6 @@
|
|||
|
||||
movlpd yy1, 0 * SIZE(YY)
|
||||
movhpd yy1, 1 * SIZE(YY)
|
||||
movsd 4 * SIZE(YY), yy1
|
||||
movhpd 5 * SIZE(YY), yy1
|
||||
|
||||
addq $4 * SIZE, YY
|
||||
addq $4 * SIZE, A1
|
||||
|
@ -731,6 +729,9 @@
|
|||
|
||||
MOVDDUP(1 * SIZE, A1, a2)
|
||||
|
||||
movsd 0 * SIZE(YY), yy1
|
||||
movhpd 1 * SIZE(YY), yy1
|
||||
|
||||
movapd xtemp1, xt1
|
||||
mulpd a1, xt1
|
||||
mulpd atemp1, a1
|
||||
|
|
|
@ -2,9 +2,9 @@ add_subdirectory(SRC)
|
|||
if(BUILD_TESTING)
|
||||
add_subdirectory(TESTING)
|
||||
endif()
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/blas.pc @ONLY)
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${BLASLIB}.pc @ONLY)
|
||||
install(FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/blas.pc
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${BLASLIB}.pc
|
||||
DESTINATION ${PKG_CONFIG_DIR}
|
||||
COMPONENT Development
|
||||
)
|
||||
|
|
|
@ -97,10 +97,10 @@ if(BUILD_COMPLEX16)
|
|||
endif()
|
||||
list(REMOVE_DUPLICATES SOURCES)
|
||||
|
||||
add_library(blas ${SOURCES})
|
||||
add_library(${BLASLIB} ${SOURCES})
|
||||
set_target_properties(
|
||||
blas PROPERTIES
|
||||
${BLASLIB} PROPERTIES
|
||||
VERSION ${LAPACK_VERSION}
|
||||
SOVERSION ${LAPACK_MAJOR_VERSION}
|
||||
)
|
||||
lapack_install_library(blas)
|
||||
lapack_install_library(${BLASLIB})
|
||||
|
|
|
@ -2,7 +2,7 @@ macro(add_blas_test name src)
|
|||
get_filename_component(baseNAME ${src} NAME_WE)
|
||||
set(TEST_INPUT "${CMAKE_CURRENT_SOURCE_DIR}/${baseNAME}.in")
|
||||
add_executable(${name} ${src})
|
||||
target_link_libraries(${name} blas)
|
||||
target_link_libraries(${name} ${BLASLIB})
|
||||
if(EXISTS "${TEST_INPUT}")
|
||||
add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}"
|
||||
-DTEST=$<TARGET_FILE:${name}>
|
||||
|
|
|
@ -5,4 +5,4 @@ Name: BLAS
|
|||
Description: FORTRAN reference implementation of BLAS Basic Linear Algebra Subprograms
|
||||
Version: @LAPACK_VERSION@
|
||||
URL: http://www.netlib.org/blas/
|
||||
Libs: -L${libdir} -lblas
|
||||
Libs: -L${libdir} -l@BLASLIB@
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
message(STATUS "CBLAS enable")
|
||||
enable_language(C)
|
||||
|
||||
set(LAPACK_INSTALL_EXPORT_NAME cblas-targets)
|
||||
set(LAPACK_INSTALL_EXPORT_NAME ${CBLASLIB}-targets)
|
||||
|
||||
# Create a header file cblas.h for the routines called in my C programs
|
||||
include(FortranCInterface)
|
||||
|
@ -42,15 +42,15 @@ if(BUILD_TESTING)
|
|||
endif()
|
||||
|
||||
if(NOT BLAS_FOUND)
|
||||
set(ALL_TARGETS ${ALL_TARGETS} blas)
|
||||
set(ALL_TARGETS ${ALL_TARGETS} ${BLASLIB})
|
||||
endif()
|
||||
|
||||
# Export cblas targets from the
|
||||
# install tree, if any.
|
||||
set(_cblas_config_install_guard_target "")
|
||||
if(ALL_TARGETS)
|
||||
install(EXPORT cblas-targets
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}
|
||||
install(EXPORT ${CBLASLIB}-targets
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION}
|
||||
COMPONENT Development
|
||||
)
|
||||
# Choose one of the cblas targets to use as a guard for
|
||||
|
@ -61,7 +61,7 @@ endif()
|
|||
# Export cblas targets from the build tree, if any.
|
||||
set(_cblas_config_build_guard_target "")
|
||||
if(ALL_TARGETS)
|
||||
export(TARGETS ${ALL_TARGETS} FILE cblas-targets.cmake)
|
||||
export(TARGETS ${ALL_TARGETS} FILE ${CBLASLIB}-targets.cmake)
|
||||
|
||||
# Choose one of the cblas targets to use as a guard
|
||||
# for cblas-config.cmake to load targets from the build tree.
|
||||
|
@ -69,26 +69,26 @@ if(ALL_TARGETS)
|
|||
endif()
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-version.cmake.in
|
||||
${LAPACK_BINARY_DIR}/cblas-config-version.cmake @ONLY)
|
||||
${LAPACK_BINARY_DIR}/${CBLASLIB}-config-version.cmake @ONLY)
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-build.cmake.in
|
||||
${LAPACK_BINARY_DIR}/cblas-config.cmake @ONLY)
|
||||
${LAPACK_BINARY_DIR}/${CBLASLIB}-config.cmake @ONLY)
|
||||
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/cblas.pc @ONLY)
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${CBLASLIB}.pc @ONLY)
|
||||
install(FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/cblas.pc
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${CBLASLIB}.pc
|
||||
DESTINATION ${PKG_CONFIG_DIR}
|
||||
)
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-install.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake @ONLY)
|
||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${CBLASLIB}-config.cmake @ONLY)
|
||||
install(FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake
|
||||
${LAPACK_BINARY_DIR}/cblas-config-version.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}
|
||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${CBLASLIB}-config.cmake
|
||||
${LAPACK_BINARY_DIR}/${CBLASLIB}-config-version.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION}
|
||||
)
|
||||
|
||||
#install(EXPORT cblas-targets
|
||||
# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}
|
||||
#install(EXPORT ${CBLASLIB}-targets
|
||||
# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION}
|
||||
# COMPONENT Development
|
||||
# )
|
||||
|
|
|
@ -5,6 +5,6 @@ Name: CBLAS
|
|||
Description: C Standard Interface to BLAS Basic Linear Algebra Subprograms
|
||||
Version: @LAPACK_VERSION@
|
||||
URL: http://www.netlib.org/blas/#_cblas
|
||||
Libs: -L${libdir} -lcblas
|
||||
Libs: -L${libdir} -l@CBLASLIB@
|
||||
Cflags: -I${includedir}
|
||||
Requires.private: blas
|
||||
Requires.private: @BLASLIB@
|
||||
|
|
|
@ -4,11 +4,11 @@ find_package(LAPACK NO_MODULE)
|
|||
|
||||
# Load lapack targets from the build tree, including lapacke targets.
|
||||
if(NOT TARGET lapacke)
|
||||
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake")
|
||||
include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake")
|
||||
endif()
|
||||
|
||||
# Report cblas header search locations from build tree.
|
||||
set(CBLAS_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include")
|
||||
|
||||
# Report cblas libraries.
|
||||
set(CBLAS_LIBRARIES cblas)
|
||||
set(CBLAS_LIBRARIES @CBLASLIB@)
|
||||
|
|
|
@ -5,19 +5,19 @@ get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH)
|
|||
get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH)
|
||||
|
||||
# Load the LAPACK package with which we were built.
|
||||
set(LAPACK_DIR "${_CBLAS_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/lapack-@LAPACK_VERSION@")
|
||||
set(LAPACK_DIR "${_CBLAS_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/@LAPACKLIB@-@LAPACK_VERSION@")
|
||||
find_package(LAPACK NO_MODULE)
|
||||
|
||||
# Load lapacke targets from the install tree.
|
||||
if(NOT TARGET cblas)
|
||||
include(${_CBLAS_SELF_DIR}/cblas-targets.cmake)
|
||||
if(NOT TARGET @CBLASLIB@)
|
||||
include(${_CBLAS_SELF_DIR}/@CBLASLIB@-targets.cmake)
|
||||
endif()
|
||||
|
||||
# Report lapacke header search locations.
|
||||
set(CBLAS_INCLUDE_DIRS ${_CBLAS_PREFIX}/include)
|
||||
|
||||
# Report lapacke libraries.
|
||||
set(CBLAS_LIBRARIES cblas)
|
||||
set(CBLAS_LIBRARIES @CBLASLIB@)
|
||||
|
||||
unset(_CBLAS_PREFIX)
|
||||
unset(_CBLAS_SELF_DIR)
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
add_executable(xexample1_CBLAS cblas_example1.c)
|
||||
add_executable(xexample2_CBLAS cblas_example2.c)
|
||||
|
||||
target_link_libraries(xexample1_CBLAS cblas)
|
||||
target_link_libraries(xexample2_CBLAS cblas ${BLAS_LIBRARIES})
|
||||
target_link_libraries(xexample1_CBLAS ${CBLASLIB})
|
||||
target_link_libraries(xexample2_CBLAS ${CBLASLIB} ${BLAS_LIBRARIES})
|
||||
|
||||
add_test(example1_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample1_CBLAS)
|
||||
add_test(example2_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample2_CBLAS)
|
||||
|
|
|
@ -11,7 +11,7 @@ int main ( )
|
|||
|
||||
double *a, *x, *y;
|
||||
double alpha, beta;
|
||||
int m, n, lda, incx, incy, i;
|
||||
CBLAS_INDEX m, n, lda, incx, incy, i;
|
||||
|
||||
Layout = CblasColMajor;
|
||||
transa = CblasNoTrans;
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
int main (int argc, char **argv )
|
||||
{
|
||||
int rout=-1,info=0,m,n,k,lda,ldb,ldc;
|
||||
CBLAS_INDEX rout=-1,info=0,m,n,k,lda,ldb,ldc;
|
||||
double A[2] = {0.0,0.0},
|
||||
B[2] = {0.0,0.0},
|
||||
C[2] = {0.0,0.0},
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef CBLAS_H
|
||||
#define CBLAS_H
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -11,9 +12,9 @@ extern "C" { /* Assume C declarations for C++ */
|
|||
* Enumerated and derived types
|
||||
*/
|
||||
#ifdef WeirdNEC
|
||||
#define CBLAS_INDEX long
|
||||
#define CBLAS_INDEX int64_t
|
||||
#else
|
||||
#define CBLAS_INDEX int
|
||||
#define CBLAS_INDEX int32_t
|
||||
#endif
|
||||
|
||||
typedef enum {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT;
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
#ifndef CBLAS_F77_H
|
||||
#define CBLAS_F77_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef CRAY
|
||||
#include <fortran.h>
|
||||
#define F77_CHAR _fcd
|
||||
|
@ -17,8 +19,12 @@
|
|||
#define F77_STRLEN(a) (_fcdlen)
|
||||
#endif
|
||||
|
||||
#ifndef F77_INT
|
||||
#ifdef WeirdNEC
|
||||
#define F77_INT long
|
||||
#define F77_INT int64_t
|
||||
#else
|
||||
#define F77_INT int32_t
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef F77_CHAR
|
||||
|
|
|
@ -113,16 +113,16 @@ if(BUILD_COMPLEX16)
|
|||
endif()
|
||||
list(REMOVE_DUPLICATES SOURCES)
|
||||
|
||||
add_library(cblas ${SOURCES})
|
||||
add_library(${CBLASLIB} ${SOURCES})
|
||||
set_target_properties(
|
||||
cblas PROPERTIES
|
||||
${CBLASLIB} PROPERTIES
|
||||
LINKER_LANGUAGE C
|
||||
VERSION ${LAPACK_VERSION}
|
||||
SOVERSION ${LAPACK_MAJOR_VERSION}
|
||||
)
|
||||
target_include_directories(cblas PUBLIC
|
||||
target_include_directories(${CBLASLIB} PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
|
||||
$<INSTALL_INTERFACE:include>
|
||||
)
|
||||
target_link_libraries(cblas PRIVATE ${BLAS_LIBRARIES})
|
||||
lapack_install_library(cblas)
|
||||
target_link_libraries(${CBLASLIB} PRIVATE ${BLAS_LIBRARIES})
|
||||
lapack_install_library(${CBLASLIB})
|
||||
|
|
|
@ -52,9 +52,9 @@ if(BUILD_SINGLE)
|
|||
add_executable(xscblat2 c_sblat2.f ${STESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||
add_executable(xscblat3 c_sblat3.f ${STESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||
|
||||
target_link_libraries(xscblat1 cblas)
|
||||
target_link_libraries(xscblat2 cblas)
|
||||
target_link_libraries(xscblat3 cblas)
|
||||
target_link_libraries(xscblat1 ${CBLASLIB})
|
||||
target_link_libraries(xscblat2 ${CBLASLIB})
|
||||
target_link_libraries(xscblat3 ${CBLASLIB})
|
||||
|
||||
add_cblas_test(stest1.out "" xscblat1)
|
||||
add_cblas_test(stest2.out sin2 xscblat2)
|
||||
|
@ -66,9 +66,9 @@ if(BUILD_DOUBLE)
|
|||
add_executable(xdcblat2 c_dblat2.f ${DTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||
add_executable(xdcblat3 c_dblat3.f ${DTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||
|
||||
target_link_libraries(xdcblat1 cblas)
|
||||
target_link_libraries(xdcblat2 cblas)
|
||||
target_link_libraries(xdcblat3 cblas)
|
||||
target_link_libraries(xdcblat1 ${CBLASLIB})
|
||||
target_link_libraries(xdcblat2 ${CBLASLIB})
|
||||
target_link_libraries(xdcblat3 ${CBLASLIB})
|
||||
|
||||
add_cblas_test(dtest1.out "" xdcblat1)
|
||||
add_cblas_test(dtest2.out din2 xdcblat2)
|
||||
|
@ -80,9 +80,9 @@ if(BUILD_COMPLEX)
|
|||
add_executable(xccblat2 c_cblat2.f ${CTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||
add_executable(xccblat3 c_cblat3.f ${CTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||
|
||||
target_link_libraries(xccblat1 cblas ${BLAS_LIBRARIES})
|
||||
target_link_libraries(xccblat2 cblas)
|
||||
target_link_libraries(xccblat3 cblas)
|
||||
target_link_libraries(xccblat1 ${CBLASLIB} ${BLAS_LIBRARIES})
|
||||
target_link_libraries(xccblat2 ${CBLASLIB})
|
||||
target_link_libraries(xccblat3 ${CBLASLIB})
|
||||
|
||||
add_cblas_test(ctest1.out "" xccblat1)
|
||||
add_cblas_test(ctest2.out cin2 xccblat2)
|
||||
|
@ -94,9 +94,9 @@ if(BUILD_COMPLEX16)
|
|||
add_executable(xzcblat2 c_zblat2.f ${ZTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||
add_executable(xzcblat3 c_zblat3.f ${ZTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
|
||||
|
||||
target_link_libraries(xzcblat1 cblas)
|
||||
target_link_libraries(xzcblat2 cblas)
|
||||
target_link_libraries(xzcblat3 cblas)
|
||||
target_link_libraries(xzcblat1 ${CBLASLIB})
|
||||
target_link_libraries(xzcblat2 ${CBLASLIB})
|
||||
target_link_libraries(xzcblat3 ${CBLASLIB})
|
||||
|
||||
add_cblas_test(ztest1.out "" xzcblat1)
|
||||
add_cblas_test(ztest2.out zin2 xzcblat2)
|
||||
|
|
|
@ -14,6 +14,19 @@ macro( CheckLAPACKCompilerFlags )
|
|||
|
||||
set( FPE_EXIT FALSE )
|
||||
|
||||
# FORTRAN ILP default
|
||||
if ( FORTRAN_ILP )
|
||||
if( CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" )
|
||||
if ( WIN32 )
|
||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} /integer-size:64")
|
||||
else ()
|
||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -integer-size 64")
|
||||
endif()
|
||||
else()
|
||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-integer-8")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# GNU Fortran
|
||||
if( CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" )
|
||||
if( "${CMAKE_Fortran_FLAGS}" MATCHES "-ffpe-trap=[izoupd]")
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# Load lapack targets from the build tree if necessary.
|
||||
set(_LAPACK_TARGET "@_lapack_config_build_guard_target@")
|
||||
if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}")
|
||||
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake")
|
||||
include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake")
|
||||
endif()
|
||||
unset(_LAPACK_TARGET)
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ get_filename_component(_LAPACK_SELF_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
|||
# Load lapack targets from the install tree if necessary.
|
||||
set(_LAPACK_TARGET "@_lapack_config_install_guard_target@")
|
||||
if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}")
|
||||
include("${_LAPACK_SELF_DIR}/lapack-targets.cmake")
|
||||
include("${_LAPACK_SELF_DIR}/@LAPACKLIB@-targets.cmake")
|
||||
endif()
|
||||
unset(_LAPACK_TARGET)
|
||||
|
||||
|
|
|
@ -44,6 +44,24 @@ endif()
|
|||
# By default static library
|
||||
option(BUILD_SHARED_LIBS "Build shared libraries" OFF)
|
||||
|
||||
# By default build index32 library
|
||||
option(BUILD_INDEX64 "Build Index-64 API libraries" OFF)
|
||||
if(BUILD_INDEX64)
|
||||
set(BLASLIB "blas64")
|
||||
set(CBLASLIB "cblas64")
|
||||
set(LAPACKLIB "lapack64")
|
||||
set(LAPACKELIB "lapacke64")
|
||||
set(TMGLIB "tmglib64")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWeirdNEC -DLAPACK_ILP64 -DHAVE_LAPACK_CONFIG_H")
|
||||
set(FORTRAN_ILP TRUE)
|
||||
else()
|
||||
set(BLASLIB "blas")
|
||||
set(CBLASLIB "cblas")
|
||||
set(LAPACKLIB "lapack")
|
||||
set(LAPACKELIB "lapacke")
|
||||
set(TMGLIB "tmglib")
|
||||
endif()
|
||||
|
||||
include(GNUInstallDirs)
|
||||
|
||||
# Updated OSX RPATH settings
|
||||
|
@ -73,10 +91,10 @@ include(PreventInBuildInstalls)
|
|||
|
||||
if(UNIX)
|
||||
if(CMAKE_Fortran_COMPILER_ID STREQUAL Intel)
|
||||
list(APPEND CMAKE_Fortran_FLAGS "-fp-model strict")
|
||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fp-model strict")
|
||||
endif()
|
||||
if(CMAKE_Fortran_COMPILER_ID STREQUAL XL)
|
||||
list(APPEND CMAKE_Fortran_FLAGS "-qnosave -qstrict=none")
|
||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qnosave -qstrict=none")
|
||||
endif()
|
||||
# Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler.
|
||||
# This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin
|
||||
|
@ -112,7 +130,7 @@ endif()
|
|||
|
||||
|
||||
# --------------------------------------------------
|
||||
set(LAPACK_INSTALL_EXPORT_NAME lapack-targets)
|
||||
set(LAPACK_INSTALL_EXPORT_NAME ${LAPACKLIB}-targets)
|
||||
|
||||
macro(lapack_install_library lib)
|
||||
install(TARGETS ${lib}
|
||||
|
@ -220,7 +238,7 @@ endif()
|
|||
if(NOT BLAS_FOUND)
|
||||
message(STATUS "Using supplied NETLIB BLAS implementation")
|
||||
add_subdirectory(BLAS)
|
||||
set(BLAS_LIBRARIES blas)
|
||||
set(BLAS_LIBRARIES ${BLASLIB})
|
||||
else()
|
||||
set(CMAKE_EXE_LINKER_FLAGS
|
||||
"${CMAKE_EXE_LINKER_FLAGS} ${BLAS_LINKER_FLAGS}"
|
||||
|
@ -279,7 +297,7 @@ endif()
|
|||
# Neither user specified or optimized LAPACK libraries can be used
|
||||
if(NOT LATESTLAPACK_FOUND)
|
||||
message(STATUS "Using supplied NETLIB LAPACK implementation")
|
||||
set(LAPACK_LIBRARIES lapack)
|
||||
set(LAPACK_LIBRARIES ${LAPACKLIB})
|
||||
add_subdirectory(SRC)
|
||||
else()
|
||||
set(CMAKE_EXE_LINKER_FLAGS
|
||||
|
@ -371,23 +389,23 @@ include(CPack)
|
|||
# --------------------------------------------------
|
||||
|
||||
if(NOT BLAS_FOUND)
|
||||
set(ALL_TARGETS ${ALL_TARGETS} blas)
|
||||
set(ALL_TARGETS ${ALL_TARGETS} ${BLASLIB})
|
||||
endif()
|
||||
|
||||
if(NOT LATESTLAPACK_FOUND)
|
||||
set(ALL_TARGETS ${ALL_TARGETS} lapack)
|
||||
set(ALL_TARGETS ${ALL_TARGETS} ${LAPACKLIB})
|
||||
endif()
|
||||
|
||||
if(BUILD_TESTING OR LAPACKE_WITH_TMG)
|
||||
set(ALL_TARGETS ${ALL_TARGETS} tmglib)
|
||||
set(ALL_TARGETS ${ALL_TARGETS} ${TMGLIB})
|
||||
endif()
|
||||
|
||||
# Export lapack targets, not including lapacke, from the
|
||||
# install tree, if any.
|
||||
set(_lapack_config_install_guard_target "")
|
||||
if(ALL_TARGETS)
|
||||
install(EXPORT lapack-targets
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION}
|
||||
install(EXPORT ${LAPACKLIB}-targets
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKLIB}-${LAPACK_VERSION}
|
||||
COMPONENT Development
|
||||
)
|
||||
|
||||
|
@ -398,18 +416,18 @@ endif()
|
|||
|
||||
# Include cblas in targets exported from the build tree.
|
||||
if(CBLAS)
|
||||
set(ALL_TARGETS ${ALL_TARGETS} cblas)
|
||||
set(ALL_TARGETS ${ALL_TARGETS} ${CBLASLIB})
|
||||
endif()
|
||||
|
||||
# Include lapacke in targets exported from the build tree.
|
||||
if(LAPACKE)
|
||||
set(ALL_TARGETS ${ALL_TARGETS} lapacke)
|
||||
set(ALL_TARGETS ${ALL_TARGETS} ${LAPACKELIB})
|
||||
endif()
|
||||
|
||||
# Export lapack and lapacke targets from the build tree, if any.
|
||||
set(_lapack_config_build_guard_target "")
|
||||
if(ALL_TARGETS)
|
||||
export(TARGETS ${ALL_TARGETS} FILE lapack-targets.cmake)
|
||||
export(TARGETS ${ALL_TARGETS} FILE ${LAPACKLIB}-targets.cmake)
|
||||
|
||||
# Choose one of the lapack or lapacke targets to use as a guard
|
||||
# for lapack-config.cmake to load targets from the build tree.
|
||||
|
@ -417,30 +435,30 @@ if(ALL_TARGETS)
|
|||
endif()
|
||||
|
||||
configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-build.cmake.in
|
||||
${LAPACK_BINARY_DIR}/lapack-config.cmake @ONLY)
|
||||
${LAPACK_BINARY_DIR}/${LAPACKLIB}-config.cmake @ONLY)
|
||||
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_DIR}/lapack.pc @ONLY)
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKLIB}.pc @ONLY)
|
||||
install(FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/lapack.pc
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${LAPACKLIB}.pc
|
||||
DESTINATION ${PKG_CONFIG_DIR}
|
||||
COMPONENT Development
|
||||
)
|
||||
|
||||
configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in
|
||||
${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake @ONLY)
|
||||
${LAPACK_BINARY_DIR}/CMakeFiles/${LAPACKLIB}-config.cmake @ONLY)
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
write_basic_package_version_file(
|
||||
${LAPACK_BINARY_DIR}/lapack-config-version.cmake
|
||||
${LAPACK_BINARY_DIR}/${LAPACKLIB}-config-version.cmake
|
||||
VERSION ${LAPACK_VERSION}
|
||||
COMPATIBILITY SameMajorVersion
|
||||
)
|
||||
|
||||
install(FILES
|
||||
${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake
|
||||
${LAPACK_BINARY_DIR}/lapack-config-version.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION}
|
||||
${LAPACK_BINARY_DIR}/CMakeFiles/${LAPACKLIB}-config.cmake
|
||||
${LAPACK_BINARY_DIR}/${LAPACKLIB}-config-version.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKLIB}-${LAPACK_VERSION}
|
||||
COMPONENT Development
|
||||
)
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
message(STATUS "LAPACKE enable")
|
||||
enable_language(C)
|
||||
|
||||
set(LAPACK_INSTALL_EXPORT_NAME lapacke-targets)
|
||||
set(LAPACK_INSTALL_EXPORT_NAME ${LAPACKELIB}-targets)
|
||||
|
||||
# Create a header file lapacke_mangling.h for the routines called in my C programs
|
||||
include(FortranCInterface)
|
||||
|
@ -72,28 +72,28 @@ if(LAPACKE_WITH_TMG)
|
|||
endif()
|
||||
list(APPEND SOURCES ${UTILS})
|
||||
|
||||
add_library(lapacke ${SOURCES})
|
||||
add_library(${LAPACKELIB} ${SOURCES})
|
||||
set_target_properties(
|
||||
lapacke PROPERTIES
|
||||
${LAPACKELIB} PROPERTIES
|
||||
LINKER_LANGUAGE C
|
||||
VERSION ${LAPACK_VERSION}
|
||||
SOVERSION ${LAPACK_MAJOR_VERSION}
|
||||
)
|
||||
target_include_directories(lapacke PUBLIC
|
||||
target_include_directories(${LAPACKELIB} PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||
$<INSTALL_INTERFACE:include>
|
||||
)
|
||||
if(WIN32 AND NOT UNIX)
|
||||
target_compile_definitions(lapacke PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE)
|
||||
target_compile_definitions(${LAPACKELIB} PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE)
|
||||
message(STATUS "Windows BUILD")
|
||||
endif()
|
||||
|
||||
if(LAPACKE_WITH_TMG)
|
||||
target_link_libraries(lapacke PRIVATE tmglib)
|
||||
target_link_libraries(${LAPACKELIB} PRIVATE ${TMGLIB})
|
||||
endif()
|
||||
target_link_libraries(lapacke PRIVATE ${LAPACK_LIBRARIES})
|
||||
target_link_libraries(${LAPACKELIB} PRIVATE ${LAPACK_LIBRARIES})
|
||||
|
||||
lapack_install_library(lapacke)
|
||||
lapack_install_library(${LAPACKELIB})
|
||||
install(
|
||||
FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||
|
@ -105,28 +105,28 @@ if(BUILD_TESTING)
|
|||
endif()
|
||||
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc @ONLY)
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKELIB}.pc @ONLY)
|
||||
install(FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${LAPACKELIB}.pc
|
||||
DESTINATION ${PKG_CONFIG_DIR}
|
||||
COMPONENT Development
|
||||
)
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in
|
||||
${LAPACK_BINARY_DIR}/lapacke-config-version.cmake @ONLY)
|
||||
${LAPACK_BINARY_DIR}/${LAPACKELIB}-config-version.cmake @ONLY)
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-build.cmake.in
|
||||
${LAPACK_BINARY_DIR}/lapacke-config.cmake @ONLY)
|
||||
${LAPACK_BINARY_DIR}/${LAPACKELIB}-config.cmake @ONLY)
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-install.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake @ONLY)
|
||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${LAPACKELIB}-config.cmake @ONLY)
|
||||
install(FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake
|
||||
${LAPACK_BINARY_DIR}/lapacke-config-version.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION}
|
||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${LAPACKELIB}-config.cmake
|
||||
${LAPACK_BINARY_DIR}/${LAPACKELIB}-config-version.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKELIB}-${LAPACK_VERSION}
|
||||
COMPONENT Development
|
||||
)
|
||||
|
||||
install(EXPORT lapacke-targets
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION}
|
||||
install(EXPORT ${LAPACKELIB}-targets
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKELIB}-${LAPACK_VERSION}
|
||||
COMPONENT Development
|
||||
)
|
||||
|
|
|
@ -3,8 +3,8 @@ set(LAPACK_DIR "@LAPACK_BINARY_DIR@")
|
|||
find_package(LAPACK NO_MODULE)
|
||||
|
||||
# Load lapack targets from the build tree, including lapacke targets.
|
||||
if(NOT TARGET lapacke)
|
||||
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake")
|
||||
if(NOT TARGET @LAPACKELIB@)
|
||||
include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake")
|
||||
endif()
|
||||
|
||||
# Hint for project building against lapack
|
||||
|
@ -14,4 +14,4 @@ set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID})
|
|||
set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include")
|
||||
|
||||
# Report lapacke libraries.
|
||||
set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES})
|
||||
set(LAPACKE_LIBRARIES @LAPACKELIB@ ${LAPACK_LIBRARIES})
|
||||
|
|
|
@ -5,12 +5,12 @@ get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH)
|
|||
get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH)
|
||||
|
||||
# Load the LAPACK package with which we were built.
|
||||
set(LAPACK_DIR "${_LAPACKE_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/lapack-@LAPACK_VERSION@")
|
||||
set(LAPACK_DIR "${_LAPACKE_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/@LAPACK@-@LAPACK_VERSION@")
|
||||
find_package(LAPACK NO_MODULE)
|
||||
|
||||
# Load lapacke targets from the install tree.
|
||||
if(NOT TARGET lapacke)
|
||||
include(${_LAPACKE_SELF_DIR}/lapacke-targets.cmake)
|
||||
if(NOT TARGET @LAPACKELIB@)
|
||||
include(${_LAPACKE_SELF_DIR}/@LAPACKELIB@-targets.cmake)
|
||||
endif()
|
||||
|
||||
# Hint for project building against lapack
|
||||
|
@ -20,7 +20,7 @@ set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID})
|
|||
set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include)
|
||||
|
||||
# Report lapacke libraries.
|
||||
set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES})
|
||||
set(LAPACKE_LIBRARIES @LAPACKELIB@ ${LAPACK_LIBRARIES})
|
||||
|
||||
unset(_LAPACKE_PREFIX)
|
||||
unset(_LAPACKE_SELF_DIR)
|
||||
|
|
|
@ -3,10 +3,10 @@ add_executable(xexample_DGESV_colmajor example_DGESV_colmajor.c lapacke_example_
|
|||
add_executable(xexample_DGELS_rowmajor example_DGELS_rowmajor.c lapacke_example_aux.c lapacke_example_aux.h)
|
||||
add_executable(xexample_DGELS_colmajor example_DGELS_colmajor.c lapacke_example_aux.c lapacke_example_aux.h)
|
||||
|
||||
target_link_libraries(xexample_DGESV_rowmajor lapacke)
|
||||
target_link_libraries(xexample_DGESV_colmajor lapacke)
|
||||
target_link_libraries(xexample_DGELS_rowmajor lapacke)
|
||||
target_link_libraries(xexample_DGELS_colmajor lapacke)
|
||||
target_link_libraries(xexample_DGESV_rowmajor ${LAPACKELIB})
|
||||
target_link_libraries(xexample_DGESV_colmajor ${LAPACKELIB})
|
||||
target_link_libraries(xexample_DGELS_rowmajor ${LAPACKELIB})
|
||||
target_link_libraries(xexample_DGELS_colmajor ${LAPACKELIB})
|
||||
|
||||
add_test(example_DGESV_rowmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_rowmajor)
|
||||
add_test(example_DGESV_colmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_colmajor)
|
||||
|
|
|
@ -49,12 +49,13 @@ extern "C" {
|
|||
#endif /* __cplusplus */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifndef lapack_int
|
||||
#if defined(LAPACK_ILP64)
|
||||
#define lapack_int long
|
||||
#define lapack_int int64_t
|
||||
#else
|
||||
#define lapack_int int
|
||||
#define lapack_int int32_t
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -67,7 +67,11 @@ extern "C" {
|
|||
void LAPACKE_xerbla( const char *name, lapack_int info );
|
||||
|
||||
/* Compare two chars (case-insensitive) */
|
||||
lapack_logical LAPACKE_lsame( char ca, char cb );
|
||||
lapack_logical LAPACKE_lsame( char ca, char cb )
|
||||
#if defined __GNUC__
|
||||
__attribute__((const))
|
||||
#endif
|
||||
;
|
||||
|
||||
/* Functions to convert column-major to row-major 2d arrays and vice versa. */
|
||||
void LAPACKE_cgb_trans( int matrix_layout, lapack_int m, lapack_int n,
|
||||
|
|
|
@ -5,6 +5,6 @@ Name: LAPACKE
|
|||
Description: C Standard Interface to LAPACK Linear Algebra PACKage
|
||||
Version: @LAPACK_VERSION@
|
||||
URL: http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack
|
||||
Libs: -L${libdir} -llapacke
|
||||
Libs: -L${libdir} -l@LAPACKELIB@
|
||||
Cflags: -I${includedir}
|
||||
Requires.private: lapack
|
||||
Requires.private: @LAPACKLIB@
|
||||
|
|
|
@ -500,21 +500,21 @@ if(BUILD_COMPLEX16)
|
|||
endif()
|
||||
list(REMOVE_DUPLICATES SOURCES)
|
||||
|
||||
add_library(lapack ${SOURCES})
|
||||
add_library(${LAPACKLIB} ${SOURCES})
|
||||
set_target_properties(
|
||||
lapack PROPERTIES
|
||||
${LAPACKLIB} PROPERTIES
|
||||
VERSION ${LAPACK_VERSION}
|
||||
SOVERSION ${LAPACK_MAJOR_VERSION}
|
||||
)
|
||||
|
||||
if(USE_XBLAS)
|
||||
target_link_libraries(lapack PRIVATE ${XBLAS_LIBRARY})
|
||||
target_link_libraries(${LAPACKLIB} PRIVATE ${XBLAS_LIBRARY})
|
||||
endif()
|
||||
target_link_libraries(lapack PRIVATE ${BLAS_LIBRARIES})
|
||||
target_link_libraries(${LAPACKLIB} PRIVATE ${BLAS_LIBRARIES})
|
||||
|
||||
if(_is_coverage_build)
|
||||
target_link_libraries(lapack PRIVATE gcov)
|
||||
add_coverage(lapack)
|
||||
target_link_libraries(${LAPACKLIB} PRIVATE gcov)
|
||||
add_coverage(${LAPACKLIB})
|
||||
endif()
|
||||
|
||||
lapack_install_library(lapack)
|
||||
lapack_install_library(${LAPACKLIB})
|
||||
|
|
|
@ -47,6 +47,6 @@ if(BUILD_COMPLEX16)
|
|||
endif()
|
||||
list(REMOVE_DUPLICATES SOURCES)
|
||||
|
||||
add_library(tmglib ${SOURCES})
|
||||
target_link_libraries(tmglib ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES})
|
||||
lapack_install_library(tmglib)
|
||||
add_library(${TMGLIB} ${SOURCES})
|
||||
target_link_libraries(${TMGLIB} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES})
|
||||
lapack_install_library(${TMGLIB})
|
||||
|
|
12
param.h
12
param.h
|
@ -3128,9 +3128,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define SYMV_P 16
|
||||
|
||||
#if defined(CORTEXA57) || \
|
||||
#if defined(CORTEXA57) || defined(CORTEXX1) || \
|
||||
defined(CORTEXA72) || defined(CORTEXA73) || \
|
||||
defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)
|
||||
defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000)
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
@ -3147,7 +3147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
/*FIXME: this should be using the cache size, but there is currently no easy way to
|
||||
query that on ARM. So if getarch counted more than 8 cores we simply assume the host
|
||||
is a big desktop or server with abundant cache rather than a phone or embedded device */
|
||||
#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)
|
||||
#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)|| defined(CORTEXX1)
|
||||
#define SGEMM_DEFAULT_P 512
|
||||
#define DGEMM_DEFAULT_P 256
|
||||
#define CGEMM_DEFAULT_P 256
|
||||
|
@ -3377,7 +3377,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
|||
#define CGEMM_DEFAULT_R 4096
|
||||
#define ZGEMM_DEFAULT_R 4096
|
||||
|
||||
#elif defined(ARMV8SVE) || defined(A64FX)
|
||||
#elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510)|| defined(CORTEXA710) || defined(CORTEXX2)
|
||||
|
||||
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
|
||||
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
|
||||
|
@ -3423,8 +3423,8 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
|
|||
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||
#define DGEMM_DEFAULT_UNROLL_M 8
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||
|
|
|
@ -115,7 +115,7 @@
|
|||
#define INCLUDE_CTGSYL INCLUDE_XTGSYL
|
||||
#define INCLUDE_ZTGSYL INCLUDE_XTGSYL
|
||||
|
||||
#define INCLUDE_XGEMMT 0
|
||||
#define INCLUDE_XGEMMT 1
|
||||
#define INCLUDE_SGEMMT INCLUDE_XGEMMT
|
||||
#define INCLUDE_DGEMMT INCLUDE_XGEMMT
|
||||
#define INCLUDE_CGEMMT INCLUDE_XGEMMT
|
||||
|
|
|
@ -566,7 +566,8 @@ void LAPACK(sgemmt)(
|
|||
const float *B, const blasint *ldB,
|
||||
const float *beta, float *C, const blasint *ldC
|
||||
) {
|
||||
RELAPACK_sgemmt(uplo, n, A, ldA, info);
|
||||
blasint info;
|
||||
RELAPACK_sgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -578,7 +579,8 @@ void LAPACK(dgemmt)(
|
|||
const double *B, const blasint *ldB,
|
||||
const double *beta, double *C, const blasint *ldC
|
||||
) {
|
||||
RELAPACK_dgemmt(uplo, n, A, ldA, info);
|
||||
blasint info;
|
||||
RELAPACK_dgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -590,7 +592,8 @@ void LAPACK(cgemmt)(
|
|||
const float *B, const blasint *ldB,
|
||||
const float *beta, float *C, const blasint *ldC
|
||||
) {
|
||||
RELAPACK_cgemmt(uplo, n, A, ldA, info);
|
||||
blasint info;
|
||||
RELAPACK_cgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -602,6 +605,7 @@ void LAPACK(zgemmt)(
|
|||
const double *B, const blasint *ldB,
|
||||
const double *beta, double *C, const blasint *ldC
|
||||
) {
|
||||
RELAPACK_zgemmt(uplo, n, A, ldA, info);
|
||||
blasint info;
|
||||
RELAPACK_zgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -30,6 +30,10 @@ if(WIN32)
|
|||
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.ps1
|
||||
"if (Test-Path $args[2]) { Remove-Item -Force $args[2] } \n"
|
||||
"$ErrorActionPreference = \"Stop\"\n"
|
||||
"If ((Get-Content $args[1] | & file - | %{$_ -match \"BOM\"}) -contains $true) {\n"
|
||||
"echo 'Skipped due to wrong input encoding'\n"
|
||||
"exit 0\n"
|
||||
"}\n"
|
||||
"Get-Content $args[1] | & $args[0]\n"
|
||||
"If ((Get-Content $args[2] | %{$_ -match \"FATAL\"}) -contains $true) {\n"
|
||||
"echo Error\n"
|
||||
|
|
Loading…
Reference in New Issue