Merge branch 'develop' into clapack

This commit is contained in:
Martin Kroeker 2022-03-30 18:01:38 +02:00 committed by GitHub
commit e3250e2362
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
63 changed files with 1217 additions and 221 deletions

View File

@ -23,9 +23,9 @@ if(MSVC AND NOT DEFINED NOFORTRAN)
endif() endif()
####### #######
if(MSVC) option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" OFF)
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
endif() option(BUILD_TESTING "Build LAPACK testsuite when building LAPACK" ON)
option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF) option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF)
@ -320,7 +320,9 @@ if (NOT NOFORTRAN)
if(NOT NO_CBLAS) if(NOT NO_CBLAS)
add_subdirectory(ctest) add_subdirectory(ctest)
endif() endif()
if (BUILD_TESTING)
add_subdirectory(lapack-netlib/TESTING) add_subdirectory(lapack-netlib/TESTING)
endif()
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV) if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
add_subdirectory(cpp_thread_test) add_subdirectory(cpp_thread_test)
endif() endif()

View File

@ -55,6 +55,13 @@ FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
endif endif
endif endif
ifeq ($(CORE), FT2000)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif
# Use a72 tunings because Neoverse-N1 is only available # Use a72 tunings because Neoverse-N1 is only available
# in GCC>=9 # in GCC>=9
ifeq ($(CORE), NEOVERSEN1) ifeq ($(CORE), NEOVERSEN1)
@ -229,6 +236,43 @@ endif
endif endif
endif endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXX1)
CCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72
endif
endif
endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXX2)
CCOMMON_OPT += -march=armv8.4-a+sve
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a+sve
endif
endif
endif
#ifeq (1, $(filter 1,$(ISCLANG)))
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXA510)
CCOMMON_OPT += -march=armv8.4-a+sve
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a+sve
endif
endif
endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXA710)
CCOMMON_OPT += -march=armv8.4-a+sve
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a+sve
endif
endif
endif
endif endif
endif endif

View File

@ -71,7 +71,8 @@ endif
getarch : getarch.c cpuid.S dummy $(CPUIDEMU) getarch : getarch.c cpuid.S dummy $(CPUIDEMU)
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) -o $(@F) getarch.c cpuid.S $(CPUIDEMU) avx512=$$(perl c_check - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
getarch_2nd : getarch_2nd.c config.h dummy getarch_2nd : getarch_2nd.c config.h dummy
ifndef TARGET_CORE ifndef TARGET_CORE

View File

@ -92,6 +92,10 @@ CORTEXA53
CORTEXA57 CORTEXA57
CORTEXA72 CORTEXA72
CORTEXA73 CORTEXA73
CORTEXA510
CORTEXA710
CORTEXX1
CORTEXX2
NEOVERSEN1 NEOVERSEN1
NEOVERSEV1 NEOVERSEV1
NEOVERSEN2 NEOVERSEN2
@ -103,6 +107,9 @@ THUNDERX2T99
TSV110 TSV110
THUNDERX3T110 THUNDERX3T110
VORTEX VORTEX
A64FX
ARMV8SVE
FT2000
9.System Z: 9.System Z:
ZARCH_GENERIC ZARCH_GENERIC

View File

@ -65,7 +65,7 @@ jobs:
- task: CMake@1 - task: CMake@1
inputs: inputs:
workingDirectory: 'build' # Optional workingDirectory: 'build' # Optional
cmakeArgs: '-G "Visual Studio 16 2019" ..' cmakeArgs: '-G "Visual Studio 17 2022" ..'
- task: CMake@1 - task: CMake@1
inputs: inputs:
cmakeArgs: '--build . --config Release' cmakeArgs: '--build . --config Release'
@ -103,7 +103,7 @@ jobs:
- job: Windows_flang_clang - job: Windows_flang_clang
pool: pool:
vmImage: 'windows-latest' vmImage: 'windows-2022'
steps: steps:
- script: | - script: |
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%" set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
@ -114,8 +114,8 @@ jobs:
conda install --yes --quiet ninja flang conda install --yes --quiet ninja flang
mkdir build mkdir build
cd build cd build
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON .. cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
cmake --build . --config Release cmake --build . --config Release
ctest ctest

View File

@ -254,7 +254,7 @@ if (($architecture eq "x86") || ($architecture eq "x86_64")) {
# $tmpf = new File::Temp( UNLINK => 1 ); # $tmpf = new File::Temp( UNLINK => 1 );
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); ($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 );
$code = '"vbroadcastss -4 * 4(%rsi), %zmm2"'; $code = '"vbroadcastss -4 * 4(%rsi), %zmm2"';
print $tmpf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n"; print $fh "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n";
$args = " -march=skylake-avx512 -c -o $tmpf.o $tmpf"; $args = " -march=skylake-avx512 -c -o $tmpf.o $tmpf";
if ($compiler eq "PGI") { if ($compiler eq "PGI") {
$args = " -tp skylake -c -o $tmpf.o $tmpf"; $args = " -tp skylake -c -o $tmpf.o $tmpf";
@ -278,7 +278,7 @@ if ($data =~ /HAVE_C11/) {
$c11_atomics = 0; $c11_atomics = 0;
} else { } else {
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); ($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 );
print $tmpf "#include <stdatomic.h>\nint main(void){}\n"; print $fh "#include <stdatomic.h>\nint main(void){}\n";
$args = " -c -o $tmpf.o $tmpf"; $args = " -c -o $tmpf.o $tmpf";
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null");
system(@cmd) == 0; system(@cmd) == 0;
@ -316,6 +316,7 @@ if ($architecture ne $hostarch) {
} }
$cross = 1 if ($os ne $hostos); $cross = 1 if ($os ne $hostos);
$cross = 0 if (($os eq "Android") && ($hostos eq "Linux") && ($ENV{TERMUX_APP_PID} != ""));
$openmp = "" if $ENV{USE_OPENMP} != 1; $openmp = "" if $ENV{USE_OPENMP} != 1;

View File

@ -161,6 +161,30 @@ if (${CORE} STREQUAL ARMV8SVE)
endif () endif ()
endif () endif ()
if (${CORE} STREQUAL CORTEXA510)
if (NOT DYNAMIC_ARCH)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
endif ()
endif ()
if (${CORE} STREQUAL CORTEXA710)
if (NOT DYNAMIC_ARCH)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
endif ()
endif ()
if (${CORE} STREQUAL CORTEXX1)
if (NOT DYNAMIC_ARCH)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a")
endif ()
endif ()
if (${CORE} STREQUAL CORTEXX2)
if (NOT DYNAMIC_ARCH)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
endif ()
endif ()
if (${CORE} STREQUAL POWER10) if (${CORE} STREQUAL POWER10)
if (NOT DYNAMIC_ARCH) if (NOT DYNAMIC_ARCH)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)

View File

@ -67,8 +67,16 @@ if (${F_COMPILER} STREQUAL "GFORTRAN")
if (BINARY64) if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m64") set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
if (INTERFACE64) if (INTERFACE64)
if (CMAKE_Fortran_COMPILER_ID STREQUAL "Intel")
if (WIN32)
set(FCOMMON_OPT "${FCOMMON_OPT} /integer-size:64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -integer-size 64")
endif ()
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8") set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
endif () endif ()
endif ()
else () else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m32") set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
endif () endif ()

View File

@ -2610,8 +2610,9 @@
#endif #endif
#ifndef ASSEMBLER #ifndef ASSEMBLER
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)\ #if !defined(DYNAMIC_ARCH) \
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) && (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K))
extern BLASLONG gemm_offset_a; extern BLASLONG gemm_offset_a;
extern BLASLONG gemm_offset_b; extern BLASLONG gemm_offset_b;
extern BLASLONG sbgemm_p; extern BLASLONG sbgemm_p;

View File

@ -45,6 +45,10 @@ size_t length64=sizeof(value64);
#define CPU_NEOVERSEN1 11 #define CPU_NEOVERSEN1 11
#define CPU_NEOVERSEV1 16 #define CPU_NEOVERSEV1 16
#define CPU_NEOVERSEN2 17 #define CPU_NEOVERSEN2 17
#define CPU_CORTEXX1 18
#define CPU_CORTEXX2 19
#define CPU_CORTEXA510 20
#define CPU_CORTEXA710 21
// Qualcomm // Qualcomm
#define CPU_FALKOR 6 #define CPU_FALKOR 6
// Cavium // Cavium
@ -59,6 +63,8 @@ size_t length64=sizeof(value64);
#define CPU_VORTEX 13 #define CPU_VORTEX 13
// Fujitsu // Fujitsu
#define CPU_A64FX 15 #define CPU_A64FX 15
// Phytium
#define CPU_FT2000 22
static char *cpuname[] = { static char *cpuname[] = {
"UNKNOWN", "UNKNOWN",
@ -73,12 +79,17 @@ static char *cpuname[] = {
"TSV110", "TSV110",
"EMAG8180", "EMAG8180",
"NEOVERSEN1", "NEOVERSEN1",
"NEOVERSEV1"
"NEOVERSEN2"
"THUNDERX3T110", "THUNDERX3T110",
"VORTEX", "VORTEX",
"CORTEXA55", "CORTEXA55",
"A64FX" "A64FX",
"NEOVERSEV1",
"NEOVERSEN2",
"CORTEXX1",
"CORTEXX2",
"CORTEXA510",
"CORTEXA710",
"FT2000"
}; };
static char *cpuname_lower[] = { static char *cpuname_lower[] = {
@ -94,12 +105,17 @@ static char *cpuname_lower[] = {
"tsv110", "tsv110",
"emag8180", "emag8180",
"neoversen1", "neoversen1",
"neoversev1",
"neoversen2",
"thunderx3t110", "thunderx3t110",
"vortex", "vortex",
"cortexa55", "cortexa55",
"a64fx" "a64fx",
"neoversev1",
"neoversen2",
"cortexx1",
"cortexx2",
"cortexa510",
"cortexa710",
"ft2000"
}; };
int get_feature(char *search) int get_feature(char *search)
@ -182,6 +198,14 @@ int detect(void)
return CPU_NEOVERSEN2; return CPU_NEOVERSEN2;
else if (strstr(cpu_part, "0xd05")) else if (strstr(cpu_part, "0xd05"))
return CPU_CORTEXA55; return CPU_CORTEXA55;
else if (strstr(cpu_part, "0xd46"))
return CPU_CORTEXA510;
else if (strstr(cpu_part, "0xd47"))
return CPU_CORTEXA710;
else if (strstr(cpu_part, "0xd44"))
return CPU_CORTEXX1;
else if (strstr(cpu_part, "0xd4c"))
return CPU_CORTEXX2;
} }
// Qualcomm // Qualcomm
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
@ -202,6 +226,13 @@ int detect(void)
// Fujitsu // Fujitsu
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001")) else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
return CPU_A64FX; return CPU_A64FX;
// Apple
else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022"))
return CPU_VORTEX;
// Phytium
else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661")
|| strstr(cpu_part, "0x662") || strstr(cpu_part, "0x663")))
return CPU_FT2000;
} }
p = (char *) NULL ; p = (char *) NULL ;
@ -382,7 +413,24 @@ void get_cpuconfig(void)
printf("#define DTB_DEFAULT_ENTRIES 48\n"); printf("#define DTB_DEFAULT_ENTRIES 48\n");
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
break; break;
case CPU_CORTEXA510:
case CPU_CORTEXA710:
case CPU_CORTEXX1:
case CPU_CORTEXX2:
printf("#define ARMV9\n");
printf("#define %s\n", cpuname[d]);
printf("#define L1_CODE_SIZE 65536\n");
printf("#define L1_CODE_LINESIZE 64\n");
printf("#define L1_CODE_ASSOCIATIVE 4\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L1_DATA_ASSOCIATIVE 4\n");
printf("#define L2_SIZE 1048576\n");
printf("#define L2_LINESIZE 64\n");
printf("#define L2_ASSOCIATIVE 8\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;
case CPU_FALKOR: case CPU_FALKOR:
printf("#define FALKOR\n"); printf("#define FALKOR\n");
printf("#define L1_CODE_SIZE 65536\n"); printf("#define L1_CODE_SIZE 65536\n");
@ -469,9 +517,9 @@ void get_cpuconfig(void)
printf("#define DTB_DEFAULT_ENTRIES 64 \n"); printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n"); printf("#define DTB_SIZE 4096 \n");
break; break;
#ifdef __APPLE__
case CPU_VORTEX: case CPU_VORTEX:
printf("#define VORTEX \n"); printf("#define VORTEX \n");
#ifdef __APPLE__
sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0);
printf("#define L1_CODE_SIZE %lld \n",value64); printf("#define L1_CODE_SIZE %lld \n",value64);
sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0);
@ -480,10 +528,10 @@ void get_cpuconfig(void)
printf("#define L1_DATA_SIZE %lld \n",value64); printf("#define L1_DATA_SIZE %lld \n",value64);
sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0); sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0);
printf("#define L2_SIZE %lld \n",value64); printf("#define L2_SIZE %lld \n",value64);
#endif
printf("#define DTB_DEFAULT_ENTRIES 64 \n"); printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n"); printf("#define DTB_SIZE 4096 \n");
break; break;
#endif
case CPU_A64FX: case CPU_A64FX:
printf("#define A64FX\n"); printf("#define A64FX\n");
printf("#define L1_CODE_SIZE 65535\n"); printf("#define L1_CODE_SIZE 65535\n");
@ -494,6 +542,16 @@ void get_cpuconfig(void)
printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
break; break;
case CPU_FT2000:
printf("#define FT2000\n");
printf("#define L1_CODE_SIZE 32768\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L2_SIZE 33554432\n");
printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;
} }
get_cpucount(); get_cpucount();
} }

View File

@ -1707,8 +1707,18 @@ int get_cpuname(void){
if (model == 0xf && stepping < 0xe) if (model == 0xf && stepping < 0xe)
return CPUTYPE_NANO; return CPUTYPE_NANO;
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
case 0x7:
switch (exmodel) {
case 5:
if (support_avx2())
return CPUTYPE_ZEN;
else
return CPUTYPE_DUNNINGTON;
default: default:
if (family >= 0x7) return CPUTYPE_NEHALEM;
}
default:
if (family >= 0x8)
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
else else
return CPUTYPE_VIAC3; return CPUTYPE_VIAC3;
@ -1716,8 +1726,21 @@ int get_cpuname(void){
} }
if (vendor == VENDOR_ZHAOXIN){ if (vendor == VENDOR_ZHAOXIN){
switch (family) {
case 0x7:
switch (exmodel) {
case 5:
if (support_avx2())
return CPUTYPE_ZEN;
else
return CPUTYPE_DUNNINGTON;
default:
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
} }
default:
return CPUTYPE_NEHALEM;
}
}
if (vendor == VENDOR_RISE){ if (vendor == VENDOR_RISE){
switch (family) { switch (family) {
@ -2416,8 +2439,18 @@ int get_coretype(void){
if (model == 0xf && stepping < 0xe) if (model == 0xf && stepping < 0xe)
return CORE_NANO; return CORE_NANO;
return CORE_NEHALEM; return CORE_NEHALEM;
case 0x7:
switch (exmodel) {
case 5:
if (support_avx2())
return CORE_ZEN;
else
return CORE_DUNNINGTON;
default: default:
if (family >= 0x7) return CORE_NEHALEM;
}
default:
if (family >= 0x8)
return CORE_NEHALEM; return CORE_NEHALEM;
else else
return CORE_VIAC3; return CORE_VIAC3;
@ -2425,8 +2458,21 @@ int get_coretype(void){
} }
if (vendor == VENDOR_ZHAOXIN) { if (vendor == VENDOR_ZHAOXIN) {
switch (family) {
case 0x7:
switch (exmodel) {
case 5:
if (support_avx2())
return CORE_ZEN;
else
return CORE_DUNNINGTON;
default:
return CORE_NEHALEM; return CORE_NEHALEM;
} }
default:
return CORE_NEHALEM;
}
}
return CORE_UNKNOWN; return CORE_UNKNOWN;
} }

View File

@ -96,7 +96,7 @@ extern gotoblas_t gotoblas_BARCELONA;
#endif #endif
#ifdef DYN_ATOM #ifdef DYN_ATOM
extern gotoblas_t gotoblas_ATOM; extern gotoblas_t gotoblas_ATOM;
elif defined(DYN_NEHALEM) #elif defined(DYN_NEHALEM)
#define gotoblas_ATOM gotoblas_NEHALEM #define gotoblas_ATOM gotoblas_NEHALEM
#else #else
#define gotoblas_ATOM gotoblas_PRESCOTT #define gotoblas_ATOM gotoblas_PRESCOTT
@ -875,15 +875,38 @@ static gotoblas_t *get_coretype(void){
if (model == 0xf && stepping < 0xe) if (model == 0xf && stepping < 0xe)
return &gotoblas_NANO; return &gotoblas_NANO;
return &gotoblas_NEHALEM; return &gotoblas_NEHALEM;
case 0x7:
switch (exmodel) {
case 5:
if (support_avx2())
return &gotoblas_ZEN;
else
return &gotoblas_DUNNINGTON;
default: default:
if (family >= 0x7) return &gotoblas_NEHALEM;
}
default:
if (family >= 0x8)
return &gotoblas_NEHALEM; return &gotoblas_NEHALEM;
} }
} }
if (vendor == VENDOR_ZHAOXIN) { if (vendor == VENDOR_ZHAOXIN) {
switch (family) {
case 0x7:
switch (exmodel) {
case 5:
if (support_avx2())
return &gotoblas_ZEN;
else
return &gotoblas_DUNNINGTON;
default:
return &gotoblas_NEHALEM; return &gotoblas_NEHALEM;
} }
default:
return &gotoblas_NEHALEM;
}
}
return NULL; return NULL;
} }

View File

@ -60,6 +60,9 @@ static char* openblas_config_str=""
#ifdef USE_OPENMP #ifdef USE_OPENMP
"USE_OPENMP " "USE_OPENMP "
#endif #endif
#ifdef USE_TLS
"USE_TLS "
#endif
#ifndef DYNAMIC_ARCH #ifndef DYNAMIC_ARCH
CHAR_CORENAME CHAR_CORENAME
#endif #endif

View File

@ -94,14 +94,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <sys/sysinfo.h> #include <sys/sysinfo.h>
#endif #endif
#if defined(__x86_64__) || defined(_M_X64)
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX2__)) || (defined(__clang__) && __clang_major__ >= 6))
#else
#ifndef NO_AVX512
#define NO_AVX512
#endif
#endif
#endif
/* #define FORCE_P2 */ /* #define FORCE_P2 */
/* #define FORCE_KATMAI */ /* #define FORCE_KATMAI */
/* #define FORCE_COPPERMINE */ /* #define FORCE_COPPERMINE */
@ -1240,7 +1232,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa53" #define LIBNAME "cortexa53"
#define CORENAME "CORTEXA53" #define CORENAME "CORTEXA53"
#else
#endif #endif
#ifdef FORCE_CORTEXA57 #ifdef FORCE_CORTEXA57
@ -1256,7 +1247,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa57" #define LIBNAME "cortexa57"
#define CORENAME "CORTEXA57" #define CORENAME "CORTEXA57"
#else
#endif #endif
#ifdef FORCE_CORTEXA72 #ifdef FORCE_CORTEXA72
@ -1272,7 +1262,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa72" #define LIBNAME "cortexa72"
#define CORENAME "CORTEXA72" #define CORENAME "CORTEXA72"
#else
#endif #endif
#ifdef FORCE_CORTEXA73 #ifdef FORCE_CORTEXA73
@ -1288,7 +1277,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa73" #define LIBNAME "cortexa73"
#define CORENAME "CORTEXA73" #define CORENAME "CORTEXA73"
#else #endif
#ifdef FORCE_CORTEXX1
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "CORTEXX1"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DCORTEXX1 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexx1"
#define CORENAME "CORTEXX1"
#endif
#ifdef FORCE_CORTEXX2
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "CORTEXX2"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DCORTEXX2 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
#define LIBNAME "cortexx2"
#define CORENAME "CORTEXX2"
#endif
#ifdef FORCE_CORTEXA510
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "CORTEXA510"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DCORTEXA510 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
#define LIBNAME "cortexa510"
#define CORENAME "CORTEXA510"
#endif
#ifdef FORCE_CORTEXA710
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "CORTEXA710"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DCORTEXA710 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
#define LIBNAME "cortexa710"
#define CORENAME "CORTEXA710"
#endif #endif
#ifdef FORCE_NEOVERSEN1 #ifdef FORCE_NEOVERSEN1
@ -1305,7 +1349,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-march=armv8.2-a -mtune=neoverse-n1" "-march=armv8.2-a -mtune=neoverse-n1"
#define LIBNAME "neoversen1" #define LIBNAME "neoversen1"
#define CORENAME "NEOVERSEN1" #define CORENAME "NEOVERSEN1"
#else
#endif #endif
#ifdef FORCE_NEOVERSEV1 #ifdef FORCE_NEOVERSEV1
@ -1322,7 +1365,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-march=armv8.4-a -mtune=neoverse-v1" "-march=armv8.4-a -mtune=neoverse-v1"
#define LIBNAME "neoversev1" #define LIBNAME "neoversev1"
#define CORENAME "NEOVERSEV1" #define CORENAME "NEOVERSEV1"
#else
#endif #endif
@ -1340,7 +1382,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-march=armv8.5-a -mtune=neoverse-n2" "-march=armv8.5-a -mtune=neoverse-n2"
#define LIBNAME "neoversen2" #define LIBNAME "neoversen2"
#define CORENAME "NEOVERSEN2" #define CORENAME "NEOVERSEN2"
#else
#endif #endif
#ifdef FORCE_CORTEXA55 #ifdef FORCE_CORTEXA55
@ -1356,7 +1397,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa55" #define LIBNAME "cortexa55"
#define CORENAME "CORTEXA55" #define CORENAME "CORTEXA55"
#else
#endif #endif
#ifdef FORCE_FALKOR #ifdef FORCE_FALKOR
@ -1372,7 +1412,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "falkor" #define LIBNAME "falkor"
#define CORENAME "FALKOR" #define CORENAME "FALKOR"
#else
#endif #endif
#ifdef FORCE_THUNDERX #ifdef FORCE_THUNDERX
@ -1387,7 +1426,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "thunderx" #define LIBNAME "thunderx"
#define CORENAME "THUNDERX" #define CORENAME "THUNDERX"
#else
#endif #endif
#ifdef FORCE_THUNDERX2T99 #ifdef FORCE_THUNDERX2T99
@ -1405,7 +1443,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "thunderx2t99" #define LIBNAME "thunderx2t99"
#define CORENAME "THUNDERX2T99" #define CORENAME "THUNDERX2T99"
#else
#endif #endif
#ifdef FORCE_TSV110 #ifdef FORCE_TSV110
@ -1421,7 +1458,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "tsv110" #define LIBNAME "tsv110"
#define CORENAME "TSV110" #define CORENAME "TSV110"
#else
#endif #endif
#ifdef FORCE_EMAG8180 #ifdef FORCE_EMAG8180
@ -1456,7 +1492,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "thunderx3t110" #define LIBNAME "thunderx3t110"
#define CORENAME "THUNDERX3T110" #define CORENAME "THUNDERX3T110"
#else
#endif #endif
#ifdef FORCE_VORTEX #ifdef FORCE_VORTEX
@ -1488,7 +1523,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8" "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8"
#define LIBNAME "a64fx" #define LIBNAME "a64fx"
#define CORENAME "A64FX" #define CORENAME "A64FX"
#else #endif
#ifdef FORCE_FT2000
#define ARMV8
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "FT2000"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DFT2000 " \
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
"-DL2_SIZE=33554426-DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "ft2000"
#define CORENAME "FT2000"
#endif #endif
#ifdef FORCE_ZARCH_GENERIC #ifdef FORCE_ZARCH_GENERIC

View File

@ -678,7 +678,7 @@ endif ()
set(SBGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) set(SBGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
endif () endif ()
if (NOT DEFINED SBGEMM_SMALL_K_B0_TT) if (NOT DEFINED SBGEMM_SMALL_K_B0_TT)
set($SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) set(SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
endif () endif ()
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "BFLOAT16") GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "BFLOAT16")
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "BFLOAT16") GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "BFLOAT16")

View File

@ -0,0 +1,216 @@
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c
SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c
SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c
ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c
ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c
ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
TRSMCOPYLN_M = trsm_lncopy_sve.c
TRSMCOPYLT_M = trsm_ltcopy_sve.c
TRSMCOPYUN_M = trsm_uncopy_sve.c
TRSMCOPYUT_M = trsm_utcopy_sve.c
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
SAMAXKERNEL = amax.S
DAMAXKERNEL = amax.S
CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S
SAXPYKERNEL = axpy.S
DAXPYKERNEL = axpy.S
CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S
SROTKERNEL = rot.S
DROTKERNEL = rot.S
CROTKERNEL = zrot.S
ZROTKERNEL = zrot.S
SSCALKERNEL = scal.S
DSCALKERNEL = scal.S
CSCALKERNEL = zscal.S
ZSCALKERNEL = zscal.S
SGEMVNKERNEL = gemv_n.S
DGEMVNKERNEL = gemv_n.S
CGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n.S
SGEMVTKERNEL = gemv_t.S
DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S
SASUMKERNEL = asum.S
DASUMKERNEL = asum.S
CASUMKERNEL = casum.S
ZASUMKERNEL = zasum.S
SCOPYKERNEL = copy.S
DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S
SSWAPKERNEL = swap.S
DSWAPKERNEL = swap.S
CSWAPKERNEL = swap.S
ZSWAPKERNEL = swap.S
ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax.S
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
DGEMM_BETA = dgemm_beta.S
SGEMM_BETA = sgemm_beta.S
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
SSYMMUCOPY_M = symm_ucopy_sve.c
SSYMMLCOPY_M = symm_lcopy_sve.c
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
DSYMMUCOPY_M = symm_ucopy_sve.c
DSYMMLCOPY_M = symm_lcopy_sve.c
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
CSYMMUCOPY_M = zsymm_ucopy_sve.c
CSYMMLCOPY_M = zsymm_lcopy_sve.c
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
ZSYMMLCOPY_M = zsymm_lcopy_sve.c

View File

@ -0,0 +1,216 @@
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c
SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c
SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c
ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c
ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c
ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
TRSMCOPYLN_M = trsm_lncopy_sve.c
TRSMCOPYLT_M = trsm_ltcopy_sve.c
TRSMCOPYUN_M = trsm_uncopy_sve.c
TRSMCOPYUT_M = trsm_utcopy_sve.c
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
SAMAXKERNEL = amax.S
DAMAXKERNEL = amax.S
CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S
SAXPYKERNEL = axpy.S
DAXPYKERNEL = axpy.S
CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S
SROTKERNEL = rot.S
DROTKERNEL = rot.S
CROTKERNEL = zrot.S
ZROTKERNEL = zrot.S
SSCALKERNEL = scal.S
DSCALKERNEL = scal.S
CSCALKERNEL = zscal.S
ZSCALKERNEL = zscal.S
SGEMVNKERNEL = gemv_n.S
DGEMVNKERNEL = gemv_n.S
CGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n.S
SGEMVTKERNEL = gemv_t.S
DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S
SASUMKERNEL = asum.S
DASUMKERNEL = asum.S
CASUMKERNEL = casum.S
ZASUMKERNEL = zasum.S
SCOPYKERNEL = copy.S
DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S
SSWAPKERNEL = swap.S
DSWAPKERNEL = swap.S
CSWAPKERNEL = swap.S
ZSWAPKERNEL = swap.S
ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax.S
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
DGEMM_BETA = dgemm_beta.S
SGEMM_BETA = sgemm_beta.S
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
SSYMMUCOPY_M = symm_ucopy_sve.c
SSYMMLCOPY_M = symm_lcopy_sve.c
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
DSYMMUCOPY_M = symm_ucopy_sve.c
DSYMMLCOPY_M = symm_lcopy_sve.c
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
CSYMMUCOPY_M = zsymm_ucopy_sve.c
CSYMMLCOPY_M = zsymm_lcopy_sve.c
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
ZSYMMLCOPY_M = zsymm_lcopy_sve.c

View File

@ -0,0 +1 @@
include $(KERNELDIR)/KERNEL.CORTEXA57

View File

@ -0,0 +1,216 @@
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c
SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c
SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c
ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c
ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c
ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
TRSMCOPYLN_M = trsm_lncopy_sve.c
TRSMCOPYLT_M = trsm_ltcopy_sve.c
TRSMCOPYUN_M = trsm_uncopy_sve.c
TRSMCOPYUT_M = trsm_utcopy_sve.c
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
SAMAXKERNEL = amax.S
DAMAXKERNEL = amax.S
CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S
SAXPYKERNEL = axpy.S
DAXPYKERNEL = axpy.S
CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S
SROTKERNEL = rot.S
DROTKERNEL = rot.S
CROTKERNEL = zrot.S
ZROTKERNEL = zrot.S
SSCALKERNEL = scal.S
DSCALKERNEL = scal.S
CSCALKERNEL = zscal.S
ZSCALKERNEL = zscal.S
SGEMVNKERNEL = gemv_n.S
DGEMVNKERNEL = gemv_n.S
CGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n.S
SGEMVTKERNEL = gemv_t.S
DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S
SASUMKERNEL = asum.S
DASUMKERNEL = asum.S
CASUMKERNEL = casum.S
ZASUMKERNEL = zasum.S
SCOPYKERNEL = copy.S
DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S
SSWAPKERNEL = swap.S
DSWAPKERNEL = swap.S
CSWAPKERNEL = swap.S
ZSWAPKERNEL = swap.S
ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax.S
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
DGEMM_BETA = dgemm_beta.S
SGEMM_BETA = sgemm_beta.S
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
SSYMMUCOPY_M = symm_ucopy_sve.c
SSYMMLCOPY_M = symm_lcopy_sve.c
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
DSYMMUCOPY_M = symm_ucopy_sve.c
DSYMMLCOPY_M = symm_lcopy_sve.c
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
CSYMMUCOPY_M = zsymm_ucopy_sve.c
CSYMMLCOPY_M = zsymm_lcopy_sve.c
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
ZSYMMLCOPY_M = zsymm_lcopy_sve.c

View File

@ -0,0 +1,3 @@
include $(KERNELDIR)/KERNEL.CORTEXA57

View File

@ -1239,7 +1239,6 @@ static void init_parameter(void) {
#ifdef BUILD_BFLOAT16 #ifdef BUILD_BFLOAT16
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
@ -1824,6 +1823,13 @@ static void init_parameter(void) {
fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p); fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
#endif #endif
#if BUILD_BFLOAT16==1
TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
#endif
#if BUILD_SINGLE==1 #if BUILD_SINGLE==1
TABLE_NAME.sgemm_r = (((BUFFER_SIZE - TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA

View File

@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
#include <immintrin.h> #include <immintrin.h>
#include "common.h" #include "common.h"
@ -47,7 +48,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
_mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N) _mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N)
#define MASK_STORE_512(M, N) \ #define MASK_STORE_512(M, N) \
result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \
asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "k"(mask)); \ asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "Yk"(mask)); \
_mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N) _mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N)
#endif #endif
@ -265,7 +266,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
int mm = M - i; int mm = M - i;
if (!mm) return 0; if (!mm) return 0;
if (mm > 4 || K < 16) { if (mm > 4 || K < 16) {
register __mmask8 mask asm("k1") = (1UL << mm) - 1; register __mmask8 mask = (1UL << mm) - 1;
for (j = 0; j < n6; j += 6) { for (j = 0; j < n6; j += 6) {
DECLARE_RESULT_512(0, 0); DECLARE_RESULT_512(0, 0);
DECLARE_RESULT_512(0, 1); DECLARE_RESULT_512(0, 1);
@ -588,3 +589,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
} }
return 0; return 0;
} }
#else
#include "../generic/gemm_small_matrix_kernel_nn.c"
#endif

View File

@ -55,7 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
_mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N) _mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N)
#define MASK_STORE_512(M, N) \ #define MASK_STORE_512(M, N) \
result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \
asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "k"(mask)); \ asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "Yk"(mask)); \
_mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N) _mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N)
#define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ #define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \
__m512d tmp##M##N = _mm512_i64gather_pd(vindex_n, &C[(j + N*8)*ldc + i + M], 8); \ __m512d tmp##M##N = _mm512_i64gather_pd(vindex_n, &C[(j + N*8)*ldc + i + M], 8); \
@ -303,7 +303,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
} }
int mm = M - i; int mm = M - i;
if (mm >= 6) { if (mm >= 6) {
register __mmask16 mask asm("k1") = (1UL << mm) - 1; register __mmask16 mask = (1UL << mm) - 1;
for (j = 0; j < n8; j += 8) { for (j = 0; j < n8; j += 8) {
DECLARE_RESULT_512(0, 0); DECLARE_RESULT_512(0, 0);
DECLARE_RESULT_512(0, 1); DECLARE_RESULT_512(0, 1);

View File

@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
#include <immintrin.h> #include <immintrin.h>
#include "common.h" #include "common.h"
@ -320,3 +321,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
} }
return 0; return 0;
} }
#else
#include "../generic/gemm_small_matrix_kernel_tn.c"
#endif

View File

@ -114,10 +114,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
asm("vmovups %0, (%1, %2, 4)": : "v"(val1), "r"(addr), "r"(ldc)) asm("vmovups %0, (%1, %2, 4)": : "v"(val1), "r"(addr), "r"(ldc))
#define _MASK_STORE_C_2nx16(addr, val0, val1) \ #define _MASK_STORE_C_2nx16(addr, val0, val1) \
asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "k"(mmask)); \ asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "Yk"(mmask)); \
asm("vfmadd213ps (%1, %3, 4), %2, %0 %{%4%}": "+v"(val1) : "r"(addr), "v"(alpha_512), "r"(ldc), "k"(mmask)); \ asm("vfmadd213ps (%1, %3, 4), %2, %0 %{%4%}": "+v"(val1) : "r"(addr), "v"(alpha_512), "r"(ldc), "Yk"(mmask)); \
asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "k"(mmask)); \ asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "Yk"(mmask)); \
asm("vmovups %0, (%1, %2, 4) %{%3%}": : "v"(val1), "r"(addr), "r"(ldc), "k"(mmask)) asm("vmovups %0, (%1, %2, 4) %{%3%}": : "v"(val1), "r"(addr), "r"(ldc), "Yk"(mmask))
#define _REORDER_C_2X(result_0, result_1) { \ #define _REORDER_C_2X(result_0, result_1) { \
__m512 tmp0, tmp1; \ __m512 tmp0, tmp1; \
@ -154,8 +154,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
asm("vmovups %0, (%1)": : "v"(val0), "r"(addr)); asm("vmovups %0, (%1)": : "v"(val0), "r"(addr));
#define _MASK_STORE_C_16(addr, val0) \ #define _MASK_STORE_C_16(addr, val0) \
asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "k"(mmask)); \ asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "Yk"(mmask)); \
asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "k"(mmask)); asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "Yk"(mmask));
#define N_STORE_4X(A, Bx, By) { \ #define N_STORE_4X(A, Bx, By) { \
_REORDER_C_2X(result_00_##A##Bx##By, result_01_##A##Bx##By); \ _REORDER_C_2X(result_00_##A##Bx##By, result_01_##A##Bx##By); \

View File

@ -13,6 +13,8 @@
#define ONE 1.e0f #define ONE 1.e0f
#define ZERO 0.e0f #define ZERO 0.e0f
#define SHUFFLE_MAGIC_NO (const int) 0x39
#undef STORE16_COMPLETE_RESULT #undef STORE16_COMPLETE_RESULT
#undef STORE16_MASK_COMPLETE_RESULT #undef STORE16_MASK_COMPLETE_RESULT
#undef SBGEMM_BLOCK_KERNEL_NN_32x8xK #undef SBGEMM_BLOCK_KERNEL_NN_32x8xK
@ -356,7 +358,6 @@ void sbgemm_block_kernel_nn_32xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
bfloat16 * B_addr = B; bfloat16 * B_addr = B;
float * C_addr = C; float * C_addr = C;
int SHUFFLE_MAGIC_NO = 0x39;
BLASLONG tag_k_32x = k & (~31); BLASLONG tag_k_32x = k & (~31);
#ifndef ONE_ALPHA #ifndef ONE_ALPHA
@ -465,7 +466,6 @@ void sbgemm_block_kernel_nn_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
bfloat16 * B_addr = B; bfloat16 * B_addr = B;
float * C_addr = C; float * C_addr = C;
int SHUFFLE_MAGIC_NO = 0x39;
BLASLONG tag_k_32x = k & (~31); BLASLONG tag_k_32x = k & (~31);
#ifndef ONE_ALPHA #ifndef ONE_ALPHA
@ -1192,7 +1192,6 @@ void sbgemm_block_kernel_tn_32xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
bfloat16 * B_addr = B; bfloat16 * B_addr = B;
float * C_addr = C; float * C_addr = C;
int SHUFFLE_MAGIC_NO = 0x39;
BLASLONG tag_k_32x = k & (~31); BLASLONG tag_k_32x = k & (~31);
#ifndef ONE_ALPHA #ifndef ONE_ALPHA
@ -1291,7 +1290,6 @@ void sbgemm_block_kernel_tn_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
bfloat16 * B_addr = B; bfloat16 * B_addr = B;
float * C_addr = C; float * C_addr = C;
int SHUFFLE_MAGIC_NO = 0x39;
BLASLONG tag_k_32x = k & (~31); BLASLONG tag_k_32x = k & (~31);
#ifndef ONE_ALPHA #ifndef ONE_ALPHA

View File

@ -135,7 +135,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
0x0, 0x1, 0x2, 0x3, 0x10, 0x11, 0x12, 0x13, 0x8, 0x9, 0xa, 0xb, 0x18, 0x19, 0x1a, 0x1b, 0x0, 0x1, 0x2, 0x3, 0x10, 0x11, 0x12, 0x13, 0x8, 0x9, 0xa, 0xb, 0x18, 0x19, 0x1a, 0x1b,
0x4, 0x5, 0x6, 0x7, 0x14, 0x15, 0x16, 0x17, 0xc, 0xd, 0xe, 0xf, 0x1c, 0x1d, 0x1e, 0x1f, 0x4, 0x5, 0x6, 0x7, 0x14, 0x15, 0x16, 0x17, 0xc, 0xd, 0xe, 0xf, 0x1c, 0x1d, 0x1e, 0x1f,
}; };
u_int64_t permute_table2[] = { uint64_t permute_table2[] = {
0x00, 0x01, 0x02, 0x03, 8|0x0, 8|0x1, 8|0x2, 8|0x3, 0x00, 0x01, 0x02, 0x03, 8|0x0, 8|0x1, 8|0x2, 8|0x3,
0x04, 0x05, 0x06, 0x07, 8|0x4, 8|0x5, 8|0x6, 8|0x7, 0x04, 0x05, 0x06, 0x07, 8|0x4, 8|0x5, 8|0x6, 8|0x7,
}; };

View File

@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
#include <immintrin.h> #include <immintrin.h>
#include "common.h" #include "common.h"
@ -47,7 +48,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
_mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N) _mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N)
#define MASK_STORE_512(M, N) \ #define MASK_STORE_512(M, N) \
result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \
asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "k"(mask)); \ asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "Yk"(mask)); \
_mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N) _mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N)
#endif #endif
@ -266,7 +267,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
int mm = M - i; int mm = M - i;
if (!mm) return 0; if (!mm) return 0;
if (mm > 8 || K < 32) { if (mm > 8 || K < 32) {
register __mmask16 mask asm("k1") = (1UL << mm) - 1; register __mmask16 mask = (1UL << mm) - 1;
for (j = 0; j < n6; j += 6) { for (j = 0; j < n6; j += 6) {
DECLARE_RESULT_512(0, 0); DECLARE_RESULT_512(0, 0);
DECLARE_RESULT_512(0, 1); DECLARE_RESULT_512(0, 1);
@ -610,3 +611,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
} }
return 0; return 0;
} }
#else
#include "../generic/gemm_small_matrix_kernel_nn.c"
#endif

View File

@ -55,7 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
_mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N) _mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N)
#define MASK_STORE_512(M, N) \ #define MASK_STORE_512(M, N) \
result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \
asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "k"(mask)); \ asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "Yk"(mask)); \
_mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N) _mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N)
#define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ #define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \
__m512 tmp##M##N = _mm512_i32gather_ps(vindex_n, &C[(j + N*16)*ldc + i + M], 4); \ __m512 tmp##M##N = _mm512_i32gather_ps(vindex_n, &C[(j + N*16)*ldc + i + M], 4); \
@ -303,7 +303,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
} }
int mm = M - i; int mm = M - i;
if (mm >= 12) { if (mm >= 12) {
register __mmask16 mask asm("k1") = (1UL << mm) - 1; register __mmask16 mask = (1UL << mm) - 1;
for (j = 0; j < n8; j += 8) { for (j = 0; j < n8; j += 8) {
DECLARE_RESULT_512(0, 0); DECLARE_RESULT_512(0, 0);
DECLARE_RESULT_512(0, 1); DECLARE_RESULT_512(0, 1);

View File

@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
#include <immintrin.h> #include <immintrin.h>
#include "common.h" #include "common.h"
@ -314,3 +315,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
} }
return 0; return 0;
} }
#else
#include "../generic/gemm_small_matrix_kernel_tn.c"
#endif

View File

@ -452,11 +452,6 @@
MOVDDUP(4 * SIZE, A1, a1) MOVDDUP(4 * SIZE, A1, a1)
movsd 0 * SIZE(YY), yy1
movhpd 1 * SIZE(YY), yy1
movsd 2 * SIZE(YY), yy2
movhpd 3 * SIZE(YY), yy2
movapd 8 * SIZE(XX), xtemp1 movapd 8 * SIZE(XX), xtemp1
movapd 10 * SIZE(XX), xtemp2 movapd 10 * SIZE(XX), xtemp2
movapd 12 * SIZE(XX), xtemp3 movapd 12 * SIZE(XX), xtemp3
@ -475,6 +470,12 @@
MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2) MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2)
ALIGN_3 ALIGN_3
.L12_prep:
movsd 0 * SIZE(YY), yy1
movhpd 1 * SIZE(YY), yy1
movsd 2 * SIZE(YY), yy2
movhpd 3 * SIZE(YY), yy2
.L12: .L12:
movapd xtemp1, xt1 movapd xtemp1, xt1
mulpd a1, xt1 mulpd a1, xt1
@ -608,8 +609,6 @@
movlpd yy2, 6 * SIZE(YY) movlpd yy2, 6 * SIZE(YY)
movhpd yy2, 7 * SIZE(YY) movhpd yy2, 7 * SIZE(YY)
movsd 10 * SIZE(YY), yy2
movhpd 11 * SIZE(YY), yy2
movapd xtemp2, xt1 movapd xtemp2, xt1
movapd 18 * SIZE(XX), xtemp2 movapd 18 * SIZE(XX), xtemp2
@ -621,8 +620,6 @@
movlpd yy1, 4 * SIZE(YY) movlpd yy1, 4 * SIZE(YY)
movhpd yy1, 5 * SIZE(YY) movhpd yy1, 5 * SIZE(YY)
movsd 8 * SIZE(YY), yy1
movhpd 9 * SIZE(YY), yy1
subq $-16 * SIZE, XX subq $-16 * SIZE, XX
addq $ 8 * SIZE, YY addq $ 8 * SIZE, YY
@ -630,7 +627,8 @@
addq $ 8 * SIZE, A2 addq $ 8 * SIZE, A2
decq I decq I
jg .L12 jg .L12_prep
jmp .L15
ALIGN_3 ALIGN_3
.L14: .L14:
@ -641,7 +639,6 @@
jle .L16 jle .L16
MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2) MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2)
jmp .L15_pastcheck
.L15: .L15:
movq M, I movq M, I
@ -650,6 +647,11 @@
testq $2, I testq $2, I
jle .L16 jle .L16
movsd 0 * SIZE(YY), yy1
movhpd 1 * SIZE(YY), yy1
movsd 2 * SIZE(YY), yy2
movhpd 3 * SIZE(YY), yy2
.L15_pastcheck: .L15_pastcheck:
movapd xtemp1, xt1 movapd xtemp1, xt1
mulpd a1, xt1 mulpd a1, xt1
@ -705,8 +707,6 @@
movlpd yy2, 2 * SIZE(YY) movlpd yy2, 2 * SIZE(YY)
movhpd yy2, 3 * SIZE(YY) movhpd yy2, 3 * SIZE(YY)
movsd 6 * SIZE(YY), yy2
movhpd 7 * SIZE(YY), yy2
movapd xtemp2, xt1 movapd xtemp2, xt1
movapd 10 * SIZE(XX), xtemp2 movapd 10 * SIZE(XX), xtemp2
@ -717,8 +717,6 @@
movlpd yy1, 0 * SIZE(YY) movlpd yy1, 0 * SIZE(YY)
movhpd yy1, 1 * SIZE(YY) movhpd yy1, 1 * SIZE(YY)
movsd 4 * SIZE(YY), yy1
movhpd 5 * SIZE(YY), yy1
addq $4 * SIZE, YY addq $4 * SIZE, YY
addq $4 * SIZE, A1 addq $4 * SIZE, A1
@ -731,6 +729,9 @@
MOVDDUP(1 * SIZE, A1, a2) MOVDDUP(1 * SIZE, A1, a2)
movsd 0 * SIZE(YY), yy1
movhpd 1 * SIZE(YY), yy1
movapd xtemp1, xt1 movapd xtemp1, xt1
mulpd a1, xt1 mulpd a1, xt1
mulpd atemp1, a1 mulpd atemp1, a1

View File

@ -2,9 +2,9 @@ add_subdirectory(SRC)
if(BUILD_TESTING) if(BUILD_TESTING)
add_subdirectory(TESTING) add_subdirectory(TESTING)
endif() endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/blas.pc @ONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${BLASLIB}.pc @ONLY)
install(FILES install(FILES
${CMAKE_CURRENT_BINARY_DIR}/blas.pc ${CMAKE_CURRENT_BINARY_DIR}/${BLASLIB}.pc
DESTINATION ${PKG_CONFIG_DIR} DESTINATION ${PKG_CONFIG_DIR}
COMPONENT Development COMPONENT Development
) )

View File

@ -97,10 +97,10 @@ if(BUILD_COMPLEX16)
endif() endif()
list(REMOVE_DUPLICATES SOURCES) list(REMOVE_DUPLICATES SOURCES)
add_library(blas ${SOURCES}) add_library(${BLASLIB} ${SOURCES})
set_target_properties( set_target_properties(
blas PROPERTIES ${BLASLIB} PROPERTIES
VERSION ${LAPACK_VERSION} VERSION ${LAPACK_VERSION}
SOVERSION ${LAPACK_MAJOR_VERSION} SOVERSION ${LAPACK_MAJOR_VERSION}
) )
lapack_install_library(blas) lapack_install_library(${BLASLIB})

View File

@ -2,7 +2,7 @@ macro(add_blas_test name src)
get_filename_component(baseNAME ${src} NAME_WE) get_filename_component(baseNAME ${src} NAME_WE)
set(TEST_INPUT "${CMAKE_CURRENT_SOURCE_DIR}/${baseNAME}.in") set(TEST_INPUT "${CMAKE_CURRENT_SOURCE_DIR}/${baseNAME}.in")
add_executable(${name} ${src}) add_executable(${name} ${src})
target_link_libraries(${name} blas) target_link_libraries(${name} ${BLASLIB})
if(EXISTS "${TEST_INPUT}") if(EXISTS "${TEST_INPUT}")
add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}" add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}"
-DTEST=$<TARGET_FILE:${name}> -DTEST=$<TARGET_FILE:${name}>

View File

@ -5,4 +5,4 @@ Name: BLAS
Description: FORTRAN reference implementation of BLAS Basic Linear Algebra Subprograms Description: FORTRAN reference implementation of BLAS Basic Linear Algebra Subprograms
Version: @LAPACK_VERSION@ Version: @LAPACK_VERSION@
URL: http://www.netlib.org/blas/ URL: http://www.netlib.org/blas/
Libs: -L${libdir} -lblas Libs: -L${libdir} -l@BLASLIB@

View File

@ -1,7 +1,7 @@
message(STATUS "CBLAS enable") message(STATUS "CBLAS enable")
enable_language(C) enable_language(C)
set(LAPACK_INSTALL_EXPORT_NAME cblas-targets) set(LAPACK_INSTALL_EXPORT_NAME ${CBLASLIB}-targets)
# Create a header file cblas.h for the routines called in my C programs # Create a header file cblas.h for the routines called in my C programs
include(FortranCInterface) include(FortranCInterface)
@ -42,15 +42,15 @@ if(BUILD_TESTING)
endif() endif()
if(NOT BLAS_FOUND) if(NOT BLAS_FOUND)
set(ALL_TARGETS ${ALL_TARGETS} blas) set(ALL_TARGETS ${ALL_TARGETS} ${BLASLIB})
endif() endif()
# Export cblas targets from the # Export cblas targets from the
# install tree, if any. # install tree, if any.
set(_cblas_config_install_guard_target "") set(_cblas_config_install_guard_target "")
if(ALL_TARGETS) if(ALL_TARGETS)
install(EXPORT cblas-targets install(EXPORT ${CBLASLIB}-targets
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION}
COMPONENT Development COMPONENT Development
) )
# Choose one of the cblas targets to use as a guard for # Choose one of the cblas targets to use as a guard for
@ -61,7 +61,7 @@ endif()
# Export cblas targets from the build tree, if any. # Export cblas targets from the build tree, if any.
set(_cblas_config_build_guard_target "") set(_cblas_config_build_guard_target "")
if(ALL_TARGETS) if(ALL_TARGETS)
export(TARGETS ${ALL_TARGETS} FILE cblas-targets.cmake) export(TARGETS ${ALL_TARGETS} FILE ${CBLASLIB}-targets.cmake)
# Choose one of the cblas targets to use as a guard # Choose one of the cblas targets to use as a guard
# for cblas-config.cmake to load targets from the build tree. # for cblas-config.cmake to load targets from the build tree.
@ -69,26 +69,26 @@ if(ALL_TARGETS)
endif() endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-version.cmake.in configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-version.cmake.in
${LAPACK_BINARY_DIR}/cblas-config-version.cmake @ONLY) ${LAPACK_BINARY_DIR}/${CBLASLIB}-config-version.cmake @ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-build.cmake.in configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-build.cmake.in
${LAPACK_BINARY_DIR}/cblas-config.cmake @ONLY) ${LAPACK_BINARY_DIR}/${CBLASLIB}-config.cmake @ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/cblas.pc @ONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${CBLASLIB}.pc @ONLY)
install(FILES install(FILES
${CMAKE_CURRENT_BINARY_DIR}/cblas.pc ${CMAKE_CURRENT_BINARY_DIR}/${CBLASLIB}.pc
DESTINATION ${PKG_CONFIG_DIR} DESTINATION ${PKG_CONFIG_DIR}
) )
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-install.cmake.in configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-install.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake @ONLY) ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${CBLASLIB}-config.cmake @ONLY)
install(FILES install(FILES
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${CBLASLIB}-config.cmake
${LAPACK_BINARY_DIR}/cblas-config-version.cmake ${LAPACK_BINARY_DIR}/${CBLASLIB}-config-version.cmake
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION}
) )
#install(EXPORT cblas-targets #install(EXPORT ${CBLASLIB}-targets
# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} # DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION}
# COMPONENT Development # COMPONENT Development
# ) # )

View File

@ -5,6 +5,6 @@ Name: CBLAS
Description: C Standard Interface to BLAS Basic Linear Algebra Subprograms Description: C Standard Interface to BLAS Basic Linear Algebra Subprograms
Version: @LAPACK_VERSION@ Version: @LAPACK_VERSION@
URL: http://www.netlib.org/blas/#_cblas URL: http://www.netlib.org/blas/#_cblas
Libs: -L${libdir} -lcblas Libs: -L${libdir} -l@CBLASLIB@
Cflags: -I${includedir} Cflags: -I${includedir}
Requires.private: blas Requires.private: @BLASLIB@

View File

@ -4,11 +4,11 @@ find_package(LAPACK NO_MODULE)
# Load lapack targets from the build tree, including lapacke targets. # Load lapack targets from the build tree, including lapacke targets.
if(NOT TARGET lapacke) if(NOT TARGET lapacke)
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake")
endif() endif()
# Report cblas header search locations from build tree. # Report cblas header search locations from build tree.
set(CBLAS_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include") set(CBLAS_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include")
# Report cblas libraries. # Report cblas libraries.
set(CBLAS_LIBRARIES cblas) set(CBLAS_LIBRARIES @CBLASLIB@)

View File

@ -5,19 +5,19 @@ get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH)
get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH) get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH)
# Load the LAPACK package with which we were built. # Load the LAPACK package with which we were built.
set(LAPACK_DIR "${_CBLAS_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/lapack-@LAPACK_VERSION@") set(LAPACK_DIR "${_CBLAS_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/@LAPACKLIB@-@LAPACK_VERSION@")
find_package(LAPACK NO_MODULE) find_package(LAPACK NO_MODULE)
# Load lapacke targets from the install tree. # Load lapacke targets from the install tree.
if(NOT TARGET cblas) if(NOT TARGET @CBLASLIB@)
include(${_CBLAS_SELF_DIR}/cblas-targets.cmake) include(${_CBLAS_SELF_DIR}/@CBLASLIB@-targets.cmake)
endif() endif()
# Report lapacke header search locations. # Report lapacke header search locations.
set(CBLAS_INCLUDE_DIRS ${_CBLAS_PREFIX}/include) set(CBLAS_INCLUDE_DIRS ${_CBLAS_PREFIX}/include)
# Report lapacke libraries. # Report lapacke libraries.
set(CBLAS_LIBRARIES cblas) set(CBLAS_LIBRARIES @CBLASLIB@)
unset(_CBLAS_PREFIX) unset(_CBLAS_PREFIX)
unset(_CBLAS_SELF_DIR) unset(_CBLAS_SELF_DIR)

View File

@ -1,8 +1,8 @@
add_executable(xexample1_CBLAS cblas_example1.c) add_executable(xexample1_CBLAS cblas_example1.c)
add_executable(xexample2_CBLAS cblas_example2.c) add_executable(xexample2_CBLAS cblas_example2.c)
target_link_libraries(xexample1_CBLAS cblas) target_link_libraries(xexample1_CBLAS ${CBLASLIB})
target_link_libraries(xexample2_CBLAS cblas ${BLAS_LIBRARIES}) target_link_libraries(xexample2_CBLAS ${CBLASLIB} ${BLAS_LIBRARIES})
add_test(example1_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample1_CBLAS) add_test(example1_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample1_CBLAS)
add_test(example2_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample2_CBLAS) add_test(example2_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample2_CBLAS)

View File

@ -11,7 +11,7 @@ int main ( )
double *a, *x, *y; double *a, *x, *y;
double alpha, beta; double alpha, beta;
int m, n, lda, incx, incy, i; CBLAS_INDEX m, n, lda, incx, incy, i;
Layout = CblasColMajor; Layout = CblasColMajor;
transa = CblasNoTrans; transa = CblasNoTrans;

View File

@ -9,7 +9,7 @@
int main (int argc, char **argv ) int main (int argc, char **argv )
{ {
int rout=-1,info=0,m,n,k,lda,ldb,ldc; CBLAS_INDEX rout=-1,info=0,m,n,k,lda,ldb,ldc;
double A[2] = {0.0,0.0}, double A[2] = {0.0,0.0},
B[2] = {0.0,0.0}, B[2] = {0.0,0.0},
C[2] = {0.0,0.0}, C[2] = {0.0,0.0},

View File

@ -1,6 +1,7 @@
#ifndef CBLAS_H #ifndef CBLAS_H
#define CBLAS_H #define CBLAS_H
#include <stddef.h> #include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus #ifdef __cplusplus
@ -11,9 +12,9 @@ extern "C" { /* Assume C declarations for C++ */
* Enumerated and derived types * Enumerated and derived types
*/ */
#ifdef WeirdNEC #ifdef WeirdNEC
#define CBLAS_INDEX long #define CBLAS_INDEX int64_t
#else #else
#define CBLAS_INDEX int #define CBLAS_INDEX int32_t
#endif #endif
typedef enum {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT; typedef enum {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT;

View File

@ -9,6 +9,8 @@
#ifndef CBLAS_F77_H #ifndef CBLAS_F77_H
#define CBLAS_F77_H #define CBLAS_F77_H
#include <stdint.h>
#ifdef CRAY #ifdef CRAY
#include <fortran.h> #include <fortran.h>
#define F77_CHAR _fcd #define F77_CHAR _fcd
@ -17,8 +19,12 @@
#define F77_STRLEN(a) (_fcdlen) #define F77_STRLEN(a) (_fcdlen)
#endif #endif
#ifndef F77_INT
#ifdef WeirdNEC #ifdef WeirdNEC
#define F77_INT long #define F77_INT int64_t
#else
#define F77_INT int32_t
#endif
#endif #endif
#ifdef F77_CHAR #ifdef F77_CHAR

View File

@ -113,16 +113,16 @@ if(BUILD_COMPLEX16)
endif() endif()
list(REMOVE_DUPLICATES SOURCES) list(REMOVE_DUPLICATES SOURCES)
add_library(cblas ${SOURCES}) add_library(${CBLASLIB} ${SOURCES})
set_target_properties( set_target_properties(
cblas PROPERTIES ${CBLASLIB} PROPERTIES
LINKER_LANGUAGE C LINKER_LANGUAGE C
VERSION ${LAPACK_VERSION} VERSION ${LAPACK_VERSION}
SOVERSION ${LAPACK_MAJOR_VERSION} SOVERSION ${LAPACK_MAJOR_VERSION}
) )
target_include_directories(cblas PUBLIC target_include_directories(${CBLASLIB} PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
$<INSTALL_INTERFACE:include> $<INSTALL_INTERFACE:include>
) )
target_link_libraries(cblas PRIVATE ${BLAS_LIBRARIES}) target_link_libraries(${CBLASLIB} PRIVATE ${BLAS_LIBRARIES})
lapack_install_library(cblas) lapack_install_library(${CBLASLIB})

View File

@ -52,9 +52,9 @@ if(BUILD_SINGLE)
add_executable(xscblat2 c_sblat2.f ${STESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) add_executable(xscblat2 c_sblat2.f ${STESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
add_executable(xscblat3 c_sblat3.f ${STESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) add_executable(xscblat3 c_sblat3.f ${STESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
target_link_libraries(xscblat1 cblas) target_link_libraries(xscblat1 ${CBLASLIB})
target_link_libraries(xscblat2 cblas) target_link_libraries(xscblat2 ${CBLASLIB})
target_link_libraries(xscblat3 cblas) target_link_libraries(xscblat3 ${CBLASLIB})
add_cblas_test(stest1.out "" xscblat1) add_cblas_test(stest1.out "" xscblat1)
add_cblas_test(stest2.out sin2 xscblat2) add_cblas_test(stest2.out sin2 xscblat2)
@ -66,9 +66,9 @@ if(BUILD_DOUBLE)
add_executable(xdcblat2 c_dblat2.f ${DTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) add_executable(xdcblat2 c_dblat2.f ${DTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
add_executable(xdcblat3 c_dblat3.f ${DTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) add_executable(xdcblat3 c_dblat3.f ${DTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
target_link_libraries(xdcblat1 cblas) target_link_libraries(xdcblat1 ${CBLASLIB})
target_link_libraries(xdcblat2 cblas) target_link_libraries(xdcblat2 ${CBLASLIB})
target_link_libraries(xdcblat3 cblas) target_link_libraries(xdcblat3 ${CBLASLIB})
add_cblas_test(dtest1.out "" xdcblat1) add_cblas_test(dtest1.out "" xdcblat1)
add_cblas_test(dtest2.out din2 xdcblat2) add_cblas_test(dtest2.out din2 xdcblat2)
@ -80,9 +80,9 @@ if(BUILD_COMPLEX)
add_executable(xccblat2 c_cblat2.f ${CTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) add_executable(xccblat2 c_cblat2.f ${CTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
add_executable(xccblat3 c_cblat3.f ${CTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) add_executable(xccblat3 c_cblat3.f ${CTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
target_link_libraries(xccblat1 cblas ${BLAS_LIBRARIES}) target_link_libraries(xccblat1 ${CBLASLIB} ${BLAS_LIBRARIES})
target_link_libraries(xccblat2 cblas) target_link_libraries(xccblat2 ${CBLASLIB})
target_link_libraries(xccblat3 cblas) target_link_libraries(xccblat3 ${CBLASLIB})
add_cblas_test(ctest1.out "" xccblat1) add_cblas_test(ctest1.out "" xccblat1)
add_cblas_test(ctest2.out cin2 xccblat2) add_cblas_test(ctest2.out cin2 xccblat2)
@ -94,9 +94,9 @@ if(BUILD_COMPLEX16)
add_executable(xzcblat2 c_zblat2.f ${ZTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) add_executable(xzcblat2 c_zblat2.f ${ZTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
add_executable(xzcblat3 c_zblat3.f ${ZTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) add_executable(xzcblat3 c_zblat3.f ${ZTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h)
target_link_libraries(xzcblat1 cblas) target_link_libraries(xzcblat1 ${CBLASLIB})
target_link_libraries(xzcblat2 cblas) target_link_libraries(xzcblat2 ${CBLASLIB})
target_link_libraries(xzcblat3 cblas) target_link_libraries(xzcblat3 ${CBLASLIB})
add_cblas_test(ztest1.out "" xzcblat1) add_cblas_test(ztest1.out "" xzcblat1)
add_cblas_test(ztest2.out zin2 xzcblat2) add_cblas_test(ztest2.out zin2 xzcblat2)

View File

@ -14,6 +14,19 @@ macro( CheckLAPACKCompilerFlags )
set( FPE_EXIT FALSE ) set( FPE_EXIT FALSE )
# FORTRAN ILP default
if ( FORTRAN_ILP )
if( CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" )
if ( WIN32 )
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} /integer-size:64")
else ()
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -integer-size 64")
endif()
else()
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-integer-8")
endif()
endif()
# GNU Fortran # GNU Fortran
if( CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" ) if( CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" )
if( "${CMAKE_Fortran_FLAGS}" MATCHES "-ffpe-trap=[izoupd]") if( "${CMAKE_Fortran_FLAGS}" MATCHES "-ffpe-trap=[izoupd]")

View File

@ -1,7 +1,7 @@
# Load lapack targets from the build tree if necessary. # Load lapack targets from the build tree if necessary.
set(_LAPACK_TARGET "@_lapack_config_build_guard_target@") set(_LAPACK_TARGET "@_lapack_config_build_guard_target@")
if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}")
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake")
endif() endif()
unset(_LAPACK_TARGET) unset(_LAPACK_TARGET)

View File

@ -4,7 +4,7 @@ get_filename_component(_LAPACK_SELF_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
# Load lapack targets from the install tree if necessary. # Load lapack targets from the install tree if necessary.
set(_LAPACK_TARGET "@_lapack_config_install_guard_target@") set(_LAPACK_TARGET "@_lapack_config_install_guard_target@")
if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}")
include("${_LAPACK_SELF_DIR}/lapack-targets.cmake") include("${_LAPACK_SELF_DIR}/@LAPACKLIB@-targets.cmake")
endif() endif()
unset(_LAPACK_TARGET) unset(_LAPACK_TARGET)

View File

@ -44,6 +44,24 @@ endif()
# By default static library # By default static library
option(BUILD_SHARED_LIBS "Build shared libraries" OFF) option(BUILD_SHARED_LIBS "Build shared libraries" OFF)
# By default build index32 library
option(BUILD_INDEX64 "Build Index-64 API libraries" OFF)
if(BUILD_INDEX64)
set(BLASLIB "blas64")
set(CBLASLIB "cblas64")
set(LAPACKLIB "lapack64")
set(LAPACKELIB "lapacke64")
set(TMGLIB "tmglib64")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWeirdNEC -DLAPACK_ILP64 -DHAVE_LAPACK_CONFIG_H")
set(FORTRAN_ILP TRUE)
else()
set(BLASLIB "blas")
set(CBLASLIB "cblas")
set(LAPACKLIB "lapack")
set(LAPACKELIB "lapacke")
set(TMGLIB "tmglib")
endif()
include(GNUInstallDirs) include(GNUInstallDirs)
# Updated OSX RPATH settings # Updated OSX RPATH settings
@ -73,10 +91,10 @@ include(PreventInBuildInstalls)
if(UNIX) if(UNIX)
if(CMAKE_Fortran_COMPILER_ID STREQUAL Intel) if(CMAKE_Fortran_COMPILER_ID STREQUAL Intel)
list(APPEND CMAKE_Fortran_FLAGS "-fp-model strict") set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fp-model strict")
endif() endif()
if(CMAKE_Fortran_COMPILER_ID STREQUAL XL) if(CMAKE_Fortran_COMPILER_ID STREQUAL XL)
list(APPEND CMAKE_Fortran_FLAGS "-qnosave -qstrict=none") set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qnosave -qstrict=none")
endif() endif()
# Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler. # Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler.
# This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin # This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin
@ -112,7 +130,7 @@ endif()
# -------------------------------------------------- # --------------------------------------------------
set(LAPACK_INSTALL_EXPORT_NAME lapack-targets) set(LAPACK_INSTALL_EXPORT_NAME ${LAPACKLIB}-targets)
macro(lapack_install_library lib) macro(lapack_install_library lib)
install(TARGETS ${lib} install(TARGETS ${lib}
@ -220,7 +238,7 @@ endif()
if(NOT BLAS_FOUND) if(NOT BLAS_FOUND)
message(STATUS "Using supplied NETLIB BLAS implementation") message(STATUS "Using supplied NETLIB BLAS implementation")
add_subdirectory(BLAS) add_subdirectory(BLAS)
set(BLAS_LIBRARIES blas) set(BLAS_LIBRARIES ${BLASLIB})
else() else()
set(CMAKE_EXE_LINKER_FLAGS set(CMAKE_EXE_LINKER_FLAGS
"${CMAKE_EXE_LINKER_FLAGS} ${BLAS_LINKER_FLAGS}" "${CMAKE_EXE_LINKER_FLAGS} ${BLAS_LINKER_FLAGS}"
@ -279,7 +297,7 @@ endif()
# Neither user specified or optimized LAPACK libraries can be used # Neither user specified or optimized LAPACK libraries can be used
if(NOT LATESTLAPACK_FOUND) if(NOT LATESTLAPACK_FOUND)
message(STATUS "Using supplied NETLIB LAPACK implementation") message(STATUS "Using supplied NETLIB LAPACK implementation")
set(LAPACK_LIBRARIES lapack) set(LAPACK_LIBRARIES ${LAPACKLIB})
add_subdirectory(SRC) add_subdirectory(SRC)
else() else()
set(CMAKE_EXE_LINKER_FLAGS set(CMAKE_EXE_LINKER_FLAGS
@ -371,23 +389,23 @@ include(CPack)
# -------------------------------------------------- # --------------------------------------------------
if(NOT BLAS_FOUND) if(NOT BLAS_FOUND)
set(ALL_TARGETS ${ALL_TARGETS} blas) set(ALL_TARGETS ${ALL_TARGETS} ${BLASLIB})
endif() endif()
if(NOT LATESTLAPACK_FOUND) if(NOT LATESTLAPACK_FOUND)
set(ALL_TARGETS ${ALL_TARGETS} lapack) set(ALL_TARGETS ${ALL_TARGETS} ${LAPACKLIB})
endif() endif()
if(BUILD_TESTING OR LAPACKE_WITH_TMG) if(BUILD_TESTING OR LAPACKE_WITH_TMG)
set(ALL_TARGETS ${ALL_TARGETS} tmglib) set(ALL_TARGETS ${ALL_TARGETS} ${TMGLIB})
endif() endif()
# Export lapack targets, not including lapacke, from the # Export lapack targets, not including lapacke, from the
# install tree, if any. # install tree, if any.
set(_lapack_config_install_guard_target "") set(_lapack_config_install_guard_target "")
if(ALL_TARGETS) if(ALL_TARGETS)
install(EXPORT lapack-targets install(EXPORT ${LAPACKLIB}-targets
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKLIB}-${LAPACK_VERSION}
COMPONENT Development COMPONENT Development
) )
@ -398,18 +416,18 @@ endif()
# Include cblas in targets exported from the build tree. # Include cblas in targets exported from the build tree.
if(CBLAS) if(CBLAS)
set(ALL_TARGETS ${ALL_TARGETS} cblas) set(ALL_TARGETS ${ALL_TARGETS} ${CBLASLIB})
endif() endif()
# Include lapacke in targets exported from the build tree. # Include lapacke in targets exported from the build tree.
if(LAPACKE) if(LAPACKE)
set(ALL_TARGETS ${ALL_TARGETS} lapacke) set(ALL_TARGETS ${ALL_TARGETS} ${LAPACKELIB})
endif() endif()
# Export lapack and lapacke targets from the build tree, if any. # Export lapack and lapacke targets from the build tree, if any.
set(_lapack_config_build_guard_target "") set(_lapack_config_build_guard_target "")
if(ALL_TARGETS) if(ALL_TARGETS)
export(TARGETS ${ALL_TARGETS} FILE lapack-targets.cmake) export(TARGETS ${ALL_TARGETS} FILE ${LAPACKLIB}-targets.cmake)
# Choose one of the lapack or lapacke targets to use as a guard # Choose one of the lapack or lapacke targets to use as a guard
# for lapack-config.cmake to load targets from the build tree. # for lapack-config.cmake to load targets from the build tree.
@ -417,30 +435,30 @@ if(ALL_TARGETS)
endif() endif()
configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-build.cmake.in configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-build.cmake.in
${LAPACK_BINARY_DIR}/lapack-config.cmake @ONLY) ${LAPACK_BINARY_DIR}/${LAPACKLIB}-config.cmake @ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_DIR}/lapack.pc @ONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKLIB}.pc @ONLY)
install(FILES install(FILES
${CMAKE_CURRENT_BINARY_DIR}/lapack.pc ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKLIB}.pc
DESTINATION ${PKG_CONFIG_DIR} DESTINATION ${PKG_CONFIG_DIR}
COMPONENT Development COMPONENT Development
) )
configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in
${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake @ONLY) ${LAPACK_BINARY_DIR}/CMakeFiles/${LAPACKLIB}-config.cmake @ONLY)
include(CMakePackageConfigHelpers) include(CMakePackageConfigHelpers)
write_basic_package_version_file( write_basic_package_version_file(
${LAPACK_BINARY_DIR}/lapack-config-version.cmake ${LAPACK_BINARY_DIR}/${LAPACKLIB}-config-version.cmake
VERSION ${LAPACK_VERSION} VERSION ${LAPACK_VERSION}
COMPATIBILITY SameMajorVersion COMPATIBILITY SameMajorVersion
) )
install(FILES install(FILES
${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake ${LAPACK_BINARY_DIR}/CMakeFiles/${LAPACKLIB}-config.cmake
${LAPACK_BINARY_DIR}/lapack-config-version.cmake ${LAPACK_BINARY_DIR}/${LAPACKLIB}-config-version.cmake
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKLIB}-${LAPACK_VERSION}
COMPONENT Development COMPONENT Development
) )

View File

@ -1,7 +1,7 @@
message(STATUS "LAPACKE enable") message(STATUS "LAPACKE enable")
enable_language(C) enable_language(C)
set(LAPACK_INSTALL_EXPORT_NAME lapacke-targets) set(LAPACK_INSTALL_EXPORT_NAME ${LAPACKELIB}-targets)
# Create a header file lapacke_mangling.h for the routines called in my C programs # Create a header file lapacke_mangling.h for the routines called in my C programs
include(FortranCInterface) include(FortranCInterface)
@ -72,28 +72,28 @@ if(LAPACKE_WITH_TMG)
endif() endif()
list(APPEND SOURCES ${UTILS}) list(APPEND SOURCES ${UTILS})
add_library(lapacke ${SOURCES}) add_library(${LAPACKELIB} ${SOURCES})
set_target_properties( set_target_properties(
lapacke PROPERTIES ${LAPACKELIB} PROPERTIES
LINKER_LANGUAGE C LINKER_LANGUAGE C
VERSION ${LAPACK_VERSION} VERSION ${LAPACK_VERSION}
SOVERSION ${LAPACK_MAJOR_VERSION} SOVERSION ${LAPACK_MAJOR_VERSION}
) )
target_include_directories(lapacke PUBLIC target_include_directories(${LAPACKELIB} PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include> $<INSTALL_INTERFACE:include>
) )
if(WIN32 AND NOT UNIX) if(WIN32 AND NOT UNIX)
target_compile_definitions(lapacke PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE) target_compile_definitions(${LAPACKELIB} PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE)
message(STATUS "Windows BUILD") message(STATUS "Windows BUILD")
endif() endif()
if(LAPACKE_WITH_TMG) if(LAPACKE_WITH_TMG)
target_link_libraries(lapacke PRIVATE tmglib) target_link_libraries(${LAPACKELIB} PRIVATE ${TMGLIB})
endif() endif()
target_link_libraries(lapacke PRIVATE ${LAPACK_LIBRARIES}) target_link_libraries(${LAPACKELIB} PRIVATE ${LAPACK_LIBRARIES})
lapack_install_library(lapacke) lapack_install_library(${LAPACKELIB})
install( install(
FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
@ -105,28 +105,28 @@ if(BUILD_TESTING)
endif() endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc @ONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKELIB}.pc @ONLY)
install(FILES install(FILES
${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKELIB}.pc
DESTINATION ${PKG_CONFIG_DIR} DESTINATION ${PKG_CONFIG_DIR}
COMPONENT Development COMPONENT Development
) )
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in
${LAPACK_BINARY_DIR}/lapacke-config-version.cmake @ONLY) ${LAPACK_BINARY_DIR}/${LAPACKELIB}-config-version.cmake @ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-build.cmake.in configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-build.cmake.in
${LAPACK_BINARY_DIR}/lapacke-config.cmake @ONLY) ${LAPACK_BINARY_DIR}/${LAPACKELIB}-config.cmake @ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-install.cmake.in configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-install.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake @ONLY) ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${LAPACKELIB}-config.cmake @ONLY)
install(FILES install(FILES
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${LAPACKELIB}-config.cmake
${LAPACK_BINARY_DIR}/lapacke-config-version.cmake ${LAPACK_BINARY_DIR}/${LAPACKELIB}-config-version.cmake
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKELIB}-${LAPACK_VERSION}
COMPONENT Development COMPONENT Development
) )
install(EXPORT lapacke-targets install(EXPORT ${LAPACKELIB}-targets
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKELIB}-${LAPACK_VERSION}
COMPONENT Development COMPONENT Development
) )

View File

@ -3,8 +3,8 @@ set(LAPACK_DIR "@LAPACK_BINARY_DIR@")
find_package(LAPACK NO_MODULE) find_package(LAPACK NO_MODULE)
# Load lapack targets from the build tree, including lapacke targets. # Load lapack targets from the build tree, including lapacke targets.
if(NOT TARGET lapacke) if(NOT TARGET @LAPACKELIB@)
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake")
endif() endif()
# Hint for project building against lapack # Hint for project building against lapack
@ -14,4 +14,4 @@ set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID})
set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include") set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include")
# Report lapacke libraries. # Report lapacke libraries.
set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES}) set(LAPACKE_LIBRARIES @LAPACKELIB@ ${LAPACK_LIBRARIES})

View File

@ -5,12 +5,12 @@ get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH)
get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH) get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH)
# Load the LAPACK package with which we were built. # Load the LAPACK package with which we were built.
set(LAPACK_DIR "${_LAPACKE_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/lapack-@LAPACK_VERSION@") set(LAPACK_DIR "${_LAPACKE_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/@LAPACK@-@LAPACK_VERSION@")
find_package(LAPACK NO_MODULE) find_package(LAPACK NO_MODULE)
# Load lapacke targets from the install tree. # Load lapacke targets from the install tree.
if(NOT TARGET lapacke) if(NOT TARGET @LAPACKELIB@)
include(${_LAPACKE_SELF_DIR}/lapacke-targets.cmake) include(${_LAPACKE_SELF_DIR}/@LAPACKELIB@-targets.cmake)
endif() endif()
# Hint for project building against lapack # Hint for project building against lapack
@ -20,7 +20,7 @@ set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID})
set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include) set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include)
# Report lapacke libraries. # Report lapacke libraries.
set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES}) set(LAPACKE_LIBRARIES @LAPACKELIB@ ${LAPACK_LIBRARIES})
unset(_LAPACKE_PREFIX) unset(_LAPACKE_PREFIX)
unset(_LAPACKE_SELF_DIR) unset(_LAPACKE_SELF_DIR)

View File

@ -3,10 +3,10 @@ add_executable(xexample_DGESV_colmajor example_DGESV_colmajor.c lapacke_example_
add_executable(xexample_DGELS_rowmajor example_DGELS_rowmajor.c lapacke_example_aux.c lapacke_example_aux.h) add_executable(xexample_DGELS_rowmajor example_DGELS_rowmajor.c lapacke_example_aux.c lapacke_example_aux.h)
add_executable(xexample_DGELS_colmajor example_DGELS_colmajor.c lapacke_example_aux.c lapacke_example_aux.h) add_executable(xexample_DGELS_colmajor example_DGELS_colmajor.c lapacke_example_aux.c lapacke_example_aux.h)
target_link_libraries(xexample_DGESV_rowmajor lapacke) target_link_libraries(xexample_DGESV_rowmajor ${LAPACKELIB})
target_link_libraries(xexample_DGESV_colmajor lapacke) target_link_libraries(xexample_DGESV_colmajor ${LAPACKELIB})
target_link_libraries(xexample_DGELS_rowmajor lapacke) target_link_libraries(xexample_DGELS_rowmajor ${LAPACKELIB})
target_link_libraries(xexample_DGELS_colmajor lapacke) target_link_libraries(xexample_DGELS_colmajor ${LAPACKELIB})
add_test(example_DGESV_rowmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_rowmajor) add_test(example_DGESV_rowmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_rowmajor)
add_test(example_DGESV_colmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_colmajor) add_test(example_DGESV_colmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_colmajor)

View File

@ -49,12 +49,13 @@ extern "C" {
#endif /* __cplusplus */ #endif /* __cplusplus */
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h>
#ifndef lapack_int #ifndef lapack_int
#if defined(LAPACK_ILP64) #if defined(LAPACK_ILP64)
#define lapack_int long #define lapack_int int64_t
#else #else
#define lapack_int int #define lapack_int int32_t
#endif #endif
#endif #endif

View File

@ -67,7 +67,11 @@ extern "C" {
void LAPACKE_xerbla( const char *name, lapack_int info ); void LAPACKE_xerbla( const char *name, lapack_int info );
/* Compare two chars (case-insensitive) */ /* Compare two chars (case-insensitive) */
lapack_logical LAPACKE_lsame( char ca, char cb ); lapack_logical LAPACKE_lsame( char ca, char cb )
#if defined __GNUC__
__attribute__((const))
#endif
;
/* Functions to convert column-major to row-major 2d arrays and vice versa. */ /* Functions to convert column-major to row-major 2d arrays and vice versa. */
void LAPACKE_cgb_trans( int matrix_layout, lapack_int m, lapack_int n, void LAPACKE_cgb_trans( int matrix_layout, lapack_int m, lapack_int n,

View File

@ -5,6 +5,6 @@ Name: LAPACKE
Description: C Standard Interface to LAPACK Linear Algebra PACKage Description: C Standard Interface to LAPACK Linear Algebra PACKage
Version: @LAPACK_VERSION@ Version: @LAPACK_VERSION@
URL: http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack URL: http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack
Libs: -L${libdir} -llapacke Libs: -L${libdir} -l@LAPACKELIB@
Cflags: -I${includedir} Cflags: -I${includedir}
Requires.private: lapack Requires.private: @LAPACKLIB@

View File

@ -500,21 +500,21 @@ if(BUILD_COMPLEX16)
endif() endif()
list(REMOVE_DUPLICATES SOURCES) list(REMOVE_DUPLICATES SOURCES)
add_library(lapack ${SOURCES}) add_library(${LAPACKLIB} ${SOURCES})
set_target_properties( set_target_properties(
lapack PROPERTIES ${LAPACKLIB} PROPERTIES
VERSION ${LAPACK_VERSION} VERSION ${LAPACK_VERSION}
SOVERSION ${LAPACK_MAJOR_VERSION} SOVERSION ${LAPACK_MAJOR_VERSION}
) )
if(USE_XBLAS) if(USE_XBLAS)
target_link_libraries(lapack PRIVATE ${XBLAS_LIBRARY}) target_link_libraries(${LAPACKLIB} PRIVATE ${XBLAS_LIBRARY})
endif() endif()
target_link_libraries(lapack PRIVATE ${BLAS_LIBRARIES}) target_link_libraries(${LAPACKLIB} PRIVATE ${BLAS_LIBRARIES})
if(_is_coverage_build) if(_is_coverage_build)
target_link_libraries(lapack PRIVATE gcov) target_link_libraries(${LAPACKLIB} PRIVATE gcov)
add_coverage(lapack) add_coverage(${LAPACKLIB})
endif() endif()
lapack_install_library(lapack) lapack_install_library(${LAPACKLIB})

View File

@ -47,6 +47,6 @@ if(BUILD_COMPLEX16)
endif() endif()
list(REMOVE_DUPLICATES SOURCES) list(REMOVE_DUPLICATES SOURCES)
add_library(tmglib ${SOURCES}) add_library(${TMGLIB} ${SOURCES})
target_link_libraries(tmglib ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) target_link_libraries(${TMGLIB} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES})
lapack_install_library(tmglib) lapack_install_library(${TMGLIB})

12
param.h
View File

@ -3128,9 +3128,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 16 #define SYMV_P 16
#if defined(CORTEXA57) || \ #if defined(CORTEXA57) || defined(CORTEXX1) || \
defined(CORTEXA72) || defined(CORTEXA73) || \ defined(CORTEXA72) || defined(CORTEXA73) || \
defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000)
#define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4 #define SGEMM_DEFAULT_UNROLL_N 4
@ -3147,7 +3147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*FIXME: this should be using the cache size, but there is currently no easy way to /*FIXME: this should be using the cache size, but there is currently no easy way to
query that on ARM. So if getarch counted more than 8 cores we simply assume the host query that on ARM. So if getarch counted more than 8 cores we simply assume the host
is a big desktop or server with abundant cache rather than a phone or embedded device */ is a big desktop or server with abundant cache rather than a phone or embedded device */
#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) #if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)|| defined(CORTEXX1)
#define SGEMM_DEFAULT_P 512 #define SGEMM_DEFAULT_P 512
#define DGEMM_DEFAULT_P 256 #define DGEMM_DEFAULT_P 256
#define CGEMM_DEFAULT_P 256 #define CGEMM_DEFAULT_P 256
@ -3377,7 +3377,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096 #define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096
#elif defined(ARMV8SVE) || defined(A64FX) #elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510)|| defined(CORTEXA710) || defined(CORTEXX2)
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl". /* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
@ -3423,8 +3423,8 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
#define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4 #define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 4 #define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 8 #define DGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 8 #define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4 #define CGEMM_DEFAULT_UNROLL_N 4

View File

@ -115,7 +115,7 @@
#define INCLUDE_CTGSYL INCLUDE_XTGSYL #define INCLUDE_CTGSYL INCLUDE_XTGSYL
#define INCLUDE_ZTGSYL INCLUDE_XTGSYL #define INCLUDE_ZTGSYL INCLUDE_XTGSYL
#define INCLUDE_XGEMMT 0 #define INCLUDE_XGEMMT 1
#define INCLUDE_SGEMMT INCLUDE_XGEMMT #define INCLUDE_SGEMMT INCLUDE_XGEMMT
#define INCLUDE_DGEMMT INCLUDE_XGEMMT #define INCLUDE_DGEMMT INCLUDE_XGEMMT
#define INCLUDE_CGEMMT INCLUDE_XGEMMT #define INCLUDE_CGEMMT INCLUDE_XGEMMT

View File

@ -566,7 +566,8 @@ void LAPACK(sgemmt)(
const float *B, const blasint *ldB, const float *B, const blasint *ldB,
const float *beta, float *C, const blasint *ldC const float *beta, float *C, const blasint *ldC
) { ) {
RELAPACK_sgemmt(uplo, n, A, ldA, info); blasint info;
RELAPACK_sgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info);
} }
#endif #endif
@ -578,7 +579,8 @@ void LAPACK(dgemmt)(
const double *B, const blasint *ldB, const double *B, const blasint *ldB,
const double *beta, double *C, const blasint *ldC const double *beta, double *C, const blasint *ldC
) { ) {
RELAPACK_dgemmt(uplo, n, A, ldA, info); blasint info;
RELAPACK_dgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info);
} }
#endif #endif
@ -590,7 +592,8 @@ void LAPACK(cgemmt)(
const float *B, const blasint *ldB, const float *B, const blasint *ldB,
const float *beta, float *C, const blasint *ldC const float *beta, float *C, const blasint *ldC
) { ) {
RELAPACK_cgemmt(uplo, n, A, ldA, info); blasint info;
RELAPACK_cgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info);
} }
#endif #endif
@ -602,6 +605,7 @@ void LAPACK(zgemmt)(
const double *B, const blasint *ldB, const double *B, const blasint *ldB,
const double *beta, double *C, const blasint *ldC const double *beta, double *C, const blasint *ldC
) { ) {
RELAPACK_zgemmt(uplo, n, A, ldA, info); blasint info;
RELAPACK_zgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info);
} }
#endif #endif

View File

@ -30,6 +30,10 @@ if(WIN32)
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.ps1 FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.ps1
"if (Test-Path $args[2]) { Remove-Item -Force $args[2] } \n" "if (Test-Path $args[2]) { Remove-Item -Force $args[2] } \n"
"$ErrorActionPreference = \"Stop\"\n" "$ErrorActionPreference = \"Stop\"\n"
"If ((Get-Content $args[1] | & file - | %{$_ -match \"BOM\"}) -contains $true) {\n"
"echo 'Skipped due to wrong input encoding'\n"
"exit 0\n"
"}\n"
"Get-Content $args[1] | & $args[0]\n" "Get-Content $args[1] | & $args[0]\n"
"If ((Get-Content $args[2] | %{$_ -match \"FATAL\"}) -contains $true) {\n" "If ((Get-Content $args[2] | %{$_ -match \"FATAL\"}) -contains $true) {\n"
"echo Error\n" "echo Error\n"