Merge branch 'develop' into actionsflang
This commit is contained in:
commit
8916c4ae2c
|
@ -233,6 +233,7 @@ if (NOT MSVC AND NOT NOFORTRAN)
|
|||
if(NOT NO_CBLAS)
|
||||
add_subdirectory(ctest)
|
||||
endif()
|
||||
add_subdirectory(lapack-netlib/TESTING)
|
||||
endif()
|
||||
|
||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
|
||||
|
|
|
@ -1,4 +1,77 @@
|
|||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.3.10
|
||||
14-Jun-2020
|
||||
|
||||
common:
|
||||
* Improved thread locking behaviour in blas_server and parallel getrf
|
||||
* Imported bugfix 394 from LAPACK (spurious reference to "XERBL"
|
||||
due to overlong lines)
|
||||
* Imported bugfix 403 from LAPACK (compile option "recursive" required
|
||||
for correctness with Intel and PGI)
|
||||
* Imported bugfix 408 from LAPACK (wrong scaling in ZHEEQUB)
|
||||
* Imported bugfix 411 from LAPACK (infinite loop in LARGV/LARTG/LARTGP)
|
||||
* Fixed mismatches between BUFFERSIZE and GEMM_UNROLL parameters that
|
||||
could lead to crashes at large matrix sizes
|
||||
* Restored internal soname in dynamic libraries on FreeBSD and Dragonfly
|
||||
* Added API (openblas_setaffinity) to set the thread affinity on Linux
|
||||
* Added initial infrastructure for half-precision floating point
|
||||
(bfloat16) support with a generic implementation of SHGEMM
|
||||
* Added CMAKE build system support for building the cblas_Xgemm3m
|
||||
functions
|
||||
* Fixed CMAKE support for building in a path with embedded spaces
|
||||
* Fixed CMAKE (non)handling of NO_EXPRECISION and MAX_STACK_ALLOC
|
||||
* Fixed GCC version detection in the Makefiles
|
||||
* Allowed overriding the names of AR, AS and LD in Makefile builds
|
||||
|
||||
POWER:
|
||||
* Fixed big-endian POWER8 ELFv2 builds on FreeBSD
|
||||
* Fixed GCC version checks and DYNAMIC_ARCH builds on POWER9
|
||||
* Fixed CMAKE build support for POWER9
|
||||
* fixed a potential race condition in the thread buffer allocation
|
||||
* Worked around LAPACK test failures on PPC G4
|
||||
|
||||
MIPS:
|
||||
* Fixed a potential race condition in the thread buffer allocation
|
||||
* Added support for MIPS 24K/24KE family based on P5600 kernels
|
||||
|
||||
MIPS64:
|
||||
* fixed a potential race condition in the thread buffer allocation
|
||||
* Added TARGET=GENERIC
|
||||
|
||||
ARMV7:
|
||||
* Fixed a race condition in the thread buffer allocation
|
||||
|
||||
ARMV8:
|
||||
* Fixed a race condition in the thread buffer allocation
|
||||
* Fixed zero initialisation in the assembly for SGEMM and DGEMM BETA
|
||||
* Improved performance of the ThunderX2 DAXPY kernel
|
||||
* Added an optimized SGEMM kernel for Cortex A53
|
||||
* Fixed Makefile support for INTERFACE64 (8-byte integer)
|
||||
|
||||
x86_64:
|
||||
* Fixed a syntax error in the CMAKE setup for SkylakeX
|
||||
* Improved performance of STRSM on Haswell, SkylakeX and Ryzen
|
||||
* Improved SGEMM performance on SGEMM for workloads with ldc a
|
||||
multiple of 1024
|
||||
* Improved DGEMM performance on Skylake X
|
||||
* Fixed unwanted AVX512-dependency of SGEMM in DYNAMIC_ARCH
|
||||
builds created on SkylakeX
|
||||
* Removed data alignment requirement in the SSE2 copy kernels
|
||||
that could cause spurious crashes
|
||||
* Added a workaround for an optimizer bug in AppleClang 11.0.3
|
||||
* Fixed LAPACK test failures due to wrong options for Intel Fortran
|
||||
* Fixed compilation and LAPACK test results with recent Flang
|
||||
and AMD AOCC
|
||||
* Fixed DYNAMIC_ARCH builds with CMAKE on OS X
|
||||
* Fixed missing exports of cblas_i?amin, cblas_i?min, cblas_i?max,
|
||||
cblas_?sum, cblas_?gemm3m in the shared library on OS
|
||||
* Fixed reporting of cpu name in DYNAMIC_ARCH builds (would sometimes
|
||||
show the name of an older generation chip supported by the same kernels)
|
||||
|
||||
IBM Z:
|
||||
* Improved performance of SGEMM/STRMM and DGEMM/DTRMM on Z14
|
||||
|
||||
====================================================================
|
||||
Version 0.3.9
|
||||
1-Mar-2020
|
||||
|
|
|
@ -9,6 +9,16 @@ else
|
|||
USE_OPENMP = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), POWER10)
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
else
|
||||
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -malign-power -fno-fast-math
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -malign-power -fno-fast-math
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), POWER9)
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.3.9.dev
|
||||
VERSION = 0.3.10.dev
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
|
|
|
@ -263,10 +263,10 @@ endif
|
|||
|
||||
ARFLAGS =
|
||||
CPP = $(COMPILER) -E
|
||||
AR = $(CROSS_SUFFIX)ar
|
||||
AS = $(CROSS_SUFFIX)as
|
||||
LD = $(CROSS_SUFFIX)ld
|
||||
RANLIB = $(CROSS_SUFFIX)ranlib
|
||||
AR ?= $(CROSS_SUFFIX)ar
|
||||
AS ?= $(CROSS_SUFFIX)as
|
||||
LD ?= $(CROSS_SUFFIX)ld
|
||||
RANLIB ?= $(CROSS_SUFFIX)ranlib
|
||||
NM = $(CROSS_SUFFIX)nm
|
||||
DLLWRAP = $(CROSS_SUFFIX)dllwrap
|
||||
OBJCOPY = $(CROSS_SUFFIX)objcopy
|
||||
|
@ -282,10 +282,21 @@ endif
|
|||
ifeq ($(C_COMPILER), GCC)
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
|
||||
GCCVERSIONEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` = 5)
|
||||
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
|
||||
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
|
||||
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11)
|
||||
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
|
||||
# Note that the behavior of -dumpversion is compile-time-configurable for
|
||||
# gcc-7.x and newer. Use -dumpfullversion there
|
||||
ifeq ($(GCCVERSIONGTEQ7),1)
|
||||
GCCDUMPVERSION_PARAM := -dumpfullversion
|
||||
else
|
||||
GCCDUMPVERSION_PARAM := -dumpversion
|
||||
endif
|
||||
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7)
|
||||
endif
|
||||
|
||||
#
|
||||
|
@ -570,20 +581,27 @@ ifeq ($(ARCH), zarch)
|
|||
DYNAMIC_CORE = ZARCH_GENERIC
|
||||
|
||||
# Z13 is supported since gcc-5.2, gcc-6, and in RHEL 7.3 and newer
|
||||
GCC_GE_52 := $(subst 0,,$(shell expr `$(CC) -dumpversion` \>= "5.2"))
|
||||
|
||||
ifeq ($(wildcard /etc/redhat-release), /etc/redhat-release)
|
||||
RHEL_WITH_Z13 := $(subst 0,,$(shell source /etc/os-release ; expr $$VERSION_ID \>= "7.3"))
|
||||
ifeq ($(GCCVERSIONGT5), 1)
|
||||
ZARCH_SUPPORT_Z13 := 1
|
||||
else ifeq ($(GCCVERSIONEQ5), 1)
|
||||
ifeq ($(GCCMINORVERSIONGTEQ2), 1)
|
||||
ZARCH_SUPPORT_Z13 := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(or $(GCC_GE_52),$(RHEL_WITH_Z13)), 1)
|
||||
ifeq ($(wildcard /etc/redhat-release), /etc/redhat-release)
|
||||
ifeq ($(shell source /etc/os-release ; expr $$VERSION_ID \>= "7.3"), 1)
|
||||
ZARCH_SUPPORT_Z13 := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ZARCH_SUPPORT_Z13), 1)
|
||||
DYNAMIC_CORE += Z13
|
||||
else
|
||||
$(info OpenBLAS: Not building Z13 kernels because gcc is older than 5.2 or 6.x)
|
||||
endif
|
||||
|
||||
GCC_MAJOR_GE_7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
|
||||
ifeq ($(GCC_MAJOR_GE_7), 1)
|
||||
ifeq ($(GCCVERSIONGTEQ7), 1)
|
||||
DYNAMIC_CORE += Z14
|
||||
else
|
||||
$(info OpenBLAS: Not building Z14 kernels because gcc is older than 7.x)
|
||||
|
@ -595,14 +613,23 @@ DYNAMIC_CORE = POWER6
|
|||
DYNAMIC_CORE += POWER8
|
||||
ifneq ($(C_COMPILER), GCC)
|
||||
DYNAMIC_CORE += POWER9
|
||||
DYNAMIC_CORE += POWER10
|
||||
endif
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
|
||||
ifeq ($(GCCVERSIONGT5), 1)
|
||||
DYNAMIC_CORE += POWER9
|
||||
else
|
||||
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
|
||||
endif
|
||||
ifeq ($(GCCVERSIONGTEQ11), 1)
|
||||
DYNAMIC_CORE += POWER10
|
||||
else ifeq ($(GCCVERSIONEQ10), 1)
|
||||
ifeq ($(GCCMINORVERSIONGTEQ2), 1)
|
||||
DYNAMIC_CORE += POWER10
|
||||
endif
|
||||
else
|
||||
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
@ -783,7 +810,15 @@ endif
|
|||
|
||||
ifeq ($(F_COMPILER), FLANG)
|
||||
CCOMMON_OPT += -DF_INTERFACE_FLANG
|
||||
FCOMMON_OPT += -frecursive
|
||||
FCOMMON_OPT += -Mrecursive -Kieee
|
||||
ifeq ($(OSNAME), Linux)
|
||||
ifeq ($(ARCH), x86_64)
|
||||
FLANG_VENDOR := $(shell expr `$(FC) --version|cut -f 1 -d "."|head -1`)
|
||||
ifeq ($(FLANG_VENDOR),AOCC)
|
||||
FCOMMON_OPT += -fno-unroll-loops
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
|
@ -797,11 +832,6 @@ endif
|
|||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
ifeq ($(OSNAME), Linux)
|
||||
ifeq ($(ARCH), x86_64)
|
||||
FLANG_VENDOR := $(shell expr `$(FC) --version|cut -f 1 -d "."|head -1`)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), G77)
|
||||
|
@ -1276,11 +1306,7 @@ endif
|
|||
|
||||
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||
ifeq ($(F_COMPILER), FLANG)
|
||||
override FFLAGS += $(filter-out -O2 -O3,$(COMMON_OPT)) -O1 $(FCOMMON_OPT)
|
||||
else
|
||||
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
|
||||
endif
|
||||
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
#MAKEOVERRIDES =
|
||||
|
||||
|
|
|
@ -31,14 +31,24 @@ ifeq ($(CORE), HASWELL)
|
|||
ifndef DYNAMIC_ARCH
|
||||
ifndef NO_AVX2
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# AVX2 support was added in 4.7.0
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
|
||||
CCOMMON_OPT += -mavx2
|
||||
endif
|
||||
endif
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
# AVX2 support was added in 4.7.0
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
|
||||
FCOMMON_OPT += -mavx2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -49,6 +49,7 @@ POWER6
|
|||
POWER7
|
||||
POWER8
|
||||
POWER9
|
||||
POWER10
|
||||
PPCG4
|
||||
PPC970
|
||||
PPC970MP
|
||||
|
|
|
@ -49,7 +49,7 @@ if (DYNAMIC_ARCH)
|
|||
endif ()
|
||||
|
||||
if (POWER)
|
||||
set(DYNAMIC_CORE POWER6 POWER8 POWER9)
|
||||
set(DYNAMIC_CORE POWER6 POWER8 POWER9 POWER10)
|
||||
endif ()
|
||||
|
||||
if (X86)
|
||||
|
|
|
@ -16,6 +16,7 @@ if (${F_COMPILER} STREQUAL "FLANG")
|
|||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
|
||||
endif ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Mrecursive -Kieee")
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "G77")
|
||||
|
|
|
@ -420,7 +420,7 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
|
|||
set(ZGEMM_UNROLL_M 8)
|
||||
set(ZGEMM_UNROLL_N 2)
|
||||
set(SYMV_P 8)
|
||||
elseif ("${TCORE}" STREQUAL "POWER9")
|
||||
elseif ("${TCORE}" STREQUAL "POWER9" OR "${TCORE}" STREQUAL "POWER10")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE 32768\n"
|
||||
"#define L1_DATA_LINESIZE 128\n"
|
||||
|
|
|
@ -417,6 +417,14 @@ if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows
|
|||
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE")
|
||||
endif ()
|
||||
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
if ("${F_COMPILER}" STREQUAL "FLANG")
|
||||
if (${CMAKE_Fortran_COMPILER_VERSION} VERSION_LESS_EQUAL 3)
|
||||
set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -fno-unroll-loops")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED SUFFIX)
|
||||
set(SUFFIX o)
|
||||
endif ()
|
||||
|
|
7
common.h
7
common.h
|
@ -360,13 +360,8 @@ typedef int blasint;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef POWER8
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef POWER9
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
|
|
|
@ -68,7 +68,7 @@
|
|||
#endif
|
||||
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#define MB __asm__ __volatile__ ("eieio":::"memory")
|
||||
#define WMB __asm__ __volatile__ ("eieio":::"memory")
|
||||
#define RMB __asm__ __volatile__ ("eieio":::"memory")
|
||||
|
@ -272,7 +272,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
|||
#define HAVE_PREFETCH
|
||||
#endif
|
||||
|
||||
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || defined(POWER9) || defined(PPC970)
|
||||
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || defined(POWER9) || defined(POWER10) || defined(PPC970)
|
||||
#define DCBT_ARG 0
|
||||
#else
|
||||
#define DCBT_ARG 8
|
||||
|
@ -294,7 +294,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
|||
#define L1_PREFETCH dcbtst
|
||||
#endif
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#define L1_DUALFETCH
|
||||
#define L1_PREFETCHSIZE (16 + 128 * 100)
|
||||
#define L1_PREFETCH dcbtst
|
||||
|
@ -843,7 +843,7 @@ Lmcount$lazy_ptr:
|
|||
#define BUFFER_SIZE ( 2 << 20)
|
||||
#elif defined(PPC440FP2)
|
||||
#define BUFFER_SIZE ( 16 << 20)
|
||||
#elif defined(POWER8) || defined(POWER9)
|
||||
#elif defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#define BUFFER_SIZE ( 64 << 20)
|
||||
#else
|
||||
#define BUFFER_SIZE ( 16 << 20)
|
||||
|
|
|
@ -132,18 +132,18 @@ extern int blas_server_avail;
|
|||
static __inline int num_cpu_avail(int level) {
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
int openmp_nthreads=0;
|
||||
int openmp_nthreads=omp_get_max_threads();
|
||||
#endif
|
||||
|
||||
#ifndef USE_OPENMP
|
||||
if (blas_cpu_number == 1
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
|| omp_in_parallel()
|
||||
#endif
|
||||
) return 1;
|
||||
#ifdef USE_OPENMP
|
||||
if (openmp_nthreads == 1 || omp_in_parallel()
|
||||
#endif
|
||||
) return 1;
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
openmp_nthreads=omp_get_max_threads();
|
||||
if (blas_cpu_number != openmp_nthreads) {
|
||||
goto_set_num_threads(openmp_nthreads);
|
||||
}
|
||||
|
|
|
@ -5,6 +5,14 @@ inline void pauser(){
|
|||
std::getline(std::cin, dummy);
|
||||
}
|
||||
|
||||
void FailIfThreadsAreZero(uint32_t numConcurrentThreads) {
|
||||
if(numConcurrentThreads == 0) {
|
||||
std::cout<<"ERROR: Invalid parameter 0 for number of concurrent calls into OpenBLAS!"<<std::endl;
|
||||
std::cout<<"CBLAS DGEMV thread safety test FAILED!"<<std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
void FillMatrices(std::vector<std::vector<double>>& matBlock, std::mt19937_64& PRNG, std::uniform_real_distribution<double>& rngdist, const blasint randomMatSize, const uint32_t numConcurrentThreads, const uint32_t numMat){
|
||||
for(uint32_t i=0; i<numMat; i++){
|
||||
for(uint32_t j = 0; j < static_cast<uint32_t>(randomMatSize*randomMatSize); j++){
|
||||
|
|
|
@ -46,6 +46,8 @@ int main(int argc, char* argv[]){
|
|||
std::cout<<"Number of concurrent calls into OpenBLAS : "<<numConcurrentThreads<<'\n';
|
||||
std::cout<<"Number of testing rounds : "<<numTestRounds<<'\n';
|
||||
std::cout<<"This test will need "<<(static_cast<uint64_t>(randomMatSize*randomMatSize)*numConcurrentThreads*3*8)/static_cast<double>(1024*1024)<<" MiB of RAM\n"<<std::endl;
|
||||
|
||||
FailIfThreadsAreZero(numConcurrentThreads);
|
||||
|
||||
std::cout<<"Initializing random number generator..."<<std::flush;
|
||||
std::mt19937_64 PRNG = InitPRNG();
|
||||
|
|
|
@ -18,7 +18,7 @@ int main(int argc, char* argv[]){
|
|||
uint32_t maxHwThreads = omp_get_max_threads();
|
||||
|
||||
if (maxHwThreads < 52)
|
||||
numConcurrentThreads = maxHwThreads -4;
|
||||
numConcurrentThreads = maxHwThreads;
|
||||
|
||||
if (argc > 4){
|
||||
std::cout<<"ERROR: too many arguments for thread safety tester"<<std::endl;
|
||||
|
@ -47,6 +47,8 @@ int main(int argc, char* argv[]){
|
|||
std::cout<<"Number of concurrent calls into OpenBLAS : "<<numConcurrentThreads<<'\n';
|
||||
std::cout<<"Number of testing rounds : "<<numTestRounds<<'\n';
|
||||
std::cout<<"This test will need "<<((static_cast<uint64_t>(randomMatSize*randomMatSize)*numConcurrentThreads*8)+(static_cast<uint64_t>(randomMatSize)*numConcurrentThreads*8*2))/static_cast<double>(1024*1024)<<" MiB of RAM\n"<<std::endl;
|
||||
|
||||
FailIfThreadsAreZero(numConcurrentThreads);
|
||||
|
||||
std::cout<<"Initializing random number generator..."<<std::flush;
|
||||
std::mt19937_64 PRNG = InitPRNG();
|
||||
|
|
|
@ -57,6 +57,7 @@
|
|||
#define CPUTYPE_PPCG4 7
|
||||
#define CPUTYPE_POWER8 8
|
||||
#define CPUTYPE_POWER9 9
|
||||
#define CPUTYPE_POWER10 10
|
||||
|
||||
char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
|
@ -68,7 +69,8 @@ char *cpuname[] = {
|
|||
"CELL",
|
||||
"PPCG4",
|
||||
"POWER8",
|
||||
"POWER9"
|
||||
"POWER9",
|
||||
"POWER10"
|
||||
};
|
||||
|
||||
char *lowercpuname[] = {
|
||||
|
@ -81,7 +83,8 @@ char *lowercpuname[] = {
|
|||
"cell",
|
||||
"ppcg4",
|
||||
"power8",
|
||||
"power9"
|
||||
"power9",
|
||||
"power10"
|
||||
};
|
||||
|
||||
char *corename[] = {
|
||||
|
@ -94,7 +97,8 @@ char *corename[] = {
|
|||
"CELL",
|
||||
"PPCG4",
|
||||
"POWER8",
|
||||
"POWER9"
|
||||
"POWER9",
|
||||
"POWER10"
|
||||
};
|
||||
|
||||
int detect(void){
|
||||
|
@ -125,6 +129,7 @@ int detect(void){
|
|||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
|
||||
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9;
|
||||
if (!strncasecmp(p, "POWER10", 7)) return CPUTYPE_POWER10;
|
||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
||||
|
||||
|
@ -157,6 +162,7 @@ int detect(void){
|
|||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
|
||||
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9;
|
||||
if (!strncasecmp(p, "POWER10", 7)) return CPUTYPE_POWER10;
|
||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
||||
return CPUTYPE_POWER5;
|
||||
|
@ -179,6 +185,9 @@ int detect(void){
|
|||
int id;
|
||||
__asm __volatile("mfpvr %0" : "=r"(id));
|
||||
switch ( id >> 16 ) {
|
||||
case 0x80: // POWER10
|
||||
return CPUTYPE_POWER10;
|
||||
break;
|
||||
case 0x4e: // POWER9
|
||||
return CPUTYPE_POWER9;
|
||||
break;
|
||||
|
|
24
cpuid_x86.c
24
cpuid_x86.c
|
@ -1406,6 +1406,17 @@ int get_cpuname(void){
|
|||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
case 10: //family 6 exmodel 10
|
||||
switch (model) {
|
||||
case 5: // Comet Lake H and S
|
||||
case 6: // Comet Lake U
|
||||
if(support_avx2())
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1955,6 +1966,19 @@ int get_coretype(void){
|
|||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 10:
|
||||
switch (model) {
|
||||
case 5: // Comet Lake H and S
|
||||
case 6: // Comet Lake U
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
case 5:
|
||||
switch (model) {
|
||||
case 6:
|
||||
|
|
|
@ -332,7 +332,7 @@ int support_avx512(){
|
|||
if((ebx & (1<<7)) == 0){
|
||||
ret=0; //OS does not even support AVX2
|
||||
}
|
||||
if((ebx & (1<<31)) != 0){
|
||||
if((ebx & (1u<<31)) != 0){
|
||||
xgetbv(0, &eax, &edx);
|
||||
if((eax & 0xe0) == 0xe0)
|
||||
ret=1; //OS supports AVX512VL
|
||||
|
@ -618,6 +618,18 @@ static gotoblas_t *get_coretype(void){
|
|||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
case 10:
|
||||
if (model == 5 || model == 6) {
|
||||
if(support_avx2())
|
||||
return &gotoblas_HASWELL;
|
||||
if(support_avx()) {
|
||||
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
|
||||
return &gotoblas_SANDYBRIDGE;
|
||||
} else {
|
||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
case 0xf:
|
||||
|
@ -632,7 +644,7 @@ static gotoblas_t *get_coretype(void){
|
|||
cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
|
||||
if ( (eax & 0xffff) >= 0x01) {
|
||||
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
|
||||
if ((edx & (1 << 30)) == 0 || (edx & (1 << 31)) == 0)
|
||||
if ((edx & (1 << 30)) == 0 || (edx & (1u << 31)) == 0)
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
|
@ -764,18 +776,53 @@ char *gotoblas_corename(void) {
|
|||
if (gotoblas == &gotoblas_NORTHWOOD) return corename[ 3];
|
||||
if (gotoblas == &gotoblas_PRESCOTT) return corename[ 4];
|
||||
if (gotoblas == &gotoblas_BANIAS) return corename[ 5];
|
||||
if (gotoblas == &gotoblas_ATOM) return corename[ 6];
|
||||
if (gotoblas == &gotoblas_ATOM)
|
||||
#ifdef DYNAMIC_OLDER
|
||||
return corename[ 6];
|
||||
#else
|
||||
return corename[10];
|
||||
#endif
|
||||
if (gotoblas == &gotoblas_CORE2) return corename[ 7];
|
||||
if (gotoblas == &gotoblas_PENRYN) return corename[ 8];
|
||||
if (gotoblas == &gotoblas_DUNNINGTON) return corename[ 9];
|
||||
if (gotoblas == &gotoblas_PENRYN)
|
||||
#ifdef DYNAMIC_OLDER
|
||||
return corename[ 8];
|
||||
#else
|
||||
return corename[7];
|
||||
#endif
|
||||
if (gotoblas == &gotoblas_DUNNINGTON)
|
||||
#ifdef DYNAMIC_OLDER
|
||||
return corename[ 9];
|
||||
#else
|
||||
return corename[7];
|
||||
#endif
|
||||
if (gotoblas == &gotoblas_NEHALEM) return corename[10];
|
||||
if (gotoblas == &gotoblas_ATHLON) return corename[11];
|
||||
if (gotoblas == &gotoblas_OPTERON_SSE3) return corename[12];
|
||||
if (gotoblas == &gotoblas_OPTERON) return corename[13];
|
||||
if (gotoblas == &gotoblas_OPTERON_SSE3)
|
||||
#ifdef DYNAMIC_OLDER
|
||||
return corename[12];
|
||||
#else
|
||||
return corename[7];
|
||||
#endif
|
||||
if (gotoblas == &gotoblas_OPTERON)
|
||||
#ifdef DYNAMIC_OLDER
|
||||
return corename[13];
|
||||
#else
|
||||
return corename[7];
|
||||
#endif
|
||||
if (gotoblas == &gotoblas_BARCELONA) return corename[14];
|
||||
if (gotoblas == &gotoblas_NANO) return corename[15];
|
||||
if (gotoblas == &gotoblas_NANO)
|
||||
#ifdef DYNAMIC_OLDER
|
||||
return corename[15];
|
||||
#else
|
||||
return corename[10];
|
||||
#endif
|
||||
if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16];
|
||||
if (gotoblas == &gotoblas_BOBCAT) return corename[17];
|
||||
if (gotoblas == &gotoblas_BOBCAT)
|
||||
#ifdef DYNAMIC_OLDER
|
||||
return corename[17];
|
||||
#else
|
||||
return corename[7];
|
||||
#endif
|
||||
if (gotoblas == &gotoblas_BULLDOZER) return corename[18];
|
||||
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19];
|
||||
if (gotoblas == &gotoblas_HASWELL) return corename[20];
|
||||
|
@ -787,6 +834,7 @@ char *gotoblas_corename(void) {
|
|||
}
|
||||
|
||||
|
||||
|
||||
static gotoblas_t *force_coretype(char *coretype){
|
||||
|
||||
int i ;
|
||||
|
|
|
@ -6,6 +6,13 @@ extern gotoblas_t gotoblas_POWER8;
|
|||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
extern gotoblas_t gotoblas_POWER9;
|
||||
#endif
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 11) \
|
||||
|| (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
|
||||
#define HAVE_P10_SUPPORT 1
|
||||
#endif
|
||||
#ifdef HAVE_P10_SUPPORT
|
||||
extern gotoblas_t gotoblas_POWER10;
|
||||
#endif
|
||||
|
||||
extern void openblas_warning(int verbose, const char *msg);
|
||||
|
||||
|
@ -13,7 +20,8 @@ static char *corename[] = {
|
|||
"unknown",
|
||||
"POWER6",
|
||||
"POWER8",
|
||||
"POWER9"
|
||||
"POWER9",
|
||||
"POWER10"
|
||||
};
|
||||
|
||||
#define NUM_CORETYPES 4
|
||||
|
@ -23,6 +31,9 @@ char *gotoblas_corename(void) {
|
|||
if (gotoblas == &gotoblas_POWER8) return corename[2];
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
if (gotoblas == &gotoblas_POWER9) return corename[3];
|
||||
#endif
|
||||
#ifdef HAVE_P10_SUPPORT
|
||||
if (gotoblas == &gotoblas_POWER10) return corename[4];
|
||||
#endif
|
||||
return corename[0];
|
||||
}
|
||||
|
@ -36,6 +47,10 @@ static gotoblas_t *get_coretype(void) {
|
|||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
if (__builtin_cpu_is("power9"))
|
||||
return &gotoblas_POWER9;
|
||||
#endif
|
||||
#ifdef HAVE_P10_SUPPORT
|
||||
if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma"))
|
||||
return &gotoblas_POWER10;
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
@ -61,6 +76,9 @@ static gotoblas_t *force_coretype(char * coretype) {
|
|||
case 2: return (&gotoblas_POWER8);
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
case 3: return (&gotoblas_POWER9);
|
||||
#endif
|
||||
#ifdef HAVE_P10_SUPPORT
|
||||
case 4: return (&gotoblas_POWER10);
|
||||
#endif
|
||||
default: return NULL;
|
||||
}
|
||||
|
|
|
@ -155,8 +155,12 @@ ifeq ($(F_COMPILER), INTEL)
|
|||
-Wl,--whole-archive $< -Wl,--no-whole-archive \
|
||||
-Wl,-soname,$(INTERNALNAME) $(EXTRALIB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
else ifeq ($(F_COMPILER), FLANG)
|
||||
$(FC) $(FFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
-Wl,--whole-archive $< -Wl,--no-whole-archive \
|
||||
-Wl,-soname,$(INTERNALNAME) $(EXTRALIB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
|
||||
else
|
||||
|
||||
ifneq ($(C_COMPILER), LSB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
|
||||
-Wl,--whole-archive $< -Wl,--no-whole-archive \
|
||||
|
|
|
@ -40,15 +40,10 @@
|
|||
ztbsv,ztpmv,ztpsv,ztrmm,ztrmv,ztrsm,ztrsv,
|
||||
xerbla,
|
||||
saxpby,daxpby,caxpby,zaxpby,
|
||||
somatcopy, domatcopy, comatcopy, zomatcopy,
|
||||
simatcopy, dimatcopy, cimatcopy, zimatcopy,
|
||||
sgeadd,dgeadd,cgeadd,zgeadd,
|
||||
somatcopy,
|
||||
simatcopy,
|
||||
domatcopy,
|
||||
dimatcopy,
|
||||
comatcopy,
|
||||
cimatcopy,
|
||||
zomatcopy,
|
||||
zimatcopy,
|
||||
ssum, dsum, scsum, dzsum
|
||||
);
|
||||
|
||||
@halfblasobjs = (shgemm);
|
||||
|
@ -81,7 +76,12 @@
|
|||
cblas_saxpby,cblas_daxpby,cblas_caxpby,cblas_zaxpby,
|
||||
cblas_somatcopy, cblas_domatcopy, cblas_comatcopy, cblas_zomatcopy,
|
||||
cblas_simatcopy, cblas_dimatcopy, cblas_cimatcopy, cblas_zimatcopy,
|
||||
cblas_sgeadd, cblas_dgeadd,cblas_cgeadd, cblas_zgeadd
|
||||
cblas_sgeadd, cblas_dgeadd,cblas_cgeadd, cblas_zgeadd,
|
||||
cblas_isamin, cblas_idamin, cblas_icamin, cblas_izamin,
|
||||
cblas_ismin, cblas_idmin, cblas_icmin, cblas_izmin,
|
||||
cblas_ismax, cblas_idmax, cblas_icmax, cblas_izmax,
|
||||
cblas_ssum, cblas_dsum, cblas_scsum, cblas_dzsum,
|
||||
cblas_xerbla
|
||||
);
|
||||
|
||||
@halfcblasobjs = (cblas_shgemm);
|
||||
|
@ -3501,9 +3501,12 @@ if ($ARGV[1] eq "x86") { @underscore_objs = (@underscore_objs, @gemm3mobjs);
|
|||
if ($ARGV[1] eq "ia64") { @underscore_objs = (@underscore_objs, @gemm3mobjs); };
|
||||
if ($ARGV[1] eq "MIPS") { @underscore_objs = (@underscore_objs, @gemm3mobjs); };
|
||||
|
||||
|
||||
if ($ARGV[4] == 0) {
|
||||
@no_underscore_objs = (@cblasobjs, @misc_no_underscore_objs);
|
||||
if ($ARGV[1] eq "x86_64") { @no_underscore_objs = (@no_underscore_objs, @cblasgemm3mobjs); };
|
||||
if ($ARGV[1] eq "x86") { @no_underscore_objs = (@no_underscore_objs, @cblasgemm3mobjs); };
|
||||
if ($ARGV[1] eq "ia64") { @no_underscore_objs = (@no_underscore_objs, @cblasgemm3mobjs); };
|
||||
if ($ARGV[1] eq "MIPS") { @no_underscore_objs = (@no_underscore_objs, @cblasgemm3mobjs); };
|
||||
}else{
|
||||
#NO_CBLAS=1
|
||||
@no_underscore_objs = (@misc_no_underscore_objs);
|
||||
|
|
13
getarch.c
13
getarch.c
|
@ -650,6 +650,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define CORENAME "POWER9"
|
||||
#endif
|
||||
|
||||
#if defined(FORCE_POWER10)
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "POWER"
|
||||
#define SUBARCHITECTURE "POWER10"
|
||||
#define SUBDIRNAME "power"
|
||||
#define ARCHCONFIG "-DPOWER10 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "power10"
|
||||
#define CORENAME "POWER10"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_PPCG4
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "POWER"
|
||||
|
|
|
@ -130,7 +130,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
|||
if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) )
|
||||
set(USE_TRMM true)
|
||||
endif ()
|
||||
if (ZARCH OR (TARGET_CORE MATCHES POWER8) OR (TARGET_CORE MATCHES POWER9))
|
||||
if (ZARCH OR (TARGET_CORE MATCHES POWER8) OR (TARGET_CORE MATCHES POWER9) OR (TARGET_CORE MATCHES POWER10))
|
||||
set(USE_TRMM true)
|
||||
endif ()
|
||||
|
||||
|
|
|
@ -51,6 +51,10 @@ ifeq ($(CORE), POWER9)
|
|||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), POWER10)
|
||||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), zarch)
|
||||
USE_TRMM = 1
|
||||
endif
|
||||
|
@ -479,7 +483,7 @@ $(KDIR)$(SHGEMMONCOPYOBJ) : $(KERNELDIR)/$(SHGEMMONCOPY)
|
|||
$(KDIR)$(SHGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SHGEMMOTCOPY)
|
||||
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmotcopy.s
|
||||
$(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemmotcopy.s
|
||||
m4 shgemmotcopy.s > shgemmotcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmotcopy_nomacros.s -o $@
|
||||
rm shgemmotcopy.s shgemmotcopy_nomacros.s
|
||||
|
@ -494,7 +498,7 @@ $(KDIR)$(SHGEMMINCOPYOBJ) : $(KERNELDIR)/$(SHGEMMINCOPY)
|
|||
|
||||
$(KDIR)$(SHGEMMITCOPYOBJ) : $(KERNELDIR)/$(SHGEMMITCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmitcopy.s
|
||||
$(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemmitcopy.s
|
||||
m4 shgemmitcopy.s > shgemmitcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmitcopy_nomacros.s -o $@
|
||||
rm shgemmitcopy.s shgemmitcopy_nomacros.s
|
||||
|
@ -510,7 +514,7 @@ $(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY)
|
|||
|
||||
$(KDIR)$(SGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SGEMMOTCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemmotcopy.s
|
||||
$(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmotcopy.s
|
||||
m4 sgemmotcopy.s > sgemmotcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmotcopy_nomacros.s -o $@
|
||||
rm sgemmotcopy.s sgemmotcopy_nomacros.s
|
||||
|
@ -526,7 +530,7 @@ $(KDIR)$(SGEMMINCOPYOBJ) : $(KERNELDIR)/$(SGEMMINCOPY)
|
|||
|
||||
$(KDIR)$(SGEMMITCOPYOBJ) : $(KERNELDIR)/$(SGEMMITCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemmitcopy.s
|
||||
$(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmitcopy.s
|
||||
m4 sgemmitcopy.s > sgemmitcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmitcopy_nomacros.s -o $@
|
||||
rm sgemmitcopy.s sgemmitcopy_nomacros.s
|
||||
|
@ -538,7 +542,7 @@ endif
|
|||
|
||||
$(KDIR)$(DGEMMONCOPYOBJ) : $(KERNELDIR)/$(DGEMMONCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_ncopy.s
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_ncopy.s
|
||||
m4 dgemm_ncopy.s > dgemm_ncopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_ncopy_nomacros.s -o $@
|
||||
rm dgemm_ncopy.s dgemm_ncopy_nomacros.s
|
||||
|
@ -556,7 +560,7 @@ $(KDIR)$(DGEMMINCOPYOBJ) : $(KERNELDIR)/$(DGEMMINCOPY)
|
|||
|
||||
$(KDIR)$(DGEMMITCOPYOBJ) : $(KERNELDIR)/$(DGEMMITCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_itcopy.s
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_itcopy.s
|
||||
m4 dgemm_itcopy.s > dgemm_itcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_itcopy_nomacros.s -o $@
|
||||
rm dgemm_itcopy.s dgemm_itcopy_nomacros.s
|
||||
|
@ -599,7 +603,7 @@ $(KDIR)$(CGEMMINCOPYOBJ) : $(KERNELDIR)/$(CGEMMINCOPY)
|
|||
|
||||
$(KDIR)$(CGEMMITCOPYOBJ) : $(KERNELDIR)/$(CGEMMITCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -E $< -o cgemm_itcopy.s
|
||||
$(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -S $< -o - > cgemm_itcopy.s
|
||||
m4 cgemm_itcopy.s > cgemm_itcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX cgemm_itcopy_nomacros.s -o $@
|
||||
rm cgemm_itcopy.s cgemm_itcopy_nomacros.s
|
||||
|
@ -622,7 +626,7 @@ $(KDIR)$(ZGEMMINCOPYOBJ) : $(KERNELDIR)/$(ZGEMMINCOPY)
|
|||
|
||||
$(KDIR)$(ZGEMMITCOPYOBJ) : $(KERNELDIR)/$(ZGEMMITCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o zgemm_itcopy.s
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > zgemm_itcopy.s
|
||||
m4 zgemm_itcopy.s > zgemm_itcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX zgemm_itcopy_nomacros.s -o $@
|
||||
rm zgemm_itcopy.s zgemm_itcopy_nomacros.s
|
||||
|
@ -654,7 +658,7 @@ endif
|
|||
|
||||
$(KDIR)sgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -UDOUBLE -UCOMPLEX $< -o sgemm_kernel$(TSUFFIX).s
|
||||
$(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemm_kernel$(TSUFFIX).s
|
||||
m4 sgemm_kernel$(TSUFFIX).s > sgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemm_kernel$(TSUFFIX)_nomacros.s -o $@
|
||||
rm sgemm_kernel$(TSUFFIX).s sgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
|
@ -666,7 +670,7 @@ ifeq ($(BUILD_HALF), 1)
|
|||
|
||||
$(KDIR)shgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemm_kernel$(TSUFFIX).s
|
||||
$(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemm_kernel$(TSUFFIX).s
|
||||
m4 shgemm_kernel$(TSUFFIX).s > shgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemm_kernel$(TSUFFIX)_nomacros.s -o $@
|
||||
rm shgemm_kernel$(TSUFFIX).s shgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
|
@ -677,7 +681,7 @@ endif
|
|||
|
||||
$(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DDOUBLE -UCOMPLEX $< -o dgemm_kernel$(TSUFFIX).s
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_kernel$(TSUFFIX).s
|
||||
m4 dgemm_kernel$(TSUFFIX).s > dgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_kernel$(TSUFFIX)_nomacros.s -o $@
|
||||
rm dgemm_kernel$(TSUFFIX).s dgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
|
@ -690,7 +694,7 @@ $(KDIR)qgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(QGEMMDEP
|
|||
|
||||
$(KDIR)cgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DNN $< -o cgemm_kernel_n.s
|
||||
$(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNN $< -o - > cgemm_kernel_n.s
|
||||
m4 cgemm_kernel_n.s > cgemm_kernel_n_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN cgemm_kernel_n_nomacros.s -o $@
|
||||
rm cgemm_kernel_n.s cgemm_kernel_n_nomacros.s
|
||||
|
@ -700,7 +704,7 @@ endif
|
|||
|
||||
$(KDIR)cgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DCN $< -o cgemm_kernel_l.s
|
||||
$(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCN $< -o - > cgemm_kernel_l.s
|
||||
m4 cgemm_kernel_l.s > cgemm_kernel_l_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN cgemm_kernel_l_nomacros.s -o $@
|
||||
rm cgemm_kernel_l.s cgemm_kernel_l_nomacros.s
|
||||
|
@ -710,7 +714,7 @@ endif
|
|||
|
||||
$(KDIR)cgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DNC $< -o cgemm_kernel_r.s
|
||||
$(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s
|
||||
m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@
|
||||
rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s
|
||||
|
@ -720,7 +724,7 @@ endif
|
|||
|
||||
$(KDIR)cgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -UDOUBLE -DCOMPLEX -DCC $< -o cgemm_kernel_b.s
|
||||
$(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCC $< -o - > cgemm_kernel_b.s
|
||||
m4 cgemm_kernel_b.s > cgemm_kernel_b_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC cgemm_kernel_b_nomacros.s -o $@
|
||||
rm cgemm_kernel_b.s cgemm_kernel_b_nomacros.s
|
||||
|
@ -730,7 +734,7 @@ endif
|
|||
|
||||
$(KDIR)zgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DNN $< -o zgemm_kernel_n.s
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNN $< -o - > zgemm_kernel_n.s
|
||||
m4 zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN zgemm_kernel_n_nomacros.s -o $@
|
||||
rm zgemm_kernel_n.s zgemm_kernel_n_nomacros.s
|
||||
|
@ -740,7 +744,7 @@ endif
|
|||
|
||||
$(KDIR)zgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DCN $< -o zgemm_kernel_l.s
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCN $< -o - > zgemm_kernel_l.s
|
||||
m4 zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN zgemm_kernel_l_nomacros.s -o $@
|
||||
rm zgemm_kernel_l.s zgemm_kernel_l_nomacros.s
|
||||
|
@ -750,7 +754,7 @@ endif
|
|||
|
||||
$(KDIR)zgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DNC $< -o zgemm_kernel_r.s
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNC $< -o - > zgemm_kernel_r.s
|
||||
m4 zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC zgemm_kernel_r_nomacros.s -o $@
|
||||
rm zgemm_kernel_r.s zgemm_kernel_r_nomacros.s
|
||||
|
@ -760,7 +764,7 @@ endif
|
|||
|
||||
$(KDIR)zgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DDOUBLE -DCOMPLEX -DCC $< -o zgemm_kernel_b.s
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCC $< -o - > zgemm_kernel_b.s
|
||||
m4 zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC zgemm_kernel_b_nomacros.s -o $@
|
||||
rm zgemm_kernel_b.s zgemm_kernel_b_nomacros.s
|
||||
|
@ -784,7 +788,7 @@ $(KDIR)xgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMD
|
|||
ifdef USE_TRMM
|
||||
$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o strmmkernel_ln.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > strmmkernel_ln.s
|
||||
m4 strmmkernel_ln.s > strmmkernel_ln_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA strmmkernel_ln_nomacros.s -o $@
|
||||
rm strmmkernel_ln.s strmmkernel_ln_nomacros.s
|
||||
|
@ -794,7 +798,7 @@ endif
|
|||
|
||||
$(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o strmmkernel_lt.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > strmmkernel_lt.s
|
||||
m4 strmmkernel_lt.s > strmmkernel_lt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA strmmkernel_lt_nomacros.s -o $@
|
||||
rm strmmkernel_lt.s strmmkernel_lt_nomacros.s
|
||||
|
@ -804,7 +808,7 @@ endif
|
|||
|
||||
$(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o strmmkernel_rn.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > strmmkernel_rn.s
|
||||
m4 strmmkernel_rn.s > strmmkernel_rn_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA strmmkernel_rn_nomacros.s -o $@
|
||||
rm strmmkernel_rn.s strmmkernel_rn_nomacros.s
|
||||
|
@ -814,7 +818,7 @@ endif
|
|||
|
||||
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
|
||||
m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@
|
||||
rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s
|
||||
|
@ -824,7 +828,7 @@ endif
|
|||
|
||||
$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o dtrmm_kernel_ln.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > dtrmm_kernel_ln.s
|
||||
m4 dtrmm_kernel_ln.s > dtrmm_kernel_ln_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA dtrmm_kernel_ln_nomacros.s -o $@
|
||||
rm dtrmm_kernel_ln.s dtrmm_kernel_ln_nomacros.s
|
||||
|
@ -834,7 +838,7 @@ endif
|
|||
|
||||
$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o dtrmm_kernel_lt.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > dtrmm_kernel_lt.s
|
||||
m4 dtrmm_kernel_lt.s > dtrmm_kernel_lt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA dtrmm_kernel_lt_nomacros.s -o $@
|
||||
rm dtrmm_kernel_lt.s dtrmm_kernel_lt_nomacros.s
|
||||
|
@ -844,7 +848,7 @@ endif
|
|||
|
||||
$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o dtrmm_kernel_rn.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > dtrmm_kernel_rn.s
|
||||
m4 dtrmm_kernel_rn.s > dtrmm_kernel_rn_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA dtrmm_kernel_rn_nomacros.s -o $@
|
||||
rm dtrmm_kernel_rn.s dtrmm_kernel_rn_nomacros.s
|
||||
|
@ -854,7 +858,7 @@ endif
|
|||
|
||||
$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o dtrmm_kernel_rt.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > dtrmm_kernel_rt.s
|
||||
m4 dtrmm_kernel_rt.s > dtrmm_kernel_rt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA dtrmm_kernel_rt_nomacros.s -o $@
|
||||
rm dtrmm_kernel_rt.s dtrmm_kernel_rt_nomacros.s
|
||||
|
@ -876,7 +880,7 @@ $(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
|
|||
|
||||
$(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_ln.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_ln.s
|
||||
m4 ctrmm_kernel_ln.s > ctrmm_kernel_ln_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_ln_nomacros.s -o $@
|
||||
rm ctrmm_kernel_ln.s ctrmm_kernel_ln_nomacros.s
|
||||
|
@ -886,7 +890,7 @@ endif
|
|||
|
||||
$(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_lt.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_lt.s
|
||||
m4 ctrmm_kernel_lt.s > ctrmm_kernel_lt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_lt_nomacros.s -o $@
|
||||
rm ctrmm_kernel_lt.s ctrmm_kernel_lt_nomacros.s
|
||||
|
@ -896,7 +900,7 @@ endif
|
|||
|
||||
$(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lr.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lr.s
|
||||
m4 ctrmm_kernel_lr.s > ctrmm_kernel_lr_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ctrmm_kernel_lr_nomacros.s -o $@
|
||||
rm ctrmm_kernel_lr.s ctrmm_kernel_lr_nomacros.s
|
||||
|
@ -906,7 +910,7 @@ endif
|
|||
|
||||
$(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ctrmm_kernel_lc.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lc.s
|
||||
m4 ctrmm_kernel_lc.s > ctrmm_kernel_lc_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ctrmm_kernel_lc_nomacros.s -o $@
|
||||
rm ctrmm_kernel_lc_nomacros.s ctrmm_kernel_lc.s
|
||||
|
@ -916,7 +920,7 @@ endif
|
|||
|
||||
$(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rn.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rn.s
|
||||
m4 ctrmm_kernel_rn.s > ctrmm_kernel_rn_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_rn_nomacros.s -o $@
|
||||
rm ctrmm_kernel_rn.s ctrmm_kernel_rn_nomacros.s
|
||||
|
@ -926,7 +930,7 @@ endif
|
|||
|
||||
$(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ctrmm_kernel_rt.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rt.s
|
||||
m4 ctrmm_kernel_rt.s > ctrmm_kernel_rt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_rt_nomacros.s -o $@
|
||||
rm ctrmm_kernel_rt.s ctrmm_kernel_rt_nomacros.s
|
||||
|
@ -936,7 +940,7 @@ endif
|
|||
|
||||
$(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ctrmm_kernel_rr.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_rr.s
|
||||
m4 ctrmm_kernel_rr.s > ctrmm_kernel_rr_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ctrmm_kernel_rr_nomacros.s -o $@
|
||||
rm ctrmm_kernel_rr.s ctrmm_kernel_rr_nomacros.s
|
||||
|
@ -946,7 +950,7 @@ endif
|
|||
|
||||
$(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ctrmm_kernel_RC.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_RC.s
|
||||
m4 ctrmm_kernel_RC.s > ctrmm_kernel_RC_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ctrmm_kernel_RC_nomacros.s -o $@
|
||||
rm ctrmm_kernel_RC.s ctrmm_kernel_RC_nomacros.s
|
||||
|
@ -956,7 +960,7 @@ endif
|
|||
|
||||
$(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_ln.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_ln.s
|
||||
m4 ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_ln_nomacros.s -o $@
|
||||
rm ztrmm_kernel_ln.s ztrmm_kernel_ln_nomacros.s
|
||||
|
@ -966,7 +970,7 @@ endif
|
|||
|
||||
$(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_lt.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_lt.s
|
||||
m4 ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_lt_nomacros.s -o $@
|
||||
rm ztrmm_kernel_lt.s ztrmm_kernel_lt_nomacros.s
|
||||
|
@ -976,7 +980,7 @@ endif
|
|||
|
||||
$(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lr.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lr.s
|
||||
m4 ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ztrmm_kernel_lr_nomacros.s -o $@
|
||||
rm ztrmm_kernel_lr.s ztrmm_kernel_lr_nomacros.s
|
||||
|
@ -986,7 +990,7 @@ endif
|
|||
|
||||
$(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o ztrmm_kernel_lc.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lc.s
|
||||
m4 ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ztrmm_kernel_lc_nomacros.s -o $@
|
||||
rm ztrmm_kernel_lc.s ztrmm_kernel_lc_nomacros.s
|
||||
|
@ -996,7 +1000,7 @@ endif
|
|||
|
||||
$(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rn.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rn.s
|
||||
m4 ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_rn_nomacros.s -o $@
|
||||
rm ztrmm_kernel_rn.s ztrmm_kernel_rn_nomacros.s
|
||||
|
@ -1006,7 +1010,7 @@ endif
|
|||
|
||||
$(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o ztrmm_kernel_rt.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rt.s
|
||||
m4 ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_rt_nomacros.s -o $@
|
||||
rm ztrmm_kernel_rt.s ztrmm_kernel_rt_nomacros.s
|
||||
|
@ -1016,7 +1020,7 @@ endif
|
|||
|
||||
$(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rr.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rr.s
|
||||
m4 ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ztrmm_kernel_rr_nomacros.s -o $@
|
||||
rm ztrmm_kernel_rr.s ztrmm_kernel_rr_nomacros.s
|
||||
|
@ -1026,7 +1030,7 @@ endif
|
|||
|
||||
$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o ztrmm_kernel_rc.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rc.s
|
||||
m4 ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ztrmm_kernel_rc_nomacros.s -o $@
|
||||
rm ztrmm_kernel_rc.s ztrmm_kernel_rc_nomacros.s
|
||||
|
@ -1046,7 +1050,7 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
|
|||
|
||||
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
|
||||
m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@
|
||||
rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s
|
||||
|
@ -1180,7 +1184,7 @@ $(KDIR)dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LN) $(DT
|
|||
|
||||
$(KDIR)dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LT) $(DTRSMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o dtrsm_kernel_lt.s
|
||||
$(CC) $(CFLAGS) -S -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o - > dtrsm_kernel_lt.s
|
||||
m4 dtrsm_kernel_lt.s > dtrsm_kernel_lt_nomacros.s
|
||||
$(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ dtrsm_kernel_lt_nomacros.s -o $@
|
||||
rm dtrsm_kernel_lt.s dtrsm_kernel_lt_nomacros.s
|
||||
|
@ -2456,7 +2460,7 @@ $(KDIR)cgemm_kernel_l$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMM
|
|||
|
||||
$(KDIR)cgemm_kernel_r$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(PFLAGS) -E -UDOUBLE -DCOMPLEX -DNC $< -o cgemm_kernel_r.s
|
||||
$(CC) $(PFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s
|
||||
m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@
|
||||
rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s
|
||||
|
@ -2502,7 +2506,7 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
|
|||
|
||||
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -E -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o strmm_kernel_rt.s
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
|
||||
m4 strmmkernel_rn.s > strmm_kernel_rt_nomacros.s
|
||||
$(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@
|
||||
rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s
|
||||
|
|
|
@ -681,12 +681,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro INIT8x4
|
||||
fmov s16, wzr
|
||||
fmov s17, wzr
|
||||
fmov s18, wzr
|
||||
fmov s19, s16
|
||||
fmov s20, wzr
|
||||
fmov s21, s16
|
||||
fmov s24, wzr
|
||||
fmov s25, s16
|
||||
fmov s28, wzr
|
||||
fmov s29, s16
|
||||
fmov s22, wzr
|
||||
fmov s23, s16
|
||||
.endm
|
||||
|
||||
.macro KERNEL8x4_I
|
||||
|
@ -765,14 +765,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
fmla v21.4s, v3.4s, v6.s[2]
|
||||
fmla v22.4s, v2.4s, v6.s[3]
|
||||
fmla v23.4s, v3.4s, v6.s[3]
|
||||
fmla v24.4s, v2.4s, v7.s[0]
|
||||
fmla v25.4s, v3.4s, v7.s[0]
|
||||
fmla v26.4s, v2.4s, v7.s[1]
|
||||
fmla v27.4s, v3.4s, v7.s[1]
|
||||
fmla v28.4s, v2.4s, v7.s[2]
|
||||
fmla v29.4s, v3.4s, v7.s[2]
|
||||
fmla v30.4s, v2.4s, v7.s[3]
|
||||
fmla v31.4s, v3.4s, v7.s[3]
|
||||
.endm
|
||||
|
||||
.macro KERNEL8x4_SUB
|
||||
|
|
|
@ -39,24 +39,24 @@
|
|||
#include <stdio.h>
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
|
||||
int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
||||
BLASLONG i, j;
|
||||
|
||||
FLOAT *aoffset;
|
||||
FLOAT *aoffset1, *aoffset2, *aoffset3, *aoffset4;
|
||||
FLOAT *aoffset5, *aoffset6, *aoffset7, *aoffset8;
|
||||
FLOAT *aoffset9, *aoffset10, *aoffset11, *aoffset12;
|
||||
FLOAT *aoffset13, *aoffset14, *aoffset15, *aoffset16;
|
||||
IFLOAT *aoffset;
|
||||
IFLOAT *aoffset1, *aoffset2, *aoffset3, *aoffset4;
|
||||
IFLOAT *aoffset5, *aoffset6, *aoffset7, *aoffset8;
|
||||
IFLOAT *aoffset9, *aoffset10, *aoffset11, *aoffset12;
|
||||
IFLOAT *aoffset13, *aoffset14, *aoffset15, *aoffset16;
|
||||
|
||||
FLOAT *boffset;
|
||||
FLOAT ctemp01, ctemp02, ctemp03, ctemp04;
|
||||
FLOAT ctemp05, ctemp06, ctemp07, ctemp08;
|
||||
FLOAT ctemp09, ctemp10, ctemp11, ctemp12;
|
||||
FLOAT ctemp13, ctemp14, ctemp15, ctemp16;
|
||||
FLOAT ctemp17, ctemp18, ctemp19, ctemp20;
|
||||
FLOAT ctemp21, ctemp22, ctemp23, ctemp24;
|
||||
FLOAT ctemp25, ctemp26, ctemp27, ctemp28;
|
||||
FLOAT ctemp29, ctemp30, ctemp31, ctemp32;
|
||||
IFLOAT *boffset;
|
||||
IFLOAT ctemp01, ctemp02, ctemp03, ctemp04;
|
||||
IFLOAT ctemp05, ctemp06, ctemp07, ctemp08;
|
||||
IFLOAT ctemp09, ctemp10, ctemp11, ctemp12;
|
||||
IFLOAT ctemp13, ctemp14, ctemp15, ctemp16;
|
||||
IFLOAT ctemp17, ctemp18, ctemp19, ctemp20;
|
||||
IFLOAT ctemp21, ctemp22, ctemp23, ctemp24;
|
||||
IFLOAT ctemp25, ctemp26, ctemp27, ctemp28;
|
||||
IFLOAT ctemp29, ctemp30, ctemp31, ctemp32;
|
||||
|
||||
aoffset = a;
|
||||
boffset = b;
|
||||
|
|
|
@ -39,30 +39,30 @@
|
|||
#include <stdio.h>
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
|
||||
int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
||||
BLASLONG i, j;
|
||||
|
||||
FLOAT *aoffset;
|
||||
FLOAT *aoffset1, *aoffset2, *aoffset3, *aoffset4;
|
||||
FLOAT *aoffset5, *aoffset6, *aoffset7, *aoffset8;
|
||||
IFLOAT *aoffset;
|
||||
IFLOAT *aoffset1, *aoffset2, *aoffset3, *aoffset4;
|
||||
IFLOAT *aoffset5, *aoffset6, *aoffset7, *aoffset8;
|
||||
|
||||
FLOAT *boffset;
|
||||
FLOAT ctemp01, ctemp02, ctemp03, ctemp04;
|
||||
FLOAT ctemp05, ctemp06, ctemp07, ctemp08;
|
||||
FLOAT ctemp09, ctemp10, ctemp11, ctemp12;
|
||||
FLOAT ctemp13, ctemp14, ctemp15, ctemp16;
|
||||
FLOAT ctemp17, ctemp18, ctemp19, ctemp20;
|
||||
FLOAT ctemp21, ctemp22, ctemp23, ctemp24;
|
||||
FLOAT ctemp25, ctemp26, ctemp27, ctemp28;
|
||||
FLOAT ctemp29, ctemp30, ctemp31, ctemp32;
|
||||
FLOAT ctemp33, ctemp34, ctemp35, ctemp36;
|
||||
FLOAT ctemp37, ctemp38, ctemp39, ctemp40;
|
||||
FLOAT ctemp41, ctemp42, ctemp43, ctemp44;
|
||||
FLOAT ctemp45, ctemp46, ctemp47, ctemp48;
|
||||
FLOAT ctemp49, ctemp50, ctemp51, ctemp52;
|
||||
FLOAT ctemp53, ctemp54, ctemp55, ctemp56;
|
||||
FLOAT ctemp57, ctemp58, ctemp59, ctemp60;
|
||||
FLOAT ctemp61, ctemp62, ctemp63, ctemp64;
|
||||
IFLOAT *boffset;
|
||||
IFLOAT ctemp01, ctemp02, ctemp03, ctemp04;
|
||||
IFLOAT ctemp05, ctemp06, ctemp07, ctemp08;
|
||||
IFLOAT ctemp09, ctemp10, ctemp11, ctemp12;
|
||||
IFLOAT ctemp13, ctemp14, ctemp15, ctemp16;
|
||||
IFLOAT ctemp17, ctemp18, ctemp19, ctemp20;
|
||||
IFLOAT ctemp21, ctemp22, ctemp23, ctemp24;
|
||||
IFLOAT ctemp25, ctemp26, ctemp27, ctemp28;
|
||||
IFLOAT ctemp29, ctemp30, ctemp31, ctemp32;
|
||||
IFLOAT ctemp33, ctemp34, ctemp35, ctemp36;
|
||||
IFLOAT ctemp37, ctemp38, ctemp39, ctemp40;
|
||||
IFLOAT ctemp41, ctemp42, ctemp43, ctemp44;
|
||||
IFLOAT ctemp45, ctemp46, ctemp47, ctemp48;
|
||||
IFLOAT ctemp49, ctemp50, ctemp51, ctemp52;
|
||||
IFLOAT ctemp53, ctemp54, ctemp55, ctemp56;
|
||||
IFLOAT ctemp57, ctemp58, ctemp59, ctemp60;
|
||||
IFLOAT ctemp61, ctemp62, ctemp63, ctemp64;
|
||||
|
||||
|
||||
aoffset = a;
|
||||
|
|
|
@ -39,22 +39,22 @@
|
|||
#include <stdio.h>
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
|
||||
int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
||||
|
||||
BLASLONG i, j;
|
||||
|
||||
FLOAT *aoffset;
|
||||
FLOAT *aoffset1, *aoffset2;
|
||||
FLOAT *boffset;
|
||||
IFLOAT *aoffset;
|
||||
IFLOAT *aoffset1, *aoffset2;
|
||||
IFLOAT *boffset;
|
||||
|
||||
FLOAT ctemp01, ctemp02, ctemp03, ctemp04;
|
||||
FLOAT ctemp05, ctemp06, ctemp07, ctemp08;
|
||||
FLOAT ctemp09, ctemp10, ctemp11, ctemp12;
|
||||
FLOAT ctemp13, ctemp14, ctemp15, ctemp16;
|
||||
FLOAT ctemp17, ctemp18, ctemp19, ctemp20;
|
||||
FLOAT ctemp21, ctemp22, ctemp23, ctemp24;
|
||||
FLOAT ctemp25, ctemp26, ctemp27, ctemp28;
|
||||
FLOAT ctemp29, ctemp30, ctemp31, ctemp32;
|
||||
IFLOAT ctemp01, ctemp02, ctemp03, ctemp04;
|
||||
IFLOAT ctemp05, ctemp06, ctemp07, ctemp08;
|
||||
IFLOAT ctemp09, ctemp10, ctemp11, ctemp12;
|
||||
IFLOAT ctemp13, ctemp14, ctemp15, ctemp16;
|
||||
IFLOAT ctemp17, ctemp18, ctemp19, ctemp20;
|
||||
IFLOAT ctemp21, ctemp22, ctemp23, ctemp24;
|
||||
IFLOAT ctemp25, ctemp26, ctemp27, ctemp28;
|
||||
IFLOAT ctemp29, ctemp30, ctemp31, ctemp32;
|
||||
|
||||
aoffset = a;
|
||||
boffset = b;
|
||||
|
|
|
@ -39,32 +39,32 @@
|
|||
#include <stdio.h>
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
|
||||
int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
||||
|
||||
BLASLONG i, j;
|
||||
|
||||
FLOAT *aoffset;
|
||||
FLOAT *aoffset1, *aoffset2, *aoffset3, *aoffset4;
|
||||
FLOAT *aoffset5, *aoffset6, *aoffset7, *aoffset8;
|
||||
IFLOAT *aoffset;
|
||||
IFLOAT *aoffset1, *aoffset2, *aoffset3, *aoffset4;
|
||||
IFLOAT *aoffset5, *aoffset6, *aoffset7, *aoffset8;
|
||||
|
||||
FLOAT *boffset, *boffset1, *boffset2, *boffset3, *boffset4;
|
||||
IFLOAT *boffset, *boffset1, *boffset2, *boffset3, *boffset4;
|
||||
|
||||
FLOAT ctemp01, ctemp02, ctemp03, ctemp04;
|
||||
FLOAT ctemp05, ctemp06, ctemp07, ctemp08;
|
||||
FLOAT ctemp09, ctemp10, ctemp11, ctemp12;
|
||||
FLOAT ctemp13, ctemp14, ctemp15, ctemp16;
|
||||
FLOAT ctemp17, ctemp18, ctemp19, ctemp20;
|
||||
FLOAT ctemp21, ctemp22, ctemp23, ctemp24;
|
||||
FLOAT ctemp25, ctemp26, ctemp27, ctemp28;
|
||||
FLOAT ctemp29, ctemp30, ctemp31, ctemp32;
|
||||
FLOAT ctemp33, ctemp34, ctemp35, ctemp36;
|
||||
FLOAT ctemp37, ctemp38, ctemp39, ctemp40;
|
||||
FLOAT ctemp41, ctemp42, ctemp43, ctemp44;
|
||||
FLOAT ctemp45, ctemp46, ctemp47, ctemp48;
|
||||
FLOAT ctemp49, ctemp50, ctemp51, ctemp52;
|
||||
FLOAT ctemp53, ctemp54, ctemp55, ctemp56;
|
||||
FLOAT ctemp57, ctemp58, ctemp59, ctemp60;
|
||||
FLOAT ctemp61, ctemp62, ctemp63, ctemp64;
|
||||
IFLOAT ctemp01, ctemp02, ctemp03, ctemp04;
|
||||
IFLOAT ctemp05, ctemp06, ctemp07, ctemp08;
|
||||
IFLOAT ctemp09, ctemp10, ctemp11, ctemp12;
|
||||
IFLOAT ctemp13, ctemp14, ctemp15, ctemp16;
|
||||
IFLOAT ctemp17, ctemp18, ctemp19, ctemp20;
|
||||
IFLOAT ctemp21, ctemp22, ctemp23, ctemp24;
|
||||
IFLOAT ctemp25, ctemp26, ctemp27, ctemp28;
|
||||
IFLOAT ctemp29, ctemp30, ctemp31, ctemp32;
|
||||
IFLOAT ctemp33, ctemp34, ctemp35, ctemp36;
|
||||
IFLOAT ctemp37, ctemp38, ctemp39, ctemp40;
|
||||
IFLOAT ctemp41, ctemp42, ctemp43, ctemp44;
|
||||
IFLOAT ctemp45, ctemp46, ctemp47, ctemp48;
|
||||
IFLOAT ctemp49, ctemp50, ctemp51, ctemp52;
|
||||
IFLOAT ctemp53, ctemp54, ctemp55, ctemp56;
|
||||
IFLOAT ctemp57, ctemp58, ctemp59, ctemp60;
|
||||
IFLOAT ctemp61, ctemp62, ctemp63, ctemp64;
|
||||
|
||||
aoffset = a;
|
||||
boffset = b;
|
||||
|
|
|
@ -0,0 +1,225 @@
|
|||
ifeq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
|
||||
include $(KERNELDIR)/KERNEL.POWER8
|
||||
else
|
||||
|
||||
#SGEMM_BETA = ../generic/gemm_beta.c
|
||||
#DGEMM_BETA = ../generic/gemm_beta.c
|
||||
#CGEMM_BETA = ../generic/zgemm_beta.c
|
||||
#ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||
|
||||
SHGEMM_BETA = ../generic/gemm_beta.c
|
||||
SHGEMMKERNEL = shgemm_kernel_power10.c
|
||||
SHGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||
SHGEMMITCOPY = ../generic/gemm_tcopy_16.c
|
||||
SHGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||
SHGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||
SHGEMMINCOPYOBJ = shgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRMMKERNEL = sgemm_kernel_power10.c
|
||||
DTRMMKERNEL = dgemm_kernel_power10.c
|
||||
CTRMMKERNEL = cgemm_kernel_power10.S
|
||||
ZTRMMKERNEL = zgemm_kernel_power10.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_power10.c
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||
SGEMMITCOPY = sgemm_tcopy_16_power8.S
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||
SGEMMOTCOPY = sgemm_tcopy_8_power8.S
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_power10.c
|
||||
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||
DGEMMITCOPY = dgemm_tcopy_16_power8.S
|
||||
DGEMMONCOPY = dgemm_ncopy_4_power8.S
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_power10.S
|
||||
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
|
||||
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_power10.S
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c
|
||||
ZGEMMITCOPY = zgemm_tcopy_8_power8.S
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
#Todo: CGEMM3MKERNEL should be 4x4 blocksizes.
|
||||
#CGEMM3MKERNEL = zgemm3m_kernel_8x4_sse3.S
|
||||
#ZGEMM3MKERNEL = zgemm3m_kernel_4x4_sse3.S
|
||||
|
||||
#Pure C for other kernels
|
||||
#SAMAXKERNEL = ../arm/amax.c
|
||||
#DAMAXKERNEL = ../arm/amax.c
|
||||
#CAMAXKERNEL = ../arm/zamax.c
|
||||
#ZAMAXKERNEL = ../arm/zamax.c
|
||||
#
|
||||
#SAMINKERNEL = ../arm/amin.c
|
||||
#DAMINKERNEL = ../arm/amin.c
|
||||
#CAMINKERNEL = ../arm/zamin.c
|
||||
#ZAMINKERNEL = ../arm/zamin.c
|
||||
#
|
||||
#SMAXKERNEL = ../arm/max.c
|
||||
#DMAXKERNEL = ../arm/max.c
|
||||
#
|
||||
#SMINKERNEL = ../arm/min.c
|
||||
#DMINKERNEL = ../arm/min.c
|
||||
#
|
||||
ifneq ($(GCCVERSIONGTEQ9),1)
|
||||
ISAMAXKERNEL = isamax_power9.S
|
||||
else
|
||||
ISAMAXKERNEL = isamax.c
|
||||
endif
|
||||
IDAMAXKERNEL = idamax.c
|
||||
ifneq ($(GCCVERSIONGTEQ9),1)
|
||||
ICAMAXKERNEL = icamax_power9.S
|
||||
else
|
||||
ICAMAXKERNEL = icamax.c
|
||||
endif
|
||||
IZAMAXKERNEL = izamax.c
|
||||
#
|
||||
ifneq ($(GCCVERSIONGTEQ9),1)
|
||||
ISAMINKERNEL = isamin_power9.S
|
||||
else
|
||||
ISAMINKERNEL = isamin.c
|
||||
endif
|
||||
IDAMINKERNEL = idamin.c
|
||||
ifneq ($(GCCVERSIONGTEQ9),1)
|
||||
ICAMINKERNEL = icamin_power9.S
|
||||
else
|
||||
ICAMINKERNEL = icamin.c
|
||||
endif
|
||||
IZAMINKERNEL = izamin.c
|
||||
#
|
||||
#ISMAXKERNEL = ../arm/imax.c
|
||||
#IDMAXKERNEL = ../arm/imax.c
|
||||
#
|
||||
#ISMINKERNEL = ../arm/imin.c
|
||||
#IDMINKERNEL = ../arm/imin.c
|
||||
#
|
||||
SASUMKERNEL = sasum.c
|
||||
DASUMKERNEL = dasum.c
|
||||
CASUMKERNEL = casum.c
|
||||
ZASUMKERNEL = zasum.c
|
||||
#
|
||||
SAXPYKERNEL = saxpy.c
|
||||
DAXPYKERNEL = daxpy.c
|
||||
ifneq ($(GCCVERSIONGTEQ9),1)
|
||||
CAXPYKERNEL = caxpy_power9.S
|
||||
else
|
||||
CAXPYKERNEL = caxpy.c
|
||||
endif
|
||||
ZAXPYKERNEL = zaxpy.c
|
||||
#
|
||||
SCOPYKERNEL = scopy.c
|
||||
DCOPYKERNEL = dcopy.c
|
||||
CCOPYKERNEL = ccopy.c
|
||||
ZCOPYKERNEL = zcopy.c
|
||||
#
|
||||
SDOTKERNEL = sdot.c
|
||||
DDOTKERNEL = ddot.c
|
||||
DSDOTKERNEL = sdot.c
|
||||
ifneq ($(GCCVERSIONGTEQ9),1)
|
||||
CDOTKERNEL = cdot_power9.S
|
||||
else
|
||||
CDOTKERNEL = cdot.c
|
||||
endif
|
||||
ZDOTKERNEL = zdot.c
|
||||
#
|
||||
SNRM2KERNEL = ../arm/nrm2.c
|
||||
DNRM2KERNEL = ../arm/nrm2.c
|
||||
CNRM2KERNEL = ../arm/znrm2.c
|
||||
ZNRM2KERNEL = ../arm/znrm2.c
|
||||
#
|
||||
SROTKERNEL = srot.c
|
||||
DROTKERNEL = drot.c
|
||||
CROTKERNEL = crot.c
|
||||
ZROTKERNEL = zrot.c
|
||||
#
|
||||
SSCALKERNEL = sscal.c
|
||||
DSCALKERNEL = dscal.c
|
||||
CSCALKERNEL = zscal.c
|
||||
ZSCALKERNEL = zscal.c
|
||||
#
|
||||
SSWAPKERNEL = sswap.c
|
||||
DSWAPKERNEL = dswap.c
|
||||
CSWAPKERNEL = cswap.c
|
||||
ZSWAPKERNEL = zswap.c
|
||||
#
|
||||
|
||||
SGEMVNKERNEL = sgemv_n.c
|
||||
DGEMVNKERNEL = dgemv_n.c
|
||||
CGEMVNKERNEL = cgemv_n.c
|
||||
ZGEMVNKERNEL = zgemv_n_4.c
|
||||
#
|
||||
SGEMVTKERNEL = sgemv_t.c
|
||||
DGEMVTKERNEL = dgemv_t.c
|
||||
CGEMVTKERNEL = cgemv_t.c
|
||||
ZGEMVTKERNEL = zgemv_t_4.c
|
||||
|
||||
|
||||
#SSYMV_U_KERNEL = ../generic/symv_k.c
|
||||
#SSYMV_L_KERNEL = ../generic/symv_k.c
|
||||
#DSYMV_U_KERNEL = ../generic/symv_k.c
|
||||
#DSYMV_L_KERNEL = ../generic/symv_k.c
|
||||
#QSYMV_U_KERNEL = ../generic/symv_k.c
|
||||
#QSYMV_L_KERNEL = ../generic/symv_k.c
|
||||
#CSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||
#CSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||
#ZSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||
#ZSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||
#XSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||
#XSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||
|
||||
#ZHEMV_U_KERNEL = ../generic/zhemv_k.c
|
||||
#ZHEMV_L_KERNEL = ../generic/zhemv_k.c
|
||||
|
||||
LSAME_KERNEL = ../generic/lsame.c
|
||||
SCABS_KERNEL = ../generic/cabs.c
|
||||
DCABS_KERNEL = ../generic/cabs.c
|
||||
QCABS_KERNEL = ../generic/cabs.c
|
||||
|
||||
#Dump kernel
|
||||
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
||||
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
||||
|
||||
endif
|
|
@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "casum_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "ccopy_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -424,7 +424,7 @@ L999:
|
|||
lwz r16, 204(SP)
|
||||
lwz r15, 208(SP)
|
||||
lwz r14, 212(SP)
|
||||
addi r11, 224
|
||||
addi r11, SP, 224
|
||||
#endif
|
||||
lvx v20, r11, r0
|
||||
addi r11, r11, 16
|
||||
|
@ -459,4 +459,4 @@ L999:
|
|||
blr
|
||||
|
||||
EPILOGUE
|
||||
#endif^
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,286 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013-2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "def_vsx.h"
|
||||
|
||||
|
||||
#define LOAD ld
|
||||
#define STACKSIZE (512 )
|
||||
#define FLINK_SAVE (STACKSIZE+16) /* 16($r12) */
|
||||
#define M r3
|
||||
#define N r4
|
||||
#define K r5
|
||||
|
||||
|
||||
#define A r8
|
||||
#define B r9
|
||||
#define C r10
|
||||
#define LDC r6
|
||||
#define OFFSET r7
|
||||
|
||||
|
||||
#define alpha_r vs51
|
||||
#define alpha_i vs55
|
||||
#define save_permute_1 vs59
|
||||
#define permute_mask vs63
|
||||
#define o0 0
|
||||
|
||||
|
||||
#define T1 r11
|
||||
#define T2 r12
|
||||
#define T3 r14
|
||||
#define T4 r15
|
||||
#define T5 r16
|
||||
#define T6 r17
|
||||
#define L r18
|
||||
#define T7 r19
|
||||
#define T8 r20
|
||||
#define TEMP_REG r21
|
||||
#define I r22
|
||||
#define J r23
|
||||
#define AO r24
|
||||
#define BO r25
|
||||
#define CO r26
|
||||
#define T9 r27
|
||||
#define T10 r28
|
||||
#define PRE r29
|
||||
|
||||
#define T12 r30
|
||||
#define T13 r31
|
||||
|
||||
#include "cgemm_macros_power10.S"
|
||||
|
||||
.equ perm_const1, 0x0405060700010203
|
||||
.equ perm_const2, 0x0c0d0e0f08090a0b
|
||||
.equ save_permute_12, 0x0c0d0e0f1c1d1e1f
|
||||
.equ save_permute_11, 0x0405060714151617
|
||||
|
||||
|
||||
|
||||
#ifndef NEEDPARAM
|
||||
|
||||
PROLOGUE
|
||||
PROFCODE
|
||||
|
||||
|
||||
addi SP, SP, -STACKSIZE
|
||||
mflr r0
|
||||
|
||||
|
||||
stfd f14, 0(SP)
|
||||
stfd f15, 8(SP)
|
||||
stfd f16, 16(SP)
|
||||
stfd f17, 24(SP)
|
||||
|
||||
stfd f18, 32(SP)
|
||||
stfd f19, 40(SP)
|
||||
stfd f20, 48(SP)
|
||||
stfd f21, 56(SP)
|
||||
|
||||
stfd f22, 64(SP)
|
||||
stfd f23, 72(SP)
|
||||
stfd f24, 80(SP)
|
||||
stfd f25, 88(SP)
|
||||
|
||||
stfd f26, 96(SP)
|
||||
stfd f27, 104(SP)
|
||||
stfd f28, 112(SP)
|
||||
stfd f29, 120(SP)
|
||||
|
||||
stfd f30, 128(SP)
|
||||
stfd f31, 136(SP)
|
||||
|
||||
|
||||
std r31, 144(SP)
|
||||
std r30, 152(SP)
|
||||
std r29, 160(SP)
|
||||
std r28, 168(SP)
|
||||
std r27, 176(SP)
|
||||
std r26, 184(SP)
|
||||
std r25, 192(SP)
|
||||
std r24, 200(SP)
|
||||
std r23, 208(SP)
|
||||
std r22, 216(SP)
|
||||
std r21, 224(SP)
|
||||
std r20, 232(SP)
|
||||
std r19, 240(SP)
|
||||
std r18, 248(SP)
|
||||
std r17, 256(SP)
|
||||
std r16, 264(SP)
|
||||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
|
||||
|
||||
stxv vs52, 288(SP)
|
||||
stxv vs53, 304(SP)
|
||||
stxv vs54, 320(SP)
|
||||
stxv vs55, 336(SP)
|
||||
stxv vs56, 352(SP)
|
||||
stxv vs57, 368(SP)
|
||||
stxv vs58, 384(SP)
|
||||
stxv vs59, 400(SP)
|
||||
stxv vs60, 416(SP)
|
||||
stxv vs61, 432(SP)
|
||||
stxv vs62, 448(SP)
|
||||
stxv vs63, 464(SP)
|
||||
std r0, FLINK_SAVE(SP)
|
||||
|
||||
|
||||
|
||||
ld LDC, FRAMESLOT(0) + STACKSIZE(SP)
|
||||
|
||||
|
||||
|
||||
#ifdef TRMMKERNEL
|
||||
ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
|
||||
#endif
|
||||
slwi LDC, LDC, ZBASE_SHIFT
|
||||
|
||||
|
||||
|
||||
/*alpha is stored in f1. convert to single and splat*/
|
||||
xscvdpspn alpha_r,vs1
|
||||
xscvdpspn alpha_i,vs2
|
||||
xxspltw alpha_r,alpha_r,0
|
||||
xxspltw alpha_i,alpha_i,0
|
||||
/*load reverse permute mask for big endian
|
||||
uint128 = 0xc0d0e0f08090a0b0405060700010203
|
||||
*/
|
||||
|
||||
lis T2, perm_const2@highest
|
||||
lis T1, perm_const1@highest
|
||||
lis T3, save_permute_12@highest
|
||||
lis T4, save_permute_11@highest
|
||||
|
||||
|
||||
ori T2, T2, perm_const2@higher
|
||||
ori T1, T1, perm_const1@higher
|
||||
ori T3, T3, save_permute_12@higher
|
||||
ori T4, T4, save_permute_11@higher
|
||||
|
||||
|
||||
rldicr T2, T2, 32, 31
|
||||
rldicr T1, T1, 32, 31
|
||||
rldicr T3, T3, 32, 31
|
||||
rldicr T4, T4, 32, 31
|
||||
|
||||
oris T2, T2, perm_const2@h
|
||||
oris T1, T1, perm_const1@h
|
||||
oris T3, T3, save_permute_12@h
|
||||
oris T4, T4, save_permute_11@h
|
||||
|
||||
|
||||
ori T2, T2, perm_const2@l
|
||||
ori T1, T1, perm_const1@l
|
||||
ori T3, T3, save_permute_12@l
|
||||
ori T4, T4, save_permute_11@l
|
||||
|
||||
|
||||
li r0,0
|
||||
li PRE,512
|
||||
|
||||
#if defined(CC) || defined(CR) || defined(RC) || defined(RR)
|
||||
/*negate for this case as we will use addition -1*(a+b) */
|
||||
xvnegsp alpha_r,alpha_r
|
||||
xvnegsp alpha_i,alpha_i
|
||||
#endif
|
||||
|
||||
mtvsrdd permute_mask,T2,T1
|
||||
mtvsrdd save_permute_1,T3,T4
|
||||
|
||||
/*mask is reverse permute so we have to make it inner permute */
|
||||
xxpermdi permute_mask, permute_mask, permute_mask,2
|
||||
|
||||
#include "cgemm_logic_power10.S"
|
||||
|
||||
.L999:
|
||||
lfd f14, 0(SP)
|
||||
lfd f15, 8(SP)
|
||||
lfd f16, 16(SP)
|
||||
lfd f17, 24(SP)
|
||||
|
||||
lfd f18, 32(SP)
|
||||
lfd f19, 40(SP)
|
||||
lfd f20, 48(SP)
|
||||
lfd f21, 56(SP)
|
||||
|
||||
lfd f22, 64(SP)
|
||||
lfd f23, 72(SP)
|
||||
lfd f24, 80(SP)
|
||||
lfd f25, 88(SP)
|
||||
|
||||
lfd f26, 96(SP)
|
||||
lfd f27, 104(SP)
|
||||
lfd f28, 112(SP)
|
||||
lfd f29, 120(SP)
|
||||
|
||||
lfd f30, 128(SP)
|
||||
lfd f31, 136(SP)
|
||||
|
||||
ld r31, 144(SP)
|
||||
ld r30, 152(SP)
|
||||
ld r29, 160(SP)
|
||||
ld r28, 168(SP)
|
||||
ld r27, 176(SP)
|
||||
ld r26, 184(SP)
|
||||
ld r25, 192(SP)
|
||||
ld r24, 200(SP)
|
||||
ld r23, 208(SP)
|
||||
ld r22, 216(SP)
|
||||
ld r21, 224(SP)
|
||||
ld r20, 232(SP)
|
||||
ld r19, 240(SP)
|
||||
ld r18, 248(SP)
|
||||
ld r17, 256(SP)
|
||||
ld r16, 264(SP)
|
||||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
|
||||
ld r0, FLINK_SAVE(SP)
|
||||
|
||||
lxv vs52, 288(SP)
|
||||
lxv vs53, 304(SP)
|
||||
lxv vs54, 320(SP)
|
||||
lxv vs55, 336(SP)
|
||||
lxv vs56, 352(SP)
|
||||
lxv vs57, 368(SP)
|
||||
lxv vs58, 384(SP)
|
||||
lxv vs59, 400(SP)
|
||||
mtlr r0
|
||||
lxv vs60, 416(SP)
|
||||
lxv vs61, 432(SP)
|
||||
lxv vs62, 448(SP)
|
||||
lxv vs63, 464(SP)
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
blr
|
||||
|
||||
|
||||
EPILOGUE
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
|
||||
static void crot_kernel_8 (long n, float *x, float *y, float c, float s)
|
||||
{
|
||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "common.h"
|
||||
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "cswap_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "dasum_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "common.h"
|
||||
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "daxpy_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "dcopy_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "common.h"
|
||||
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "ddot_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -0,0 +1,864 @@
|
|||
/*********************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************************/
|
||||
#include "common.h"
|
||||
#include <altivec.h>
|
||||
|
||||
typedef unsigned char vec_t __attribute__ ((vector_size (16)));
|
||||
typedef FLOAT v4sf_t __attribute__ ((vector_size (16)));
|
||||
typedef FLOAT v2sf_t __attribute__ ((vector_size (8)));
|
||||
|
||||
#ifdef TRMMKERNEL
|
||||
#define SAVE_ACC(ACC, J) \
|
||||
__builtin_mma_disassemble_acc (result, ACC); \
|
||||
rowC = (v4sf_t *) &CO[0* ldc+J]; \
|
||||
rowC[0] = result[3] * alpha; \
|
||||
rowC = (v4sf_t *) &CO[1*ldc+J]; \
|
||||
rowC[0] = result[2] * alpha; \
|
||||
rowC = (v4sf_t *) &CO[2*ldc+J]; \
|
||||
rowC[0] = result[1] * alpha; \
|
||||
rowC = (v4sf_t *) &CO[3*ldc+J]; \
|
||||
rowC[0] = result[0] * alpha;
|
||||
#define SAVE_ACC1(ACC, J) \
|
||||
__builtin_mma_disassemble_acc (result, ACC); \
|
||||
rowC = (v4sf_t *) &CO[4* ldc+J]; \
|
||||
rowC[0] = result[3] * alpha; \
|
||||
rowC = (v4sf_t *) &CO[5*ldc+J]; \
|
||||
rowC[0] = result[2] * alpha; \
|
||||
rowC = (v4sf_t *) &CO[6*ldc+J]; \
|
||||
rowC[0] = result[1] * alpha; \
|
||||
rowC = (v4sf_t *) &CO[7*ldc+J]; \
|
||||
rowC[0] = result[0] * alpha;
|
||||
#define SAVE2x4_ACC(ACC, J) \
|
||||
__builtin_mma_disassemble_acc (result, ACC); \
|
||||
rowC = (v4sf_t *) &CO[0* ldc+J]; \
|
||||
rowC[0] = result[3] * alpha; \
|
||||
rowC = (v4sf_t *) &CO[1* ldc+J]; \
|
||||
rowC[0] = result[2] * alpha;
|
||||
#else
|
||||
#define SAVE_ACC(ACC, J) \
|
||||
__builtin_mma_disassemble_acc (result, ACC); \
|
||||
rowC = (v4sf_t *) &CO[0* ldc+J]; \
|
||||
rowC[0] += result[3] * alpha; \
|
||||
rowC = (v4sf_t *) &CO[1*ldc+J]; \
|
||||
rowC[0] += result[2] * alpha; \
|
||||
rowC = (v4sf_t *) &CO[2*ldc+J]; \
|
||||
rowC[0] += result[1] * alpha; \
|
||||
rowC = (v4sf_t *) &CO[3*ldc+J]; \
|
||||
rowC[0] += result[0] * alpha;
|
||||
#define SAVE_ACC1(ACC, J) \
|
||||
__builtin_mma_disassemble_acc (result, ACC); \
|
||||
rowC = (v4sf_t *) &CO[4* ldc+J]; \
|
||||
rowC[0] += result[3] * alpha; \
|
||||
rowC = (v4sf_t *) &CO[5*ldc+J]; \
|
||||
rowC[0] += result[2] * alpha; \
|
||||
rowC = (v4sf_t *) &CO[6*ldc+J]; \
|
||||
rowC[0] += result[1] * alpha; \
|
||||
rowC = (v4sf_t *) &CO[7*ldc+J]; \
|
||||
rowC[0] += result[0] * alpha;
|
||||
#define SAVE2x4_ACC(ACC, J) \
|
||||
__builtin_mma_disassemble_acc (result, ACC); \
|
||||
rowC = (v4sf_t *) &CO[0* ldc+J]; \
|
||||
rowC[0] += result[3] * alpha; \
|
||||
rowC = (v4sf_t *) &CO[1* ldc+J]; \
|
||||
rowC[0] += result[2] * alpha;
|
||||
#endif
|
||||
|
||||
#define SET_ACC_ZERO4() \
|
||||
__builtin_mma_xxsetaccz (&acc0); \
|
||||
__builtin_mma_xxsetaccz (&acc1); \
|
||||
__builtin_mma_xxsetaccz (&acc2); \
|
||||
__builtin_mma_xxsetaccz (&acc3);
|
||||
|
||||
#define SET_ACC_ZERO8() \
|
||||
__builtin_mma_xxsetaccz (&acc0); \
|
||||
__builtin_mma_xxsetaccz (&acc1); \
|
||||
__builtin_mma_xxsetaccz (&acc2); \
|
||||
__builtin_mma_xxsetaccz (&acc3); \
|
||||
__builtin_mma_xxsetaccz (&acc4); \
|
||||
__builtin_mma_xxsetaccz (&acc5); \
|
||||
__builtin_mma_xxsetaccz (&acc6); \
|
||||
__builtin_mma_xxsetaccz (&acc7);
|
||||
|
||||
#define PREFETCH1(x, y) asm volatile ("dcbt %0, %1" : : "r" (x), "b" (y) : "memory");
|
||||
|
||||
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
|
||||
#define REFRESH_TEMP_BK(x, y) \
|
||||
temp = k - off;
|
||||
#elif defined(LEFT)
|
||||
#define REFRESH_TEMP_BK(x, y) \
|
||||
temp = off + x;
|
||||
#else
|
||||
#define REFRESH_TEMP_BK(x, y) \
|
||||
temp = off + y;
|
||||
#endif
|
||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||
#define REFRESH_POINTERS(x, y) \
|
||||
BO = B; \
|
||||
REFRESH_TEMP_BK(x, y)
|
||||
#else
|
||||
#define REFRESH_POINTERS(x, y) \
|
||||
AO += off * x; \
|
||||
BO = B + off * y; \
|
||||
REFRESH_TEMP_BK(x, y)
|
||||
#endif
|
||||
|
||||
#ifdef LEFT
|
||||
#define REFRESH_OFF(x) \
|
||||
off += x;
|
||||
#else
|
||||
#define REFRESH_OFF(x)
|
||||
#endif
|
||||
|
||||
#ifdef LEFT
|
||||
#define UPDATE_TEMP(x, y) \
|
||||
temp -= x;
|
||||
#else
|
||||
#define UPDATE_TEMP(x, y) \
|
||||
temp -= y;
|
||||
#endif
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||
#define REFRESH_TMP_AFTER_SAVE(x, y) \
|
||||
temp = k - off; \
|
||||
UPDATE_TEMP(x, y) \
|
||||
AO += temp * x; \
|
||||
BO += temp * y;
|
||||
#else
|
||||
#define REFRESH_TMP_AFTER_SAVE(x, y)
|
||||
#endif
|
||||
|
||||
#define REFRESH_AFTER_SAVE(x,y) \
|
||||
REFRESH_TMP_AFTER_SAVE(x, y) \
|
||||
REFRESH_OFF(x)
|
||||
/*************************************************************************************
|
||||
* GEMM Kernel
|
||||
*************************************************************************************/
|
||||
int
|
||||
CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
||||
FLOAT * C, BLASLONG ldc
|
||||
#ifdef TRMMKERNEL
|
||||
, BLASLONG offset
|
||||
#endif
|
||||
)
|
||||
{
|
||||
BLASLONG N = n;
|
||||
BLASLONG i1;
|
||||
#if defined(TRMMKERNEL)
|
||||
BLASLONG off;
|
||||
#endif
|
||||
#if defined(TRMMKERNEL) && !defined(LEFT)
|
||||
off = -offset;
|
||||
#endif
|
||||
v4sf_t valpha = { alpha, alpha };
|
||||
N = n >> 2;
|
||||
for (i1 = 0; i1 < N; i1++)
|
||||
{
|
||||
BLASLONG i, j, temp;
|
||||
FLOAT *CO;
|
||||
FLOAT *AO;
|
||||
#if defined(TRMMKERNEL) && defined(LEFT)
|
||||
off = offset;
|
||||
#endif
|
||||
CO = C;
|
||||
C += ldc << 2;
|
||||
AO = A;
|
||||
PREFETCH1 (A, 128);
|
||||
PREFETCH1 (A, 256);
|
||||
i = m >> 4;
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (16, 4);
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
v4sf_t *rowC;
|
||||
v4sf_t result[4];
|
||||
BLASLONG l = 0;
|
||||
PREFETCH1 (CO, 0);
|
||||
PREFETCH1 (CO + ldc, 0);
|
||||
PREFETCH1 (CO + ldc + ldc, 0);
|
||||
PREFETCH1 (CO + ldc + ldc + ldc, 0);
|
||||
PREFETCH1 (CO, 128);
|
||||
PREFETCH1 (CO + ldc, 128);
|
||||
PREFETCH1 (CO + ldc + ldc, 128);
|
||||
PREFETCH1 (CO + ldc + ldc + ldc, 128);
|
||||
__vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
|
||||
SET_ACC_ZERO8 ();
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
vec_t *rowA = (vec_t *) & AO[l << 4];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & BO[l << 2];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
|
||||
__builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
|
||||
__builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
|
||||
__builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
|
||||
__builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
|
||||
__builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
|
||||
}
|
||||
SAVE_ACC (&acc0, 0);
|
||||
SAVE_ACC (&acc2, 4);
|
||||
SAVE_ACC (&acc1, 2);
|
||||
SAVE_ACC (&acc3, 6);
|
||||
SAVE_ACC (&acc4, 8);
|
||||
SAVE_ACC (&acc6, 12);
|
||||
SAVE_ACC (&acc5, 10);
|
||||
SAVE_ACC (&acc7, 14);
|
||||
AO += temp << 4;
|
||||
BO += temp << 2;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (16, 4)
|
||||
#endif
|
||||
CO += 16;
|
||||
}
|
||||
i = (m & 15) >> 3;
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (8, 4);
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
v4sf_t *rowC;
|
||||
v4sf_t result[4];
|
||||
__vector_quad acc0, acc1, acc2, acc3;
|
||||
SET_ACC_ZERO4 ();
|
||||
BLASLONG l = 0;
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
vec_t *rowA = (vec_t *) & AO[l << 3];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & BO[l << 2];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
|
||||
__builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
|
||||
}
|
||||
SAVE_ACC (&acc0, 0);
|
||||
SAVE_ACC (&acc2, 4);
|
||||
SAVE_ACC (&acc1, 2);
|
||||
SAVE_ACC (&acc3, 6);
|
||||
CO += 8;
|
||||
AO += temp << 3;
|
||||
BO += temp << 2;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (8, 4)
|
||||
#endif
|
||||
}
|
||||
i = (m & 7) >> 2;
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (4, 4);
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
v4sf_t *rowC;
|
||||
v4sf_t result[4];
|
||||
__vector_quad acc0, acc1;
|
||||
__builtin_mma_xxsetaccz (&acc0);
|
||||
__builtin_mma_xxsetaccz (&acc1);
|
||||
BLASLONG l = 0;
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
vec_t *rowA = (vec_t *) & AO[l << 2];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & BO[l << 2];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||
}
|
||||
SAVE_ACC (&acc0, 0);
|
||||
SAVE_ACC (&acc1, 2);
|
||||
CO += 4;
|
||||
AO += temp << 2;
|
||||
BO += temp << 2;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (4, 4)
|
||||
#endif
|
||||
}
|
||||
i = (m & 3) >> 1;
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (2, 4);
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
v4sf_t *rowC;
|
||||
v4sf_t result[4];
|
||||
__vector_quad acc0;
|
||||
__builtin_mma_xxsetaccz (&acc0);
|
||||
BLASLONG l = 0;
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
vec_t *rowA = (vec_t *) & AO[l << 1];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & BO[l << 2];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
}
|
||||
SAVE_ACC (&acc0, 0);
|
||||
CO += 2;
|
||||
AO += temp << 1;
|
||||
BO += temp << 2;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (2, 4)
|
||||
#endif
|
||||
}
|
||||
i = (m & 1) >> 0;
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (1, 4);
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
BLASLONG l = 0;
|
||||
v4sf_t t = { 0, 0 };
|
||||
v4sf_t t1 = { 0, 0 };
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
v4sf_t rowA = { AO[l], AO[l] };
|
||||
v4sf_t rowB = { BO[l << 2], BO[(l << 2) + 1] };
|
||||
v4sf_t rowB1 = { BO[(l << 2) + 2], BO[(l << 2) + 3] };
|
||||
t += rowA * rowB;
|
||||
t1 += rowA * rowB1;
|
||||
}
|
||||
t = t * valpha;
|
||||
t1 = t1 * valpha;
|
||||
#if defined(TRMMKERNEL)
|
||||
CO[0 * ldc] = t[0];
|
||||
CO[1 * ldc] = t[1];
|
||||
CO[2 * ldc] = t1[0];
|
||||
CO[3 * ldc] = t1[1];
|
||||
#else
|
||||
CO[0 * ldc] += t[0];
|
||||
CO[1 * ldc] += t[1];
|
||||
CO[2 * ldc] += t1[0];
|
||||
CO[3 * ldc] += t1[1];
|
||||
#endif
|
||||
CO += 1;
|
||||
AO += temp;
|
||||
BO += temp << 2;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (1, 4)
|
||||
#endif
|
||||
}
|
||||
#if defined(TRMMKERNEL) && !defined(LEFT)
|
||||
off += 4; // number of values in A
|
||||
#endif
|
||||
B += k << 2;
|
||||
}
|
||||
N = (n & 3) >> 1;
|
||||
for (i1 = 0; i1 < N; i1++)
|
||||
{
|
||||
BLASLONG i, j, temp;
|
||||
#if defined(TRMMKERNEL) && defined(LEFT)
|
||||
off = offset;
|
||||
#endif
|
||||
FLOAT *CO;
|
||||
FLOAT *AO;
|
||||
CO = C;
|
||||
C += ldc << 1;
|
||||
AO = A;
|
||||
i = m >> 4;
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (16, 2);
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
v4sf_t *rowC;
|
||||
v4sf_t result[4];
|
||||
__vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
|
||||
SET_ACC_ZERO8 ();
|
||||
BLASLONG l = 0;
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
FLOAT t[4] = { 0, 0, 0, 0 };
|
||||
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & t[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
vec_t *rowA = (vec_t *) & AO[l << 4];
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
|
||||
__builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
|
||||
__builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
|
||||
__builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
|
||||
__builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
|
||||
__builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
|
||||
}
|
||||
SAVE2x4_ACC (&acc0, 0);
|
||||
SAVE2x4_ACC (&acc1, 2);
|
||||
SAVE2x4_ACC (&acc2, 4);
|
||||
SAVE2x4_ACC (&acc3, 6);
|
||||
SAVE2x4_ACC (&acc4, 8);
|
||||
SAVE2x4_ACC (&acc5, 10);
|
||||
SAVE2x4_ACC (&acc6, 12);
|
||||
SAVE2x4_ACC (&acc7, 14);
|
||||
CO += 16;
|
||||
AO += temp << 4;
|
||||
BO += temp << 1;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (16, 2)
|
||||
#endif
|
||||
}
|
||||
i = (m & 15) >> 3;
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (8, 2);
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
v4sf_t *rowC;
|
||||
v4sf_t result[4];
|
||||
__vector_quad acc0, acc1, acc2, acc3;
|
||||
SET_ACC_ZERO4 ();
|
||||
BLASLONG l = 0;
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
FLOAT t[4] = { 0, 0, 0, 0 };
|
||||
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & t[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
vec_t *rowA = (vec_t *) & AO[l << 3];
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
|
||||
__builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
|
||||
}
|
||||
SAVE2x4_ACC (&acc0, 0);
|
||||
SAVE2x4_ACC (&acc1, 2);
|
||||
SAVE2x4_ACC (&acc2, 4);
|
||||
SAVE2x4_ACC (&acc3, 6);
|
||||
CO += 8;
|
||||
AO += temp << 3;
|
||||
BO += temp << 1;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (8, 2)
|
||||
#endif
|
||||
}
|
||||
i = (m & 7) >> 2;
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (4, 2);
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
v4sf_t *rowC;
|
||||
v4sf_t result[4];
|
||||
__vector_quad acc0, acc1;
|
||||
__builtin_mma_xxsetaccz (&acc0);
|
||||
__builtin_mma_xxsetaccz (&acc1);
|
||||
BLASLONG l = 0;
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
FLOAT t[4] = { 0, 0, 0, 0 };
|
||||
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & t[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
vec_t *rowA = (vec_t *) & AO[l << 2];
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||
}
|
||||
SAVE2x4_ACC (&acc0, 0);
|
||||
SAVE2x4_ACC (&acc1, 2);
|
||||
CO += 4;
|
||||
AO += temp << 2;
|
||||
BO += temp << 1;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (4, 2)
|
||||
#endif
|
||||
}
|
||||
i = (m & 3) >> 1;
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (2, 2);
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
v4sf_t *rowC;
|
||||
v4sf_t result[4];
|
||||
__vector_quad acc0;
|
||||
__builtin_mma_xxsetaccz (&acc0);
|
||||
BLASLONG l = 0;
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
FLOAT t[4] = { 0, 0, 0, 0 };
|
||||
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & t[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
vec_t *rowA = (vec_t *) & AO[l << 1];
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
}
|
||||
SAVE2x4_ACC (&acc0, 0);
|
||||
CO += 2;
|
||||
AO += temp << 1;
|
||||
BO += temp << 1;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (2, 2)
|
||||
#endif
|
||||
}
|
||||
i = (m & 1) >> 0;
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (1, 2);
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
BLASLONG l = 0;
|
||||
v4sf_t t = { 0, 0 };
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
v4sf_t rowA = { AO[l], AO[l] };
|
||||
v4sf_t rowB = { BO[l << 1], BO[(l << 1) + 1] };
|
||||
t += rowA * rowB;
|
||||
}
|
||||
t = t * valpha;
|
||||
#if defined(TRMMKERNEL)
|
||||
CO[0 * ldc] = t[0];
|
||||
CO[1 * ldc] = t[1];
|
||||
#else
|
||||
CO[0 * ldc] += t[0];
|
||||
CO[1 * ldc] += t[1];
|
||||
#endif
|
||||
CO += 1;
|
||||
AO += temp;
|
||||
BO += temp << 1;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (1, 2)
|
||||
#endif
|
||||
}
|
||||
#if defined(TRMMKERNEL) && !defined(LEFT)
|
||||
off += 2; // number of values in A
|
||||
#endif
|
||||
B += k << 1;
|
||||
}
|
||||
N = (n & 1) >> 0;
|
||||
for (i1 = 0; i1 < N; i1++)
|
||||
{
|
||||
BLASLONG i, temp;
|
||||
#if defined(TRMMKERNEL) && defined(LEFT)
|
||||
off = offset;
|
||||
#endif
|
||||
FLOAT *CO;
|
||||
FLOAT *AO;
|
||||
CO = C;
|
||||
C += ldc;
|
||||
AO = A;
|
||||
i = m;
|
||||
while (i >= 16)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (16, 1)
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
BLASLONG l = 0;
|
||||
v4sf_t t = { 0, 0 };
|
||||
v4sf_t t1 = { 0, 0 };
|
||||
v4sf_t t2 = { 0, 0 };
|
||||
v4sf_t t3 = { 0, 0 };
|
||||
v4sf_t t4 = { 0, 0 };
|
||||
v4sf_t t5 = { 0, 0 };
|
||||
v4sf_t t6 = { 0, 0 };
|
||||
v4sf_t t7 = { 0, 0 };
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
v4sf_t rowB = { BO[l], BO[l] };
|
||||
v4sf_t rowA = { AO[l << 4], AO[(l << 4) + 1] };
|
||||
v4sf_t rowA1 = { AO[(l << 4) + 2], AO[(l << 4) + 3] };
|
||||
v4sf_t rowA2 = { AO[(l << 4) + 4], AO[(l << 4) + 5] };
|
||||
v4sf_t rowA3 = { AO[(l << 4) + 6], AO[(l << 4) + 7] };
|
||||
v4sf_t rowA4 = { AO[(l << 4) + 8], AO[(l << 4) + 9] };
|
||||
v4sf_t rowA5 = { AO[(l << 4) + 10], AO[(l << 4) + 11] };
|
||||
v4sf_t rowA6 = { AO[(l << 4) + 12], AO[(l << 4) + 13] };
|
||||
v4sf_t rowA7 = { AO[(l << 4) + 14], AO[(l << 4) + 15] };
|
||||
t += rowA * rowB;
|
||||
t1 += rowA1 * rowB;
|
||||
t2 += rowA2 * rowB;
|
||||
t3 += rowA3 * rowB;
|
||||
t4 += rowA4 * rowB;
|
||||
t5 += rowA5 * rowB;
|
||||
t6 += rowA6 * rowB;
|
||||
t7 += rowA7 * rowB;
|
||||
}
|
||||
t = t * valpha;
|
||||
t1 = t1 * valpha;
|
||||
t2 = t2 * valpha;
|
||||
t3 = t3 * valpha;
|
||||
t4 = t4 * valpha;
|
||||
t5 = t5 * valpha;
|
||||
t6 = t6 * valpha;
|
||||
t7 = t7 * valpha;
|
||||
#if defined(TRMMKERNEL)
|
||||
CO[0] = t[0];
|
||||
CO[1] = t[1];
|
||||
CO[2] = t1[0];
|
||||
CO[3] = t1[1];
|
||||
CO[4] = t2[0];
|
||||
CO[5] = t2[1];
|
||||
CO[6] = t3[0];
|
||||
CO[7] = t3[1];
|
||||
CO[8] = t4[0];
|
||||
CO[9] = t4[1];
|
||||
CO[10] = t5[0];
|
||||
CO[11] = t5[1];
|
||||
CO[12] = t6[0];
|
||||
CO[13] = t6[1];
|
||||
CO[14] = t7[0];
|
||||
CO[15] = t7[1];
|
||||
#else
|
||||
CO[0] += t[0];
|
||||
CO[1] += t[1];
|
||||
CO[2] += t1[0];
|
||||
CO[3] += t1[1];
|
||||
CO[4] += t2[0];
|
||||
CO[5] += t2[1];
|
||||
CO[6] += t3[0];
|
||||
CO[7] += t3[1];
|
||||
CO[8] += t4[0];
|
||||
CO[9] += t4[1];
|
||||
CO[10] += t5[0];
|
||||
CO[11] += t5[1];
|
||||
CO[12] += t6[0];
|
||||
CO[13] += t6[1];
|
||||
CO[14] += t7[0];
|
||||
CO[15] += t7[1];
|
||||
#endif
|
||||
AO += temp << 4;
|
||||
BO += temp;
|
||||
CO += 16;
|
||||
i -= 16;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (16, 1)
|
||||
#endif
|
||||
}
|
||||
while (i >= 8)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (8, 1)
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
BLASLONG l = 0;
|
||||
v4sf_t t = { 0, 0 };
|
||||
v4sf_t t1 = { 0, 0 };
|
||||
v4sf_t t2 = { 0, 0 };
|
||||
v4sf_t t3 = { 0, 0 };
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
v4sf_t rowB = { BO[l], BO[l] };
|
||||
v4sf_t rowA = { AO[l << 3], AO[(l << 3) + 1] };
|
||||
v4sf_t rowA1 = { AO[(l << 3) + 2], AO[(l << 3) + 3] };
|
||||
v4sf_t rowA2 = { AO[(l << 3) + 4], AO[(l << 3) + 5] };
|
||||
v4sf_t rowA3 = { AO[(l << 3) + 6], AO[(l << 3) + 7] };
|
||||
t += rowA * rowB;
|
||||
t1 += rowA1 * rowB;
|
||||
t2 += rowA2 * rowB;
|
||||
t3 += rowA3 * rowB;
|
||||
}
|
||||
t = t * valpha;
|
||||
t1 = t1 * valpha;
|
||||
t2 = t2 * valpha;
|
||||
t3 = t3 * valpha;
|
||||
#if defined(TRMMKERNEL)
|
||||
CO[0] = t[0];
|
||||
CO[1] = t[1];
|
||||
CO[2] = t1[0];
|
||||
CO[3] = t1[1];
|
||||
CO[4] = t2[0];
|
||||
CO[5] = t2[1];
|
||||
CO[6] = t3[0];
|
||||
CO[7] = t3[1];
|
||||
#else
|
||||
CO[0] += t[0];
|
||||
CO[1] += t[1];
|
||||
CO[2] += t1[0];
|
||||
CO[3] += t1[1];
|
||||
CO[4] += t2[0];
|
||||
CO[5] += t2[1];
|
||||
CO[6] += t3[0];
|
||||
CO[7] += t3[1];
|
||||
#endif
|
||||
AO += temp << 3;
|
||||
BO += temp;
|
||||
CO += 8;
|
||||
i -= 8;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (8, 1)
|
||||
#endif
|
||||
}
|
||||
while (i >= 4)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (4, 1)
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
BLASLONG l = 0;
|
||||
v4sf_t t = { 0, 0 };
|
||||
v4sf_t t1 = { 0, 0 };
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
v4sf_t rowB = { BO[l], BO[l] };
|
||||
v4sf_t rowA = { AO[l << 2], AO[(l << 2) + 1] };
|
||||
v4sf_t rowA1 = { AO[(l << 2) + 2], AO[(l << 2) + 3] };
|
||||
t += rowA * rowB;
|
||||
t1 += rowA1 * rowB;
|
||||
}
|
||||
t = t * valpha;
|
||||
t1 = t1 * valpha;
|
||||
#if defined(TRMMKERNEL)
|
||||
CO[0] = t[0];
|
||||
CO[1] = t[1];
|
||||
CO[2] = t1[0];
|
||||
CO[3] = t1[1];
|
||||
#else
|
||||
CO[0] += t[0];
|
||||
CO[1] += t[1];
|
||||
CO[2] += t1[0];
|
||||
CO[3] += t1[1];
|
||||
#endif
|
||||
AO += temp << 2;
|
||||
BO += temp;
|
||||
CO += 4;
|
||||
i -= 4;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (4, 1)
|
||||
#endif
|
||||
}
|
||||
while (i >= 2)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (2, 1)
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
BLASLONG l = 0;
|
||||
v4sf_t t = { 0, 0 };
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
v4sf_t rowB = { BO[l], BO[l] };
|
||||
v4sf_t rowA = { AO[l << 1], AO[(l << 1) + 1] };
|
||||
t += rowA * rowB;
|
||||
}
|
||||
t = t * valpha;
|
||||
#if defined(TRMMKERNEL)
|
||||
CO[0] = t[0];
|
||||
CO[1] = t[1];
|
||||
#else
|
||||
CO[0] += t[0];
|
||||
CO[1] += t[1];
|
||||
#endif
|
||||
AO += temp << 1;
|
||||
BO += temp;
|
||||
CO += 2;
|
||||
i -= 2;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (2, 1)
|
||||
#endif
|
||||
}
|
||||
while (i >= 1)
|
||||
{
|
||||
FLOAT *BO;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_POINTERS (1, 1)
|
||||
#else
|
||||
BO = B;
|
||||
temp = k;
|
||||
#endif
|
||||
BLASLONG l = 0;
|
||||
FLOAT t = 0;
|
||||
for (l = 0; l < temp; l++)
|
||||
{
|
||||
t += AO[l] * BO[l];
|
||||
}
|
||||
AO += temp;
|
||||
BO += temp;
|
||||
#if defined(TRMMKERNEL)
|
||||
CO[0] = t * alpha;
|
||||
#else
|
||||
CO[0] += t * alpha;
|
||||
#endif
|
||||
CO += 1;
|
||||
i -= 1;
|
||||
#if defined(TRMMKERNEL)
|
||||
REFRESH_AFTER_SAVE (1, 1)
|
||||
#endif
|
||||
}
|
||||
#if defined(TRMMKERNEL) && !defined(LEFT)
|
||||
off += 1; // number of values in A
|
||||
#endif
|
||||
B += k;
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "common.h"
|
||||
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "dgemv_n_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#pragma GCC optimize "O1"
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "drot_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "dscal_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "dswap_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "sasum_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "scopy_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "sdot_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#pragma GCC optimize "O1"
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "srot_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "sscal_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "sswap_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "zasum_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "common.h"
|
||||
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "zaxpy_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "zcopy_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "common.h"
|
||||
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "zdot_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -0,0 +1,245 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013-2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "def_vsx.h"
|
||||
|
||||
#define LOAD ld
|
||||
|
||||
#define STACKSIZE 512
|
||||
|
||||
#define FZERO 312+192(SP)
|
||||
|
||||
#define FLINK_SAVE (STACKSIZE+16) /* 16($r12) */
|
||||
|
||||
#define M r3
|
||||
#define N r4
|
||||
#define K r5
|
||||
|
||||
|
||||
#define A r8
|
||||
#define B r9
|
||||
#define C r10
|
||||
#define LDC r6
|
||||
#define OFFSET r7
|
||||
|
||||
|
||||
|
||||
#define o0 0
|
||||
#define alpha_r vs62
|
||||
#define alpha_i vs63
|
||||
|
||||
#define VECSAVE r11
|
||||
|
||||
#define FRAMEPOINTER r12
|
||||
|
||||
#define T10 r14
|
||||
|
||||
#define L r15
|
||||
#define T8 r16
|
||||
#define T5 r17
|
||||
#define T2 r19
|
||||
#define TEMP_REG r20
|
||||
#define T6 r21
|
||||
#define I r22
|
||||
#define J r23
|
||||
#define AO r24
|
||||
#define BO r25
|
||||
#define CO r26
|
||||
#define T7 r27
|
||||
#define T3 r28
|
||||
#define T4 r29
|
||||
|
||||
#define PRE r30
|
||||
#define T1 r31
|
||||
|
||||
#ifndef NEEDPARAM
|
||||
|
||||
PROLOGUE
|
||||
PROFCODE
|
||||
|
||||
mr FRAMEPOINTER, SP
|
||||
addi SP, SP, -STACKSIZE
|
||||
mflr r0
|
||||
stfd f14, 0(SP)
|
||||
stfd f15, 8(SP)
|
||||
stfd f16, 16(SP)
|
||||
stfd f17, 24(SP)
|
||||
|
||||
stfd f18, 32(SP)
|
||||
stfd f19, 40(SP)
|
||||
stfd f20, 48(SP)
|
||||
stfd f21, 56(SP)
|
||||
|
||||
stfd f22, 64(SP)
|
||||
stfd f23, 72(SP)
|
||||
stfd f24, 80(SP)
|
||||
stfd f25, 88(SP)
|
||||
|
||||
stfd f26, 96(SP)
|
||||
stfd f27, 104(SP)
|
||||
stfd f28, 112(SP)
|
||||
stfd f29, 120(SP)
|
||||
|
||||
stfd f30, 128(SP)
|
||||
stfd f31, 136(SP)
|
||||
|
||||
xxspltd alpha_r,vs1,0 /*copy from register f1 */
|
||||
xxspltd alpha_i,vs2,0 /*copy from register f2 */
|
||||
|
||||
std r31, 144(SP)
|
||||
std r30, 152(SP)
|
||||
std r29, 160(SP)
|
||||
std r28, 168(SP)
|
||||
std r27, 176(SP)
|
||||
std r26, 184(SP)
|
||||
std r25, 192(SP)
|
||||
std r24, 200(SP)
|
||||
std r23, 208(SP)
|
||||
std r22, 216(SP)
|
||||
std r21, 224(SP)
|
||||
std r20, 232(SP)
|
||||
std r19, 240(SP)
|
||||
std r18, 248(SP)
|
||||
std r17, 256(SP)
|
||||
std r16, 264(SP)
|
||||
std r15, 272(SP)
|
||||
std r14, 280(SP)
|
||||
|
||||
|
||||
stxv vs20, 288(SP)
|
||||
stxv vs21, 304(SP)
|
||||
stxv vs22, 320(SP)
|
||||
stxv vs23, 336(SP)
|
||||
stxv vs24, 352(SP)
|
||||
stxv vs25, 368(SP)
|
||||
stxv vs26, 384(SP)
|
||||
stxv vs27, 400(SP)
|
||||
stxv vs28, 416(SP)
|
||||
stxv vs29, 432(SP)
|
||||
stxv vs30, 448(SP)
|
||||
stxv vs31, 464(SP)
|
||||
|
||||
std r0, FLINK_SAVE(SP)
|
||||
|
||||
|
||||
#if defined(linux) || defined(__FreeBSD__)
|
||||
ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef TRMMKERNEL
|
||||
#if (defined(linux) || defined(__FreeBSD__)) && defined(__64BIT__)
|
||||
ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#include "zgemm_macros_power10.S"
|
||||
|
||||
|
||||
|
||||
slwi LDC, LDC, ZBASE_SHIFT
|
||||
li PRE, 512
|
||||
li r0, 0
|
||||
|
||||
|
||||
#if defined(CC) || defined(CR) || defined(RC) || defined(RR)
|
||||
/*negate for this case as we will use addition -1*(a+b) */
|
||||
xvnegdp alpha_r,alpha_r
|
||||
xvnegdp alpha_i,alpha_i
|
||||
#endif
|
||||
.align 4
|
||||
|
||||
#include "zgemm_logic_power10.S"
|
||||
|
||||
L999:
|
||||
|
||||
lfd f14, 0(SP)
|
||||
lfd f15, 8(SP)
|
||||
lfd f16, 16(SP)
|
||||
lfd f17, 24(SP)
|
||||
|
||||
lfd f18, 32(SP)
|
||||
lfd f19, 40(SP)
|
||||
lfd f20, 48(SP)
|
||||
lfd f21, 56(SP)
|
||||
|
||||
lfd f22, 64(SP)
|
||||
lfd f23, 72(SP)
|
||||
lfd f24, 80(SP)
|
||||
lfd f25, 88(SP)
|
||||
|
||||
lfd f26, 96(SP)
|
||||
lfd f27, 104(SP)
|
||||
lfd f28, 112(SP)
|
||||
lfd f29, 120(SP)
|
||||
|
||||
lfd f30, 128(SP)
|
||||
lfd f31, 136(SP)
|
||||
|
||||
|
||||
ld r31, 144(SP)
|
||||
ld r30, 152(SP)
|
||||
ld r29, 160(SP)
|
||||
ld r28, 168(SP)
|
||||
ld r27, 176(SP)
|
||||
ld r26, 184(SP)
|
||||
ld r25, 192(SP)
|
||||
ld r24, 200(SP)
|
||||
ld r23, 208(SP)
|
||||
ld r22, 216(SP)
|
||||
ld r21, 224(SP)
|
||||
ld r20, 232(SP)
|
||||
ld r19, 240(SP)
|
||||
ld r18, 248(SP)
|
||||
ld r17, 256(SP)
|
||||
ld r16, 264(SP)
|
||||
ld r15, 272(SP)
|
||||
ld r14, 280(SP)
|
||||
|
||||
ld r0, FLINK_SAVE(SP)
|
||||
|
||||
lxv vs20, 288(SP)
|
||||
lxv vs21, 304(SP)
|
||||
lxv vs22, 320(SP)
|
||||
lxv vs23, 336(SP)
|
||||
lxv vs24, 352(SP)
|
||||
lxv vs25, 368(SP)
|
||||
lxv vs26, 384(SP)
|
||||
lxv vs27, 400(SP)
|
||||
mtlr r0
|
||||
lxv vs28, 416(SP)
|
||||
lxv vs29, 432(SP)
|
||||
lxv vs30, 448(SP)
|
||||
lxv vs31, 464(SP)
|
||||
|
||||
addi SP, SP, STACKSIZE
|
||||
blr
|
||||
|
||||
EPILOGUE
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#pragma GCC optimize "O1"
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#if defined(DOUBLE)
|
||||
#include "zscal_microk_power8.c"
|
||||
#endif
|
||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "common.h"
|
||||
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#include "zswap_microk_power8.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
|||
DGEMMKERNEL = dgemm_kernel_16x2_skylakex.c
|
||||
DTRMMKERNEL = dgemm_kernel_16x2_skylakex.c
|
||||
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||
DGEMMITCOPY = ../generic/gemm_tcopy_16.c
|
||||
DGEMMITCOPY = dgemm_tcopy_16_skylakex.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
|
|
|
@ -0,0 +1,129 @@
|
|||
#include <stdio.h>
|
||||
#include "common.h"
|
||||
#include <immintrin.h>
|
||||
|
||||
int CNAME(BLASLONG dim_second, BLASLONG dim_first, double *src, BLASLONG lead_dim, double *dst){
|
||||
double *src1, *src2, *src3, *src4, *dst1;
|
||||
__m512d z1,z2,z3,z4,z5,z6,z7,z8; __m256d y1,y2,y3,y4; __m128d x1,x2,x3,x4; double s1,s2,s3,s4;
|
||||
BLASLONG dim1_count, dim2_count, src_inc;
|
||||
src_inc = 4 * lead_dim - dim_first;
|
||||
src1 = src; src2 = src + lead_dim; src3 = src2 + lead_dim; src4 = src3 + lead_dim;
|
||||
for(dim2_count=dim_second; dim2_count>3; dim2_count-=4){
|
||||
dst1 = dst + 16 * (dim_second - dim2_count);
|
||||
for(dim1_count=dim_first; dim1_count>15; dim1_count-=16){
|
||||
z1 = _mm512_loadu_pd(src1); z2 = _mm512_loadu_pd(src1+8); src1 += 16;
|
||||
z3 = _mm512_loadu_pd(src2); z4 = _mm512_loadu_pd(src2+8); src2 += 16;
|
||||
z5 = _mm512_loadu_pd(src3); z6 = _mm512_loadu_pd(src3+8); src3 += 16;
|
||||
z7 = _mm512_loadu_pd(src4); z8 = _mm512_loadu_pd(src4+8); src4 += 16;
|
||||
_mm512_storeu_pd(dst1+ 0,z1); _mm512_storeu_pd(dst1+ 8,z2);
|
||||
_mm512_storeu_pd(dst1+16,z3); _mm512_storeu_pd(dst1+24,z4);
|
||||
_mm512_storeu_pd(dst1+32,z5); _mm512_storeu_pd(dst1+40,z6);
|
||||
_mm512_storeu_pd(dst1+48,z7); _mm512_storeu_pd(dst1+56,z8); dst1 += 16 * dim_second;
|
||||
}
|
||||
dst1 -= 8 * (dim_second - dim2_count);
|
||||
if(dim1_count>7){
|
||||
z1 = _mm512_loadu_pd(src1); src1 += 8;
|
||||
z2 = _mm512_loadu_pd(src2); src2 += 8;
|
||||
z3 = _mm512_loadu_pd(src3); src3 += 8;
|
||||
z4 = _mm512_loadu_pd(src4); src4 += 8;
|
||||
_mm512_storeu_pd(dst1+ 0,z1); _mm512_storeu_pd(dst1+ 8,z2);
|
||||
_mm512_storeu_pd(dst1+16,z3); _mm512_storeu_pd(dst1+24,z4); dst1 += 8 * dim_second;
|
||||
dim1_count -= 8;
|
||||
}
|
||||
dst1 -= 4 * (dim_second - dim2_count);
|
||||
if(dim1_count>3){
|
||||
y1 = _mm256_loadu_pd(src1); src1 += 4;
|
||||
y2 = _mm256_loadu_pd(src2); src2 += 4;
|
||||
y3 = _mm256_loadu_pd(src3); src3 += 4;
|
||||
y4 = _mm256_loadu_pd(src4); src4 += 4;
|
||||
_mm256_storeu_pd(dst1+ 0,y1); _mm256_storeu_pd(dst1+ 4,y2);
|
||||
_mm256_storeu_pd(dst1+ 8,y3); _mm256_storeu_pd(dst1+12,y4); dst1 += 4 * dim_second;
|
||||
dim1_count -= 4;
|
||||
}
|
||||
dst1 -= 2 * (dim_second - dim2_count);
|
||||
if(dim1_count>1){
|
||||
x1 = _mm_loadu_pd(src1); src1 += 2;
|
||||
x2 = _mm_loadu_pd(src2); src2 += 2;
|
||||
x3 = _mm_loadu_pd(src3); src3 += 2;
|
||||
x4 = _mm_loadu_pd(src4); src4 += 2;
|
||||
_mm_storeu_pd(dst1+0,x1); _mm_storeu_pd(dst1+2,x2);
|
||||
_mm_storeu_pd(dst1+4,x3); _mm_storeu_pd(dst1+6,x4); dst1 += 2 * dim_second;
|
||||
dim1_count -= 2;
|
||||
}
|
||||
dst1 -= dim_second - dim2_count;
|
||||
if(dim1_count>0){
|
||||
s1 = *src1; src1++; s2 = *src2; src2++; s3 = *src3; src3++; s4 = *src4; src4++;
|
||||
dst1[0] = s1; dst1[1] = s2; dst1[2] = s3; dst1[3] = s4;
|
||||
}
|
||||
src1 += src_inc; src2 += src_inc; src3 += src_inc; src4 += src_inc;
|
||||
}
|
||||
src_inc -= 2 * lead_dim;
|
||||
for(; dim2_count>1; dim2_count-=2){
|
||||
dst1 = dst + 16 * (dim_second - dim2_count);
|
||||
for(dim1_count=dim_first; dim1_count>15; dim1_count-=16){
|
||||
z1 = _mm512_loadu_pd(src1); z2 = _mm512_loadu_pd(src1+8); src1 += 16;
|
||||
z3 = _mm512_loadu_pd(src2); z4 = _mm512_loadu_pd(src2+8); src2 += 16;
|
||||
_mm512_storeu_pd(dst1+ 0,z1); _mm512_storeu_pd(dst1+ 8,z2);
|
||||
_mm512_storeu_pd(dst1+16,z3); _mm512_storeu_pd(dst1+24,z4); dst1 += 16 * dim_second;
|
||||
}
|
||||
dst1 -= 8 * (dim_second - dim2_count);
|
||||
if(dim1_count>7){
|
||||
z1 = _mm512_loadu_pd(src1); src1 += 8;
|
||||
z2 = _mm512_loadu_pd(src2); src2 += 8;
|
||||
_mm512_storeu_pd(dst1+ 0,z1); _mm512_storeu_pd(dst1+ 8,z2); dst1 += 8 * dim_second;
|
||||
dim1_count -= 8;
|
||||
}
|
||||
dst1 -= 4 * (dim_second - dim2_count);
|
||||
if(dim1_count>3){
|
||||
y1 = _mm256_loadu_pd(src1); src1 += 4;
|
||||
y2 = _mm256_loadu_pd(src2); src2 += 4;
|
||||
_mm256_storeu_pd(dst1+ 0,y1); _mm256_storeu_pd(dst1+ 4,y2); dst1 += 4 * dim_second;
|
||||
dim1_count -= 4;
|
||||
}
|
||||
dst1 -= 2 * (dim_second - dim2_count);
|
||||
if(dim1_count>1){
|
||||
x1 = _mm_loadu_pd(src1); src1 += 2;
|
||||
x2 = _mm_loadu_pd(src2); src2 += 2;
|
||||
_mm_storeu_pd(dst1+0,x1); _mm_storeu_pd(dst1+2,x2); dst1 += 2 * dim_second;
|
||||
dim1_count -= 2;
|
||||
}
|
||||
dst1 -= dim_second - dim2_count;
|
||||
if(dim1_count>0){
|
||||
s1 = *src1; src1++; s2 = *src2; src2++;
|
||||
dst1[0] = s1; dst1[1] = s2;
|
||||
}
|
||||
src1 += src_inc; src2 += src_inc;
|
||||
}
|
||||
src_inc -= lead_dim;
|
||||
for(; dim2_count>0; dim2_count--){
|
||||
dst1 = dst + 16 * (dim_second - dim2_count);
|
||||
for(dim1_count=dim_first; dim1_count>15; dim1_count-=16){
|
||||
z1 = _mm512_loadu_pd(src1); z2 = _mm512_loadu_pd(src1+8); src1 += 16;
|
||||
_mm512_storeu_pd(dst1+ 0,z1); _mm512_storeu_pd(dst1+ 8,z2); dst1 += 16 * dim_second;
|
||||
}
|
||||
dst1 -= 8 * (dim_second - dim2_count);
|
||||
if(dim1_count>7){
|
||||
z1 = _mm512_loadu_pd(src1); src1 += 8;
|
||||
_mm512_storeu_pd(dst1+ 0,z1); dst1 += 8 * dim_second;
|
||||
dim1_count -= 8;
|
||||
}
|
||||
dst1 -= 4 * (dim_second - dim2_count);
|
||||
if(dim1_count>3){
|
||||
y1 = _mm256_loadu_pd(src1); src1 += 4;
|
||||
_mm256_storeu_pd(dst1+ 0,y1); dst1 += 4 * dim_second;
|
||||
dim1_count -= 4;
|
||||
}
|
||||
dst1 -= 2 * (dim_second - dim2_count);
|
||||
if(dim1_count>1){
|
||||
x1 = _mm_loadu_pd(src1); src1 += 2;
|
||||
_mm_storeu_pd(dst1+0,x1); dst1 += 2 * dim_second;
|
||||
dim1_count -= 2;
|
||||
}
|
||||
dst1 -= dim_second - dim2_count;
|
||||
if(dim1_count>0){
|
||||
s1 = *src1; src1++;
|
||||
dst1[0] = s1;
|
||||
}
|
||||
src1 += src_inc;
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
/* %0 = "+r"(a_pointer), %1 = "+r"(b_pointer), %2 = "+r"(c_pointer), %3 = "+r"(ldc_in_bytes), %4 for k_count, %5 for c_store, %6 = b_pref */
|
||||
/* %0 = "+r"(a_pointer), %1 = "+r"(b_pointer), %2 = "+r"(c_pointer), %3 = "+r"(ldc_in_bytes), %4 for k_count, %5 for c_store, %6 = b_pref */
|
||||
/* r10 = tmp, r11 = m_counter, r12 = k << 2(const), r13 = tmp, r14 = b_head_pos(const), r15 = tmp */
|
||||
|
||||
/* m = 8 *//* ymm0 for alpha, ymm1-ymm3 for temporary use, ymm4-ymm15 for accumulators */
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#include "common.h"
|
||||
#include "common.h"
|
||||
#include <stdint.h>
|
||||
#include "strsm_kernel_8x4_haswell_L_common.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* r11 = m_counter, r12 = size_of_k_elements, r13 = kk, r14 = b_head, r15 = a_head */
|
||||
/* r11 = m_counter, r12 = size_of_k_elements, r13 = kk, r14 = b_head, r15 = a_head */
|
||||
/* register i/o: %0 = a_ptr, %1 = b_ptr, %2 = c_ptr, %3 = c_tmp, %4 = ldc, %5 = k_counter */
|
||||
/* memory input: %6 = K, %7 = offset, %8 = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0}, %9 = {0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}, %10 = M */
|
||||
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
enable_language(Fortran)
|
||||
|
||||
enable_testing()
|
||||
|
||||
if(MSVC_VERSION)
|
||||
# string(REPLACE "/STACK:10000000" "/STACK:900000000000000000"
|
||||
# CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
|
||||
|
@ -168,3 +172,394 @@ if(PYTHONINTERP_FOUND)
|
|||
COMMAND ${PYTHON_EXECUTABLE} "lapack_testing.py"
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
# $1 exec, $2 input, $3 output_result
|
||||
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh
|
||||
"rm -f $3\n"
|
||||
"$1 < $2\n"
|
||||
"grep -q FATAL $3\n"
|
||||
"if [ $? -eq 0 ]; then\n"
|
||||
"echo Error\n"
|
||||
"exit 1\n"
|
||||
"else\n"
|
||||
"exit 0\n"
|
||||
"fi\n"
|
||||
)
|
||||
|
||||
|
||||
add_test(NAME "REAL_LAPACK_linear_equation_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/stest.in" "${CMAKE_CURRENT_BINARY_DIR}/stest.out"
|
||||
)
|
||||
add_test(NAME "COMPLEX_LAPACK_linear_equation_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ctest.in" "${CMAKE_CURRENT_BINARY_DIR}/ctest.out"
|
||||
)
|
||||
add_test(NAME "DOUBLE_PRECISION_LAPACK_linear_equation_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN//xlintstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dtest.in" "${CMAKE_CURRENT_BINARY_DIR}/dtest.out"
|
||||
)
|
||||
add_test(NAME "COMPLEX16_LAPACK_linear_equation_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN//xlintstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ztest.in" "${CMAKE_CURRENT_BINARY_DIR}/ztest.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SINGLE-DOUBLE_PRECISION_LAPACK_prototype_linear_equation_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstds" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dstest.in" " ${CMAKE_CURRENT_BINARY_DIR}/dstest.out"
|
||||
)
|
||||
# ======== COMPLEX-COMPLEX16 LIN TESTS ========================
|
||||
|
||||
add_test(NAME "Testing_COMPLEX-COMPLEX16_LAPACK_prototype_linear_equation_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstzc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zctest.in" " ${CMAKE_CURRENT_BINARY_DIR}/zctest.out"
|
||||
)
|
||||
|
||||
# ======== SINGLE RFP LIN TESTS ========================
|
||||
|
||||
add_test(NAME "Testing_REAL_LAPACK_RFP_prototype_linear_equation_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstrfs" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/stest_rfp.in" "${CMAKE_CURRENT_BINARY_DIR}/stest_rfp.out"
|
||||
)
|
||||
|
||||
# ======== COMPLEX16 RFP LIN TESTS ========================
|
||||
|
||||
add_test(NAME "Testing_DOUBLE_PRECISION_LAPACK_RFP_prototype_linear_equation_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstrfd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dtest_rfp.in" " ${CMAKE_CURRENT_BINARY_DIR}/dtest_rfp.out"
|
||||
)
|
||||
# ======== COMPLEX16 RFP LIN TESTS ========================
|
||||
|
||||
add_test(NAME "Testing_COMPLEX_LAPACK_RFP_prototype_linear_equation_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstrfc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ctest_rfp.in" " ${CMAKE_CURRENT_BINARY_DIR}/ctest_rfp.out"
|
||||
)
|
||||
|
||||
# ======== COMPLEX16 RFP LIN TESTS ========================
|
||||
|
||||
add_test(NAME "Testing_COMPLEX16_LAPACK_RFP_prototype_linear_equation_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstrfz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ztest_rfp.in" " ${CMAKE_CURRENT_BINARY_DIR}/ztest_rfp.out"
|
||||
)
|
||||
#
|
||||
#
|
||||
# ======== SINGLE EIG TESTS ===========================
|
||||
#
|
||||
|
||||
add_test(NAME "SNEP:_Testing_Nonsymmetric_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/nep.in" " ${CMAKE_CURRENT_BINARY_DIR}/snep.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SSEP:_Testing_Symmetric_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sep.in" " ${CMAKE_CURRENT_BINARY_DIR}/ssep.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SSE2:_Testing_Symmetric_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/se2.in" " ${CMAKE_CURRENT_BINARY_DIR}/sse2.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SSVD:_Testing_Singular_Value_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/svd.in" " ${CMAKE_CURRENT_BINARY_DIR}/ssvd.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SSEC:_Testing_REAL_Eigen_Condition_Routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sec.in" " ${CMAKE_CURRENT_BINARY_DIR}/sec.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SSEV:_Testing_REAL_Nonsymmetric_Eigenvalue_Driver"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sed.in" " ${CMAKE_CURRENT_BINARY_DIR}/sed.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SGG:_Testing_REAL_Nonsymmetric_Generalized_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sgg.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgg.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SGD:_Testing_REAL_Nonsymmetric_Generalized_Eigenvalue_Problem_driver_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sgd.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgd.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SSB:_Testing_REAL_Symmetric_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ssb.in" " ${CMAKE_CURRENT_BINARY_DIR}/ssb.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SSG:_Testing_REAL_Symmetric_Generalized_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ssg.in" " ${CMAKE_CURRENT_BINARY_DIR}/ssg.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SGEBAL:_Testing_the_balancing_of_a_REAL_general_matrix"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/sbal.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SGEBAK:_Testing_the_back_transformation_of_a_REAL_balanced_matrix"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/sbak.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SGGBAL:_Testing_the_balancing_of_a_pair_of_REAL_general_matrices"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sgbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgbal.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SGGBAK:_Testing_the_back_transformation_of_a_pair_of_REAL_balanced_matrices"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sgbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgbak.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SBB:_Testing_banded_Singular_Value_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sbb.in" " ${CMAKE_CURRENT_BINARY_DIR}/sbb.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SGLM:_Testing_Generalized_Linear_Regression_Model_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/glm.in" " ${CMAKE_CURRENT_BINARY_DIR}/sglm.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SGQR:_Testing_Generalized_QR_and_RQ_factorization_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gqr.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgqr.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SGSV:_Testing_Generalized_Singular_Value_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gsv.in" "${CMAKE_CURRENT_BINARY_DIR}/sgsv.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SCSD:_Testing_CS_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csd.in" " ${CMAKE_CURRENT_BINARY_DIR}/scsd.out"
|
||||
)
|
||||
|
||||
add_test(NAME "SLSE:_Testing_Constrained_Linear_Least_Squares_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/lse.in" " ${CMAKE_CURRENT_BINARY_DIR}/slse.out"
|
||||
)
|
||||
|
||||
# ======== COMPLEX EIG TESTS ===========================
|
||||
|
||||
add_test(NAME "CNEP:_Testing_Nonsymmetric_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/nep.in" " ${CMAKE_CURRENT_BINARY_DIR}/cnep.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CSEP:_Testing_Symmetric_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sep.in" " ${CMAKE_CURRENT_BINARY_DIR}/csep.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CSE2:_Testing_Symmetric_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/se2.in" " ${CMAKE_CURRENT_BINARY_DIR}/cse2.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CSVD:_Testing_Singular_Value_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/svd.in" " ${CMAKE_CURRENT_BINARY_DIR}/csvd.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CEC:_Testing_COMPLEX_Eigen_Condition_Routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cec.in" " ${CMAKE_CURRENT_BINARY_DIR}/cec.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CES:_Testing_COMPLEX_Nonsymmetric_Schur_Form_Driver"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ced.in" " ${CMAKE_CURRENT_BINARY_DIR}/ced.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CGG:_Testing_COMPLEX_Nonsymmetric_Generalized_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cgg.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgg.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CGD:_Testing_COMPLEX_Nonsymmetric_Generalized_Eigenvalue_Problem_driver_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cgd.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgd.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CHB:_Testing_Hermitian_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csb.in" " ${CMAKE_CURRENT_BINARY_DIR}/csb.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CSG:_Testing_Symmetric_Generalized_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csg.in" " ${CMAKE_CURRENT_BINARY_DIR}/csg.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CGEBAL:_Testing_the_balancing_of_a_COMPLEX_general_matrix"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/cbal.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CGEBAK:_Testing_the_back_transformation_of_a_COMPLEX_balanced_matrix"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/cbak.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CGGBAL:_Testing_the_balancing_of_a_pair_of_COMPLEX_general_matrices"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cgbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgbal.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CGGBAK:_Testing_the_back_transformation_of_a_pair_of_COMPLEX_balanced_matrices"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cgbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgbak.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CBB:_Testing_banded_Singular_Value_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cbb.in" " ${CMAKE_CURRENT_BINARY_DIR}/cbb.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CGLM:_Testing_Generalized_Linear_Regression_Model_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/glm.in" " ${CMAKE_CURRENT_BINARY_DIR}/cglm.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CGQR:_Testing_Generalized_QR_and_RQ_factorization_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gqr.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgqr.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CGSV:_Testing_Generalized_Singular_Value_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gsv.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgsv.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CCSD:_Testing_CS_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csd.in" " ${CMAKE_CURRENT_BINARY_DIR}/ccsd.out"
|
||||
)
|
||||
|
||||
add_test(NAME "CLSE:_Testing_Constrained_Linear_Least_Squares_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/lse.in" " ${CMAKE_CURRENT_BINARY_DIR}/clse.out"
|
||||
)
|
||||
|
||||
# ======== DOUBLE EIG TESTS ===========================
|
||||
|
||||
add_test(NAME "DNEP:_Testing_Nonsymmetric_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/nep.in" " ${CMAKE_CURRENT_BINARY_DIR}/dnep.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DSEP:_Testing_Symmetric_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sep.in" " ${CMAKE_CURRENT_BINARY_DIR}/dsep.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DSE2:_Testing_Symmetric_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/se2.in" " ${CMAKE_CURRENT_BINARY_DIR}/dse2.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DSVD:_Testing_Singular_Value_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/svd.in" " ${CMAKE_CURRENT_BINARY_DIR}/dsvd.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DEC:_Testing_DOUBLE_PRECISION_Eigen_Condition_Routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dec.in" " ${CMAKE_CURRENT_BINARY_DIR}/dec.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DEV:_Testing_DOUBLE_PRECISION_Nonsymmetric_Eigenvalue_Driver"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ded.in" " ${CMAKE_CURRENT_BINARY_DIR}/ded.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DGG:_Testing_DOUBLE_PRECISION_Nonsymmetric_Generalized_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dgg.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgg.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DGD:_Testing_DOUBLE_PRECISION_Nonsymmetric_Generalized_Eigenvalue_Problem_driver_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dgd.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgd.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DSB:_Testing_DOUBLE_PRECISION_Symmetric_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dsb.in" " ${CMAKE_CURRENT_BINARY_DIR}/dsb.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DSG:_Testing_DOUBLE_PRECISION_Symmetric_Generalized_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dsg.in" " ${CMAKE_CURRENT_BINARY_DIR}/dsg.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DGEBAL:_Testing_the_balancing_of_a_DOUBLE_PRECISION_general_matrix"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/dbal.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DGEBAK:_Testing_the_back_transformation_of_a_DOUBLE_PRECISION_balanced_matrix"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/dbak.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DGGBAL:_Testing_the_balancing_of_a_pair_of_DOUBLE_PRECISION_general_matrices"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dgbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgbal.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DGGBAK:_Testing_the_back_transformation_of_a_pair_of_DOUBLE_PRECISION_balanced_matrices"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dgbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgbak.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DBB:_Testing_banded_Singular_Value_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dbb.in" " ${CMAKE_CURRENT_BINARY_DIR}/dbb.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DGLM:_Testing_Generalized_Linear_Regression_Model_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/glm.in" " ${CMAKE_CURRENT_BINARY_DIR}/dglm.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DGQR:_Testing_Generalized_QR_and_RQ_factorization_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gqr.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgqr.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DGSV:_Testing_Generalized_Singular_Value_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gsv.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgsv.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DCSD:_Testing_CS_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csd.in" " ${CMAKE_CURRENT_BINARY_DIR}/dcsd.out"
|
||||
)
|
||||
|
||||
add_test(NAME "DLSE:_Testing_Constrained_Linear_Least_Squares_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/lse.in" " ${CMAKE_CURRENT_BINARY_DIR}/dlse.out"
|
||||
)
|
||||
|
||||
# ======== COMPLEX16 EIG TESTS ===========================
|
||||
|
||||
add_test(NAME "ZNEP:_Testing_Nonsymmetric_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/nep.in" " ${CMAKE_CURRENT_BINARY_DIR}/znep.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZSEP:_Testing_Symmetric_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sep.in" " ${CMAKE_CURRENT_BINARY_DIR}/zsep.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZSE2:_Testing_Symmetric_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/se2.in" " ${CMAKE_CURRENT_BINARY_DIR}/zse2.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZSVD:_Testing_Singular_Value_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/svd.in" " ${CMAKE_CURRENT_BINARY_DIR}/zsvd.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZEC:_Testing_COMPLEX16_Eigen_Condition_Routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zec.in" " ${CMAKE_CURRENT_BINARY_DIR}/zec.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZES:_Testing_COMPLEX16_Nonsymmetric_Schur_Form_Driver"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zed.in" " ${CMAKE_CURRENT_BINARY_DIR}/zed.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZGG:_Testing_COMPLEX16_Nonsymmetric_Generalized_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zgg.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgg.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZGD:_Testing_COMPLEX16_Nonsymmetric_Generalized_Eigenvalue_Problem_driver_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zgd.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgd.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZHB:_Testing_Hermitian_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zsb.in" " ${CMAKE_CURRENT_BINARY_DIR}/zsb.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZSG:_Testing_Symmetric_Generalized_Eigenvalue_Problem_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zsg.in" " ${CMAKE_CURRENT_BINARY_DIR}/zsg.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZGEBAL:_Testing_the_balancing_of_a_COMPLEX16_general_matrix"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/zbal.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZGEBAK:_Testing_the_back_transformation_of_a_COMPLEX16_balanced_matrix"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/zbak.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZGGBAL:_Testing_the_balancing_of_a_pair_of_COMPLEX_general_matrices"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zgbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgbal.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZGGBAK:_Testing_the_back_transformation_of_a_pair_of_COMPLEX16_balanced_matrices"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zgbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgbak.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZBB:_Testing_banded_Singular_Value_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zbb.in" " ${CMAKE_CURRENT_BINARY_DIR}/zbb.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZGLM:_Testing_Generalized_Linear_Regression_Model_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/glm.in" " ${CMAKE_CURRENT_BINARY_DIR}/zglm.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZGQR:_Testing_Generalized_QR_and_RQ_factorization_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gqr.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgqr.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZGSV:_Testing_Generalized_Singular_Value_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gsv.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgsv.out"
|
||||
)
|
||||
|
||||
add_test(NAME "ZCSD:_Testing_CS_Decomposition_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csd.in" " ${CMAKE_CURRENT_BINARY_DIR}/zcsd.out"
|
||||
)
|
||||
|
||||
add_test(NAME "Constrained_Linear_Least_Squares_routines"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/lse.in" " ${CMAKE_CURRENT_BINARY_DIR}/zlse.out"
|
||||
)
|
||||
|
|
|
@ -98,7 +98,7 @@ set(ZEIGTST zchkee.f
|
|||
|
||||
macro(add_eig_executable name)
|
||||
add_executable(${name} ${ARGN})
|
||||
target_link_libraries(${name} tmglib ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES})
|
||||
target_link_libraries(${name} openblas)
|
||||
endmacro()
|
||||
|
||||
if(BUILD_SINGLE)
|
||||
|
|
|
@ -239,7 +239,8 @@ set(ZLINTSTRFP zchkrfp.f zdrvrfp.f zdrvrf1.f zdrvrf2.f zdrvrf3.f zdrvrf4.f zerrr
|
|||
|
||||
macro(add_lin_executable name)
|
||||
add_executable(${name} ${ARGN})
|
||||
target_link_libraries(${name} tmglib ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES})
|
||||
target_link_libraries(${name} openblas)
|
||||
#${TMGLIB} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES})
|
||||
endmacro()
|
||||
|
||||
if(BUILD_SINGLE)
|
||||
|
|
15
param.h
15
param.h
|
@ -2260,7 +2260,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(POWER9)
|
||||
#if defined(POWER9) || defined(POWER10)
|
||||
|
||||
#define SNUMOPT 16
|
||||
#define DNUMOPT 8
|
||||
|
@ -2297,6 +2297,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(POWER10)
|
||||
#undef SHGEMM_DEFAULT_UNROLL_N
|
||||
#undef SHGEMM_DEFAULT_UNROLL_M
|
||||
#undef SHGEMM_DEFAULT_P
|
||||
#undef SHGEMM_DEFAULT_R
|
||||
#undef SHGEMM_DEFAULT_Q
|
||||
#define SHGEMM_DEFAULT_UNROLL_M 16
|
||||
#define SHGEMM_DEFAULT_UNROLL_N 8
|
||||
#define SHGEMM_DEFAULT_P 832
|
||||
#define SHGEMM_DEFAULT_Q 1026
|
||||
#define SHGEMM_DEFAULT_R 4096
|
||||
#endif
|
||||
|
||||
#if defined(SPARC) && defined(V7)
|
||||
|
||||
#define SNUMOPT 4
|
||||
|
|
|
@ -1,7 +1,49 @@
|
|||
*> \brief \b CBLAT1
|
||||
*
|
||||
* =========== DOCUMENTATION ===========
|
||||
*
|
||||
* Online html documentation available at
|
||||
* http://www.netlib.org/lapack/explore-html/
|
||||
*
|
||||
* Definition:
|
||||
* ===========
|
||||
*
|
||||
* PROGRAM CBLAT1
|
||||
*
|
||||
*
|
||||
*> \par Purpose:
|
||||
* =============
|
||||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> Test program for the COMPLEX Level 1 BLAS.
|
||||
*> Based upon the original BLAS test routine together with:
|
||||
*>
|
||||
*> F06GAF Example Program Text
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
* ========
|
||||
*
|
||||
*> \author Univ. of Tennessee
|
||||
*> \author Univ. of California Berkeley
|
||||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date April 2012
|
||||
*
|
||||
*> \ingroup complex_blas_testing
|
||||
*
|
||||
* =====================================================================
|
||||
PROGRAM CBLAT1
|
||||
* Test program for the COMPLEX Level 1 BLAS.
|
||||
* Based upon the original BLAS test routine together with:
|
||||
* F06GAF Example Program Text
|
||||
*
|
||||
* -- Reference BLAS test routine (version 3.7.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* April 2012
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
INTEGER NOUT
|
||||
PARAMETER (NOUT=6)
|
||||
|
@ -114,8 +156,8 @@
|
|||
+ (5.0E0,6.0E0), (5.0E0,6.0E0), (0.1E0,0.1E0),
|
||||
+ (-0.6E0,0.1E0), (0.1E0,-0.3E0), (7.0E0,8.0E0),
|
||||
+ (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0),
|
||||
+ (7.0E0,8.0E0), (0.3E0,0.1E0), (0.1E0,0.4E0),
|
||||
+ (0.4E0,0.1E0), (0.1E0,0.2E0), (2.0E0,3.0E0),
|
||||
+ (7.0E0,8.0E0), (0.3E0,0.1E0), (0.5E0,0.0E0),
|
||||
+ (0.0E0,0.5E0), (0.0E0,0.2E0), (2.0E0,3.0E0),
|
||||
+ (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0)/
|
||||
DATA ((CV(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0),
|
||||
+ (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0),
|
||||
|
@ -129,10 +171,10 @@
|
|||
+ (3.0E0,6.0E0), (-0.6E0,0.1E0), (4.0E0,7.0E0),
|
||||
+ (0.1E0,-0.3E0), (7.0E0,2.0E0), (7.0E0,2.0E0),
|
||||
+ (7.0E0,2.0E0), (0.3E0,0.1E0), (5.0E0,8.0E0),
|
||||
+ (0.1E0,0.4E0), (6.0E0,9.0E0), (0.4E0,0.1E0),
|
||||
+ (8.0E0,3.0E0), (0.1E0,0.2E0), (9.0E0,4.0E0)/
|
||||
DATA STRUE2/0.0E0, 0.5E0, 0.6E0, 0.7E0, 0.7E0/
|
||||
DATA STRUE4/0.0E0, 0.7E0, 1.0E0, 1.3E0, 1.7E0/
|
||||
+ (0.5E0,0.0E0), (6.0E0,9.0E0), (0.0E0,0.5E0),
|
||||
+ (8.0E0,3.0E0), (0.0E0,0.2E0), (9.0E0,4.0E0)/
|
||||
DATA STRUE2/0.0E0, 0.5E0, 0.6E0, 0.7E0, 0.8E0/
|
||||
DATA STRUE4/0.0E0, 0.7E0, 1.0E0, 1.3E0, 1.6E0/
|
||||
DATA ((CTRUE5(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0),
|
||||
+ (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0),
|
||||
+ (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0),
|
||||
|
@ -145,8 +187,8 @@
|
|||
+ (0.11E0,-0.03E0), (-0.17E0,0.46E0),
|
||||
+ (-0.17E0,-0.19E0), (7.0E0,8.0E0), (7.0E0,8.0E0),
|
||||
+ (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0),
|
||||
+ (0.19E0,-0.17E0), (0.32E0,0.09E0),
|
||||
+ (0.23E0,-0.24E0), (0.18E0,0.01E0),
|
||||
+ (0.19E0,-0.17E0), (0.20E0,-0.35E0),
|
||||
+ (0.35E0,0.20E0), (0.14E0,0.08E0),
|
||||
+ (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0),
|
||||
+ (2.0E0,3.0E0)/
|
||||
DATA ((CTRUE5(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0),
|
||||
|
@ -162,9 +204,9 @@
|
|||
+ (-0.17E0,0.46E0), (4.0E0,7.0E0),
|
||||
+ (-0.17E0,-0.19E0), (7.0E0,2.0E0), (7.0E0,2.0E0),
|
||||
+ (7.0E0,2.0E0), (0.19E0,-0.17E0), (5.0E0,8.0E0),
|
||||
+ (0.32E0,0.09E0), (6.0E0,9.0E0),
|
||||
+ (0.23E0,-0.24E0), (8.0E0,3.0E0),
|
||||
+ (0.18E0,0.01E0), (9.0E0,4.0E0)/
|
||||
+ (0.20E0,-0.35E0), (6.0E0,9.0E0),
|
||||
+ (0.35E0,0.20E0), (8.0E0,3.0E0),
|
||||
+ (0.14E0,0.08E0), (9.0E0,4.0E0)/
|
||||
DATA ((CTRUE6(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0),
|
||||
+ (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0),
|
||||
+ (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0),
|
||||
|
@ -177,8 +219,8 @@
|
|||
+ (0.03E0,0.03E0), (-0.18E0,0.03E0),
|
||||
+ (0.03E0,-0.09E0), (7.0E0,8.0E0), (7.0E0,8.0E0),
|
||||
+ (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0),
|
||||
+ (0.09E0,0.03E0), (0.03E0,0.12E0),
|
||||
+ (0.12E0,0.03E0), (0.03E0,0.06E0), (2.0E0,3.0E0),
|
||||
+ (0.09E0,0.03E0), (0.15E0,0.00E0),
|
||||
+ (0.00E0,0.15E0), (0.00E0,0.06E0), (2.0E0,3.0E0),
|
||||
+ (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0)/
|
||||
DATA ((CTRUE6(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0),
|
||||
+ (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0),
|
||||
|
@ -193,8 +235,8 @@
|
|||
+ (-0.18E0,0.03E0), (4.0E0,7.0E0),
|
||||
+ (0.03E0,-0.09E0), (7.0E0,2.0E0), (7.0E0,2.0E0),
|
||||
+ (7.0E0,2.0E0), (0.09E0,0.03E0), (5.0E0,8.0E0),
|
||||
+ (0.03E0,0.12E0), (6.0E0,9.0E0), (0.12E0,0.03E0),
|
||||
+ (8.0E0,3.0E0), (0.03E0,0.06E0), (9.0E0,4.0E0)/
|
||||
+ (0.15E0,0.00E0), (6.0E0,9.0E0), (0.00E0,0.15E0),
|
||||
+ (8.0E0,3.0E0), (0.00E0,0.06E0), (9.0E0,4.0E0)/
|
||||
DATA ITRUE3/0, 1, 2, 2, 2/
|
||||
* .. Executable Statements ..
|
||||
DO 60 INCX = 1, 2
|
||||
|
@ -529,7 +571,8 @@
|
|||
*
|
||||
* .. Parameters ..
|
||||
INTEGER NOUT
|
||||
PARAMETER (NOUT=6)
|
||||
REAL ZERO
|
||||
PARAMETER (NOUT=6, ZERO=0.0E0)
|
||||
* .. Scalar Arguments ..
|
||||
REAL SFAC
|
||||
INTEGER LEN
|
||||
|
@ -552,7 +595,7 @@
|
|||
*
|
||||
DO 40 I = 1, LEN
|
||||
SD = SCOMP(I) - STRUE(I)
|
||||
IF (SDIFF(ABS(SSIZE(I))+ABS(SFAC*SD),ABS(SSIZE(I))).EQ.0.0E0)
|
||||
IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO))
|
||||
+ GO TO 40
|
||||
*
|
||||
* HERE SCOMP(I) IS NOT CLOSE TO STRUE(I).
|
||||
|
|
188
test/cblat2.f
188
test/cblat2.f
|
@ -1,68 +1,114 @@
|
|||
*> \brief \b CBLAT2
|
||||
*
|
||||
* =========== DOCUMENTATION ===========
|
||||
*
|
||||
* Online html documentation available at
|
||||
* http://www.netlib.org/lapack/explore-html/
|
||||
*
|
||||
* Definition:
|
||||
* ===========
|
||||
*
|
||||
* PROGRAM CBLAT2
|
||||
*
|
||||
*
|
||||
*> \par Purpose:
|
||||
* =============
|
||||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> Test program for the COMPLEX Level 2 Blas.
|
||||
*>
|
||||
*> The program must be driven by a short data file. The first 18 records
|
||||
*> of the file are read using list-directed input, the last 17 records
|
||||
*> are read using the format ( A6, L2 ). An annotated example of a data
|
||||
*> file can be obtained by deleting the first 3 characters from the
|
||||
*> following 35 lines:
|
||||
*> 'cblat2.out' NAME OF SUMMARY OUTPUT FILE
|
||||
*> 6 UNIT NUMBER OF SUMMARY FILE
|
||||
*> 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
*> F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
*> T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
*> 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
*> 6 NUMBER OF VALUES OF N
|
||||
*> 0 1 2 3 5 9 VALUES OF N
|
||||
*> 4 NUMBER OF VALUES OF K
|
||||
*> 0 1 2 4 VALUES OF K
|
||||
*> 4 NUMBER OF VALUES OF INCX AND INCY
|
||||
*> 1 2 -1 -2 VALUES OF INCX AND INCY
|
||||
*> 3 NUMBER OF VALUES OF ALPHA
|
||||
*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA
|
||||
*> 3 NUMBER OF VALUES OF BETA
|
||||
*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA
|
||||
*> CGEMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CGBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CHEMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CHBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CHPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CTRMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CTBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CTPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CTRSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CTBSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CTPSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CGERC T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CGERU T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CHER T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CHPR T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CHER2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CHPR2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*>
|
||||
*> Further Details
|
||||
*> ===============
|
||||
*>
|
||||
*> See:
|
||||
*>
|
||||
*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J..
|
||||
*> An extended set of Fortran Basic Linear Algebra Subprograms.
|
||||
*>
|
||||
*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics
|
||||
*> and Computer Science Division, Argonne National Laboratory,
|
||||
*> 9700 South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*>
|
||||
*> Or
|
||||
*>
|
||||
*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms
|
||||
*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford
|
||||
*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st
|
||||
*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA.
|
||||
*>
|
||||
*>
|
||||
*> -- Written on 10-August-1987.
|
||||
*> Richard Hanson, Sandia National Labs.
|
||||
*> Jeremy Du Croz, NAG Central Office.
|
||||
*>
|
||||
*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers
|
||||
*> can be run multiple times without deleting generated
|
||||
*> output files (susan)
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
* ========
|
||||
*
|
||||
*> \author Univ. of Tennessee
|
||||
*> \author Univ. of California Berkeley
|
||||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date April 2012
|
||||
*
|
||||
*> \ingroup complex_blas_testing
|
||||
*
|
||||
* =====================================================================
|
||||
PROGRAM CBLAT2
|
||||
*
|
||||
* Test program for the COMPLEX Level 2 Blas.
|
||||
* -- Reference BLAS test routine (version 3.7.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* April 2012
|
||||
*
|
||||
* The program must be driven by a short data file. The first 18 records
|
||||
* of the file are read using list-directed input, the last 17 records
|
||||
* are read using the format ( A6, L2 ). An annotated example of a data
|
||||
* file can be obtained by deleting the first 3 characters from the
|
||||
* following 35 lines:
|
||||
* 'CBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE
|
||||
* 6 UNIT NUMBER OF SUMMARY FILE
|
||||
* 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
* F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
* T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
* 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
* 6 NUMBER OF VALUES OF N
|
||||
* 0 1 2 3 5 9 VALUES OF N
|
||||
* 4 NUMBER OF VALUES OF K
|
||||
* 0 1 2 4 VALUES OF K
|
||||
* 4 NUMBER OF VALUES OF INCX AND INCY
|
||||
* 1 2 -1 -2 VALUES OF INCX AND INCY
|
||||
* 3 NUMBER OF VALUES OF ALPHA
|
||||
* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA
|
||||
* 3 NUMBER OF VALUES OF BETA
|
||||
* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA
|
||||
* CGEMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CGBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CHEMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CHBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CHPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CTRMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CTBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CTPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CTRSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CTBSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CTPSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CGERC T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CGERU T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CHER T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CHPR T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CHER2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CHPR2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*
|
||||
* See:
|
||||
*
|
||||
* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J..
|
||||
* An extended set of Fortran Basic Linear Algebra Subprograms.
|
||||
*
|
||||
* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics
|
||||
* and Computer Science Division, Argonne National Laboratory,
|
||||
* 9700 South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*
|
||||
* Or
|
||||
*
|
||||
* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms
|
||||
* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford
|
||||
* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st
|
||||
* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA.
|
||||
*
|
||||
*
|
||||
* -- Written on 10-August-1987.
|
||||
* Richard Hanson, Sandia National Labs.
|
||||
* Jeremy Du Croz, NAG Central Office.
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
INTEGER NIN
|
||||
|
@ -71,8 +117,8 @@
|
|||
PARAMETER ( NSUBS = 17 )
|
||||
COMPLEX ZERO, ONE
|
||||
PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) )
|
||||
REAL RZERO, RHALF, RONE
|
||||
PARAMETER ( RZERO = 0.0, RHALF = 0.5, RONE = 1.0 )
|
||||
REAL RZERO
|
||||
PARAMETER ( RZERO = 0.0 )
|
||||
INTEGER NMAX, INCMAX
|
||||
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||
|
@ -126,7 +172,7 @@
|
|||
*
|
||||
READ( NIN, FMT = * )SUMMRY
|
||||
READ( NIN, FMT = * )NOUT
|
||||
OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' )
|
||||
OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' )
|
||||
NOUTC = NOUT
|
||||
*
|
||||
* Read name and unit number for snapshot output file and open file.
|
||||
|
@ -135,7 +181,7 @@
|
|||
READ( NIN, FMT = * )NTRA
|
||||
TRACE = NTRA.GE.0
|
||||
IF( TRACE )THEN
|
||||
OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' )
|
||||
OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' )
|
||||
END IF
|
||||
* Read the flag that directs rewinding of the snapshot file.
|
||||
READ( NIN, FMT = * )REWI
|
||||
|
@ -240,14 +286,7 @@
|
|||
*
|
||||
* Compute EPS (the machine precision).
|
||||
*
|
||||
EPS = RONE
|
||||
90 CONTINUE
|
||||
IF( SDIFF( RONE + EPS, RONE ).EQ.RZERO )
|
||||
$ GO TO 100
|
||||
EPS = RHALF*EPS
|
||||
GO TO 90
|
||||
100 CONTINUE
|
||||
EPS = EPS + EPS
|
||||
EPS = EPSILON(RZERO)
|
||||
WRITE( NOUT, FMT = 9998 )EPS
|
||||
*
|
||||
* Check the reliability of CMVCH using exact data.
|
||||
|
@ -3079,7 +3118,6 @@
|
|||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
60 CONTINUE
|
||||
LCERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
|
187
test/cblat3.f
187
test/cblat3.f
|
@ -1,50 +1,96 @@
|
|||
*> \brief \b CBLAT3
|
||||
*
|
||||
* =========== DOCUMENTATION ===========
|
||||
*
|
||||
* Online html documentation available at
|
||||
* http://www.netlib.org/lapack/explore-html/
|
||||
*
|
||||
* Definition:
|
||||
* ===========
|
||||
*
|
||||
* PROGRAM CBLAT3
|
||||
*
|
||||
*
|
||||
*> \par Purpose:
|
||||
* =============
|
||||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> Test program for the COMPLEX Level 3 Blas.
|
||||
*>
|
||||
*> The program must be driven by a short data file. The first 14 records
|
||||
*> of the file are read using list-directed input, the last 9 records
|
||||
*> are read using the format ( A6, L2 ). An annotated example of a data
|
||||
*> file can be obtained by deleting the first 3 characters from the
|
||||
*> following 23 lines:
|
||||
*> 'cblat3.out' NAME OF SUMMARY OUTPUT FILE
|
||||
*> 6 UNIT NUMBER OF SUMMARY FILE
|
||||
*> 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
*> F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
*> T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
*> 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
*> 6 NUMBER OF VALUES OF N
|
||||
*> 0 1 2 3 5 9 VALUES OF N
|
||||
*> 3 NUMBER OF VALUES OF ALPHA
|
||||
*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA
|
||||
*> 3 NUMBER OF VALUES OF BETA
|
||||
*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA
|
||||
*> CGEMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CHEMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CSYMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CTRMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CTRSM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CHERK T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CSYRK T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CHER2K T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> CSYR2K T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*>
|
||||
*> Further Details
|
||||
*> ===============
|
||||
*>
|
||||
*> See:
|
||||
*>
|
||||
*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S.
|
||||
*> A Set of Level 3 Basic Linear Algebra Subprograms.
|
||||
*>
|
||||
*> Technical Memorandum No.88 (Revision 1), Mathematics and
|
||||
*> Computer Science Division, Argonne National Laboratory, 9700
|
||||
*> South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*>
|
||||
*> -- Written on 8-February-1989.
|
||||
*> Jack Dongarra, Argonne National Laboratory.
|
||||
*> Iain Duff, AERE Harwell.
|
||||
*> Jeremy Du Croz, Numerical Algorithms Group Ltd.
|
||||
*> Sven Hammarling, Numerical Algorithms Group Ltd.
|
||||
*>
|
||||
*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers
|
||||
*> can be run multiple times without deleting generated
|
||||
*> output files (susan)
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
* ========
|
||||
*
|
||||
*> \author Univ. of Tennessee
|
||||
*> \author Univ. of California Berkeley
|
||||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date April 2012
|
||||
*
|
||||
*> \ingroup complex_blas_testing
|
||||
*
|
||||
* =====================================================================
|
||||
PROGRAM CBLAT3
|
||||
*
|
||||
* Test program for the COMPLEX Level 3 Blas.
|
||||
* -- Reference BLAS test routine (version 3.7.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* April 2012
|
||||
*
|
||||
* The program must be driven by a short data file. The first 14 records
|
||||
* of the file are read using list-directed input, the last 9 records
|
||||
* are read using the format ( A6, L2 ). An annotated example of a data
|
||||
* file can be obtained by deleting the first 3 characters from the
|
||||
* following 23 lines:
|
||||
* 'CBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE
|
||||
* 6 UNIT NUMBER OF SUMMARY FILE
|
||||
* 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
* F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
* T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
* 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
* 6 NUMBER OF VALUES OF N
|
||||
* 0 1 2 3 5 9 VALUES OF N
|
||||
* 3 NUMBER OF VALUES OF ALPHA
|
||||
* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA
|
||||
* 3 NUMBER OF VALUES OF BETA
|
||||
* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA
|
||||
* CGEMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CHEMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CSYMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CTRMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CTRSM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CHERK T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CSYRK T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CHER2K T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* CSYR2K T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*
|
||||
* See:
|
||||
*
|
||||
* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S.
|
||||
* A Set of Level 3 Basic Linear Algebra Subprograms.
|
||||
*
|
||||
* Technical Memorandum No.88 (Revision 1), Mathematics and
|
||||
* Computer Science Division, Argonne National Laboratory, 9700
|
||||
* South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*
|
||||
* -- Written on 8-February-1989.
|
||||
* Jack Dongarra, Argonne National Laboratory.
|
||||
* Iain Duff, AERE Harwell.
|
||||
* Jeremy Du Croz, Numerical Algorithms Group Ltd.
|
||||
* Sven Hammarling, Numerical Algorithms Group Ltd.
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
INTEGER NIN
|
||||
|
@ -53,8 +99,8 @@
|
|||
PARAMETER ( NSUBS = 9 )
|
||||
COMPLEX ZERO, ONE
|
||||
PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) )
|
||||
REAL RZERO, RHALF, RONE
|
||||
PARAMETER ( RZERO = 0.0, RHALF = 0.5, RONE = 1.0 )
|
||||
REAL RZERO
|
||||
PARAMETER ( RZERO = 0.0 )
|
||||
INTEGER NMAX
|
||||
PARAMETER ( NMAX = 65 )
|
||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||
|
@ -103,7 +149,7 @@
|
|||
*
|
||||
READ( NIN, FMT = * )SUMMRY
|
||||
READ( NIN, FMT = * )NOUT
|
||||
OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' )
|
||||
OPEN( NOUT, FILE = SUMMRY )
|
||||
NOUTC = NOUT
|
||||
*
|
||||
* Read name and unit number for snapshot output file and open file.
|
||||
|
@ -112,7 +158,7 @@
|
|||
READ( NIN, FMT = * )NTRA
|
||||
TRACE = NTRA.GE.0
|
||||
IF( TRACE )THEN
|
||||
OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' )
|
||||
OPEN( NTRA, FILE = SNAPS )
|
||||
END IF
|
||||
* Read the flag that directs rewinding of the snapshot file.
|
||||
READ( NIN, FMT = * )REWI
|
||||
|
@ -189,14 +235,7 @@
|
|||
*
|
||||
* Compute EPS (the machine precision).
|
||||
*
|
||||
EPS = RONE
|
||||
70 CONTINUE
|
||||
IF( SDIFF( RONE + EPS, RONE ).EQ.RZERO )
|
||||
$ GO TO 80
|
||||
EPS = RHALF*EPS
|
||||
GO TO 70
|
||||
80 CONTINUE
|
||||
EPS = EPS + EPS
|
||||
EPS = EPSILON(RZERO)
|
||||
WRITE( NOUT, FMT = 9998 )EPS
|
||||
*
|
||||
* Check the reliability of CMMCH using exact data.
|
||||
|
@ -1301,8 +1340,6 @@
|
|||
NC = 0
|
||||
RESET = .TRUE.
|
||||
ERRMAX = RZERO
|
||||
RALS = RONE
|
||||
RBETS = RONE
|
||||
*
|
||||
DO 100 IN = 1, NIDIM
|
||||
N = IDIM( IN )
|
||||
|
@ -1948,7 +1985,7 @@
|
|||
*
|
||||
* Tests the error exits from the Level 3 Blas.
|
||||
* Requires a special version of the error-handling routine XERBLA.
|
||||
* ALPHA, RALPHA, BETA, RBETA, A, B and C should not need to be defined.
|
||||
* A, B and C should not need to be defined.
|
||||
*
|
||||
* Auxiliary routine for test program for Level 3 Blas.
|
||||
*
|
||||
|
@ -1958,12 +1995,19 @@
|
|||
* Jeremy Du Croz, Numerical Algorithms Group Ltd.
|
||||
* Sven Hammarling, Numerical Algorithms Group Ltd.
|
||||
*
|
||||
* 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca)
|
||||
* 3-19-92: Fix argument 12 in calls to CSYMM and CHEMM
|
||||
* with INFOT = 9 (eca)
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER ISNUM, NOUT
|
||||
CHARACTER*6 SRNAMT
|
||||
* .. Scalars in Common ..
|
||||
INTEGER INFOT, NOUTC
|
||||
LOGICAL LERR, OK
|
||||
* .. Parameters ..
|
||||
REAL ONE, TWO
|
||||
PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 )
|
||||
* .. Local Scalars ..
|
||||
COMPLEX ALPHA, BETA
|
||||
REAL RALPHA, RBETA
|
||||
|
@ -1981,6 +2025,14 @@
|
|||
* LERR is set to .TRUE. by the special version of XERBLA each time
|
||||
* it is called, and is then tested and re-set by CHKXER.
|
||||
LERR = .FALSE.
|
||||
*
|
||||
* Initialize ALPHA, BETA, RALPHA, and RBETA.
|
||||
*
|
||||
ALPHA = CMPLX( ONE, -ONE )
|
||||
BETA = CMPLX( TWO, -TWO )
|
||||
RALPHA = ONE
|
||||
RBETA = TWO
|
||||
*
|
||||
GO TO ( 10, 20, 30, 40, 50, 60, 70, 80,
|
||||
$ 90 )ISNUM
|
||||
10 INFOT = 1
|
||||
|
@ -2207,16 +2259,16 @@
|
|||
CALL CHEMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 )
|
||||
CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL CHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL CHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL CHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 )
|
||||
CALL CHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL CHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL CHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 12
|
||||
CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 )
|
||||
|
@ -2274,16 +2326,16 @@
|
|||
CALL CSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 )
|
||||
CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL CSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL CSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL CSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 )
|
||||
CALL CSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL CSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL CSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 12
|
||||
CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 )
|
||||
|
@ -3270,7 +3322,6 @@
|
|||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
60 CONTINUE
|
||||
LCERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
|
186
test/dblat2.f
186
test/dblat2.f
|
@ -1,75 +1,121 @@
|
|||
*> \brief \b DBLAT2
|
||||
*
|
||||
* =========== DOCUMENTATION ===========
|
||||
*
|
||||
* Online html documentation available at
|
||||
* http://www.netlib.org/lapack/explore-html/
|
||||
*
|
||||
* Definition:
|
||||
* ===========
|
||||
*
|
||||
* PROGRAM DBLAT2
|
||||
*
|
||||
*
|
||||
*> \par Purpose:
|
||||
* =============
|
||||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> Test program for the DOUBLE PRECISION Level 2 Blas.
|
||||
*>
|
||||
*> The program must be driven by a short data file. The first 18 records
|
||||
*> of the file are read using list-directed input, the last 16 records
|
||||
*> are read using the format ( A6, L2 ). An annotated example of a data
|
||||
*> file can be obtained by deleting the first 3 characters from the
|
||||
*> following 34 lines:
|
||||
*> 'dblat2.out' NAME OF SUMMARY OUTPUT FILE
|
||||
*> 6 UNIT NUMBER OF SUMMARY FILE
|
||||
*> 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
*> F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
*> T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
*> 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
*> 6 NUMBER OF VALUES OF N
|
||||
*> 0 1 2 3 5 9 VALUES OF N
|
||||
*> 4 NUMBER OF VALUES OF K
|
||||
*> 0 1 2 4 VALUES OF K
|
||||
*> 4 NUMBER OF VALUES OF INCX AND INCY
|
||||
*> 1 2 -1 -2 VALUES OF INCX AND INCY
|
||||
*> 3 NUMBER OF VALUES OF ALPHA
|
||||
*> 0.0 1.0 0.7 VALUES OF ALPHA
|
||||
*> 3 NUMBER OF VALUES OF BETA
|
||||
*> 0.0 1.0 0.9 VALUES OF BETAC
|
||||
*> DGEMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DGBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DSYMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DSBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DSPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DTRMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DTBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DTPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DTRSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DTBSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DTPSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DGER T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DSYR T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DSPR T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DSYR2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DSPR2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*>
|
||||
*> Further Details
|
||||
*> ===============
|
||||
*>
|
||||
*> See:
|
||||
*>
|
||||
*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J..
|
||||
*> An extended set of Fortran Basic Linear Algebra Subprograms.
|
||||
*>
|
||||
*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics
|
||||
*> and Computer Science Division, Argonne National Laboratory,
|
||||
*> 9700 South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*>
|
||||
*> Or
|
||||
*>
|
||||
*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms
|
||||
*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford
|
||||
*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st
|
||||
*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA.
|
||||
*>
|
||||
*>
|
||||
*> -- Written on 10-August-1987.
|
||||
*> Richard Hanson, Sandia National Labs.
|
||||
*> Jeremy Du Croz, NAG Central Office.
|
||||
*>
|
||||
*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers
|
||||
*> can be run multiple times without deleting generated
|
||||
*> output files (susan)
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
* ========
|
||||
*
|
||||
*> \author Univ. of Tennessee
|
||||
*> \author Univ. of California Berkeley
|
||||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date April 2012
|
||||
*
|
||||
*> \ingroup double_blas_testing
|
||||
*
|
||||
* =====================================================================
|
||||
PROGRAM DBLAT2
|
||||
*
|
||||
* Test program for the DOUBLE PRECISION Level 2 Blas.
|
||||
* -- Reference BLAS test routine (version 3.7.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* April 2012
|
||||
*
|
||||
* The program must be driven by a short data file. The first 18 records
|
||||
* of the file are read using list-directed input, the last 16 records
|
||||
* are read using the format ( A6, L2 ). An annotated example of a data
|
||||
* file can be obtained by deleting the first 3 characters from the
|
||||
* following 34 lines:
|
||||
* 'DBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE
|
||||
* 6 UNIT NUMBER OF SUMMARY FILE
|
||||
* 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
* F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
* T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
* 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
* 6 NUMBER OF VALUES OF N
|
||||
* 0 1 2 3 5 9 VALUES OF N
|
||||
* 4 NUMBER OF VALUES OF K
|
||||
* 0 1 2 4 VALUES OF K
|
||||
* 4 NUMBER OF VALUES OF INCX AND INCY
|
||||
* 1 2 -1 -2 VALUES OF INCX AND INCY
|
||||
* 3 NUMBER OF VALUES OF ALPHA
|
||||
* 0.0 1.0 0.7 VALUES OF ALPHA
|
||||
* 3 NUMBER OF VALUES OF BETA
|
||||
* 0.0 1.0 0.9 VALUES OF BETA
|
||||
* DGEMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DGBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DSYMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DSBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DSPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DTRMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DTBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DTPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DTRSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DTBSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DTPSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DGER T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DSYR T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DSPR T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DSYR2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DSPR2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*
|
||||
* See:
|
||||
*
|
||||
* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J..
|
||||
* An extended set of Fortran Basic Linear Algebra Subprograms.
|
||||
*
|
||||
* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics
|
||||
* and Computer Science Division, Argonne National Laboratory,
|
||||
* 9700 South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*
|
||||
* Or
|
||||
*
|
||||
* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms
|
||||
* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford
|
||||
* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st
|
||||
* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA.
|
||||
*
|
||||
*
|
||||
* -- Written on 10-August-1987.
|
||||
* Richard Hanson, Sandia National Labs.
|
||||
* Jeremy Du Croz, NAG Central Office.
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
INTEGER NIN
|
||||
PARAMETER ( NIN = 5 )
|
||||
INTEGER NSUBS
|
||||
PARAMETER ( NSUBS = 16 )
|
||||
DOUBLE PRECISION ZERO, HALF, ONE
|
||||
PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 )
|
||||
DOUBLE PRECISION ZERO, ONE
|
||||
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
|
||||
INTEGER NMAX, INCMAX
|
||||
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||
|
@ -121,7 +167,7 @@
|
|||
*
|
||||
READ( NIN, FMT = * )SUMMRY
|
||||
READ( NIN, FMT = * )NOUT
|
||||
OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' )
|
||||
OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' )
|
||||
NOUTC = NOUT
|
||||
*
|
||||
* Read name and unit number for snapshot output file and open file.
|
||||
|
@ -130,7 +176,7 @@
|
|||
READ( NIN, FMT = * )NTRA
|
||||
TRACE = NTRA.GE.0
|
||||
IF( TRACE )THEN
|
||||
OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' )
|
||||
OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' )
|
||||
END IF
|
||||
* Read the flag that directs rewinding of the snapshot file.
|
||||
READ( NIN, FMT = * )REWI
|
||||
|
@ -235,14 +281,7 @@
|
|||
*
|
||||
* Compute EPS (the machine precision).
|
||||
*
|
||||
EPS = ONE
|
||||
90 CONTINUE
|
||||
IF( DDIFF( ONE + EPS, ONE ).EQ.ZERO )
|
||||
$ GO TO 100
|
||||
EPS = HALF*EPS
|
||||
GO TO 90
|
||||
100 CONTINUE
|
||||
EPS = EPS + EPS
|
||||
EPS = EPSILON(ZERO)
|
||||
WRITE( NOUT, FMT = 9998 )EPS
|
||||
*
|
||||
* Check the reliability of DMVCH using exact data.
|
||||
|
@ -2982,7 +3021,6 @@
|
|||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
60 CONTINUE
|
||||
LDERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
|
168
test/dblat3.f
168
test/dblat3.f
|
@ -1,55 +1,101 @@
|
|||
*> \brief \b DBLAT3
|
||||
*
|
||||
* =========== DOCUMENTATION ===========
|
||||
*
|
||||
* Online html documentation available at
|
||||
* http://www.netlib.org/lapack/explore-html/
|
||||
*
|
||||
* Definition:
|
||||
* ===========
|
||||
*
|
||||
* PROGRAM DBLAT3
|
||||
*
|
||||
*
|
||||
*> \par Purpose:
|
||||
* =============
|
||||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> Test program for the DOUBLE PRECISION Level 3 Blas.
|
||||
*>
|
||||
*> The program must be driven by a short data file. The first 14 records
|
||||
*> of the file are read using list-directed input, the last 6 records
|
||||
*> are read using the format ( A6, L2 ). An annotated example of a data
|
||||
*> file can be obtained by deleting the first 3 characters from the
|
||||
*> following 20 lines:
|
||||
*> 'dblat3.out' NAME OF SUMMARY OUTPUT FILE
|
||||
*> 6 UNIT NUMBER OF SUMMARY FILE
|
||||
*> 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
*> F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
*> T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
*> 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
*> 6 NUMBER OF VALUES OF N
|
||||
*> 0 1 2 3 5 9 VALUES OF N
|
||||
*> 3 NUMBER OF VALUES OF ALPHA
|
||||
*> 0.0 1.0 0.7 VALUES OF ALPHA
|
||||
*> 3 NUMBER OF VALUES OF BETA
|
||||
*> 0.0 1.0 1.3 VALUES OF BETA
|
||||
*> DGEMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DSYMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DTRMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DTRSM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DSYRK T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> DSYR2K T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*>
|
||||
*> Further Details
|
||||
*> ===============
|
||||
*>
|
||||
*> See:
|
||||
*>
|
||||
*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S.
|
||||
*> A Set of Level 3 Basic Linear Algebra Subprograms.
|
||||
*>
|
||||
*> Technical Memorandum No.88 (Revision 1), Mathematics and
|
||||
*> Computer Science Division, Argonne National Laboratory, 9700
|
||||
*> South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*>
|
||||
*> -- Written on 8-February-1989.
|
||||
*> Jack Dongarra, Argonne National Laboratory.
|
||||
*> Iain Duff, AERE Harwell.
|
||||
*> Jeremy Du Croz, Numerical Algorithms Group Ltd.
|
||||
*> Sven Hammarling, Numerical Algorithms Group Ltd.
|
||||
*>
|
||||
*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers
|
||||
*> can be run multiple times without deleting generated
|
||||
*> output files (susan)
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
* ========
|
||||
*
|
||||
*> \author Univ. of Tennessee
|
||||
*> \author Univ. of California Berkeley
|
||||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date April 2012
|
||||
*
|
||||
*> \ingroup double_blas_testing
|
||||
*
|
||||
* =====================================================================
|
||||
PROGRAM DBLAT3
|
||||
*
|
||||
* Test program for the DOUBLE PRECISION Level 3 Blas.
|
||||
* -- Reference BLAS test routine (version 3.7.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* April 2012
|
||||
*
|
||||
* The program must be driven by a short data file. The first 14 records
|
||||
* of the file are read using list-directed input, the last 6 records
|
||||
* are read using the format ( A6, L2 ). An annotated example of a data
|
||||
* file can be obtained by deleting the first 3 characters from the
|
||||
* following 20 lines:
|
||||
* 'DBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE
|
||||
* 6 UNIT NUMBER OF SUMMARY FILE
|
||||
* 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
* F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
* T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
* 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
* 6 NUMBER OF VALUES OF N
|
||||
* 0 1 2 3 5 9 VALUES OF N
|
||||
* 3 NUMBER OF VALUES OF ALPHA
|
||||
* 0.0 1.0 0.7 VALUES OF ALPHA
|
||||
* 3 NUMBER OF VALUES OF BETA
|
||||
* 0.0 1.0 1.3 VALUES OF BETA
|
||||
* DGEMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DSYMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DTRMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DTRSM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DSYRK T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* DSYR2K T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*
|
||||
* See:
|
||||
*
|
||||
* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S.
|
||||
* A Set of Level 3 Basic Linear Algebra Subprograms.
|
||||
*
|
||||
* Technical Memorandum No.88 (Revision 1), Mathematics and
|
||||
* Computer Science Division, Argonne National Laboratory, 9700
|
||||
* South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*
|
||||
* -- Written on 8-February-1989.
|
||||
* Jack Dongarra, Argonne National Laboratory.
|
||||
* Iain Duff, AERE Harwell.
|
||||
* Jeremy Du Croz, Numerical Algorithms Group Ltd.
|
||||
* Sven Hammarling, Numerical Algorithms Group Ltd.
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
INTEGER NIN
|
||||
PARAMETER ( NIN = 5 )
|
||||
INTEGER NSUBS
|
||||
PARAMETER ( NSUBS = 6 )
|
||||
DOUBLE PRECISION ZERO, HALF, ONE
|
||||
PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 )
|
||||
DOUBLE PRECISION ZERO, ONE
|
||||
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
|
||||
INTEGER NMAX
|
||||
PARAMETER ( NMAX = 65 )
|
||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||
|
@ -96,7 +142,7 @@
|
|||
*
|
||||
READ( NIN, FMT = * )SUMMRY
|
||||
READ( NIN, FMT = * )NOUT
|
||||
OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' )
|
||||
OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' )
|
||||
NOUTC = NOUT
|
||||
*
|
||||
* Read name and unit number for snapshot output file and open file.
|
||||
|
@ -105,7 +151,7 @@
|
|||
READ( NIN, FMT = * )NTRA
|
||||
TRACE = NTRA.GE.0
|
||||
IF( TRACE )THEN
|
||||
OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' )
|
||||
OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' )
|
||||
END IF
|
||||
* Read the flag that directs rewinding of the snapshot file.
|
||||
READ( NIN, FMT = * )REWI
|
||||
|
@ -182,14 +228,7 @@
|
|||
*
|
||||
* Compute EPS (the machine precision).
|
||||
*
|
||||
EPS = ONE
|
||||
70 CONTINUE
|
||||
IF( DDIFF( ONE + EPS, ONE ).EQ.ZERO )
|
||||
$ GO TO 80
|
||||
EPS = HALF*EPS
|
||||
GO TO 70
|
||||
80 CONTINUE
|
||||
EPS = EPS + EPS
|
||||
EPS = EPSILON(ZERO)
|
||||
WRITE( NOUT, FMT = 9998 )EPS
|
||||
*
|
||||
* Check the reliability of DMMCH using exact data.
|
||||
|
@ -1802,7 +1841,7 @@
|
|||
*
|
||||
* Tests the error exits from the Level 3 Blas.
|
||||
* Requires a special version of the error-handling routine XERBLA.
|
||||
* ALPHA, BETA, A, B and C should not need to be defined.
|
||||
* A, B and C should not need to be defined.
|
||||
*
|
||||
* Auxiliary routine for test program for Level 3 Blas.
|
||||
*
|
||||
|
@ -1812,12 +1851,18 @@
|
|||
* Jeremy Du Croz, Numerical Algorithms Group Ltd.
|
||||
* Sven Hammarling, Numerical Algorithms Group Ltd.
|
||||
*
|
||||
* 3-19-92: Initialize ALPHA and BETA (eca)
|
||||
* 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca)
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER ISNUM, NOUT
|
||||
CHARACTER*6 SRNAMT
|
||||
* .. Scalars in Common ..
|
||||
INTEGER INFOT, NOUTC
|
||||
LOGICAL LERR, OK
|
||||
* .. Parameters ..
|
||||
DOUBLE PRECISION ONE, TWO
|
||||
PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 )
|
||||
* .. Local Scalars ..
|
||||
DOUBLE PRECISION ALPHA, BETA
|
||||
* .. Local Arrays ..
|
||||
|
@ -1834,6 +1879,12 @@
|
|||
* LERR is set to .TRUE. by the special version of XERBLA each time
|
||||
* it is called, and is then tested and re-set by CHKXER.
|
||||
LERR = .FALSE.
|
||||
*
|
||||
* Initialize ALPHA and BETA.
|
||||
*
|
||||
ALPHA = ONE
|
||||
BETA = TWO
|
||||
*
|
||||
GO TO ( 10, 20, 30, 40, 50, 60 )ISNUM
|
||||
10 INFOT = 1
|
||||
CALL DGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
|
@ -1963,16 +2014,16 @@
|
|||
CALL DSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 )
|
||||
CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL DSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL DSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL DSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 )
|
||||
CALL DSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL DSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL DSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 12
|
||||
CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 )
|
||||
|
@ -2660,7 +2711,6 @@
|
|||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
60 CONTINUE
|
||||
LDERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
|
186
test/sblat2.f
186
test/sblat2.f
|
@ -1,75 +1,121 @@
|
|||
*> \brief \b SBLAT2
|
||||
*
|
||||
* =========== DOCUMENTATION ===========
|
||||
*
|
||||
* Online html documentation available at
|
||||
* http://www.netlib.org/lapack/explore-html/
|
||||
*
|
||||
* Definition:
|
||||
* ===========
|
||||
*
|
||||
* PROGRAM SBLAT2
|
||||
*
|
||||
*
|
||||
*> \par Purpose:
|
||||
* =============
|
||||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> Test program for the REAL Level 2 Blas.
|
||||
*>
|
||||
*> The program must be driven by a short data file. The first 18 records
|
||||
*> of the file are read using list-directed input, the last 16 records
|
||||
*> are read using the format ( A6, L2 ). An annotated example of a data
|
||||
*> file can be obtained by deleting the first 3 characters from the
|
||||
*> following 34 lines:
|
||||
*> 'sblat2.out' NAME OF SUMMARY OUTPUT FILE
|
||||
*> 6 UNIT NUMBER OF SUMMARY FILE
|
||||
*> 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
*> F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
*> T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
*> 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
*> 6 NUMBER OF VALUES OF N
|
||||
*> 0 1 2 3 5 9 VALUES OF N
|
||||
*> 4 NUMBER OF VALUES OF K
|
||||
*> 0 1 2 4 VALUES OF K
|
||||
*> 4 NUMBER OF VALUES OF INCX AND INCY
|
||||
*> 1 2 -1 -2 VALUES OF INCX AND INCY
|
||||
*> 3 NUMBER OF VALUES OF ALPHA
|
||||
*> 0.0 1.0 0.7 VALUES OF ALPHA
|
||||
*> 3 NUMBER OF VALUES OF BETA
|
||||
*> 0.0 1.0 0.9 VALUES OF BETA
|
||||
*> SGEMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> SGBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> SSYMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> SSBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> SSPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> STRMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> STBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> STPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> STRSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> STBSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> STPSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> SGER T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> SSYR T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> SSPR T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> SSYR2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> SSPR2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*>
|
||||
*> Further Details
|
||||
*> ===============
|
||||
*>
|
||||
*> See:
|
||||
*>
|
||||
*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J..
|
||||
*> An extended set of Fortran Basic Linear Algebra Subprograms.
|
||||
*>
|
||||
*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics
|
||||
*> and Computer Science Division, Argonne National Laboratory,
|
||||
*> 9700 South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*>
|
||||
*> Or
|
||||
*>
|
||||
*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms
|
||||
*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford
|
||||
*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st
|
||||
*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA.
|
||||
*>
|
||||
*>
|
||||
*> -- Written on 10-August-1987.
|
||||
*> Richard Hanson, Sandia National Labs.
|
||||
*> Jeremy Du Croz, NAG Central Office.
|
||||
*>
|
||||
*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers
|
||||
*> can be run multiple times without deleting generated
|
||||
*> output files (susan)
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
* ========
|
||||
*
|
||||
*> \author Univ. of Tennessee
|
||||
*> \author Univ. of California Berkeley
|
||||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date April 2012
|
||||
*
|
||||
*> \ingroup single_blas_testing
|
||||
*
|
||||
* =====================================================================
|
||||
PROGRAM SBLAT2
|
||||
*
|
||||
* Test program for the REAL Level 2 Blas.
|
||||
* -- Reference BLAS test routine (version 3.7.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* April 2012
|
||||
*
|
||||
* The program must be driven by a short data file. The first 18 records
|
||||
* of the file are read using list-directed input, the last 16 records
|
||||
* are read using the format ( A6, L2 ). An annotated example of a data
|
||||
* file can be obtained by deleting the first 3 characters from the
|
||||
* following 34 lines:
|
||||
* 'SBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE
|
||||
* 6 UNIT NUMBER OF SUMMARY FILE
|
||||
* 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
* F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
* T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
* 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
* 6 NUMBER OF VALUES OF N
|
||||
* 0 1 2 3 5 9 VALUES OF N
|
||||
* 4 NUMBER OF VALUES OF K
|
||||
* 0 1 2 4 VALUES OF K
|
||||
* 4 NUMBER OF VALUES OF INCX AND INCY
|
||||
* 1 2 -1 -2 VALUES OF INCX AND INCY
|
||||
* 3 NUMBER OF VALUES OF ALPHA
|
||||
* 0.0 1.0 0.7 VALUES OF ALPHA
|
||||
* 3 NUMBER OF VALUES OF BETA
|
||||
* 0.0 1.0 0.9 VALUES OF BETA
|
||||
* SGEMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* SGBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* SSYMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* SSBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* SSPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* STRMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* STBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* STPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* STRSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* STBSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* STPSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* SGER T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* SSYR T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* SSPR T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* SSYR2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* SSPR2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*
|
||||
* See:
|
||||
*
|
||||
* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J..
|
||||
* An extended set of Fortran Basic Linear Algebra Subprograms.
|
||||
*
|
||||
* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics
|
||||
* and Computer Science Division, Argonne National Laboratory,
|
||||
* 9700 South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*
|
||||
* Or
|
||||
*
|
||||
* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms
|
||||
* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford
|
||||
* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st
|
||||
* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA.
|
||||
*
|
||||
*
|
||||
* -- Written on 10-August-1987.
|
||||
* Richard Hanson, Sandia National Labs.
|
||||
* Jeremy Du Croz, NAG Central Office.
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
INTEGER NIN
|
||||
PARAMETER ( NIN = 5 )
|
||||
INTEGER NSUBS
|
||||
PARAMETER ( NSUBS = 16 )
|
||||
REAL ZERO, HALF, ONE
|
||||
PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 )
|
||||
REAL ZERO, ONE
|
||||
PARAMETER ( ZERO = 0.0, ONE = 1.0 )
|
||||
INTEGER NMAX, INCMAX
|
||||
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||
|
@ -121,7 +167,7 @@
|
|||
*
|
||||
READ( NIN, FMT = * )SUMMRY
|
||||
READ( NIN, FMT = * )NOUT
|
||||
OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' )
|
||||
OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' )
|
||||
NOUTC = NOUT
|
||||
*
|
||||
* Read name and unit number for snapshot output file and open file.
|
||||
|
@ -130,7 +176,7 @@
|
|||
READ( NIN, FMT = * )NTRA
|
||||
TRACE = NTRA.GE.0
|
||||
IF( TRACE )THEN
|
||||
OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' )
|
||||
OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' )
|
||||
END IF
|
||||
* Read the flag that directs rewinding of the snapshot file.
|
||||
READ( NIN, FMT = * )REWI
|
||||
|
@ -235,14 +281,7 @@
|
|||
*
|
||||
* Compute EPS (the machine precision).
|
||||
*
|
||||
EPS = ONE
|
||||
90 CONTINUE
|
||||
IF( SDIFF( ONE + EPS, ONE ).EQ.ZERO )
|
||||
$ GO TO 100
|
||||
EPS = HALF*EPS
|
||||
GO TO 90
|
||||
100 CONTINUE
|
||||
EPS = EPS + EPS
|
||||
EPS = EPSILON(ZERO)
|
||||
WRITE( NOUT, FMT = 9998 )EPS
|
||||
*
|
||||
* Check the reliability of SMVCH using exact data.
|
||||
|
@ -2982,7 +3021,6 @@
|
|||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
60 CONTINUE
|
||||
LSERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
|
168
test/sblat3.f
168
test/sblat3.f
|
@ -1,55 +1,101 @@
|
|||
*> \brief \b SBLAT3
|
||||
*
|
||||
* =========== DOCUMENTATION ===========
|
||||
*
|
||||
* Online html documentation available at
|
||||
* http://www.netlib.org/lapack/explore-html/
|
||||
*
|
||||
* Definition:
|
||||
* ===========
|
||||
*
|
||||
* PROGRAM SBLAT3
|
||||
*
|
||||
*
|
||||
*> \par Purpose:
|
||||
* =============
|
||||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> Test program for the REAL Level 3 Blas.
|
||||
*>
|
||||
*> The program must be driven by a short data file. The first 14 records
|
||||
*> of the file are read using list-directed input, the last 6 records
|
||||
*> are read using the format ( A6, L2 ). An annotated example of a data
|
||||
*> file can be obtained by deleting the first 3 characters from the
|
||||
*> following 20 lines:
|
||||
*> 'sblat3.out' NAME OF SUMMARY OUTPUT FILE
|
||||
*> 6 UNIT NUMBER OF SUMMARY FILE
|
||||
*> 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
*> F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
*> T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
*> 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
*> 6 NUMBER OF VALUES OF N
|
||||
*> 0 1 2 3 5 9 VALUES OF N
|
||||
*> 3 NUMBER OF VALUES OF ALPHA
|
||||
*> 0.0 1.0 0.7 VALUES OF ALPHA
|
||||
*> 3 NUMBER OF VALUES OF BETA
|
||||
*> 0.0 1.0 1.3 VALUES OF BETA
|
||||
*> SGEMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> SSYMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> STRMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> STRSM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> SSYRK T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> SSYR2K T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*>
|
||||
*> Further Details
|
||||
*> ===============
|
||||
*>
|
||||
*> See:
|
||||
*>
|
||||
*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S.
|
||||
*> A Set of Level 3 Basic Linear Algebra Subprograms.
|
||||
*>
|
||||
*> Technical Memorandum No.88 (Revision 1), Mathematics and
|
||||
*> Computer Science Division, Argonne National Laboratory, 9700
|
||||
*> South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*>
|
||||
*> -- Written on 8-February-1989.
|
||||
*> Jack Dongarra, Argonne National Laboratory.
|
||||
*> Iain Duff, AERE Harwell.
|
||||
*> Jeremy Du Croz, Numerical Algorithms Group Ltd.
|
||||
*> Sven Hammarling, Numerical Algorithms Group Ltd.
|
||||
*>
|
||||
*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers
|
||||
*> can be run multiple times without deleting generated
|
||||
*> output files (susan)
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
* ========
|
||||
*
|
||||
*> \author Univ. of Tennessee
|
||||
*> \author Univ. of California Berkeley
|
||||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date April 2012
|
||||
*
|
||||
*> \ingroup single_blas_testing
|
||||
*
|
||||
* =====================================================================
|
||||
PROGRAM SBLAT3
|
||||
*
|
||||
* Test program for the REAL Level 3 Blas.
|
||||
* -- Reference BLAS test routine (version 3.7.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* April 2012
|
||||
*
|
||||
* The program must be driven by a short data file. The first 14 records
|
||||
* of the file are read using list-directed input, the last 6 records
|
||||
* are read using the format ( A6, L2 ). An annotated example of a data
|
||||
* file can be obtained by deleting the first 3 characters from the
|
||||
* following 20 lines:
|
||||
* 'SBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE
|
||||
* 6 UNIT NUMBER OF SUMMARY FILE
|
||||
* 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
* F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
* T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
* 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
* 6 NUMBER OF VALUES OF N
|
||||
* 0 1 2 3 5 9 VALUES OF N
|
||||
* 3 NUMBER OF VALUES OF ALPHA
|
||||
* 0.0 1.0 0.7 VALUES OF ALPHA
|
||||
* 3 NUMBER OF VALUES OF BETA
|
||||
* 0.0 1.0 1.3 VALUES OF BETA
|
||||
* SGEMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* SSYMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* STRMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* STRSM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* SSYRK T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* SSYR2K T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*
|
||||
* See:
|
||||
*
|
||||
* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S.
|
||||
* A Set of Level 3 Basic Linear Algebra Subprograms.
|
||||
*
|
||||
* Technical Memorandum No.88 (Revision 1), Mathematics and
|
||||
* Computer Science Division, Argonne National Laboratory, 9700
|
||||
* South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*
|
||||
* -- Written on 8-February-1989.
|
||||
* Jack Dongarra, Argonne National Laboratory.
|
||||
* Iain Duff, AERE Harwell.
|
||||
* Jeremy Du Croz, Numerical Algorithms Group Ltd.
|
||||
* Sven Hammarling, Numerical Algorithms Group Ltd.
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
INTEGER NIN
|
||||
PARAMETER ( NIN = 5 )
|
||||
INTEGER NSUBS
|
||||
PARAMETER ( NSUBS = 6 )
|
||||
REAL ZERO, HALF, ONE
|
||||
PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 )
|
||||
REAL ZERO, ONE
|
||||
PARAMETER ( ZERO = 0.0, ONE = 1.0 )
|
||||
INTEGER NMAX
|
||||
PARAMETER ( NMAX = 65 )
|
||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||
|
@ -96,7 +142,7 @@
|
|||
*
|
||||
READ( NIN, FMT = * )SUMMRY
|
||||
READ( NIN, FMT = * )NOUT
|
||||
OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' )
|
||||
OPEN( NOUT, FILE = SUMMRY )
|
||||
NOUTC = NOUT
|
||||
*
|
||||
* Read name and unit number for snapshot output file and open file.
|
||||
|
@ -105,7 +151,7 @@
|
|||
READ( NIN, FMT = * )NTRA
|
||||
TRACE = NTRA.GE.0
|
||||
IF( TRACE )THEN
|
||||
OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' )
|
||||
OPEN( NTRA, FILE = SNAPS )
|
||||
END IF
|
||||
* Read the flag that directs rewinding of the snapshot file.
|
||||
READ( NIN, FMT = * )REWI
|
||||
|
@ -182,14 +228,7 @@
|
|||
*
|
||||
* Compute EPS (the machine precision).
|
||||
*
|
||||
EPS = ONE
|
||||
70 CONTINUE
|
||||
IF( SDIFF( ONE + EPS, ONE ).EQ.ZERO )
|
||||
$ GO TO 80
|
||||
EPS = HALF*EPS
|
||||
GO TO 70
|
||||
80 CONTINUE
|
||||
EPS = EPS + EPS
|
||||
EPS = EPSILON(ZERO)
|
||||
WRITE( NOUT, FMT = 9998 )EPS
|
||||
*
|
||||
* Check the reliability of SMMCH using exact data.
|
||||
|
@ -1802,7 +1841,7 @@
|
|||
*
|
||||
* Tests the error exits from the Level 3 Blas.
|
||||
* Requires a special version of the error-handling routine XERBLA.
|
||||
* ALPHA, BETA, A, B and C should not need to be defined.
|
||||
* A, B and C should not need to be defined.
|
||||
*
|
||||
* Auxiliary routine for test program for Level 3 Blas.
|
||||
*
|
||||
|
@ -1812,12 +1851,18 @@
|
|||
* Jeremy Du Croz, Numerical Algorithms Group Ltd.
|
||||
* Sven Hammarling, Numerical Algorithms Group Ltd.
|
||||
*
|
||||
* 3-19-92: Initialize ALPHA and BETA (eca)
|
||||
* 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca)
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER ISNUM, NOUT
|
||||
CHARACTER*6 SRNAMT
|
||||
* .. Scalars in Common ..
|
||||
INTEGER INFOT, NOUTC
|
||||
LOGICAL LERR, OK
|
||||
* .. Parameters ..
|
||||
REAL ONE, TWO
|
||||
PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 )
|
||||
* .. Local Scalars ..
|
||||
REAL ALPHA, BETA
|
||||
* .. Local Arrays ..
|
||||
|
@ -1834,6 +1879,12 @@
|
|||
* LERR is set to .TRUE. by the special version of XERBLA each time
|
||||
* it is called, and is then tested and re-set by CHKXER.
|
||||
LERR = .FALSE.
|
||||
*
|
||||
* Initialize ALPHA and BETA.
|
||||
*
|
||||
ALPHA = ONE
|
||||
BETA = TWO
|
||||
*
|
||||
GO TO ( 10, 20, 30, 40, 50, 60 )ISNUM
|
||||
10 INFOT = 1
|
||||
CALL SGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
|
@ -1963,16 +2014,16 @@
|
|||
CALL SSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 )
|
||||
CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL SSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL SSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL SSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 )
|
||||
CALL SSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL SSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL SSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 12
|
||||
CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 )
|
||||
|
@ -2660,7 +2711,6 @@
|
|||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
60 CONTINUE
|
||||
LSERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
|
|
@ -1,7 +1,49 @@
|
|||
*> \brief \b ZBLAT1
|
||||
*
|
||||
* =========== DOCUMENTATION ===========
|
||||
*
|
||||
* Online html documentation available at
|
||||
* http://www.netlib.org/lapack/explore-html/
|
||||
*
|
||||
* Definition:
|
||||
* ===========
|
||||
*
|
||||
* PROGRAM ZBLAT1
|
||||
*
|
||||
*
|
||||
*> \par Purpose:
|
||||
* =============
|
||||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> Test program for the COMPLEX*16 Level 1 BLAS.
|
||||
*>
|
||||
*> Based upon the original BLAS test routine together with:
|
||||
*> F06GAF Example Program Text
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
* ========
|
||||
*
|
||||
*> \author Univ. of Tennessee
|
||||
*> \author Univ. of California Berkeley
|
||||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date April 2012
|
||||
*
|
||||
*> \ingroup complex16_blas_testing
|
||||
*
|
||||
* =====================================================================
|
||||
PROGRAM ZBLAT1
|
||||
* Test program for the COMPLEX*16 Level 1 BLAS.
|
||||
* Based upon the original BLAS test routine together with:
|
||||
* F06GAF Example Program Text
|
||||
*
|
||||
* -- Reference BLAS test routine (version 3.7.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* April 2012
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
INTEGER NOUT
|
||||
PARAMETER (NOUT=6)
|
||||
|
@ -114,8 +156,8 @@
|
|||
+ (5.0D0,6.0D0), (5.0D0,6.0D0), (0.1D0,0.1D0),
|
||||
+ (-0.6D0,0.1D0), (0.1D0,-0.3D0), (7.0D0,8.0D0),
|
||||
+ (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0),
|
||||
+ (7.0D0,8.0D0), (0.3D0,0.1D0), (0.1D0,0.4D0),
|
||||
+ (0.4D0,0.1D0), (0.1D0,0.2D0), (2.0D0,3.0D0),
|
||||
+ (7.0D0,8.0D0), (0.3D0,0.1D0), (0.5D0,0.0D0),
|
||||
+ (0.0D0,0.5D0), (0.0D0,0.2D0), (2.0D0,3.0D0),
|
||||
+ (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0)/
|
||||
DATA ((CV(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0),
|
||||
+ (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0),
|
||||
|
@ -129,10 +171,10 @@
|
|||
+ (3.0D0,6.0D0), (-0.6D0,0.1D0), (4.0D0,7.0D0),
|
||||
+ (0.1D0,-0.3D0), (7.0D0,2.0D0), (7.0D0,2.0D0),
|
||||
+ (7.0D0,2.0D0), (0.3D0,0.1D0), (5.0D0,8.0D0),
|
||||
+ (0.1D0,0.4D0), (6.0D0,9.0D0), (0.4D0,0.1D0),
|
||||
+ (8.0D0,3.0D0), (0.1D0,0.2D0), (9.0D0,4.0D0)/
|
||||
DATA STRUE2/0.0D0, 0.5D0, 0.6D0, 0.7D0, 0.7D0/
|
||||
DATA STRUE4/0.0D0, 0.7D0, 1.0D0, 1.3D0, 1.7D0/
|
||||
+ (0.5D0,0.0D0), (6.0D0,9.0D0), (0.0D0,0.5D0),
|
||||
+ (8.0D0,3.0D0), (0.0D0,0.2D0), (9.0D0,4.0D0)/
|
||||
DATA STRUE2/0.0D0, 0.5D0, 0.6D0, 0.7D0, 0.8D0/
|
||||
DATA STRUE4/0.0D0, 0.7D0, 1.0D0, 1.3D0, 1.6D0/
|
||||
DATA ((CTRUE5(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0),
|
||||
+ (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0),
|
||||
+ (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0),
|
||||
|
@ -145,8 +187,8 @@
|
|||
+ (0.11D0,-0.03D0), (-0.17D0,0.46D0),
|
||||
+ (-0.17D0,-0.19D0), (7.0D0,8.0D0), (7.0D0,8.0D0),
|
||||
+ (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0),
|
||||
+ (0.19D0,-0.17D0), (0.32D0,0.09D0),
|
||||
+ (0.23D0,-0.24D0), (0.18D0,0.01D0),
|
||||
+ (0.19D0,-0.17D0), (0.20D0,-0.35D0),
|
||||
+ (0.35D0,0.20D0), (0.14D0,0.08D0),
|
||||
+ (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0),
|
||||
+ (2.0D0,3.0D0)/
|
||||
DATA ((CTRUE5(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0),
|
||||
|
@ -162,9 +204,9 @@
|
|||
+ (-0.17D0,0.46D0), (4.0D0,7.0D0),
|
||||
+ (-0.17D0,-0.19D0), (7.0D0,2.0D0), (7.0D0,2.0D0),
|
||||
+ (7.0D0,2.0D0), (0.19D0,-0.17D0), (5.0D0,8.0D0),
|
||||
+ (0.32D0,0.09D0), (6.0D0,9.0D0),
|
||||
+ (0.23D0,-0.24D0), (8.0D0,3.0D0),
|
||||
+ (0.18D0,0.01D0), (9.0D0,4.0D0)/
|
||||
+ (0.20D0,-0.35D0), (6.0D0,9.0D0),
|
||||
+ (0.35D0,0.20D0), (8.0D0,3.0D0),
|
||||
+ (0.14D0,0.08D0), (9.0D0,4.0D0)/
|
||||
DATA ((CTRUE6(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0),
|
||||
+ (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0),
|
||||
+ (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0),
|
||||
|
@ -177,8 +219,8 @@
|
|||
+ (0.03D0,0.03D0), (-0.18D0,0.03D0),
|
||||
+ (0.03D0,-0.09D0), (7.0D0,8.0D0), (7.0D0,8.0D0),
|
||||
+ (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0),
|
||||
+ (0.09D0,0.03D0), (0.03D0,0.12D0),
|
||||
+ (0.12D0,0.03D0), (0.03D0,0.06D0), (2.0D0,3.0D0),
|
||||
+ (0.09D0,0.03D0), (0.15D0,0.00D0),
|
||||
+ (0.00D0,0.15D0), (0.00D0,0.06D0), (2.0D0,3.0D0),
|
||||
+ (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0)/
|
||||
DATA ((CTRUE6(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0),
|
||||
+ (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0),
|
||||
|
@ -193,8 +235,8 @@
|
|||
+ (-0.18D0,0.03D0), (4.0D0,7.0D0),
|
||||
+ (0.03D0,-0.09D0), (7.0D0,2.0D0), (7.0D0,2.0D0),
|
||||
+ (7.0D0,2.0D0), (0.09D0,0.03D0), (5.0D0,8.0D0),
|
||||
+ (0.03D0,0.12D0), (6.0D0,9.0D0), (0.12D0,0.03D0),
|
||||
+ (8.0D0,3.0D0), (0.03D0,0.06D0), (9.0D0,4.0D0)/
|
||||
+ (0.15D0,0.00D0), (6.0D0,9.0D0), (0.00D0,0.15D0),
|
||||
+ (8.0D0,3.0D0), (0.00D0,0.06D0), (9.0D0,4.0D0)/
|
||||
DATA ITRUE3/0, 1, 2, 2, 2/
|
||||
* .. Executable Statements ..
|
||||
DO 60 INCX = 1, 2
|
||||
|
@ -529,7 +571,8 @@
|
|||
*
|
||||
* .. Parameters ..
|
||||
INTEGER NOUT
|
||||
PARAMETER (NOUT=6)
|
||||
DOUBLE PRECISION ZERO
|
||||
PARAMETER (NOUT=6, ZERO=0.0D0)
|
||||
* .. Scalar Arguments ..
|
||||
DOUBLE PRECISION SFAC
|
||||
INTEGER LEN
|
||||
|
@ -552,7 +595,7 @@
|
|||
*
|
||||
DO 40 I = 1, LEN
|
||||
SD = SCOMP(I) - STRUE(I)
|
||||
IF (SDIFF(ABS(SSIZE(I))+ABS(SFAC*SD),ABS(SSIZE(I))).EQ.0.0D0)
|
||||
IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO))
|
||||
+ GO TO 40
|
||||
*
|
||||
* HERE SCOMP(I) IS NOT CLOSE TO STRUE(I).
|
||||
|
|
188
test/zblat2.f
188
test/zblat2.f
|
@ -1,68 +1,114 @@
|
|||
*> \brief \b ZBLAT2
|
||||
*
|
||||
* =========== DOCUMENTATION ===========
|
||||
*
|
||||
* Online html documentation available at
|
||||
* http://www.netlib.org/lapack/explore-html/
|
||||
*
|
||||
* Definition:
|
||||
* ===========
|
||||
*
|
||||
* PROGRAM ZBLAT2
|
||||
*
|
||||
*
|
||||
*> \par Purpose:
|
||||
* =============
|
||||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> Test program for the COMPLEX*16 Level 2 Blas.
|
||||
*>
|
||||
*> The program must be driven by a short data file. The first 18 records
|
||||
*> of the file are read using list-directed input, the last 17 records
|
||||
*> are read using the format ( A6, L2 ). An annotated example of a data
|
||||
*> file can be obtained by deleting the first 3 characters from the
|
||||
*> following 35 lines:
|
||||
*> 'zblat2.out' NAME OF SUMMARY OUTPUT FILE
|
||||
*> 6 UNIT NUMBER OF SUMMARY FILE
|
||||
*> 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
*> F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
*> T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
*> 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
*> 6 NUMBER OF VALUES OF N
|
||||
*> 0 1 2 3 5 9 VALUES OF N
|
||||
*> 4 NUMBER OF VALUES OF K
|
||||
*> 0 1 2 4 VALUES OF K
|
||||
*> 4 NUMBER OF VALUES OF INCX AND INCY
|
||||
*> 1 2 -1 -2 VALUES OF INCX AND INCY
|
||||
*> 3 NUMBER OF VALUES OF ALPHA
|
||||
*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA
|
||||
*> 3 NUMBER OF VALUES OF BETA
|
||||
*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA
|
||||
*> ZGEMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZGBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZHEMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZHBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZHPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZTRMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZTBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZTPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZTRSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZTBSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZTPSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZGERC T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZGERU T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZHER T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZHPR T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZHER2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZHPR2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*>
|
||||
*> Further Details
|
||||
*> ===============
|
||||
*>
|
||||
*> See:
|
||||
*>
|
||||
*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J..
|
||||
*> An extended set of Fortran Basic Linear Algebra Subprograms.
|
||||
*>
|
||||
*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics
|
||||
*> and Computer Science Division, Argonne National Laboratory,
|
||||
*> 9700 South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*>
|
||||
*> Or
|
||||
*>
|
||||
*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms
|
||||
*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford
|
||||
*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st
|
||||
*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA.
|
||||
*>
|
||||
*>
|
||||
*> -- Written on 10-August-1987.
|
||||
*> Richard Hanson, Sandia National Labs.
|
||||
*> Jeremy Du Croz, NAG Central Office.
|
||||
*>
|
||||
*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers
|
||||
*> can be run multiple times without deleting generated
|
||||
*> output files (susan)
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
* ========
|
||||
*
|
||||
*> \author Univ. of Tennessee
|
||||
*> \author Univ. of California Berkeley
|
||||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date April 2012
|
||||
*
|
||||
*> \ingroup complex16_blas_testing
|
||||
*
|
||||
* =====================================================================
|
||||
PROGRAM ZBLAT2
|
||||
*
|
||||
* Test program for the COMPLEX*16 Level 2 Blas.
|
||||
* -- Reference BLAS test routine (version 3.7.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* April 2012
|
||||
*
|
||||
* The program must be driven by a short data file. The first 18 records
|
||||
* of the file are read using list-directed input, the last 17 records
|
||||
* are read using the format ( A6, L2 ). An annotated example of a data
|
||||
* file can be obtained by deleting the first 3 characters from the
|
||||
* following 35 lines:
|
||||
* 'ZBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE
|
||||
* 6 UNIT NUMBER OF SUMMARY FILE
|
||||
* 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
* F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
* T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
* 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
* 6 NUMBER OF VALUES OF N
|
||||
* 0 1 2 3 5 9 VALUES OF N
|
||||
* 4 NUMBER OF VALUES OF K
|
||||
* 0 1 2 4 VALUES OF K
|
||||
* 4 NUMBER OF VALUES OF INCX AND INCY
|
||||
* 1 2 -1 -2 VALUES OF INCX AND INCY
|
||||
* 3 NUMBER OF VALUES OF ALPHA
|
||||
* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA
|
||||
* 3 NUMBER OF VALUES OF BETA
|
||||
* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA
|
||||
* ZGEMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZGBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZHEMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZHBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZHPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZTRMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZTBMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZTPMV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZTRSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZTBSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZTPSV T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZGERC T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZGERU T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZHER T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZHPR T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZHER2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZHPR2 T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*
|
||||
* See:
|
||||
*
|
||||
* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J..
|
||||
* An extended set of Fortran Basic Linear Algebra Subprograms.
|
||||
*
|
||||
* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics
|
||||
* and Computer Science Division, Argonne National Laboratory,
|
||||
* 9700 South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*
|
||||
* Or
|
||||
*
|
||||
* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms
|
||||
* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford
|
||||
* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st
|
||||
* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA.
|
||||
*
|
||||
*
|
||||
* -- Written on 10-August-1987.
|
||||
* Richard Hanson, Sandia National Labs.
|
||||
* Jeremy Du Croz, NAG Central Office.
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
INTEGER NIN
|
||||
|
@ -72,8 +118,8 @@
|
|||
COMPLEX*16 ZERO, ONE
|
||||
PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ),
|
||||
$ ONE = ( 1.0D0, 0.0D0 ) )
|
||||
DOUBLE PRECISION RZERO, RHALF, RONE
|
||||
PARAMETER ( RZERO = 0.0D0, RHALF = 0.5D0, RONE = 1.0D0 )
|
||||
DOUBLE PRECISION RZERO
|
||||
PARAMETER ( RZERO = 0.0D0 )
|
||||
INTEGER NMAX, INCMAX
|
||||
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||
|
@ -127,7 +173,7 @@
|
|||
*
|
||||
READ( NIN, FMT = * )SUMMRY
|
||||
READ( NIN, FMT = * )NOUT
|
||||
OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' )
|
||||
OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' )
|
||||
NOUTC = NOUT
|
||||
*
|
||||
* Read name and unit number for snapshot output file and open file.
|
||||
|
@ -136,7 +182,7 @@
|
|||
READ( NIN, FMT = * )NTRA
|
||||
TRACE = NTRA.GE.0
|
||||
IF( TRACE )THEN
|
||||
OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' )
|
||||
OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' )
|
||||
END IF
|
||||
* Read the flag that directs rewinding of the snapshot file.
|
||||
READ( NIN, FMT = * )REWI
|
||||
|
@ -241,14 +287,7 @@
|
|||
*
|
||||
* Compute EPS (the machine precision).
|
||||
*
|
||||
EPS = RONE
|
||||
90 CONTINUE
|
||||
IF( DDIFF( RONE + EPS, RONE ).EQ.RZERO )
|
||||
$ GO TO 100
|
||||
EPS = RHALF*EPS
|
||||
GO TO 90
|
||||
100 CONTINUE
|
||||
EPS = EPS + EPS
|
||||
EPS = EPSILON(RZERO)
|
||||
WRITE( NOUT, FMT = 9998 )EPS
|
||||
*
|
||||
* Check the reliability of ZMVCH using exact data.
|
||||
|
@ -3087,7 +3126,6 @@
|
|||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
60 CONTINUE
|
||||
LZERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
|
191
test/zblat3.f
191
test/zblat3.f
|
@ -1,50 +1,97 @@
|
|||
*> \brief \b ZBLAT3
|
||||
*
|
||||
* =========== DOCUMENTATION ===========
|
||||
*
|
||||
* Online html documentation available at
|
||||
* http://www.netlib.org/lapack/explore-html/
|
||||
*
|
||||
* Definition:
|
||||
* ===========
|
||||
*
|
||||
* PROGRAM ZBLAT3
|
||||
*
|
||||
*
|
||||
*> \par Purpose:
|
||||
* =============
|
||||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> Test program for the COMPLEX*16 Level 3 Blas.
|
||||
*>
|
||||
*> The program must be driven by a short data file. The first 14 records
|
||||
*> of the file are read using list-directed input, the last 9 records
|
||||
*> are read using the format ( A6, L2 ). An annotated example of a data
|
||||
*> file can be obtained by deleting the first 3 characters from the
|
||||
*> following 23 lines:
|
||||
*> 'zblat3.out' NAME OF SUMMARY OUTPUT FILE
|
||||
*> 6 UNIT NUMBER OF SUMMARY FILE
|
||||
*> 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
*> F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
*> T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
*> 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
*> 6 NUMBER OF VALUES OF N
|
||||
*> 0 1 2 3 5 9 VALUES OF N
|
||||
*> 3 NUMBER OF VALUES OF ALPHA
|
||||
*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA
|
||||
*> 3 NUMBER OF VALUES OF BETA
|
||||
*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA
|
||||
*> ZGEMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZHEMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZSYMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZTRMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZTRSM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZHERK T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZSYRK T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZHER2K T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*> ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*>
|
||||
*>
|
||||
*> Further Details
|
||||
*> ===============
|
||||
*>
|
||||
*> See:
|
||||
*>
|
||||
*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S.
|
||||
*> A Set of Level 3 Basic Linear Algebra Subprograms.
|
||||
*>
|
||||
*> Technical Memorandum No.88 (Revision 1), Mathematics and
|
||||
*> Computer Science Division, Argonne National Laboratory, 9700
|
||||
*> South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*>
|
||||
*> -- Written on 8-February-1989.
|
||||
*> Jack Dongarra, Argonne National Laboratory.
|
||||
*> Iain Duff, AERE Harwell.
|
||||
*> Jeremy Du Croz, Numerical Algorithms Group Ltd.
|
||||
*> Sven Hammarling, Numerical Algorithms Group Ltd.
|
||||
*>
|
||||
*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers
|
||||
*> can be run multiple times without deleting generated
|
||||
*> output files (susan)
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
* ========
|
||||
*
|
||||
*> \author Univ. of Tennessee
|
||||
*> \author Univ. of California Berkeley
|
||||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date April 2012
|
||||
*
|
||||
*> \ingroup complex16_blas_testing
|
||||
*
|
||||
* =====================================================================
|
||||
PROGRAM ZBLAT3
|
||||
*
|
||||
* Test program for the COMPLEX*16 Level 3 Blas.
|
||||
* -- Reference BLAS test routine (version 3.7.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* April 2012
|
||||
*
|
||||
* The program must be driven by a short data file. The first 14 records
|
||||
* of the file are read using list-directed input, the last 9 records
|
||||
* are read using the format ( A6, L2 ). An annotated example of a data
|
||||
* file can be obtained by deleting the first 3 characters from the
|
||||
* following 23 lines:
|
||||
* 'ZBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE
|
||||
* 6 UNIT NUMBER OF SUMMARY FILE
|
||||
* 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||
* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||
* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||
* F LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||
* T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||
* 16.0 THRESHOLD VALUE OF TEST RATIO
|
||||
* 6 NUMBER OF VALUES OF N
|
||||
* 0 1 2 3 5 9 VALUES OF N
|
||||
* 3 NUMBER OF VALUES OF ALPHA
|
||||
* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA
|
||||
* 3 NUMBER OF VALUES OF BETA
|
||||
* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA
|
||||
* ZGEMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZHEMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZSYMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZTRMM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZTRSM T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZHERK T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZSYRK T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZHER2K T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
* ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS.
|
||||
*
|
||||
* See:
|
||||
*
|
||||
* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S.
|
||||
* A Set of Level 3 Basic Linear Algebra Subprograms.
|
||||
*
|
||||
* Technical Memorandum No.88 (Revision 1), Mathematics and
|
||||
* Computer Science Division, Argonne National Laboratory, 9700
|
||||
* South Cass Avenue, Argonne, Illinois 60439, US.
|
||||
*
|
||||
* -- Written on 8-February-1989.
|
||||
* Jack Dongarra, Argonne National Laboratory.
|
||||
* Iain Duff, AERE Harwell.
|
||||
* Jeremy Du Croz, Numerical Algorithms Group Ltd.
|
||||
* Sven Hammarling, Numerical Algorithms Group Ltd.
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
INTEGER NIN
|
||||
|
@ -54,8 +101,8 @@
|
|||
COMPLEX*16 ZERO, ONE
|
||||
PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ),
|
||||
$ ONE = ( 1.0D0, 0.0D0 ) )
|
||||
DOUBLE PRECISION RZERO, RHALF, RONE
|
||||
PARAMETER ( RZERO = 0.0D0, RHALF = 0.5D0, RONE = 1.0D0 )
|
||||
DOUBLE PRECISION RZERO
|
||||
PARAMETER ( RZERO = 0.0D0 )
|
||||
INTEGER NMAX
|
||||
PARAMETER ( NMAX = 65 )
|
||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||
|
@ -104,7 +151,7 @@
|
|||
*
|
||||
READ( NIN, FMT = * )SUMMRY
|
||||
READ( NIN, FMT = * )NOUT
|
||||
OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' )
|
||||
OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' )
|
||||
NOUTC = NOUT
|
||||
*
|
||||
* Read name and unit number for snapshot output file and open file.
|
||||
|
@ -113,7 +160,7 @@
|
|||
READ( NIN, FMT = * )NTRA
|
||||
TRACE = NTRA.GE.0
|
||||
IF( TRACE )THEN
|
||||
OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' )
|
||||
OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' )
|
||||
END IF
|
||||
* Read the flag that directs rewinding of the snapshot file.
|
||||
READ( NIN, FMT = * )REWI
|
||||
|
@ -190,14 +237,7 @@
|
|||
*
|
||||
* Compute EPS (the machine precision).
|
||||
*
|
||||
EPS = RONE
|
||||
70 CONTINUE
|
||||
IF( DDIFF( RONE + EPS, RONE ).EQ.RZERO )
|
||||
$ GO TO 80
|
||||
EPS = RHALF*EPS
|
||||
GO TO 70
|
||||
80 CONTINUE
|
||||
EPS = EPS + EPS
|
||||
EPS = EPSILON(RZERO)
|
||||
WRITE( NOUT, FMT = 9998 )EPS
|
||||
*
|
||||
* Check the reliability of ZMMCH using exact data.
|
||||
|
@ -1303,8 +1343,6 @@
|
|||
NC = 0
|
||||
RESET = .TRUE.
|
||||
ERRMAX = RZERO
|
||||
RALS = RONE
|
||||
RBETS = RONE
|
||||
*
|
||||
DO 100 IN = 1, NIDIM
|
||||
N = IDIM( IN )
|
||||
|
@ -1951,7 +1989,7 @@
|
|||
*
|
||||
* Tests the error exits from the Level 3 Blas.
|
||||
* Requires a special version of the error-handling routine XERBLA.
|
||||
* ALPHA, RALPHA, BETA, RBETA, A, B and C should not need to be defined.
|
||||
* A, B and C should not need to be defined.
|
||||
*
|
||||
* Auxiliary routine for test program for Level 3 Blas.
|
||||
*
|
||||
|
@ -1961,12 +1999,20 @@
|
|||
* Jeremy Du Croz, Numerical Algorithms Group Ltd.
|
||||
* Sven Hammarling, Numerical Algorithms Group Ltd.
|
||||
*
|
||||
* 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca)
|
||||
* 3-19-92: Fix argument 12 in calls to ZSYMM and ZHEMM
|
||||
* with INFOT = 9 (eca)
|
||||
* 10-9-00: Declared INTRINSIC DCMPLX (susan)
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER ISNUM, NOUT
|
||||
CHARACTER*6 SRNAMT
|
||||
* .. Scalars in Common ..
|
||||
INTEGER INFOT, NOUTC
|
||||
LOGICAL LERR, OK
|
||||
* .. Parameters ..
|
||||
REAL ONE, TWO
|
||||
PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 )
|
||||
* .. Local Scalars ..
|
||||
COMPLEX*16 ALPHA, BETA
|
||||
DOUBLE PRECISION RALPHA, RBETA
|
||||
|
@ -1975,6 +2021,8 @@
|
|||
* .. External Subroutines ..
|
||||
EXTERNAL ZGEMM, ZHEMM, ZHER2K, ZHERK, CHKXER, ZSYMM,
|
||||
$ ZSYR2K, ZSYRK, ZTRMM, ZTRSM
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC DCMPLX
|
||||
* .. Common blocks ..
|
||||
COMMON /INFOC/INFOT, NOUTC, OK, LERR
|
||||
* .. Executable Statements ..
|
||||
|
@ -1984,6 +2032,14 @@
|
|||
* LERR is set to .TRUE. by the special version of XERBLA each time
|
||||
* it is called, and is then tested and re-set by CHKXER.
|
||||
LERR = .FALSE.
|
||||
*
|
||||
* Initialize ALPHA, BETA, RALPHA, and RBETA.
|
||||
*
|
||||
ALPHA = DCMPLX( ONE, -ONE )
|
||||
BETA = DCMPLX( TWO, -TWO )
|
||||
RALPHA = ONE
|
||||
RBETA = TWO
|
||||
*
|
||||
GO TO ( 10, 20, 30, 40, 50, 60, 70, 80,
|
||||
$ 90 )ISNUM
|
||||
10 INFOT = 1
|
||||
|
@ -2210,16 +2266,16 @@
|
|||
CALL ZHEMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 )
|
||||
CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL ZHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL ZHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL ZHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 )
|
||||
CALL ZHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL ZHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL ZHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 12
|
||||
CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 )
|
||||
|
@ -2277,16 +2333,16 @@
|
|||
CALL ZSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 )
|
||||
CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL ZSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL ZSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL ZSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 )
|
||||
CALL ZSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 9
|
||||
CALL ZSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 )
|
||||
CALL ZSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 )
|
||||
CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK )
|
||||
INFOT = 12
|
||||
CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 )
|
||||
|
@ -3276,7 +3332,6 @@
|
|||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
60 CONTINUE
|
||||
LZERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
|
Loading…
Reference in New Issue